From f1a1a392dbca5a0240b26a88ea3bc470e8df62cd Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Tue, 25 Jun 2019 11:51:35 +0000 Subject: [PATCH] AMDGPU/MC: Add .amdgpu_lds directive Summary: The directive defines a symbol as an group/local memory (LDS) symbol. LDS symbols behave similar to common symbols for the purposes of ELF, using the processor-specific SHN_AMDGPU_LDS as section index. It is the linker and/or runtime loader's job to "instantiate" LDS symbols and resolve relocations that reference them. It is not possible to initialize LDS memory (not even zero-initialize as for .bss). We want to be able to link together objects -- starting with relocatable objects, but possible expanding to shared objects in the future -- that access LDS memory in a flexible way. LDS memory is in an address space that is entirely separate from the address space that contains the program image (code and normal data), so having program segments for it doesn't really make sense. Furthermore, we want to be able to compile multiple kernels in a compilation unit which have disjoint use of LDS memory. In that case, we may want to place LDS symbols differently for different kernels to save memory (LDS memory is very limited and physically private to each kernel invocation), so we can't simply place LDS symbols in a .lds section. Hence this solution where LDS symbols always stay undefined. Change-Id: I08cbc37a7c0c32f53f7b6123aa0afc91dbc1748f Reviewers: arsenm, rampitec, t-tye, b-sumner, jsjodin Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, rupprecht, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D61493 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@364296 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/AMDGPUUsage.rst | 27 +++--- include/llvm/BinaryFormat/ELF.h | 5 ++ include/llvm/MC/MCSymbol.h | 26 ++++-- lib/MC/ELFObjectWriter.cpp | 10 ++- lib/ObjectYAML/ELFYAML.cpp | 1 + .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 58 +++++++++++++ .../MCTargetDesc/AMDGPUTargetStreamer.cpp | 27 ++++++ .../MCTargetDesc/AMDGPUTargetStreamer.h | 7 ++ test/MC/AMDGPU/elf-lds-error.s | 19 +++++ test/MC/AMDGPU/elf-lds.s | 82 +++++++++++++++++++ tools/llvm-objcopy/ELF/Object.cpp | 27 +++--- tools/llvm-objcopy/ELF/Object.h | 1 + 12 files changed, 253 insertions(+), 37 deletions(-) create mode 100644 test/MC/AMDGPU/elf-lds-error.s create mode 100644 test/MC/AMDGPU/elf-lds.s diff --git a/docs/AMDGPUUsage.rst b/docs/AMDGPUUsage.rst index 5aecdfc3c22..e035c62c1c4 100644 --- a/docs/AMDGPUUsage.rst +++ b/docs/AMDGPUUsage.rst @@ -851,15 +851,16 @@ Symbols include the following: .. table:: AMDGPU ELF Symbols :name: amdgpu-elf-symbols-table - ===================== ============== ============= ================== - Name Type Section Description - ===================== ============== ============= ================== - *link-name* ``STT_OBJECT`` - ``.data`` Global variable - - ``.rodata`` - - ``.bss`` - *link-name*\ ``.kd`` ``STT_OBJECT`` - ``.rodata`` Kernel descriptor - *link-name* ``STT_FUNC`` - ``.text`` Kernel entry point - ===================== ============== ============= ================== + ===================== ================== ================ ================== + Name Type Section Description + ===================== ================== ================ ================== + *link-name* ``STT_OBJECT`` - ``.data`` Global variable + - ``.rodata`` + - ``.bss`` + *link-name*\ ``.kd`` ``STT_OBJECT`` - ``.rodata`` Kernel descriptor + *link-name* ``STT_FUNC`` - ``.text`` Kernel entry point + *link-name* ``STT_OBJECT`` - SHN_AMDGPU_LDS Global variable in LDS + ===================== ================== ================ ================== Global variable Global variables both used and defined by the compilation unit. @@ -871,10 +872,10 @@ Global variable will resolve relocations using the definition provided by another code object or explicitly defined by the runtime. - All global symbols, whether defined in the compilation unit or external, are - accessed by the machine code indirectly through a GOT table entry. This - allows them to be preemptable. The GOT table is only supported when the target - triple OS is ``amdhsa`` (see :ref:`amdgpu-target-triples`). + If the symbol resides in local/group memory (LDS) then its section is the + special processor-specific section name ``SHN_AMDGPU_LDS``, and the + ``st_value`` field describes alignment requirements as it does for common + symbols. .. TODO Add description of linked shared object symbols. Seems undefined symbols diff --git a/include/llvm/BinaryFormat/ELF.h b/include/llvm/BinaryFormat/ELF.h index 884d7543c5e..40c39805866 100644 --- a/include/llvm/BinaryFormat/ELF.h +++ b/include/llvm/BinaryFormat/ELF.h @@ -1424,6 +1424,11 @@ enum : unsigned { GNU_PROPERTY_X86_FEATURE_2_XSAVEC = 1 << 9, }; +// AMDGPU-specific section indices. +enum { + SHN_AMDGPU_LDS = 0xff00, // Variable in LDS; symbol encoded like SHN_COMMON +}; + // AMD specific notes. (Code Object V2) enum { // Note types with values between 0 and 9 (inclusive) are reserved. diff --git a/include/llvm/MC/MCSymbol.h b/include/llvm/MC/MCSymbol.h index c5963f49eed..189484deac7 100644 --- a/include/llvm/MC/MCSymbol.h +++ b/include/llvm/MC/MCSymbol.h @@ -58,6 +58,7 @@ protected: SymContentsOffset, SymContentsVariable, SymContentsCommon, + SymContentsTargetCommon, // Index stores the section index }; // Special sentinal value for the absolute pseudo fragment. @@ -108,7 +109,7 @@ protected: /// This is actually a Contents enumerator, but is unsigned to avoid sign /// extension and achieve better bitpacking with MSVC. - unsigned SymbolContents : 2; + unsigned SymbolContents : 3; /// The alignment of the symbol, if it is 'common', or -1. /// @@ -344,10 +345,11 @@ public: /// /// \param Size - The size of the symbol. /// \param Align - The alignment of the symbol. - void setCommon(uint64_t Size, unsigned Align) { + /// \param Target - Is the symbol a target-specific common-like symbol. + void setCommon(uint64_t Size, unsigned Align, bool Target = false) { assert(getOffset() == 0); CommonSize = Size; - SymbolContents = SymContentsCommon; + SymbolContents = Target ? SymContentsTargetCommon : SymContentsCommon; assert((!Align || isPowerOf2_32(Align)) && "Alignment must be a power of 2"); @@ -367,20 +369,28 @@ public: /// /// \param Size - The size of the symbol. /// \param Align - The alignment of the symbol. + /// \param Target - Is the symbol a target-specific common-like symbol. /// \return True if symbol was already declared as a different type - bool declareCommon(uint64_t Size, unsigned Align) { + bool declareCommon(uint64_t Size, unsigned Align, bool Target = false) { assert(isCommon() || getOffset() == 0); if(isCommon()) { - if(CommonSize != Size || getCommonAlignment() != Align) - return true; + if (CommonSize != Size || getCommonAlignment() != Align || + isTargetCommon() != Target) + return true; } else - setCommon(Size, Align); + setCommon(Size, Align, Target); return false; } /// Is this a 'common' symbol. bool isCommon() const { - return SymbolContents == SymContentsCommon; + return SymbolContents == SymContentsCommon || + SymbolContents == SymContentsTargetCommon; + } + + /// Is this a target-specific common-like symbol. + bool isTargetCommon() const { + return SymbolContents == SymContentsTargetCommon; } MCFragment *getFragment(bool SetUsed = true) const { diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index 99cab4e8c5d..2c68723a12f 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -463,7 +463,7 @@ void ELFWriter::writeHeader(const MCAssembler &Asm) { uint64_t ELFWriter::SymbolValue(const MCSymbol &Sym, const MCAsmLayout &Layout) { - if (Sym.isCommon() && Sym.isExternal()) + if (Sym.isCommon() && (Sym.isTargetCommon() || Sym.isExternal())) return Sym.getCommonAlignment(); uint64_t Res; @@ -660,8 +660,12 @@ void ELFWriter::computeSymbolTable( if (Symbol.isAbsolute()) { MSD.SectionIndex = ELF::SHN_ABS; } else if (Symbol.isCommon()) { - assert(!Local); - MSD.SectionIndex = ELF::SHN_COMMON; + if (Symbol.isTargetCommon()) { + MSD.SectionIndex = Symbol.getIndex(); + } else { + assert(!Local); + MSD.SectionIndex = ELF::SHN_COMMON; + } } else if (Symbol.isUndefined()) { if (isSignature && !Used) { MSD.SectionIndex = RevGroupMap.lookup(&Symbol); diff --git a/lib/ObjectYAML/ELFYAML.cpp b/lib/ObjectYAML/ELFYAML.cpp index ac3e3e577ea..ef46ca73ec6 100644 --- a/lib/ObjectYAML/ELFYAML.cpp +++ b/lib/ObjectYAML/ELFYAML.cpp @@ -555,6 +555,7 @@ void ScalarEnumerationTraits::enumeration( ECase(SHN_COMMON); ECase(SHN_XINDEX); ECase(SHN_HIRESERVE); + ECase(SHN_AMDGPU_LDS); ECase(SHN_HEXAGON_SCOMMON); ECase(SHN_HEXAGON_SCOMMON_1); ECase(SHN_HEXAGON_SCOMMON_2); diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 02a15692e51..27093076a22 100644 --- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -915,6 +915,7 @@ private: bool ParseDirectiveHSAMetadata(); bool ParseDirectivePALMetadataBegin(); bool ParseDirectivePALMetadata(); + bool ParseDirectiveAMDGPULDS(); /// Common code to parse out a block of text (typically YAML) between start and /// end directives. @@ -3918,6 +3919,60 @@ bool AMDGPUAsmParser::ParseDirectivePALMetadata() { return false; } +/// ParseDirectiveAMDGPULDS +/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] +bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { + if (getParser().checkForValidSection()) + return true; + + StringRef Name; + SMLoc NameLoc = getLexer().getLoc(); + if (getParser().parseIdentifier(Name)) + return TokError("expected identifier in directive"); + + MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); + if (parseToken(AsmToken::Comma, "expected ','")) + return true; + + unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); + + int64_t Size; + SMLoc SizeLoc = getLexer().getLoc(); + if (getParser().parseAbsoluteExpression(Size)) + return true; + if (Size < 0) + return Error(SizeLoc, "size must be non-negative"); + if (Size > LocalMemorySize) + return Error(SizeLoc, "size is too large"); + + int64_t Align = 4; + if (getLexer().is(AsmToken::Comma)) { + Lex(); + SMLoc AlignLoc = getLexer().getLoc(); + if (getParser().parseAbsoluteExpression(Align)) + return true; + if (Align < 0 || !isPowerOf2_64(Align)) + return Error(AlignLoc, "alignment must be a power of two"); + + // Alignment larger than the size of LDS is possible in theory, as long + // as the linker manages to place to symbol at address 0, but we do want + // to make sure the alignment fits nicely into a 32-bit integer. + if (Align >= 1u << 31) + return Error(AlignLoc, "alignment is too large"); + } + + if (parseToken(AsmToken::EndOfStatement, + "unexpected token in '.amdgpu_lds' directive")) + return true; + + Symbol->redefineIfPossible(); + if (!Symbol->isUndefined()) + return Error(NameLoc, "invalid symbol redefinition"); + + getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align); + return false; +} + bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getString(); @@ -3951,6 +4006,9 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { return ParseDirectiveHSAMetadata(); } + if (IDVal == ".amdgpu_lds") + return ParseDirectiveAMDGPULDS(); + if (IDVal == PALMD::AssemblerDirectiveBegin) return ParseDirectivePALMetadataBegin(); diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index b675004b59f..39996cd28ae 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -206,6 +206,12 @@ void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName, } } +void AMDGPUTargetAsmStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size, + unsigned Align) { + OS << "\t.amdgpu_lds " << Symbol->getName() << ", " << Size << ", " << Align + << '\n'; +} + bool AMDGPUTargetAsmStreamer::EmitISAVersion(StringRef IsaVersionString) { OS << "\t.amd_amdgpu_isa \"" << IsaVersionString << "\"\n"; return true; @@ -497,6 +503,27 @@ void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName, Symbol->setType(Type); } +void AMDGPUTargetELFStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size, + unsigned Align) { + assert(isPowerOf2_32(Align)); + + MCSymbolELF *SymbolELF = cast(Symbol); + SymbolELF->setType(ELF::STT_OBJECT); + + if (!SymbolELF->isBindingSet()) { + SymbolELF->setBinding(ELF::STB_GLOBAL); + SymbolELF->setExternal(true); + } + + if (SymbolELF->declareCommon(Size, Align, true)) { + report_fatal_error("Symbol: " + Symbol->getName() + + " redeclared as different type"); + } + + SymbolELF->setIndex(ELF::SHN_AMDGPU_LDS); + SymbolELF->setSize(MCConstantExpr::create(Size, getContext())); +} + bool AMDGPUTargetELFStreamer::EmitISAVersion(StringRef IsaVersionString) { // Create two labels to mark the beginning and end of the desc field // and a MCExpr to calculate the size of the desc field. diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h index 9c52199bf6d..683b3e363b9 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -53,6 +53,9 @@ public: virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) = 0; + virtual void emitAMDGPULDS(MCSymbol *Symbol, unsigned Size, + unsigned Align) = 0; + /// \returns True on success, false on failure. virtual bool EmitISAVersion(StringRef IsaVersionString) = 0; @@ -107,6 +110,8 @@ public: void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override; + void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, unsigned Align) override; + /// \returns True on success, false on failure. bool EmitISAVersion(StringRef IsaVersionString) override; @@ -152,6 +157,8 @@ public: void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override; + void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, unsigned Align) override; + /// \returns True on success, false on failure. bool EmitISAVersion(StringRef IsaVersionString) override; diff --git a/test/MC/AMDGPU/elf-lds-error.s b/test/MC/AMDGPU/elf-lds-error.s new file mode 100644 index 00000000000..4e0ec6a5778 --- /dev/null +++ b/test/MC/AMDGPU/elf-lds-error.s @@ -0,0 +1,19 @@ +// RUN: not llvm-mc -triple amdgcn-- -mcpu gfx900 %s -o - 2>&1 | FileCheck %s + +// CHECK: :[[@LINE+1]]:27: error: size is too large + .amdgpu_lds huge, 200000 + +// CHECK: :[[@LINE+1]]:30: error: size must be non-negative + .amdgpu_lds negsize, -4 + +// CHECK: :[[@LINE+1]]:36: error: alignment must be a power of two + .amdgpu_lds zero_align, 5, 0 + +// CHECK: :[[@LINE+1]]:39: error: alignment must be a power of two + .amdgpu_lds non_pot_align, 0, 12 + +// CHECK: :[[@LINE+1]]:36: error: alignment is too large + .amdgpu_lds huge_align, 0, 1099511627776 + +// CHECK: :[[@LINE+1]]:9: error: unknown directive + .amdgpu_ldsnowhitespace, 8 diff --git a/test/MC/AMDGPU/elf-lds.s b/test/MC/AMDGPU/elf-lds.s new file mode 100644 index 00000000000..e03f8a29583 --- /dev/null +++ b/test/MC/AMDGPU/elf-lds.s @@ -0,0 +1,82 @@ +// RUN: llvm-mc -filetype=obj -triple amdgcn-- -mcpu gfx900 %s -o - | llvm-readobj -t -r | FileCheck %s + + .text + .globl test_kernel + .p2align 8 + .type test_kernel,@function +test_kernel: + s_mov_b32 s0, lds0@abs32@lo + v_lshl_add_u32 v3, v0, 2, s0 + ds_read2_b32 v[1:2], v3 offset1:1 + + s_mov_b32 s0, lds4@abs32@lo + v_lshl_add_u32 v3, v0, 2, s0 + ds_write_b32 v3, v1 + s_endpgm +.Lfunc_end: + .size test_kernel, .Lfunc_end-test_kernel + + .globl lds0 + .amdgpu_lds lds0, 192, 16 + + .globl lds1 + .amdgpu_lds lds1,387,8 + + ; Weird whitespace cases + .globl lds2 + .amdgpu_lds lds2, 12 + + ; No alignment or .globl directive, not mentioned anywhere + .amdgpu_lds lds3, 16 + + ; No alignment or .globl directive, size 0, but mentioned in .text + .amdgpu_lds lds4, 0 + +// CHECK: Relocations [ +// CHECK: Section (3) .rel.text { +// CHECK-NEXT: 0x4 R_AMDGPU_ABS32 lds0 0x0 +// CHECK-NEXT: 0x1C R_AMDGPU_ABS32 lds4 0x0 +// CHECK-NEXT: } +// CHECK: ] + +// CHECK: Symbol { +// CHECK: Name: lds0 (54) +// CHECK-NEXT: Value: 0x10 +// CHECK-NEXT: Size: 192 +// CHECK-NEXT: Binding: Global (0x1) +// CHECK-NEXT: Type: Object (0x1) +// CHECK-NEXT: Other: 0 +// CHECK-NEXT: Section: Processor Specific (0xFF00) +// CHECK-NEXT: } + +// CHECK: Symbol { +// CHECK: Name: lds1 (49) +// CHECK-NEXT: Value: 0x8 +// CHECK-NEXT: Size: 387 +// CHECK-NEXT: Binding: Global (0x1) +// CHECK-NEXT: Type: Object (0x1) +// CHECK-NEXT: Other: 0 +// CHECK-NEXT: Section: Processor Specific (0xFF00) +// CHECK-NEXT: } + +// CHECK: Symbol { +// CHECK: Name: lds2 (44) +// CHECK-NEXT: Value: 0x4 +// CHECK-NEXT: Size: 12 +// CHECK-NEXT: Binding: Global (0x1) +// CHECK-NEXT: Type: Object (0x1) +// CHECK-NEXT: Other: 0 +// CHECK-NEXT: Section: Processor Specific (0xFF00) +// CHECK-NEXT: } + +// CHECK-NOT: Name: lds3 + +// CHECK: Symbol { +// CHECK: Name: lds4 (39) +// CHECK-NEXT: Value: 0x4 +// CHECK-NEXT: Size: 0 +// CHECK-NEXT: Binding: Global (0x1) +// CHECK-NEXT: Type: Object (0x1) +// CHECK-NEXT: Other: 0 +// CHECK-NEXT: Section: Processor Specific (0xFF00) +// CHECK-NEXT: } diff --git a/tools/llvm-objcopy/ELF/Object.cpp b/tools/llvm-objcopy/ELF/Object.cpp index c8c06f4173c..7042492ff6d 100644 --- a/tools/llvm-objcopy/ELF/Object.cpp +++ b/tools/llvm-objcopy/ELF/Object.cpp @@ -597,6 +597,11 @@ static bool isValidReservedSectionIndex(uint16_t Index, uint16_t Machine) { case SHN_COMMON: return true; } + + if (Machine == EM_AMDGPU) { + return Index == SHN_AMDGPU_LDS; + } + if (Machine == EM_HEXAGON) { switch (Index) { case SHN_HEXAGON_SCOMMON: @@ -618,21 +623,17 @@ uint16_t Symbol::getShndx() const { return SHN_XINDEX; return DefinedIn->Index; } - switch (ShndxType) { - // This means that we don't have a defined section but we do need to - // output a legitimate section index. - case SYMBOL_SIMPLE_INDEX: + + if (ShndxType == SYMBOL_SIMPLE_INDEX) { + // This means that we don't have a defined section but we do need to + // output a legitimate section index. return SHN_UNDEF; - case SYMBOL_ABS: - case SYMBOL_COMMON: - case SYMBOL_HEXAGON_SCOMMON: - case SYMBOL_HEXAGON_SCOMMON_2: - case SYMBOL_HEXAGON_SCOMMON_4: - case SYMBOL_HEXAGON_SCOMMON_8: - case SYMBOL_XINDEX: - return static_cast(ShndxType); } - llvm_unreachable("Symbol with invalid ShndxType encountered"); + + assert(ShndxType == SYMBOL_ABS || ShndxType == SYMBOL_COMMON || + (ShndxType >= ELF::SHN_LOPROC && ShndxType <= ELF::SHN_HIPROC) || + (ShndxType >= ELF::SHN_LOOS && ShndxType <= ELF::SHN_HIOS)); + return static_cast(ShndxType); } bool Symbol::isCommon() const { return getShndx() == SHN_COMMON; } diff --git a/tools/llvm-objcopy/ELF/Object.h b/tools/llvm-objcopy/ELF/Object.h index 69f999ccddd..4936fd6dc32 100644 --- a/tools/llvm-objcopy/ELF/Object.h +++ b/tools/llvm-objcopy/ELF/Object.h @@ -591,6 +591,7 @@ enum SymbolShndxType { SYMBOL_SIMPLE_INDEX = 0, SYMBOL_ABS = ELF::SHN_ABS, SYMBOL_COMMON = ELF::SHN_COMMON, + SYMBOL_AMDGPU_LDS = ELF::SHN_AMDGPU_LDS, SYMBOL_HEXAGON_SCOMMON = ELF::SHN_HEXAGON_SCOMMON, SYMBOL_HEXAGON_SCOMMON_2 = ELF::SHN_HEXAGON_SCOMMON_2, SYMBOL_HEXAGON_SCOMMON_4 = ELF::SHN_HEXAGON_SCOMMON_4, -- 2.50.1