]> granicus.if.org Git - llvm/commitdiff
[AMDGPU] gfx1010 s_code_end generation
authorStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Fri, 3 May 2019 21:26:39 +0000 (21:26 +0000)
committerStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Fri, 3 May 2019 21:26:39 +0000 (21:26 +0000)
Also add some missing metadata in the streamer.

Differential Revision: https://reviews.llvm.org/D61531

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@359937 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
lib/Target/AMDGPU/SIProgramInfo.h
test/CodeGen/AMDGPU/s_code_end.ll [new file with mode: 0644]

index 95302fff9905cc76e5f61413010f78f8efbfbf95..76f70349e2dd6ea5e4c3e89a55f69bfa0458ee6a 100644 (file)
@@ -295,6 +295,12 @@ void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
 
 bool AMDGPUAsmPrinter::doFinalization(Module &M) {
   CallGraphResourceInfo.clear();
+
+  if (AMDGPU::isGFX10(*getGlobalSTI())) {
+    OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
+    getTargetStreamer()->EmitCodeEnd();
+  }
+
   return AsmPrinter::doFinalization(M);
 }
 
@@ -928,6 +934,11 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
               1ULL << ScratchAlignShift) >>
       ScratchAlignShift;
 
+  if (getIsaVersion(getGlobalSTI()->getCPU()).Major >= 10) {
+    ProgInfo.WgpMode = STM.isCuModeEnabled() ? 0 : 1;
+    ProgInfo.MemOrdered = 1;
+  }
+
   ProgInfo.ComputePGMRSrc1 =
       S_00B848_VGPRS(ProgInfo.VGPRBlocks) |
       S_00B848_SGPRS(ProgInfo.SGPRBlocks) |
@@ -936,7 +947,9 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
       S_00B848_PRIV(ProgInfo.Priv) |
       S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp) |
       S_00B848_DEBUG_MODE(ProgInfo.DebugMode) |
-      S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
+      S_00B848_IEEE_MODE(ProgInfo.IEEEMode) |
+      S_00B848_WGP_MODE(ProgInfo.WgpMode) |
+      S_00B848_MEM_ORDERED(ProgInfo.MemOrdered);
 
   // 0 = X, 1 = XY, 2 = XYZ
   unsigned TIDIGCompCnt = 0;
@@ -1077,7 +1090,7 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
   Out.compute_pgm_resource_registers =
       CurrentProgramInfo.ComputePGMRSrc1 |
       (CurrentProgramInfo.ComputePGMRSrc2 << 32);
-  Out.code_properties = AMD_CODE_PROPERTY_IS_PTR64;
+  Out.code_properties |= AMD_CODE_PROPERTY_IS_PTR64;
 
   if (CurrentProgramInfo.DynamicCallStack)
     Out.code_properties |= AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK;
index b40bda94ae66b18d067945d783b6f3dbecc7fdaf..bab9f4df53b814ad89ec750fd18efcfac22a9a13 100644 (file)
@@ -235,6 +235,13 @@ bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
   return true;
 }
 
+bool AMDGPUTargetAsmStreamer::EmitCodeEnd() {
+  const uint32_t Encoded_s_code_end = 0xbf9f0000;
+  OS << "\t.p2alignl 6, " << Encoded_s_code_end << '\n';
+  OS << "\t.fill 32, 4, " << Encoded_s_code_end << '\n';
+  return true;
+}
+
 void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
     const MCSubtargetInfo &STI, StringRef KernelName,
     const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR,
@@ -552,6 +559,18 @@ bool AMDGPUTargetELFStreamer::EmitHSAMetadata(
   return true;
 }
 
+bool AMDGPUTargetELFStreamer::EmitCodeEnd() {
+  const uint32_t Encoded_s_code_end = 0xbf9f0000;
+
+  MCStreamer &OS = getStreamer();
+  OS.PushSection();
+  OS.EmitValueToAlignment(64, Encoded_s_code_end, 4);
+  for (unsigned I = 0; I < 32; ++I)
+    OS.EmitIntValue(Encoded_s_code_end, 4);
+  OS.PopSection();
+  return true;
+}
+
 void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
     const MCSubtargetInfo &STI, StringRef KernelName,
     const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
index c1436b3585fc592c652f75d38865509505a0c8ef..9c52199bf6dc0ca8beeb70d94d1c110f20e28987 100644 (file)
@@ -74,6 +74,9 @@ public:
   /// \returns True on success, false on failure.
   virtual bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) = 0;
 
+  /// \returns True on success, false on failure.
+  virtual bool EmitCodeEnd() = 0;
+
   virtual void EmitAmdhsaKernelDescriptor(
       const MCSubtargetInfo &STI, StringRef KernelName,
       const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
@@ -113,6 +116,9 @@ public:
   /// \returns True on success, false on failure.
   bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) override;
 
+  /// \returns True on success, false on failure.
+  bool EmitCodeEnd() override;
+
   void EmitAmdhsaKernelDescriptor(
       const MCSubtargetInfo &STI, StringRef KernelName,
       const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
@@ -155,6 +161,9 @@ public:
   /// \returns True on success, false on failure.
   bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) override;
 
+  /// \returns True on success, false on failure.
+  bool EmitCodeEnd() override;
+
   void EmitAmdhsaKernelDescriptor(
       const MCSubtargetInfo &STI, StringRef KernelName,
       const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
index 0b475919e6ddc6f765d65579d3190c1bf9669f46..168f05f8fdd65b463f49318118bc4ce771daee34 100644 (file)
@@ -28,6 +28,8 @@ struct SIProgramInfo {
     uint32_t DX10Clamp = 0;
     uint32_t DebugMode = 0;
     uint32_t IEEEMode = 0;
+    uint32_t WgpMode = 0; // GFX10+
+    uint32_t MemOrdered = 0; // GFX10+
     uint64_t ScratchSize = 0;
 
     uint64_t ComputePGMRSrc1 = 0;
diff --git a/test/CodeGen/AMDGPU/s_code_end.ll b/test/CodeGen/AMDGPU/s_code_end.ll
new file mode 100644 (file)
index 0000000..80f566b
--- /dev/null
@@ -0,0 +1,80 @@
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN,GCN-ASM,GFX10,GFX10-ASM %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -filetype=obj < %s | llvm-objdump -arch=amdgcn -mcpu=gfx1010 -disassemble - | FileCheck -check-prefixes=GCN,GCN-OBJ,GFX10,GFX10-OBJ %s
+
+; GCN:            a_kernel1:
+; GCN-NEXT:               s_endpgm
+; GCN-ASM-NEXT:   [[END_LABEL1:\.Lfunc_end.*]]:
+; GCN-ASM-NEXT:           .size   a_kernel1, [[END_LABEL1]]-a_kernel1
+; GCN-ASM:                .section        .AMDGPU.config
+
+; GCN-OBJ-NEXT:           s_nop 0
+
+define amdgpu_kernel void @a_kernel1() {
+  ret void
+}
+
+; GCN:            a_kernel2:
+; GCN-NEXT:               s_endpgm
+; GCN-ASM-NEXT:   [[END_LABEL2:\.Lfunc_end.*]]:
+; GCN-ASM-NEXT:           .size   a_kernel2, [[END_LABEL2]]-a_kernel2
+; GCN-ASM:                .section        .AMDGPU.config
+
+; GCN-OBJ-NEXT:   {{^$}}
+
+define amdgpu_kernel void @a_kernel2() {
+  ret void
+}
+
+; GCN-ASM:                .text
+; GCN-ASM-NEXT:           .globl  a_function
+; GCN-ASM-NEXT:           .p2align        2
+; GCN-ASM-NEXT:           .type   a_function,@function
+
+; GCN-NEXT:       a_function:
+; GCN:                    s_setpc_b64
+; GCN-ASM-NEXT:   [[END_LABEL3:\.Lfunc_end.*]]:
+; GCN-ASM-NEXT:           .size   a_function, [[END_LABEL3]]-a_function
+; GFX10-ASM:              .p2alignl 6, 3214868480
+; GFX10-ASM-NEXT:         .fill 32, 4, 3214868480
+
+; GFX10-OBJ-NEXT:         s_code_end
+
+; GFX10-OBJ:              s_code_end // 000000000140:
+; GFX10-OBJ-NEXT:         s_code_end
+; GFX10-OBJ-NEXT:         s_code_end
+; GFX10-OBJ-NEXT:         s_code_end
+; GFX10-OBJ-NEXT:         s_code_end
+; GFX10-OBJ-NEXT:         s_code_end
+; GFX10-OBJ-NEXT:         s_code_end
+; GFX10-OBJ-NEXT:         s_code_end
+
+; GFX10-OBJ-NEXT:         s_code_end
+; GFX10-OBJ-NEXT:         s_code_end
+; GFX10-OBJ-NEXT:         s_code_end
+; GFX10-OBJ-NEXT:         s_code_end
+; GFX10-OBJ-NEXT:         s_code_end
+; GFX10-OBJ-NEXT:         s_code_end
+; GFX10-OBJ-NEXT:         s_code_end
+; GFX10-OBJ-NEXT:         s_code_end
+
+; GFX10-OBJ-NEXT:         s_code_end
+; GFX10-OBJ-NEXT:         s_code_end
+; GFX10-OBJ-NEXT:         s_code_end
+; GFX10-OBJ-NEXT:         s_code_end
+; GFX10-OBJ-NEXT:         s_code_end
+; GFX10-OBJ-NEXT:         s_code_end
+; GFX10-OBJ-NEXT:         s_code_end
+; GFX10-OBJ-NEXT:         s_code_end
+
+; GFX10-OBJ-NEXT:         s_code_end
+; GFX10-OBJ-NEXT:         s_code_end
+; GFX10-OBJ-NEXT:         s_code_end
+; GFX10-OBJ-NEXT:         s_code_end
+; GFX10-OBJ-NEXT:         s_code_end
+; GFX10-OBJ-NEXT:         s_code_end
+; GFX10-OBJ-NEXT:         s_code_end
+; GFX10-OBJ-NEXT:         s_code_end
+
+define void @a_function() {
+  ret void
+}