AMDGPU: Don't align callable functions to 256

author Matt Arsenault <Matthew.Arsenault@amd.com>

Wed, 19 Apr 2017 17:42:39 +0000 (17:42 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Wed, 19 Apr 2017 17:42:39 +0000 (17:42 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Wed, 19 Apr 2017 17:42:39 +0000 (17:42 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Wed, 19 Apr 2017 17:42:39 +0000 (17:42 +0000)
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

index c35a0912adbb1e4062de1ec22a4db83786c19570..7ee4bcb86fe2ab65ec05faaa71624dc08e5445ef 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -184,9 +184,11 @@ void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
  }
  
  bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+  const AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();
  
    // The starting address of all shader programs must be 256 bytes aligned.
-  MF.setAlignment(8);
+  // Regular functions just need the basic required instruction alignment.
+  MF.setAlignment(MFI->isEntryFunction() ? 8 : 2);
  
    SetupMachineFunction(MF);
  
diff --git a/test/CodeGen/AMDGPU/hsa-func-align.ll b/test/CodeGen/AMDGPU/hsa-func-align.ll

new file mode 100644 (file)

index 0000000..a00f5e2
--- /dev/null
+++ b/test/CodeGen/AMDGPU/hsa-func-align.ll
@@ -0,0 +1,18 @@
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri < %s | FileCheck -check-prefix=HSA %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -filetype=obj < %s | llvm-readobj -symbols -s -sd | FileCheck -check-prefix=ELF %s
+
+; ELF: Section {
+; ELF: Name: .text
+; ELF: SHF_ALLOC (0x2)
+; ELF: SHF_EXECINSTR (0x4)
+; ELF: AddressAlignment: 32
+; ELF: }
+
+; HSA: .globl simple_align16
+; HSA: .p2align 5
+define void @simple_align16(i32 addrspace(1)* addrspace(2)* %ptr.out) align 32 {
+entry:
+  %out = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %ptr.out
+  store i32 0, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/AMDGPU/hsa-func.ll b/test/CodeGen/AMDGPU/hsa-func.ll

index b4cdd4030d86a79d8d91747f6b13bbd0ca28ff08..147cf9bbe6c01c52f04bba821e443a02270712df 100644 (file)
--- a/test/CodeGen/AMDGPU/hsa-func.ll
+++ b/test/CodeGen/AMDGPU/hsa-func.ll
@@ -14,6 +14,7 @@
  ; ELF: Flags [ (0x6)
  ; ELF: SHF_ALLOC (0x2)
  ; ELF: SHF_EXECINSTR (0x4)
+; ELF: AddressAlignment: 4
  ; ELF: }
  
  ; ELF: SHT_NOTE
@@ -36,6 +37,8 @@
  ; HSA-VI: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
  
  ; HSA-NOT: .amdgpu_hsa_kernel simple
+; HSA: .globl simple
+; HSA: .p2align 2
  ; HSA: {{^}}simple:
  ; HSA: .amd_kernel_code_t
  ; HSA: enable_sgpr_private_segment_buffer = 1
@@ -58,3 +61,13 @@ entry:
    store i32 0, i32 addrspace(1)* %out
    ret void
  }
+
+; Ignore explicit alignment that is too low.
+; HSA: .globl simple_align2
+; HSA: .p2align 2
+define void @simple_align2(i32 addrspace(1)* addrspace(2)* %ptr.out) align 2 {
+entry:
+  %out = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %ptr.out
+  store i32 0, i32 addrspace(1)* %out
+  ret void
+}
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Wed, 19 Apr 2017 17:42:39 +0000 (17:42 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Wed, 19 Apr 2017 17:42:39 +0000 (17:42 +0000)
lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp		patch \| blob \| history
test/CodeGen/AMDGPU/hsa-func-align.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/AMDGPU/hsa-func.ll		patch \| blob \| history