From 902e7e59d192dff97b799f4e81bbee71bfde74f5 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 19 Apr 2017 17:42:39 +0000 Subject: [PATCH] AMDGPU: Don't align callable functions to 256 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@300720 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 4 +++- test/CodeGen/AMDGPU/hsa-func-align.ll | 18 ++++++++++++++++++ test/CodeGen/AMDGPU/hsa-func.ll | 13 +++++++++++++ 3 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 test/CodeGen/AMDGPU/hsa-func-align.ll diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index c35a0912adb..7ee4bcb86fe 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -184,9 +184,11 @@ void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { } bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { + const AMDGPUMachineFunction *MFI = MF.getInfo(); // The starting address of all shader programs must be 256 bytes aligned. - MF.setAlignment(8); + // Regular functions just need the basic required instruction alignment. + MF.setAlignment(MFI->isEntryFunction() ? 8 : 2); SetupMachineFunction(MF); diff --git a/test/CodeGen/AMDGPU/hsa-func-align.ll b/test/CodeGen/AMDGPU/hsa-func-align.ll new file mode 100644 index 00000000000..a00f5e2669d --- /dev/null +++ b/test/CodeGen/AMDGPU/hsa-func-align.ll @@ -0,0 +1,18 @@ +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri < %s | FileCheck -check-prefix=HSA %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -filetype=obj < %s | llvm-readobj -symbols -s -sd | FileCheck -check-prefix=ELF %s + +; ELF: Section { +; ELF: Name: .text +; ELF: SHF_ALLOC (0x2) +; ELF: SHF_EXECINSTR (0x4) +; ELF: AddressAlignment: 32 +; ELF: } + +; HSA: .globl simple_align16 +; HSA: .p2align 5 +define void @simple_align16(i32 addrspace(1)* addrspace(2)* %ptr.out) align 32 { +entry: + %out = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %ptr.out + store i32 0, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/AMDGPU/hsa-func.ll b/test/CodeGen/AMDGPU/hsa-func.ll index b4cdd4030d8..147cf9bbe6c 100644 --- a/test/CodeGen/AMDGPU/hsa-func.ll +++ b/test/CodeGen/AMDGPU/hsa-func.ll @@ -14,6 +14,7 @@ ; ELF: Flags [ (0x6) ; ELF: SHF_ALLOC (0x2) ; ELF: SHF_EXECINSTR (0x4) +; ELF: AddressAlignment: 4 ; ELF: } ; ELF: SHT_NOTE @@ -36,6 +37,8 @@ ; HSA-VI: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU" ; HSA-NOT: .amdgpu_hsa_kernel simple +; HSA: .globl simple +; HSA: .p2align 2 ; HSA: {{^}}simple: ; HSA: .amd_kernel_code_t ; HSA: enable_sgpr_private_segment_buffer = 1 @@ -58,3 +61,13 @@ entry: store i32 0, i32 addrspace(1)* %out ret void } + +; Ignore explicit alignment that is too low. +; HSA: .globl simple_align2 +; HSA: .p2align 2 +define void @simple_align2(i32 addrspace(1)* addrspace(2)* %ptr.out) align 2 { +entry: + %out = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %ptr.out + store i32 0, i32 addrspace(1)* %out + ret void +} -- 2.50.1