AMDGPU: Fix kernel argument alignment impacting stack size

author Matt Arsenault <Matthew.Arsenault@amd.com>

Sat, 18 Jun 2016 05:15:53 +0000 (05:15 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Sat, 18 Jun 2016 05:15:53 +0000 (05:15 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Sat, 18 Jun 2016 05:15:53 +0000 (05:15 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Sat, 18 Jun 2016 05:15:53 +0000 (05:15 +0000)
diff --git a/lib/Target/AMDGPU/AMDGPUCallingConv.td b/lib/Target/AMDGPU/AMDGPUCallingConv.td

index 4386c78966bf83400c230176785c7cfffe7c0429..47dfa499206811a6841c0241a08535f01f75818a 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUCallingConv.td
+++ b/lib/Target/AMDGPU/AMDGPUCallingConv.td
@@ -110,7 +110,7 @@ def CC_R600 : CallingConv<[
  
  // Calling convention for compute kernels
  def CC_AMDGPU_Kernel : CallingConv<[
-  CCCustom<"allocateStack">
+  CCCustom<"allocateKernArg">
  ]>;
  
  def CC_AMDGPU : CallingConv<[
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

index 37f6efa315f760885655fb3d778685a49b8bedcb..3329e3bafdabafc1da68e39cfef77978d77051f1 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -31,13 +31,15 @@
  #include "SIInstrInfo.h"
  using namespace llvm;
  
-static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT,
-                      CCValAssign::LocInfo LocInfo,
-                      ISD::ArgFlagsTy ArgFlags, CCState &State) {
-  unsigned Offset = State.AllocateStack(ValVT.getStoreSize(),
-                                        ArgFlags.getOrigAlign());
-  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
-
+static bool allocateKernArg(unsigned ValNo, MVT ValVT, MVT LocVT,
+                            CCValAssign::LocInfo LocInfo,
+                            ISD::ArgFlagsTy ArgFlags, CCState &State) {
+  MachineFunction &MF = State.getMachineFunction();
+  AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();
+
+  uint64_t Offset = MFI->allocateKernArg(ValVT.getStoreSize(),
+                                         ArgFlags.getOrigAlign());
+  State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
    return true;
  }
  
diff --git a/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp

index c824ab83ad8eb288ddbcb148e59e5a586d2dd084..d19eb655e463c655990ad8c28483c868c7d8d91b 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
+++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@@ -1,8 +1,5 @@
  #include "AMDGPUMachineFunction.h"
-#include "AMDGPU.h"
-#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/Function.h"
+
  using namespace llvm;
  
  // Pin the vtable to this file.
@@ -10,8 +7,9 @@ void AMDGPUMachineFunction::anchor() {}
  
  AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
    MachineFunctionInfo(),
+  KernArgSize(0),
+  MaxKernArgAlign(0),
    LDSSize(0),
    ABIArgOffset(0),
    ScratchSize(0),
-  IsKernel(true) {
-}
+  IsKernel(true) {}
diff --git a/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/lib/Target/AMDGPU/AMDGPUMachineFunction.h

index 2ef3bf5f2fb0e44a4a464e5aff5f3451dcc96a6d..a534024dc2e141805c42a22a8e9a5970997154d8 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUMachineFunction.h
+++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.h
@@ -16,10 +16,25 @@
  namespace llvm {
  
  class AMDGPUMachineFunction : public MachineFunctionInfo {
+  uint64_t KernArgSize;
+  unsigned MaxKernArgAlign;
+
    virtual void anchor();
  
  public:
    AMDGPUMachineFunction(const MachineFunction &MF);
+
+  uint64_t allocateKernArg(uint64_t Size, unsigned Align) {
+    assert(isPowerOf2_32(Align));
+    KernArgSize = alignTo(KernArgSize, Align);
+
+    uint64_t Result = KernArgSize;
+    KernArgSize += Size;
+
+    MaxKernArgAlign = std::max(Align, MaxKernArgAlign);
+    return Result;
+  }
+
    /// A map to keep track of local memory objects and their offsets within
    /// the local memory space.
    std::map<const GlobalValue *, unsigned> LocalMemoryObjects;
diff --git a/test/CodeGen/AMDGPU/kernarg-stack-alignment.ll b/test/CodeGen/AMDGPU/kernarg-stack-alignment.ll

new file mode 100644 (file)

index 0000000..21c92db
--- /dev/null
+++ b/test/CodeGen/AMDGPU/kernarg-stack-alignment.ll
@@ -0,0 +1,44 @@
+; RUN: llc -O0 -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
+
+; Test that the alignment of kernel arguments does not impact the
+; alignment of the stack
+
+; CHECK-LABEL: {{^}}no_args:
+; CHECK: ScratchSize: 8{{$}}
+define void @no_args() {
+  %alloca = alloca i8
+  store volatile i8 0, i8* %alloca
+  ret void
+}
+
+; CHECK-LABEL: {{^}}force_align32:
+; CHECK: ScratchSize: 8{{$}}
+define void @force_align32(<8 x i32>) {
+  %alloca = alloca i8
+  store volatile i8 0, i8* %alloca
+  ret void
+}
+
+; CHECK-LABEL: {{^}}force_align64:
+; CHECK: ScratchSize: 8{{$}}
+define void @force_align64(<16 x i32>) {
+  %alloca = alloca i8
+  store volatile i8 0, i8* %alloca
+  ret void
+}
+
+; CHECK-LABEL: {{^}}force_align128:
+; CHECK: ScratchSize: 8{{$}}
+define void @force_align128(<32 x i32>) {
+  %alloca = alloca i8
+  store volatile i8 0, i8* %alloca
+  ret void
+}
+
+; CHECK-LABEL: {{^}}force_align256:
+; CHECK: ScratchSize: 8{{$}}
+define void @force_align256(<64 x i32>) {
+  %alloca = alloca i8
+  store volatile i8 0, i8* %alloca
+  ret void
+}
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Sat, 18 Jun 2016 05:15:53 +0000 (05:15 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Sat, 18 Jun 2016 05:15:53 +0000 (05:15 +0000)
lib/Target/AMDGPU/AMDGPUCallingConv.td		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUISelLowering.cpp		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUMachineFunction.cpp		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUMachineFunction.h		patch \| blob \| history
test/CodeGen/AMDGPU/kernarg-stack-alignment.ll	[new file with mode: 0644]	patch \| blob