AMDGPU: Refactor kernel argument lowering

author Tom Stellard <thomas.stellard@amd.com>

Fri, 16 Sep 2016 21:53:00 +0000 (21:53 +0000)

committer Tom Stellard <thomas.stellard@amd.com>

Fri, 16 Sep 2016 21:53:00 +0000 (21:53 +0000)
author Tom Stellard <thomas.stellard@amd.com>
Fri, 16 Sep 2016 21:53:00 +0000 (21:53 +0000)
committer Tom Stellard <thomas.stellard@amd.com>
Fri, 16 Sep 2016 21:53:00 +0000 (21:53 +0000)
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

index 8196cfdcc089d84bfc84f7777ee732544d2dfc00..030166539c7f96fadedec7a77a470954c625404d 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -37,7 +37,7 @@ static bool allocateKernArg(unsigned ValNo, MVT ValVT, MVT LocVT,
    MachineFunction &MF = State.getMachineFunction();
    AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();
  
-  uint64_t Offset = MFI->allocateKernArg(ValVT.getStoreSize(),
+  uint64_t Offset = MFI->allocateKernArg(LocVT.getStoreSize(),
                                           ArgFlags.getOrigAlign());
    State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
    return true;
@@ -626,9 +626,104 @@ bool AMDGPUTargetLowering::isNarrowingProfitable(EVT SrcVT, EVT DestVT) const {
  // TargetLowering Callbacks
  //===---------------------------------------------------------------------===//
  
-void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State,
+/// The SelectionDAGBuilder will automatically promote function arguments
+/// with illegal types.  However, this does not work for the AMDGPU targets
+/// since the function arguments are stored in memory as these illegal types.
+/// In order to handle this properly we need to get the original types sizes
+/// from the LLVM IR Function and fixup the ISD:InputArg values before
+/// passing them to AnalyzeFormalArguments()
+
+/// When the SelectionDAGBuilder computes the Ins, it takes care of splitting
+/// input values across multiple registers.  Each item in the Ins array
+/// represents a single value that will be stored in regsters.  Ins[x].VT is
+/// the value type of the value that will be stored in the register, so
+/// whatever SDNode we lower the argument to needs to be this type.
+///
+/// In order to correctly lower the arguments we need to know the size of each
+/// argument.  Since Ins[x].VT gives us the size of the register that will
+/// hold the value, we need to look at Ins[x].ArgVT to see the 'real' type
+/// for the orignal function argument so that we can deduce the correct memory
+/// type to use for Ins[x].  In most cases the correct memory type will be
+/// Ins[x].ArgVT.  However, this will not always be the case.  If, for example,
+/// we have a kernel argument of type v8i8, this argument will be split into
+/// 8 parts and each part will be represented by its own item in the Ins array.
+/// For each part the Ins[x].ArgVT will be the v8i8, which is the full type of
+/// the argument before it was split.  From this, we deduce that the memory type
+/// for each individual part is i8.  We pass the memory type as LocVT to the
+/// calling convention analysis function and the register type (Ins[x].VT) as
+/// the ValVT.
+void AMDGPUTargetLowering::analyzeFormalArgumentsCompute(CCState &State,
                               const SmallVectorImpl<ISD::InputArg> &Ins) const {
+  for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+    const ISD::InputArg &In = Ins[i];
+    EVT MemVT;
+
+    unsigned NumRegs = getNumRegisters(State.getContext(), In.ArgVT);
+
+    if (In.ArgVT == MVT::i16 || In.ArgVT == MVT::i8 || In.ArgVT == MVT::f16) {
+      // The ABI says the caller will extend these values to 32-bits.
+      MemVT = In.ArgVT.isInteger() ? MVT::i32 : MVT::f32;
+    } else if (NumRegs == 1) {
+      // This argument is not split, so the IR type is the memory type.
+      assert(!In.Flags.isSplit());
+      if (In.ArgVT.isExtended()) {
+        // We have an extended type, like i24, so we should just use the register type
+        MemVT = In.VT;
+      } else {
+        MemVT = In.ArgVT;
+      }
+    } else if (In.ArgVT.isVector() && In.VT.isVector() &&
+               In.ArgVT.getScalarType() == In.VT.getScalarType()) {
+      assert(In.ArgVT.getVectorNumElements() > In.VT.getVectorNumElements());
+      // We have a vector value which has been split into a vector with
+      // the same scalar type, but fewer elements.  This should handle
+      // all the floating-point vector types.
+      MemVT = In.VT;
+    } else if (In.ArgVT.isVector() &&
+               In.ArgVT.getVectorNumElements() == NumRegs) {
+      // This arg has been split so that each element is stored in a separate
+      // register.
+      MemVT = In.ArgVT.getScalarType();
+    } else if (In.ArgVT.isExtended()) {
+      // We have an extended type, like i65.
+      MemVT = In.VT;
+    } else {
+      unsigned MemoryBits = In.ArgVT.getStoreSizeInBits() / NumRegs;
+      assert(In.ArgVT.getStoreSizeInBits() % NumRegs == 0);
+      if (In.VT.isInteger()) {
+        MemVT = EVT::getIntegerVT(State.getContext(), MemoryBits);
+      } else if (In.VT.isVector()) {
+        assert(!In.VT.getScalarType().isFloatingPoint());
+        unsigned NumElements = In.VT.getVectorNumElements();
+        assert(MemoryBits % NumElements == 0);
+        // This vector type has been split into another vector type with
+        // a different elements size.
+        EVT ScalarVT = EVT::getIntegerVT(State.getContext(),
+                                         MemoryBits / NumElements);
+        MemVT = EVT::getVectorVT(State.getContext(), ScalarVT, NumElements);
+      } else {
+        llvm_unreachable("cannot deduce memory type.");
+      }
+    }
+
+    // Convert one element vectors to scalar.
+    if (MemVT.isVector() && MemVT.getVectorNumElements() == 1)
+      MemVT = MemVT.getScalarType();
+
+    if (MemVT.isExtended()) {
+      // This should really only happen if we have vec3 arguments
+      assert(MemVT.isVector() && MemVT.getVectorNumElements() == 3);
+      MemVT = MemVT.getPow2VectorType(State.getContext());
+    }
+
+    assert(MemVT.isSimple());
+    allocateKernArg(i, In.VT, MemVT.getSimpleVT(), CCValAssign::Full, In.Flags,
+                    State);
+  }
+}
  
+void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State,
+                              const SmallVectorImpl<ISD::InputArg> &Ins) const {
    State.AnalyzeFormalArguments(Ins, CC_AMDGPU);
  }
  
@@ -2617,38 +2712,6 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
  // Helper functions
  //===----------------------------------------------------------------------===//
  
-void AMDGPUTargetLowering::getOriginalFunctionArgs(
-                               SelectionDAG &DAG,
-                               const Function *F,
-                               const SmallVectorImpl<ISD::InputArg> &Ins,
-                               SmallVectorImpl<ISD::InputArg> &OrigIns) const {
-
-  for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
-    if (Ins[i].ArgVT == Ins[i].VT) {
-      OrigIns.push_back(Ins[i]);
-      continue;
-    }
-
-    EVT VT;
-    if (Ins[i].ArgVT.isVector() && !Ins[i].VT.isVector()) {
-      // Vector has been split into scalars.
-      VT = Ins[i].ArgVT.getVectorElementType();
-    } else if (Ins[i].VT.isVector() && Ins[i].ArgVT.isVector() &&
-               Ins[i].ArgVT.getVectorElementType() !=
-               Ins[i].VT.getVectorElementType()) {
-      // Vector elements have been promoted
-      VT = Ins[i].ArgVT;
-    } else {
-      // Vector has been spilt into smaller vectors.
-      VT = Ins[i].VT;
-    }
-
-    ISD::InputArg Arg(Ins[i].Flags, VT, VT, Ins[i].Used,
-                      Ins[i].OrigArgIndex, Ins[i].PartOffset);
-    OrigIns.push_back(Arg);
-  }
-}
-
  SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
                                                    const TargetRegisterClass *RC,
                                                     unsigned Reg, EVT VT) const {
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h

index 382a91e053676dc64091f67e44eef2c71f910d1a..fc042b281803049110771a36df8fcd7295363099 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -100,16 +100,8 @@ protected:
    SDValue LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const;
    void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG,
                                      SmallVectorImpl<SDValue> &Results) const;
-  /// The SelectionDAGBuilder will automatically promote function arguments
-  /// with illegal types.  However, this does not work for the AMDGPU targets
-  /// since the function arguments are stored in memory as these illegal types.
-  /// In order to handle this properly we need to get the origianl types sizes
-  /// from the LLVM IR Function and fixup the ISD:InputArg values before
-  /// passing them to AnalyzeFormalArguments()
-  void getOriginalFunctionArgs(SelectionDAG &DAG,
-                               const Function *F,
-                               const SmallVectorImpl<ISD::InputArg> &Ins,
-                               SmallVectorImpl<ISD::InputArg> &OrigIns) const;
+  void analyzeFormalArgumentsCompute(CCState &State,
+                              const SmallVectorImpl<ISD::InputArg> &Ins) const;
    void AnalyzeFormalArguments(CCState &State,
                                const SmallVectorImpl<ISD::InputArg> &Ins) const;
    void AnalyzeReturn(CCState &State,
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp

index 6b391de00d1aa103df89f9a5bb8eb1afbe0b6a0d..35e6c9d036b159e2a5df7087e72a65a288cbb3fe 100644 (file)
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -1512,9 +1512,11 @@ SDValue R600TargetLowering::LowerFormalArguments(
  
    SmallVector<ISD::InputArg, 8> LocalIns;
  
-  getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
-
-  AnalyzeFormalArguments(CCInfo, LocalIns);
+  if (AMDGPU::isShader(CallConv)) {
+    AnalyzeFormalArguments(CCInfo, Ins);
+  } else {
+    analyzeFormalArgumentsCompute(CCInfo, Ins);
+  }
  
    for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
      CCValAssign &VA = ArgLocs[i];
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp

index 6b94333bd41ebd74dc11d35caa1d3d2d5dbd2206..92c832f7a4624a24545d65981143dcf48a5c207d 100644 (file)
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -685,9 +685,6 @@ SDValue SITargetLowering::LowerFormalArguments(
    }
  
    if (!AMDGPU::isShader(CallConv)) {
-    getOriginalFunctionArgs(DAG, DAG.getMachineFunction().getFunction(), Ins,
-                            Splits);
-
      assert(Info->hasWorkGroupIDX() && Info->hasWorkItemIDX());
    } else {
      assert(!Info->hasPrivateSegmentBuffer() && !Info->hasDispatchPtr() &&
@@ -735,7 +732,10 @@ SDValue SITargetLowering::LowerFormalArguments(
      CCInfo.AllocateReg(FlatScratchInitReg);
    }
  
-  AnalyzeFormalArguments(CCInfo, Splits);
+  if (!AMDGPU::isShader(CallConv))
+    analyzeFormalArgumentsCompute(CCInfo, Ins);
+  else
+    AnalyzeFormalArguments(CCInfo, Splits);
  
    SmallVector<SDValue, 16> Chains;
  
@@ -752,7 +752,7 @@ SDValue SITargetLowering::LowerFormalArguments(
  
      if (VA.isMemLoc()) {
        VT = Ins[i].VT;
-      EVT MemVT = Splits[i].VT;
+      EVT MemVT = VA.getLocVT();
        const unsigned Offset = Subtarget->getExplicitKernelArgOffset() +
                                VA.getLocMemOffset();
        // The first 36 bytes of the input buffer contains information about
diff --git a/test/CodeGen/AMDGPU/v1i64-kernel-arg.ll b/test/CodeGen/AMDGPU/v1i64-kernel-arg.ll

index 31755125c03b950d65441be11bab6fe44d7cb340..a48e7acd4cf375e0e61e4cef9b7180c6daaa691b 100644 (file)
--- a/test/CodeGen/AMDGPU/v1i64-kernel-arg.ll
+++ b/test/CodeGen/AMDGPU/v1i64-kernel-arg.ll
@@ -1,5 +1,3 @@
-; REQUIRES: asserts
-; XFAIL: *
  ; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck %s
  
  ; CHECK-LABEL: {{^}}kernel_arg_i64:
author	Tom Stellard <thomas.stellard@amd.com>
	Fri, 16 Sep 2016 21:53:00 +0000 (21:53 +0000)
committer	Tom Stellard <thomas.stellard@amd.com>
	Fri, 16 Sep 2016 21:53:00 +0000 (21:53 +0000)
lib/Target/AMDGPU/AMDGPUISelLowering.cpp		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUISelLowering.h		patch \| blob \| history
lib/Target/AMDGPU/R600ISelLowering.cpp		patch \| blob \| history
lib/Target/AMDGPU/SIISelLowering.cpp		patch \| blob \| history
test/CodeGen/AMDGPU/v1i64-kernel-arg.ll		patch \| blob \| history