From: Matt Arsenault Date: Thu, 11 Jul 2019 14:18:25 +0000 (+0000) Subject: AMDGPU/GlobalISel: Move kernel argument handling to separate function X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5ab8f336c4eb0035ab9d05572615494dc1873a77;p=llvm AMDGPU/GlobalISel: Move kernel argument handling to separate function git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@365782 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index adcc904f35c..b107c357196 100644 --- a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -304,16 +304,71 @@ static void allocateSystemSGPRs(CCState &CCInfo, } } +bool AMDGPUCallLowering::lowerFormalArgumentsKernel( + MachineIRBuilder &MIRBuilder, const Function &F, + ArrayRef> VRegs) const { + MachineFunction &MF = MIRBuilder.getMF(); + const GCNSubtarget *Subtarget = &MF.getSubtarget(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + SIMachineFunctionInfo *Info = MF.getInfo(); + const SIRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + const DataLayout &DL = F.getParent()->getDataLayout(); + + SmallVector ArgLocs; + CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext()); + + allocateHSAUserSGPRs(CCInfo, MIRBuilder, MF, *TRI, *Info); + + unsigned i = 0; + const unsigned KernArgBaseAlign = 16; + const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F); + uint64_t ExplicitArgOffset = 0; + + // TODO: Align down to dword alignment and extract bits for extending loads. + for (auto &Arg : F.args()) { + Type *ArgTy = Arg.getType(); + unsigned AllocSize = DL.getTypeAllocSize(ArgTy); + if (AllocSize == 0) + continue; + + unsigned ABIAlign = DL.getABITypeAlignment(ArgTy); + + uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset; + ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize; + + ArrayRef OrigArgRegs = VRegs[i]; + Register ArgReg = + OrigArgRegs.size() == 1 + ? OrigArgRegs[0] + : MRI.createGenericVirtualRegister(getLLTForType(*ArgTy, DL)); + unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset); + ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy)); + lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, ArgReg); + if (OrigArgRegs.size() > 1) + unpackRegs(OrigArgRegs, ArgReg, ArgTy, MIRBuilder); + ++i; + } + + allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info); + allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false); + return true; +} + bool AMDGPUCallLowering::lowerFormalArguments( MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef> VRegs) const { + // The infrastructure for normal calling convention lowering is essentially + // useless for kernels. We want to avoid any kind of legalization or argument + // splitting. + if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL) + return lowerFormalArgumentsKernel(MIRBuilder, F, VRegs); + // AMDGPU_GS and AMDGP_HS are not supported yet. if (F.getCallingConv() == CallingConv::AMDGPU_GS || F.getCallingConv() == CallingConv::AMDGPU_HS) return false; MachineFunction &MF = MIRBuilder.getMF(); - const GCNSubtarget *Subtarget = &MF.getSubtarget(); MachineRegisterInfo &MRI = MF.getRegInfo(); SIMachineFunctionInfo *Info = MF.getInfo(); const SIRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); @@ -324,47 +379,6 @@ bool AMDGPUCallLowering::lowerFormalArguments( SmallVector ArgLocs; CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext()); - // The infrastructure for normal calling convention lowering is essentially - // useless for kernels. We want to avoid any kind of legalization or argument - // splitting. - if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL) { - allocateHSAUserSGPRs(CCInfo, MIRBuilder, MF, *TRI, *Info); - - unsigned i = 0; - const unsigned KernArgBaseAlign = 16; - const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F); - uint64_t ExplicitArgOffset = 0; - - // TODO: Align down to dword alignment and extract bits for extending loads. - for (auto &Arg : F.args()) { - Type *ArgTy = Arg.getType(); - unsigned AllocSize = DL.getTypeAllocSize(ArgTy); - if (AllocSize == 0) - continue; - - unsigned ABIAlign = DL.getABITypeAlignment(ArgTy); - - uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset; - ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize; - - ArrayRef OrigArgRegs = VRegs[i]; - Register ArgReg = - OrigArgRegs.size() == 1 - ? OrigArgRegs[0] - : MRI.createGenericVirtualRegister(getLLTForType(*ArgTy, DL)); - unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset); - ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy)); - lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, ArgReg); - if (OrigArgRegs.size() > 1) - unpackRegs(OrigArgRegs, ArgReg, ArgTy, MIRBuilder); - ++i; - } - - allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info); - allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), IsShader); - return true; - } - if (Info->hasImplicitBufferPtr()) { unsigned ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI); MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass); diff --git a/lib/Target/AMDGPU/AMDGPUCallLowering.h b/lib/Target/AMDGPU/AMDGPUCallLowering.h index f659fa17f26..3599659cac6 100644 --- a/lib/Target/AMDGPU/AMDGPUCallLowering.h +++ b/lib/Target/AMDGPU/AMDGPUCallLowering.h @@ -34,6 +34,11 @@ class AMDGPUCallLowering: public CallLowering { bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, ArrayRef VRegs) const override; + + bool lowerFormalArgumentsKernel(MachineIRBuilder &MIRBuilder, + const Function &F, + ArrayRef> VRegs) const; + bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef> VRegs) const override; static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg);