From: Matt Arsenault Date: Fri, 19 Jul 2019 14:29:30 +0000 (+0000) Subject: AMDGPU/GlobalISel: Support arguments with multiple registers X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=e7d49e218ea8d7b0b65ac5d236c353afee284cdf;p=llvm AMDGPU/GlobalISel: Support arguments with multiple registers Handles structs used directly in argument lists. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@366584 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index a118743f4d8..8fc55cba494 100644 --- a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -152,33 +152,45 @@ void AMDGPUCallLowering::splitToValueTypes( SmallVector SplitVTs; ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs); - EVT VT = SplitVTs[0]; - unsigned NumParts = TLI.getNumRegistersForCallingConv(Ctx, CallConv, VT); - - if (NumParts == 1) { - // No splitting to do, but we want to replace the original type (e.g. [1 x - // double] -> double). - SplitArgs.emplace_back(OrigArg.Regs[0], VT.getTypeForEVT(Ctx), - OrigArg.Flags, OrigArg.IsFixed); - return; - } + assert(OrigArg.Regs.size() == SplitVTs.size()); - LLT LLTy = getLLTForType(*OrigArg.Ty, DL); - SmallVector SplitRegs; + int SplitIdx = 0; + for (EVT VT : SplitVTs) { + unsigned NumParts = TLI.getNumRegistersForCallingConv(Ctx, CallConv, VT); + Type *Ty = VT.getTypeForEVT(Ctx); - EVT PartVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT); - Type *PartTy = PartVT.getTypeForEVT(Ctx); - LLT PartLLT = getLLTForType(*PartTy, DL); - // FIXME: Should we be reporting all of the part registers for a single - // argument, and let handleAssignments take care of the repacking? - for (unsigned i = 0; i < NumParts; ++i) { - Register PartReg = MRI.createGenericVirtualRegister(PartLLT); - SplitRegs.push_back(PartReg); - SplitArgs.emplace_back(ArrayRef(PartReg), PartTy, OrigArg.Flags); - } - PerformArgSplit(SplitRegs, LLTy, PartLLT); + if (NumParts == 1) { + // No splitting to do, but we want to replace the original type (e.g. [1 x + // double] -> double). + SplitArgs.emplace_back(OrigArg.Regs[SplitIdx], Ty, + OrigArg.Flags, OrigArg.IsFixed); + + ++SplitIdx; + continue; + } + + LLT LLTy = getLLTForType(*Ty, DL); + + SmallVector SplitRegs; + + EVT PartVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT); + Type *PartTy = PartVT.getTypeForEVT(Ctx); + LLT PartLLT = getLLTForType(*PartTy, DL); + + // FIXME: Should we be reporting all of the part registers for a single + // argument, and let handleAssignments take care of the repacking? + for (unsigned i = 0; i < NumParts; ++i) { + Register PartReg = MRI.createGenericVirtualRegister(PartLLT); + SplitRegs.push_back(PartReg); + SplitArgs.emplace_back(ArrayRef(PartReg), PartTy, OrigArg.Flags); + } + + PerformArgSplit(SplitRegs, LLTy, PartLLT, SplitIdx); + + ++SplitIdx; + } } bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, @@ -485,11 +497,11 @@ bool AMDGPUCallLowering::lowerFormalArguments( if (!IsShader && InReg) return false; - // TODO: Handle multiple registers and sret. + // TODO: Handle sret. if (Arg.hasAttribute(Attribute::StructRet) || Arg.hasAttribute(Attribute::SwiftSelf) || Arg.hasAttribute(Attribute::SwiftError) || - Arg.hasAttribute(Attribute::Nest) || VRegs[Idx].size() > 1) + Arg.hasAttribute(Attribute::Nest)) return false; if (CC == CallingConv::AMDGPU_PS && !InReg && PSInputNum <= 15) { @@ -505,7 +517,9 @@ bool AMDGPUCallLowering::lowerFormalArguments( ++PSInputNum; if (SkipArg) { - MIRBuilder.buildUndef(VRegs[Idx][0]); + for (int I = 0, E = VRegs[Idx].size(); I != E; ++I) + MIRBuilder.buildUndef(VRegs[Idx][I]); + ++Idx; continue; } @@ -513,11 +527,14 @@ bool AMDGPUCallLowering::lowerFormalArguments( ArgInfo OrigArg(VRegs[Idx], Arg.getType()); setArgFlags(OrigArg, Idx + AttributeList::FirstArgIndex, DL, F); - splitToValueTypes(OrigArg, SplitArgs, DL, MRI, CC, + + splitToValueTypes( + OrigArg, SplitArgs, DL, MRI, CC, // FIXME: We should probably be passing multiple registers to // handleAssignments to do this - [&](ArrayRef Regs, LLT LLTy, LLT PartLLT) { - packSplitRegsToOrigType(MIRBuilder, VRegs[Idx], Regs, LLTy, PartLLT); + [&](ArrayRef Regs, LLT LLTy, LLT PartLLT, int VTSplitIdx) { + packSplitRegsToOrigType(MIRBuilder, VRegs[Idx][VTSplitIdx], Regs, + LLTy, PartLLT); }); ++Idx; diff --git a/lib/Target/AMDGPU/AMDGPUCallLowering.h b/lib/Target/AMDGPU/AMDGPUCallLowering.h index 97f8c85b9e7..bc345b6c659 100644 --- a/lib/Target/AMDGPU/AMDGPUCallLowering.h +++ b/lib/Target/AMDGPU/AMDGPUCallLowering.h @@ -30,7 +30,7 @@ class AMDGPUCallLowering: public CallLowering { Register DstReg) const; /// A function of this type is used to perform value split action. - using SplitArgTy = std::function, LLT, LLT)>; + using SplitArgTy = std::function, LLT, LLT, int)>; void splitToValueTypes(const ArgInfo &OrigArgInfo, SmallVectorImpl &SplitArgs, diff --git a/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll b/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll index 0d11aa99395..e5e8b6840d2 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll +++ b/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll @@ -1,10 +1,37 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -stop-after=irtranslator -global-isel %s -o - | FileCheck %s ; Check that we correctly skip over disabled inputs -; CHECK: [[S0:%[0-9]+]]:_(s32) = COPY $sgpr2 -; CHECK: [[V0:%[0-9]+]]:_(s32) = COPY $vgpr0 -; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %{{[0-9]+}}(s32), %{{[0-9]+}}(s32), [[S0]](s32), [[S0]](s32), [[S0]](s32), [[V0]](s32) -define amdgpu_ps void @ps0(float inreg %arg0, float %psinput0, float %psinput1) #1 { +define amdgpu_ps void @disabled_input(float inreg %arg0, float %psinput0, float %psinput1) #1 { + ; CHECK-LABEL: name: disabled_input + ; CHECK: bb.1.main_body: + ; CHECK: liveins: $sgpr2, $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; CHECK: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 false + ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), [[C]](s32), [[C1]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), [[C2]](s1), [[C2]](s1) + ; CHECK: S_ENDPGM 0 +main_body: + call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg0, float %arg0, float %arg0, float %psinput1, i1 false, i1 false) #0 + ret void +} + +define amdgpu_ps void @disabled_input_struct(float inreg %arg0, { float, float } %psinput0, float %psinput1) #1 { + ; CHECK-LABEL: name: disabled_input_struct + ; CHECK: bb.1.main_body: + ; CHECK: liveins: $sgpr2, $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; CHECK: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 false + ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), [[C]](s32), [[C1]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), [[C2]](s1), [[C2]](s1) + ; CHECK: S_ENDPGM 0 main_body: call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg0, float %arg0, float %arg0, float %psinput1, i1 false, i1 false) #0 ret void diff --git a/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll b/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll index cf08f695c72..dcee8eaf799 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll +++ b/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll @@ -1,10 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -march=amdgcn -mcpu=tahiti -O0 -stop-after=irtranslator -global-isel -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs -o - %s 2> %t | FileCheck %s -; RUN: FileCheck -check-prefix=ERR %s < %t - -; ERR-NOT: remark -; ERR: remark: :0:0: unable to lower arguments: void ({ i8, i32 })* (in function: void_func_struct_i8_i32) -; ERR-NOT: remark +; RUN: llc -march=amdgcn -mcpu=tahiti -O0 -stop-after=irtranslator -global-isel -verify-machineinstrs -o - %s | FileCheck %s define void @void_func_i1(i1 %arg0) #0 { ; CHECK-LABEL: name: void_func_i1 @@ -1138,9 +1133,17 @@ define void @void_func_struct_i32({ i32 } %arg0) #0 { define void @void_func_struct_i8_i32({ i8, i32 } %arg0) #0 { ; CHECK-LABEL: name: void_func_struct_i8_i32 - ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x80000000) ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[TRUNC]](s8), [[DEF]](p1) :: (store 1 into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[DEF]], [[C]](s64) + ; CHECK: G_STORE [[COPY1]](s32), [[GEP]](p1) :: (store 4 into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) + ; CHECK: S_ENDPGM 0 store { i8, i32 } %arg0, { i8, i32 } addrspace(1)* undef ret void }