From 9c21e67b4cbd6e8ab29f61e95a393b2a4ae60da7 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 13 Jul 2018 16:40:37 +0000 Subject: [PATCH] AMDGPU: Properly handle shader inputs with split arguments This needs to refer to arguments by their original argument index, not the argument split index which depends on what the type splitting decides to do. Also avoid increment PSInputNum for each split piece. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@337022 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/SIISelLowering.cpp | 39 +++++++++++++++++++--------- 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 177cec982f3..ea8578fb19d 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1265,36 +1265,51 @@ static void processShaderInputArgs(SmallVectorImpl &Splits, FunctionType *FType, SIMachineFunctionInfo *Info) { for (unsigned I = 0, E = Ins.size(), PSInputNum = 0; I != E; ++I) { - const ISD::InputArg &Arg = Ins[I]; + const ISD::InputArg *Arg = &Ins[I]; // First check if it's a PS input addr. - if (CallConv == CallingConv::AMDGPU_PS && !Arg.Flags.isInReg() && - !Arg.Flags.isByVal() && PSInputNum <= 15) { + if (CallConv == CallingConv::AMDGPU_PS && + !Arg->Flags.isInReg() && !Arg->Flags.isByVal() && PSInputNum <= 15) { + + bool SkipArg = !Arg->Used && !Info->isPSInputAllocated(PSInputNum); + + // Inconveniently only the first part of the split is marked as isSplit, + // so skip to the end. We only want to increment PSInputNum once for the + // entire split argument. + if (Arg->Flags.isSplit()) { + while (!Arg->Flags.isSplitEnd()) { + assert(!Arg->VT.isVector() && + "unexpected vector split in ps argument type"); + if (!SkipArg) + Splits.push_back(*Arg); + Arg = &Ins[++I]; + } + } - if (!Arg.Used && !Info->isPSInputAllocated(PSInputNum)) { + if (SkipArg) { // We can safely skip PS inputs. - Skipped.set(I); + Skipped.set(Arg->getOrigArgIndex()); ++PSInputNum; continue; } Info->markPSInputAllocated(PSInputNum); - if (Arg.Used) + if (Arg->Used) Info->markPSInputEnabled(PSInputNum); ++PSInputNum; } // Second split vertices into their elements. - if (Arg.VT.isVector()) { - ISD::InputArg NewArg = Arg; + if (Arg->VT.isVector()) { + ISD::InputArg NewArg = *Arg; NewArg.Flags.setSplit(); - NewArg.VT = Arg.VT.getVectorElementType(); + NewArg.VT = Arg->VT.getVectorElementType(); // We REALLY want the ORIGINAL number of vertex elements here, e.g. a // three or five element vertex only needs three or five registers, // NOT four or eight. - Type *ParamType = FType->getParamType(Arg.getOrigArgIndex()); + Type *ParamType = FType->getParamType(Arg->getOrigArgIndex()); unsigned NumElements = ParamType->getVectorNumElements(); for (unsigned J = 0; J != NumElements; ++J) { @@ -1302,7 +1317,7 @@ static void processShaderInputArgs(SmallVectorImpl &Splits, NewArg.PartOffset += NewArg.VT.getStoreSize(); } } else { - Splits.push_back(Arg); + Splits.push_back(*Arg); } } } @@ -1784,7 +1799,7 @@ SDValue SITargetLowering::LowerFormalArguments( for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) { const ISD::InputArg &Arg = Ins[i]; - if (Skipped[i]) { + if (Arg.isOrigArg() && Skipped[Arg.getOrigArgIndex()]) { InVals.push_back(DAG.getUNDEF(Arg.VT)); continue; } -- 2.50.1