From: Diana Picus Date: Thu, 27 Jun 2019 08:54:17 +0000 (+0000) Subject: [GlobalISel] Accept multiple vregs in lowerFormalArgs X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=4776c1ff9754c848bc75c0892b583f67a05211ea;p=llvm [GlobalISel] Accept multiple vregs in lowerFormalArgs Change the interface of CallLowering::lowerFormalArguments to accept several virtual registers for each formal argument, instead of just one. This is a follow-up to D46018. CallLowering::lowerReturn was similarly refactored in D49660. lowerCall will be refactored in the same way in follow-up patches. With this change, we forward the virtual registers generated for aggregates to CallLowering. Therefore, the target can decide itself whether it wants to handle them as separate pieces or use one big register. We also copy the pack/unpackRegs helpers to CallLowering to facilitate this. ARM and AArch64 have been updated to use the passed in virtual registers directly, which means we no longer need to generate so many merge/extract instructions. AArch64 seems to have had a bug when lowering e.g. [1 x i8*], which was put into a s64 instead of a p0. Added a test-case which illustrates the problem more clearly (it crashes without this patch) and fixed the existing test-case to expect p0. AMDGPU has been updated to unpack into the virtual registers for kernels. I think the other code paths fall back for aggregates, so this should be NFC. Mips doesn't support aggregates yet, so it's also NFC. x86 seems to have code for dealing with aggregates, but I couldn't find the tests for it, so I just added a fallback to DAGISel if we get more than one virtual register for an argument. Differential Revision: https://reviews.llvm.org/D63549 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@364510 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/CodeGen/GlobalISel/CallLowering.h b/include/llvm/CodeGen/GlobalISel/CallLowering.h index 8734fb455f1..b0a7444af5e 100644 --- a/include/llvm/CodeGen/GlobalISel/CallLowering.h +++ b/include/llvm/CodeGen/GlobalISel/CallLowering.h @@ -52,8 +52,8 @@ public: ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy{}, bool IsFixed = true) : Regs(Regs.begin(), Regs.end()), Ty(Ty), Flags(Flags), IsFixed(IsFixed) { - assert(Regs.size() == 1 && "Can't handle multiple regs yet"); - assert((Ty->isVoidTy() == (Regs[0] == 0)) && + // FIXME: We should have just one way of saying "no register". + assert((Ty->isVoidTy() == (Regs.empty() || Regs[0] == 0)) && "only void types should have no register"); } }; @@ -139,6 +139,24 @@ protected: void setArgFlags(ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL, const FuncInfoTy &FuncInfo) const; + /// Generate instructions for packing \p SrcRegs into one big register + /// corresponding to the aggregate type \p PackedTy. + /// + /// \param SrcRegs should contain one virtual register for each base type in + /// \p PackedTy, as returned by computeValueLLTs. + /// + /// \return The packed register. + Register packRegs(ArrayRef SrcRegs, Type *PackedTy, + MachineIRBuilder &MIRBuilder) const; + + /// Generate instructions for unpacking \p SrcReg into the \p DstRegs + /// corresponding to the aggregate type \p PackedTy. + /// + /// \param DstRegs should contain one virtual register for each base type in + /// \p PackedTy, as returned by computeValueLLTs. + void unpackRegs(ArrayRef DstRegs, Register SrcReg, Type *PackedTy, + MachineIRBuilder &MIRBuilder) const; + /// Invoke Handler::assignArg on each of the given \p Args and then use /// \p Callback to move them to the assigned locations. /// @@ -182,19 +200,19 @@ public: return false; } - /// This hook must be implemented to lower the incoming (formal) - /// arguments, described by \p Args, for GlobalISel. Each argument - /// must end up in the related virtual register described by VRegs. - /// In other words, the first argument should end up in VRegs[0], - /// the second in VRegs[1], and so on. + /// arguments, described by \p VRegs, for GlobalISel. Each argument + /// must end up in the related virtual registers described by \p VRegs. + /// In other words, the first argument should end up in \c VRegs[0], + /// the second in \c VRegs[1], and so on. For each argument, there will be one + /// register for each non-aggregate type, as returned by \c computeValueLLTs. /// \p MIRBuilder is set to the proper insertion for the argument /// lowering. /// /// \return True if the lowering succeeded, false otherwise. virtual bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, - ArrayRef VRegs) const { + ArrayRef> VRegs) const { return false; } diff --git a/lib/CodeGen/GlobalISel/CallLowering.cpp b/lib/CodeGen/GlobalISel/CallLowering.cpp index e8ffd713ddd..b2838d863a6 100644 --- a/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/CallLowering.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -115,6 +116,47 @@ CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL, const CallInst &FuncInfo) const; +Register CallLowering::packRegs(ArrayRef SrcRegs, Type *PackedTy, + MachineIRBuilder &MIRBuilder) const { + assert(SrcRegs.size() > 1 && "Nothing to pack"); + + const DataLayout &DL = MIRBuilder.getMF().getDataLayout(); + MachineRegisterInfo *MRI = MIRBuilder.getMRI(); + + LLT PackedLLT = getLLTForType(*PackedTy, DL); + + SmallVector LLTs; + SmallVector Offsets; + computeValueLLTs(DL, *PackedTy, LLTs, &Offsets); + assert(LLTs.size() == SrcRegs.size() && "Regs / types mismatch"); + + Register Dst = MRI->createGenericVirtualRegister(PackedLLT); + MIRBuilder.buildUndef(Dst); + for (unsigned i = 0; i < SrcRegs.size(); ++i) { + Register NewDst = MRI->createGenericVirtualRegister(PackedLLT); + MIRBuilder.buildInsert(NewDst, Dst, SrcRegs[i], Offsets[i]); + Dst = NewDst; + } + + return Dst; +} + +void CallLowering::unpackRegs(ArrayRef DstRegs, Register SrcReg, + Type *PackedTy, + MachineIRBuilder &MIRBuilder) const { + assert(DstRegs.size() > 1 && "Nothing to unpack"); + + const DataLayout &DL = MIRBuilder.getMF().getDataLayout(); + + SmallVector LLTs; + SmallVector Offsets; + computeValueLLTs(DL, *PackedTy, LLTs, &Offsets); + assert(LLTs.size() == DstRegs.size() && "Regs / types mismatch"); + + for (unsigned i = 0; i < DstRegs.size(); ++i) + MIRBuilder.buildExtract(DstRegs[i], SrcReg, Offsets[i]); +} + bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder, ArrayRef Args, ValueHandler &Handler) const { diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp index 3a060b29c1b..5e5e1bedad0 100644 --- a/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2274,16 +2274,17 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { EntryBB->addSuccessor(&getMBB(F.front())); // Lower the actual args into this basic block. - SmallVector VRegArgs; + SmallVector, 8> VRegArgs; for (const Argument &Arg: F.args()) { if (DL->getTypeStoreSize(Arg.getType()) == 0) continue; // Don't handle zero sized types. - VRegArgs.push_back( - MRI->createGenericVirtualRegister(getLLTForType(*Arg.getType(), *DL))); + ArrayRef VRegs = getOrCreateVRegs(Arg); + VRegArgs.push_back(VRegs); - if (Arg.hasSwiftErrorAttr()) - SwiftError.setCurrentVReg(EntryBB, SwiftError.getFunctionArg(), - VRegArgs.back()); + if (Arg.hasSwiftErrorAttr()) { + assert(VRegs.size() == 1 && "Too many vregs for Swift error"); + SwiftError.setCurrentVReg(EntryBB, SwiftError.getFunctionArg(), VRegs[0]); + } } // We don't currently support translating swifterror or swiftself functions. @@ -2306,20 +2307,6 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { return false; } - auto ArgIt = F.arg_begin(); - for (auto &VArg : VRegArgs) { - // If the argument is an unsplit scalar then don't use unpackRegs to avoid - // creating redundant copies. - if (!valueIsSplit(*ArgIt, VMap.getOffsets(*ArgIt))) { - auto &VRegs = *VMap.getVRegs(cast(*ArgIt)); - assert(VRegs.empty() && "VRegs already populated?"); - VRegs.push_back(VArg); - } else { - unpackRegs(*ArgIt, VArg, *EntryBuilder.get()); - } - ArgIt++; - } - // Need to visit defs before uses when translating instructions. GISelObserverWrapper WrapperObserver; if (EnableCSE && CSEInfo) diff --git a/lib/Target/AArch64/AArch64CallLowering.cpp b/lib/Target/AArch64/AArch64CallLowering.cpp index f596d63ed0b..effca68b09b 100644 --- a/lib/Target/AArch64/AArch64CallLowering.cpp +++ b/lib/Target/AArch64/AArch64CallLowering.cpp @@ -203,7 +203,6 @@ void AArch64CallLowering::splitToValueTypes( SmallVector SplitVTs; SmallVector Offsets; ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0); - assert(OrigArg.Regs.size() == 1 && "Can't handle multple regs yet"); if (SplitVTs.size() == 1) { // No splitting to do, but we want to replace the original type (e.g. [1 x @@ -213,6 +212,24 @@ void AArch64CallLowering::splitToValueTypes( return; } + if (OrigArg.Regs.size() > 1) { + // Create one ArgInfo for each virtual register in the original ArgInfo. + assert(OrigArg.Regs.size() == SplitVTs.size() && "Regs / types mismatch"); + + bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters( + OrigArg.Ty, CallConv, false); + for (unsigned i = 0, e = SplitVTs.size(); i < e; ++i) { + Type *SplitTy = SplitVTs[i].getTypeForEVT(Ctx); + SplitArgs.emplace_back(OrigArg.Regs[i], SplitTy, OrigArg.Flags, + OrigArg.IsFixed); + if (NeedsRegBlock) + SplitArgs.back().Flags.setInConsecutiveRegs(); + } + + SplitArgs.back().Flags.setInConsecutiveRegsLast(); + return; + } + unsigned FirstRegIdx = SplitArgs.size(); bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters( OrigArg.Ty, CallConv, false); @@ -351,9 +368,9 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, return Success; } -bool AArch64CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, - const Function &F, - ArrayRef VRegs) const { +bool AArch64CallLowering::lowerFormalArguments( + MachineIRBuilder &MIRBuilder, const Function &F, + ArrayRef> VRegs) const { MachineFunction &MF = MIRBuilder.getMF(); MachineBasicBlock &MBB = MIRBuilder.getMBB(); MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -364,26 +381,14 @@ bool AArch64CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, for (auto &Arg : F.args()) { if (DL.getTypeStoreSize(Arg.getType()) == 0) continue; + ArgInfo OrigArg{VRegs[i], Arg.getType()}; setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, F); - bool Split = false; - LLT Ty = MRI.getType(VRegs[i]); - Register Dst = VRegs[i]; splitToValueTypes(OrigArg, SplitArgs, DL, MRI, F.getCallingConv(), - [&](unsigned Reg, uint64_t Offset) { - if (!Split) { - Split = true; - Dst = MRI.createGenericVirtualRegister(Ty); - MIRBuilder.buildUndef(Dst); - } - unsigned Tmp = MRI.createGenericVirtualRegister(Ty); - MIRBuilder.buildInsert(Tmp, Dst, Reg, Offset); - Dst = Tmp; + [&](Register Reg, uint64_t Offset) { + llvm_unreachable("Args should already be split"); }); - - if (Dst != VRegs[i]) - MIRBuilder.buildCopy(VRegs[i], Dst); ++i; } diff --git a/lib/Target/AArch64/AArch64CallLowering.h b/lib/Target/AArch64/AArch64CallLowering.h index 67a58501fb8..316209ea8df 100644 --- a/lib/Target/AArch64/AArch64CallLowering.h +++ b/lib/Target/AArch64/AArch64CallLowering.h @@ -38,7 +38,7 @@ public: Register SwiftErrorVReg) const override; bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, - ArrayRef VRegs) const override; + ArrayRef> VRegs) const override; bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv, const MachineOperand &Callee, const ArgInfo &OrigRet, diff --git a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index c2340d29f8e..04a26323f46 100644 --- a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -193,9 +193,9 @@ static void allocateSystemSGPRs(CCState &CCInfo, } } -bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, - const Function &F, - ArrayRef VRegs) const { +bool AMDGPUCallLowering::lowerFormalArguments( + MachineIRBuilder &MIRBuilder, const Function &F, + ArrayRef> VRegs) const { // AMDGPU_GS and AMDGP_HS are not supported yet. if (F.getCallingConv() == CallingConv::AMDGPU_GS || F.getCallingConv() == CallingConv::AMDGPU_HS) @@ -275,9 +275,16 @@ bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset; ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize; + ArrayRef OrigArgRegs = VRegs[i]; + Register ArgReg = + OrigArgRegs.size() == 1 + ? OrigArgRegs[0] + : MRI.createGenericVirtualRegister(getLLTForType(*ArgTy, DL)); unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset); ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy)); - lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, VRegs[i]); + lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, ArgReg); + if (OrigArgRegs.size() > 1) + unpackRegs(OrigArgRegs, ArgReg, ArgTy, MIRBuilder); ++i; } @@ -295,7 +302,8 @@ bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, // We can only hanlde simple value types at the moment. ISD::ArgFlagsTy Flags; - ArgInfo OrigArg{VRegs[i], CurOrigArg->getType()}; + assert(VRegs[i].size() == 1 && "Can't lower into more than one register"); + ArgInfo OrigArg{VRegs[i][0], CurOrigArg->getType()}; setArgFlags(OrigArg, i + 1, DL, F); Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType())); @@ -348,10 +356,12 @@ bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, OrigArgIdx != NumArgs && i != ArgLocs.size(); ++Arg, ++OrigArgIdx) { if (Skipped.test(OrigArgIdx)) continue; - CCValAssign &VA = ArgLocs[i++]; - MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx]); - MIRBuilder.getMBB().addLiveIn(VA.getLocReg()); - MIRBuilder.buildCopy(VRegs[OrigArgIdx], VA.getLocReg()); + assert(VRegs[OrigArgIdx].size() == 1 && + "Can't lower into more than 1 reg"); + CCValAssign &VA = ArgLocs[i++]; + MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx][0]); + MIRBuilder.getMBB().addLiveIn(VA.getLocReg()); + MIRBuilder.buildCopy(VRegs[OrigArgIdx][0], VA.getLocReg()); } allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), IsShader); diff --git a/lib/Target/AMDGPU/AMDGPUCallLowering.h b/lib/Target/AMDGPU/AMDGPUCallLowering.h index b46e54b66c7..f659fa17f26 100644 --- a/lib/Target/AMDGPU/AMDGPUCallLowering.h +++ b/lib/Target/AMDGPU/AMDGPUCallLowering.h @@ -35,7 +35,7 @@ class AMDGPUCallLowering: public CallLowering { bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, ArrayRef VRegs) const override; bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, - ArrayRef VRegs) const override; + ArrayRef> VRegs) const override; static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg); static CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg); }; diff --git a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp index 9d2c93be90c..ea5bfb595a9 100644 --- a/lib/Target/ARM/ARMCallLowering.cpp +++ b/lib/Target/ARM/ARMCallLowering.cpp @@ -195,11 +195,11 @@ void ARMCallLowering::splitToValueTypes( SmallVector SplitVTs; ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, nullptr, nullptr, 0); - assert(OrigArg.Regs.size() == 1 && "Can't handle multple regs yet"); if (SplitVTs.size() == 1) { // Even if there is no splitting to do, we still want to replace the // original type (e.g. pointer type -> integer). + assert(OrigArg.Regs.size() == 1 && "Regs / types mismatch"); auto Flags = OrigArg.Flags; unsigned OriginalAlignment = DL.getABITypeAlignment(OrigArg.Ty); Flags.setOrigAlign(OriginalAlignment); @@ -208,6 +208,34 @@ void ARMCallLowering::splitToValueTypes( return; } + if (OrigArg.Regs.size() > 1) { + // Create one ArgInfo for each virtual register. + assert(OrigArg.Regs.size() == SplitVTs.size() && "Regs / types mismatch"); + for (unsigned i = 0, e = SplitVTs.size(); i != e; ++i) { + EVT SplitVT = SplitVTs[i]; + Type *SplitTy = SplitVT.getTypeForEVT(Ctx); + auto Flags = OrigArg.Flags; + + unsigned OriginalAlignment = DL.getABITypeAlignment(SplitTy); + Flags.setOrigAlign(OriginalAlignment); + + bool NeedsConsecutiveRegisters = + TLI.functionArgumentNeedsConsecutiveRegisters( + SplitTy, F.getCallingConv(), F.isVarArg()); + if (NeedsConsecutiveRegisters) { + Flags.setInConsecutiveRegs(); + if (i == e - 1) + Flags.setInConsecutiveRegsLast(); + } + + // FIXME: We also want to split SplitTy further. + Register PartReg = OrigArg.Regs[i]; + SplitArgs.emplace_back(PartReg, SplitTy, Flags, OrigArg.IsFixed); + } + + return; + } + for (unsigned i = 0, e = SplitVTs.size(); i != e; ++i) { EVT SplitVT = SplitVTs[i]; Type *SplitTy = SplitVT.getTypeForEVT(Ctx); @@ -424,9 +452,9 @@ struct FormalArgHandler : public IncomingValueHandler { } // end anonymous namespace -bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, - const Function &F, - ArrayRef VRegs) const { +bool ARMCallLowering::lowerFormalArguments( + MachineIRBuilder &MIRBuilder, const Function &F, + ArrayRef> VRegs) const { auto &TLI = *getTLI(); auto Subtarget = TLI.getSubtarget(); @@ -457,20 +485,15 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, FormalArgHandler ArgHandler(MIRBuilder, MIRBuilder.getMF().getRegInfo(), AssignFn); - SmallVector ArgInfos; - SmallVector SplitRegs; + SmallVector SplitArgInfos; unsigned Idx = 0; for (auto &Arg : F.args()) { - ArgInfo AInfo(VRegs[Idx], Arg.getType()); - setArgFlags(AInfo, Idx + AttributeList::FirstArgIndex, DL, F); - - SplitRegs.clear(); + ArgInfo OrigArgInfo(VRegs[Idx], Arg.getType()); + setArgFlags(OrigArgInfo, Idx + AttributeList::FirstArgIndex, DL, F); - splitToValueTypes(AInfo, ArgInfos, MF, - [&](Register Reg) { SplitRegs.push_back(Reg); }); - - if (!SplitRegs.empty()) - MIRBuilder.buildMerge(VRegs[Idx], SplitRegs); + splitToValueTypes(OrigArgInfo, SplitArgInfos, MF, [&](Register Reg) { + llvm_unreachable("Args should already be split"); + }); Idx++; } @@ -478,7 +501,7 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, if (!MBB.empty()) MIRBuilder.setInstr(*MBB.begin()); - if (!handleAssignments(MIRBuilder, ArgInfos, ArgHandler)) + if (!handleAssignments(MIRBuilder, SplitArgInfos, ArgHandler)) return false; // Move back to the end of the basic block. diff --git a/lib/Target/ARM/ARMCallLowering.h b/lib/Target/ARM/ARMCallLowering.h index a563e179347..d0f20449192 100644 --- a/lib/Target/ARM/ARMCallLowering.h +++ b/lib/Target/ARM/ARMCallLowering.h @@ -36,7 +36,7 @@ public: ArrayRef VRegs) const override; bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, - ArrayRef VRegs) const override; + ArrayRef> VRegs) const override; bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv, const MachineOperand &Callee, const ArgInfo &OrigRet, diff --git a/lib/Target/Mips/MipsCallLowering.cpp b/lib/Target/Mips/MipsCallLowering.cpp index de235d4744c..8911ae39988 100644 --- a/lib/Target/Mips/MipsCallLowering.cpp +++ b/lib/Target/Mips/MipsCallLowering.cpp @@ -445,9 +445,9 @@ bool MipsCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, return true; } -bool MipsCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, - const Function &F, - ArrayRef VRegs) const { +bool MipsCallLowering::lowerFormalArguments( + MachineIRBuilder &MIRBuilder, const Function &F, + ArrayRef> VRegs) const { // Quick exit if there aren't any args. if (F.arg_empty()) diff --git a/lib/Target/Mips/MipsCallLowering.h b/lib/Target/Mips/MipsCallLowering.h index 4ed75a3c83d..11c2d53ad35 100644 --- a/lib/Target/Mips/MipsCallLowering.h +++ b/lib/Target/Mips/MipsCallLowering.h @@ -66,7 +66,7 @@ public: ArrayRef VRegs) const override; bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, - ArrayRef VRegs) const override; + ArrayRef> VRegs) const override; bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv, const MachineOperand &Callee, const ArgInfo &OrigRet, diff --git a/lib/Target/X86/X86CallLowering.cpp b/lib/Target/X86/X86CallLowering.cpp index 6403d968941..e41bcfac577 100644 --- a/lib/Target/X86/X86CallLowering.cpp +++ b/lib/Target/X86/X86CallLowering.cpp @@ -320,9 +320,9 @@ protected: } // end anonymous namespace -bool X86CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, - const Function &F, - ArrayRef VRegs) const { +bool X86CallLowering::lowerFormalArguments( + MachineIRBuilder &MIRBuilder, const Function &F, + ArrayRef> VRegs) const { if (F.arg_empty()) return true; @@ -344,14 +344,14 @@ bool X86CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, Arg.hasAttribute(Attribute::StructRet) || Arg.hasAttribute(Attribute::SwiftSelf) || Arg.hasAttribute(Attribute::SwiftError) || - Arg.hasAttribute(Attribute::Nest)) + Arg.hasAttribute(Attribute::Nest) || VRegs[Idx].size() > 1) return false; ArgInfo OrigArg(VRegs[Idx], Arg.getType()); setArgFlags(OrigArg, Idx + AttributeList::FirstArgIndex, DL, F); if (!splitToValueTypes(OrigArg, SplitArgs, DL, MRI, [&](ArrayRef Regs) { - MIRBuilder.buildMerge(VRegs[Idx], Regs); + MIRBuilder.buildMerge(VRegs[Idx][0], Regs); })) return false; Idx++; diff --git a/lib/Target/X86/X86CallLowering.h b/lib/Target/X86/X86CallLowering.h index fba151199dc..0445331bc3f 100644 --- a/lib/Target/X86/X86CallLowering.h +++ b/lib/Target/X86/X86CallLowering.h @@ -32,7 +32,7 @@ public: ArrayRef VRegs) const override; bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, - ArrayRef VRegs) const override; + ArrayRef> VRegs) const override; bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv, const MachineOperand &Callee, const ArgInfo &OrigRet, diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll index eea2d12851d..6891adc5493 100644 --- a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll +++ b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll @@ -860,6 +860,17 @@ define void @test_extractvalue_agg(%struct.nested* %addr, {i8, i32}* %addr2) { ret void } +; CHECK-LABEL: name: test_trivial_extract_ptr +; CHECK: [[STRUCT:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK: [[VAL32:%[0-9]+]]:_(s32) = COPY $w1 +; CHECK: [[VAL:%[0-9]+]]:_(s8) = G_TRUNC [[VAL32]] +; CHECK: G_STORE [[VAL]](s8), [[STRUCT]](p0) +define void @test_trivial_extract_ptr([1 x i8*] %s, i8 %val) { + %addr = extractvalue [1 x i8*] %s, 0 + store i8 %val, i8* %addr + ret void +} + ; CHECK-LABEL: name: test_insertvalue ; CHECK: %0:_(p0) = COPY $x0 ; CHECK: %1:_(s32) = COPY $w1 @@ -898,7 +909,7 @@ define [1 x i64] @test_trivial_insert([1 x i64] %s, i64 %val) { define [1 x i8*] @test_trivial_insert_ptr([1 x i8*] %s, i8* %val) { ; CHECK-LABEL: name: test_trivial_insert_ptr -; CHECK: [[STRUCT:%[0-9]+]]:_(s64) = COPY $x0 +; CHECK: [[STRUCT:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[VAL:%[0-9]+]]:_(p0) = COPY $x1 ; CHECK: $x0 = COPY [[VAL]] %res = insertvalue [1 x i8*] %s, i8* %val, 0 diff --git a/test/CodeGen/AArch64/GlobalISel/call-translator.ll b/test/CodeGen/AArch64/GlobalISel/call-translator.ll index 8cea80caa9b..6ab4fea4649 100644 --- a/test/CodeGen/AArch64/GlobalISel/call-translator.ll +++ b/test/CodeGen/AArch64/GlobalISel/call-translator.ll @@ -65,21 +65,13 @@ define void @test_multiple_args(i64 %in) { ; CHECK: [[I8:%[0-9]+]]:_(s8) = G_TRUNC [[I8_C]] ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x2 -; CHECK: [[UNDEF:%[0-9]+]]:_(s192) = G_IMPLICIT_DEF -; CHECK: [[ARG0:%[0-9]+]]:_(s192) = G_INSERT [[UNDEF]], [[DBL]](s64), 0 -; CHECK: [[ARG1:%[0-9]+]]:_(s192) = G_INSERT [[ARG0]], [[I64]](s64), 64 -; CHECK: [[ARG2:%[0-9]+]]:_(s192) = G_INSERT [[ARG1]], [[I8]](s8), 128 -; CHECK: [[ARG:%[0-9]+]]:_(s192) = COPY [[ARG2]] -; CHECK: [[EXTA0:%[0-9]+]]:_(s64) = G_EXTRACT [[ARG]](s192), 0 -; CHECK: [[EXTA1:%[0-9]+]]:_(s64) = G_EXTRACT [[ARG]](s192), 64 -; CHECK: [[EXTA2:%[0-9]+]]:_(s8) = G_EXTRACT [[ARG]](s192), 128 -; CHECK: G_STORE [[EXTA0]](s64), [[ADDR]](p0) :: (store 8 into %ir.addr) +; CHECK: G_STORE [[DBL]](s64), [[ADDR]](p0) :: (store 8 into %ir.addr) ; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST1]](s64) -; CHECK: G_STORE [[EXTA1]](s64), [[GEP1]](p0) :: (store 8 into %ir.addr + 8) +; CHECK: G_STORE [[I64]](s64), [[GEP1]](p0) :: (store 8 into %ir.addr + 8) ; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST2]](s64) -; CHECK: G_STORE [[EXTA2]](s8), [[GEP2]](p0) :: (store 1 into %ir.addr + 16, align 8) +; CHECK: G_STORE [[I8]](s8), [[GEP2]](p0) :: (store 1 into %ir.addr + 16, align 8) ; CHECK: RET_ReallyLR define void @test_struct_formal({double, i64, i8} %in, {double, i64, i8}* %addr) { store {double, i64, i8} %in, {double, i64, i8}* %addr diff --git a/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll b/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll index 72f27e68b41..85102491af1 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll +++ b/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll @@ -675,14 +675,14 @@ define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8, {i32, ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; HSA-VI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 16 from `{ i32, i64 } addrspace(4)* undef`, addrspace 4) + ; HSA-VI: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](s128), 0 + ; HSA-VI: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD]](s128), 64 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; HSA-VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 1 from `i8 addrspace(4)* undef`, align 16, addrspace 4) ; HSA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 ; HSA-VI: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64) ; HSA-VI: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[GEP2]](p4) :: (non-temporal invariant load 16 from `{ i32, i64 } addrspace(4)* undef`, align 8, addrspace 4) - ; HSA-VI: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](s128), 0 - ; HSA-VI: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD]](s128), 64 ; HSA-VI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD2]](s128), 0 ; HSA-VI: [[EXTRACT3:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD2]](s128), 64 ; HSA-VI: S_ENDPGM @@ -707,14 +707,14 @@ define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0, ; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; HSA-VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64) ; HSA-VI: [[LOAD:%[0-9]+]]:_(s96) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 12 from `<{ i32, i64 }> addrspace(4)* undef`, align 16, addrspace 4) + ; HSA-VI: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](s96), 0 + ; HSA-VI: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD]](s96), 32 ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; HSA-VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64) ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 1 from `i8 addrspace(4)* undef`, align 4, addrspace 4) ; HSA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 13 ; HSA-VI: [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64) ; HSA-VI: [[LOAD2:%[0-9]+]]:_(s96) = G_LOAD [[GEP2]](p4) :: (non-temporal invariant load 12 from `<{ i32, i64 }> addrspace(4)* undef`, align 1, addrspace 4) - ; HSA-VI: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](s96), 0 - ; HSA-VI: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD]](s96), 32 ; HSA-VI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD2]](s96), 0 ; HSA-VI: [[EXTRACT3:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD2]](s96), 32 ; HSA-VI: S_ENDPGM diff --git a/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll b/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll index 0ce23be65bd..f5c2cb40f11 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll +++ b/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll @@ -202,12 +202,9 @@ define arm_aapcscc [3 x i32] @test_tiny_int_arrays([2 x i32] %arr) { ; CHECK: liveins: $r0, $r1 ; CHECK: [[R0:%[0-9]+]]:_(s32) = COPY $r0 ; CHECK: [[R1:%[0-9]+]]:_(s32) = COPY $r1 -; CHECK: [[ARG_ARR:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32) -; CHECK: [[EXT1:%[0-9]+]]:_(s32) = G_EXTRACT [[ARG_ARR]](s64), 0 -; CHECK: [[EXT2:%[0-9]+]]:_(s32) = G_EXTRACT [[ARG_ARR]](s64), 32 ; CHECK: [[IMPDEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF -; CHECK: [[INS1:%[0-9]+]]:_(s64) = G_INSERT [[IMPDEF]], [[EXT1]](s32), 0 -; CHECK: [[INS2:%[0-9]+]]:_(s64) = G_INSERT [[INS1]], [[EXT2]](s32), 32 +; CHECK: [[INS1:%[0-9]+]]:_(s64) = G_INSERT [[IMPDEF]], [[R0]](s32), 0 +; CHECK: [[INS2:%[0-9]+]]:_(s64) = G_INSERT [[INS1]], [[R1]](s32), 32 ; CHECK: ADJCALLSTACKDOWN 0, 0, 14, $noreg, implicit-def $sp, implicit $sp ; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS2]](s64) ; CHECK: $r0 = COPY [[R0]] @@ -244,18 +241,12 @@ define arm_aapcscc void @test_multiple_int_arrays([2 x i32] %arr0, [2 x i32] %ar ; CHECK: [[R1:%[0-9]+]]:_(s32) = COPY $r1 ; CHECK: [[R2:%[0-9]+]]:_(s32) = COPY $r2 ; CHECK: [[R3:%[0-9]+]]:_(s32) = COPY $r3 -; CHECK: [[ARG_ARR0:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32) -; CHECK: [[ARG_ARR1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[R2]](s32), [[R3]](s32) -; CHECK: [[EXT1:%[0-9]+]]:_(s32) = G_EXTRACT [[ARG_ARR0]](s64), 0 -; CHECK: [[EXT2:%[0-9]+]]:_(s32) = G_EXTRACT [[ARG_ARR0]](s64), 32 -; CHECK: [[EXT3:%[0-9]+]]:_(s32) = G_EXTRACT [[ARG_ARR1]](s64), 0 -; CHECK: [[EXT4:%[0-9]+]]:_(s32) = G_EXTRACT [[ARG_ARR1]](s64), 32 ; CHECK: [[IMPDEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF -; CHECK: [[INS1:%[0-9]+]]:_(s64) = G_INSERT [[IMPDEF]], [[EXT1]](s32), 0 -; CHECK: [[INS2:%[0-9]+]]:_(s64) = G_INSERT [[INS1]], [[EXT2]](s32), 32 +; CHECK: [[INS1:%[0-9]+]]:_(s64) = G_INSERT [[IMPDEF]], [[R0]](s32), 0 +; CHECK: [[INS2:%[0-9]+]]:_(s64) = G_INSERT [[INS1]], [[R1]](s32), 32 ; CHECK: [[IMPDEF2:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF -; CHECK: [[INS3:%[0-9]+]]:_(s64) = G_INSERT [[IMPDEF2]], [[EXT3]](s32), 0 -; CHECK: [[INS4:%[0-9]+]]:_(s64) = G_INSERT [[INS3]], [[EXT4]](s32), 32 +; CHECK: [[INS3:%[0-9]+]]:_(s64) = G_INSERT [[IMPDEF2]], [[R2]](s32), 0 +; CHECK: [[INS4:%[0-9]+]]:_(s64) = G_INSERT [[INS3]], [[R3]](s32), 32 ; CHECK: ADJCALLSTACKDOWN 0, 0, 14, $noreg, implicit-def $sp, implicit $sp ; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS2]](s64) ; CHECK: [[R2:%[0-9]+]]:_(s32), [[R3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS4]](s64) @@ -291,8 +282,9 @@ define arm_aapcscc void @test_large_int_arrays([20 x i32] %arr) { ; CHECK: [[FIRST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_LOAD [[FIRST_STACK_ELEMENT_FI]]{{.*}}load 4 from %fixed-stack.[[FIRST_STACK_ID]] ; CHECK: [[LAST_STACK_ELEMENT_FI:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[LAST_STACK_ID]] ; CHECK: [[LAST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_LOAD [[LAST_STACK_ELEMENT_FI]]{{.*}}load 4 from %fixed-stack.[[LAST_STACK_ID]] -; CHECK: [[ARG_ARR:%[0-9]+]]:_(s640) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32), [[R2]](s32), [[R3]](s32), [[FIRST_STACK_ELEMENT]](s32), {{.*}}, [[LAST_STACK_ELEMENT]](s32) -; CHECK: [[INS:%[0-9]+]]:_(s640) = G_INSERT {{.*}}, {{.*}}(s32), 608 +; CHECK: [[IMPDEF:%[0-9]+]]:_(s640) = G_IMPLICIT_DEF +; CHECK: [[INS1:%[0-9]+]]:_(s640) = G_INSERT [[IMPDEF]], [[R0]](s32), 0 +; CHECK: [[INS:%[0-9]+]]:_(s640) = G_INSERT {{.*}}, [[LAST_STACK_ELEMENT]](s32), 608 ; CHECK: ADJCALLSTACKDOWN 64, 0, 14, $noreg, implicit-def $sp, implicit $sp ; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32), [[R2:%[0-9]+]]:_(s32), [[R3:%[0-9]+]]:_(s32), [[FIRST_STACK_ELEMENT:%[0-9]+]]:_(s32), {{.*}}, [[LAST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS]](s640) ; CHECK: $r0 = COPY [[R0]] @@ -336,14 +328,10 @@ define arm_aapcscc [2 x float] @test_fp_arrays_aapcs([3 x double] %arr) { ; BIG: [[ARR1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ARR1_1]](s32), [[ARR1_0]](s32) ; CHECK: [[ARR2_FI:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[ARR2_ID]] ; CHECK: [[ARR2:%[0-9]+]]:_(s64) = G_LOAD [[ARR2_FI]]{{.*}}load 8 from %fixed-stack.[[ARR2_ID]] -; CHECK: [[ARR_MERGED:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[ARR0]](s64), [[ARR1]](s64), [[ARR2]](s64) -; CHECK: [[EXT1:%[0-9]+]]:_(s64) = G_EXTRACT [[ARR_MERGED]](s192), 0 -; CHECK: [[EXT2:%[0-9]+]]:_(s64) = G_EXTRACT [[ARR_MERGED]](s192), 64 -; CHECK: [[EXT3:%[0-9]+]]:_(s64) = G_EXTRACT [[ARR_MERGED]](s192), 128 ; CHECK: [[IMPDEF:%[0-9]+]]:_(s192) = G_IMPLICIT_DEF -; CHECK: [[INS1:%[0-9]+]]:_(s192) = G_INSERT [[IMPDEF]], [[EXT1]](s64), 0 -; CHECK: [[INS2:%[0-9]+]]:_(s192) = G_INSERT [[INS1]], [[EXT2]](s64), 64 -; CHECK: [[INS3:%[0-9]+]]:_(s192) = G_INSERT [[INS2]], [[EXT3]](s64), 128 +; CHECK: [[INS1:%[0-9]+]]:_(s192) = G_INSERT [[IMPDEF]], [[ARR0]](s64), 0 +; CHECK: [[INS2:%[0-9]+]]:_(s192) = G_INSERT [[INS1]], [[ARR1]](s64), 64 +; CHECK: [[INS3:%[0-9]+]]:_(s192) = G_INSERT [[INS2]], [[ARR2]](s64), 128 ; CHECK: ADJCALLSTACKDOWN 8, 0, 14, $noreg, implicit-def $sp, implicit $sp ; CHECK: [[ARR0:%[0-9]+]]:_(s64), [[ARR1:%[0-9]+]]:_(s64), [[ARR2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[INS3]](s192) ; CHECK: [[ARR0_0:%[0-9]+]]:_(s32), [[ARR0_1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ARR0]](s64) @@ -401,32 +389,19 @@ define arm_aapcs_vfpcc [4 x float] @test_fp_arrays_aapcs_vfp([3 x double] %x, [3 ; CHECK: [[Z2:%[0-9]+]]:_(s64) = G_LOAD [[Z2_FI]]{{.*}}load 8 ; CHECK: [[Z3_FI:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[Z3_ID]] ; CHECK: [[Z3:%[0-9]+]]:_(s64) = G_LOAD [[Z3_FI]]{{.*}}load 8 -; CHECK: [[X_ARR:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[X0]](s64), [[X1]](s64), [[X2]](s64) -; CHECK: [[Y_ARR:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[Y0]](s32), [[Y1]](s32), [[Y2]](s32) -; CHECK: [[Z_ARR:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[Z0]](s64), [[Z1]](s64), [[Z2]](s64), [[Z3]](s64) -; CHECK: [[EXT1:%[0-9]+]]:_(s64) = G_EXTRACT [[X_ARR]](s192), 0 -; CHECK: [[EXT2:%[0-9]+]]:_(s64) = G_EXTRACT [[X_ARR]](s192), 64 -; CHECK: [[EXT3:%[0-9]+]]:_(s64) = G_EXTRACT [[X_ARR]](s192), 128 -; CHECK: [[EXT4:%[0-9]+]]:_(s32) = G_EXTRACT [[Y_ARR]](s96), 0 -; CHECK: [[EXT5:%[0-9]+]]:_(s32) = G_EXTRACT [[Y_ARR]](s96), 32 -; CHECK: [[EXT6:%[0-9]+]]:_(s32) = G_EXTRACT [[Y_ARR]](s96), 64 -; CHECK: [[EXT7:%[0-9]+]]:_(s64) = G_EXTRACT [[Z_ARR]](s256), 0 -; CHECK: [[EXT8:%[0-9]+]]:_(s64) = G_EXTRACT [[Z_ARR]](s256), 64 -; CHECK: [[EXT9:%[0-9]+]]:_(s64) = G_EXTRACT [[Z_ARR]](s256), 128 -; CHECK: [[EXT10:%[0-9]+]]:_(s64) = G_EXTRACT [[Z_ARR]](s256), 192 ; CHECK: [[IMPDEF:%[0-9]+]]:_(s192) = G_IMPLICIT_DEF -; CHECK: [[INS1:%[0-9]+]]:_(s192) = G_INSERT [[IMPDEF]], [[EXT1]](s64), 0 -; CHECK: [[INS2:%[0-9]+]]:_(s192) = G_INSERT [[INS1]], [[EXT2]](s64), 64 -; CHECK: [[INS3:%[0-9]+]]:_(s192) = G_INSERT [[INS2]], [[EXT3]](s64), 128 +; CHECK: [[INS1:%[0-9]+]]:_(s192) = G_INSERT [[IMPDEF]], [[X0]](s64), 0 +; CHECK: [[INS2:%[0-9]+]]:_(s192) = G_INSERT [[INS1]], [[X1]](s64), 64 +; CHECK: [[INS3:%[0-9]+]]:_(s192) = G_INSERT [[INS2]], [[X2]](s64), 128 ; CHECK: [[IMPDEF2:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF -; CHECK: [[INS4:%[0-9]+]]:_(s96) = G_INSERT [[IMPDEF2]], [[EXT4]](s32), 0 -; CHECK: [[INS5:%[0-9]+]]:_(s96) = G_INSERT [[INS4]], [[EXT5]](s32), 32 -; CHECK: [[INS6:%[0-9]+]]:_(s96) = G_INSERT [[INS5]], [[EXT6]](s32), 64 +; CHECK: [[INS4:%[0-9]+]]:_(s96) = G_INSERT [[IMPDEF2]], [[Y0]](s32), 0 +; CHECK: [[INS5:%[0-9]+]]:_(s96) = G_INSERT [[INS4]], [[Y1]](s32), 32 +; CHECK: [[INS6:%[0-9]+]]:_(s96) = G_INSERT [[INS5]], [[Y2]](s32), 64 ; CHECK: [[IMPDEF3:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF -; CHECK: [[INS7:%[0-9]+]]:_(s256) = G_INSERT [[IMPDEF3]], [[EXT7]](s64), 0 -; CHECK: [[INS8:%[0-9]+]]:_(s256) = G_INSERT [[INS7]], [[EXT8]](s64), 64 -; CHECK: [[INS9:%[0-9]+]]:_(s256) = G_INSERT [[INS8]], [[EXT9]](s64), 128 -; CHECK: [[INS10:%[0-9]+]]:_(s256) = G_INSERT [[INS9]], [[EXT10]](s64), 192 +; CHECK: [[INS7:%[0-9]+]]:_(s256) = G_INSERT [[IMPDEF3]], [[Z0]](s64), 0 +; CHECK: [[INS8:%[0-9]+]]:_(s256) = G_INSERT [[INS7]], [[Z1]](s64), 64 +; CHECK: [[INS9:%[0-9]+]]:_(s256) = G_INSERT [[INS8]], [[Z2]](s64), 128 +; CHECK: [[INS10:%[0-9]+]]:_(s256) = G_INSERT [[INS9]], [[Z3]](s64), 192 ; CHECK: ADJCALLSTACKDOWN 32, 0, 14, $noreg, implicit-def $sp, implicit $sp ; CHECK: [[X0:%[0-9]+]]:_(s64), [[X1:%[0-9]+]]:_(s64), [[X2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[INS3]](s192) ; CHECK: [[Y0:%[0-9]+]]:_(s32), [[Y1:%[0-9]+]]:_(s32), [[Y2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS6]](s96) @@ -494,8 +469,7 @@ define arm_aapcscc [2 x i32*] @test_tough_arrays([6 x [4 x i32]] %arr) { ; CHECK: [[FIRST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_LOAD [[FIRST_STACK_ELEMENT_FI]]{{.*}}load 4 from %fixed-stack.[[FIRST_STACK_ID]] ; CHECK: [[LAST_STACK_ELEMENT_FI:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[LAST_STACK_ID]] ; CHECK: [[LAST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_LOAD [[LAST_STACK_ELEMENT_FI]]{{.*}}load 4 from %fixed-stack.[[LAST_STACK_ID]] -; CHECK: [[ARG_ARR:%[0-9]+]]:_(s768) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32), [[R2]](s32), [[R3]](s32), [[FIRST_STACK_ELEMENT]](s32), {{.*}}, [[LAST_STACK_ELEMENT]](s32) -; CHECK: [[INS:%[0-9]+]]:_(s768) = G_INSERT {{.*}}, {{.*}}(s32), 736 +; CHECK: [[INS:%[0-9]+]]:_(s768) = G_INSERT {{.*}}, [[LAST_STACK_ELEMENT]](s32), 736 ; CHECK: ADJCALLSTACKDOWN 80, 0, 14, $noreg, implicit-def $sp, implicit $sp ; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32), [[R2:%[0-9]+]]:_(s32), [[R3:%[0-9]+]]:_(s32), [[FIRST_STACK_ELEMENT:%[0-9]+]]:_(s32), {{.*}}, [[LAST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS]](s768) ; CHECK: $r0 = COPY [[R0]] @@ -536,12 +510,9 @@ define arm_aapcscc {i32, i32} @test_structs({i32, i32} %x) { ; CHECK: liveins: $r0, $r1 ; CHECK-DAG: [[X0:%[0-9]+]]:_(s32) = COPY $r0 ; CHECK-DAG: [[X1:%[0-9]+]]:_(s32) = COPY $r1 -; CHECK: [[X:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[X0]](s32), [[X1]](s32) -; CHECK: [[EXT1:%[0-9]+]]:_(s32) = G_EXTRACT [[X]](s64), 0 -; CHECK: [[EXT2:%[0-9]+]]:_(s32) = G_EXTRACT [[X]](s64), 32 ; CHECK: [[IMPDEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF -; CHECK: [[INS1:%[0-9]+]]:_(s64) = G_INSERT [[IMPDEF]], [[EXT1]](s32), 0 -; CHECK: [[INS2:%[0-9]+]]:_(s64) = G_INSERT [[INS1]], [[EXT2]](s32), 32 +; CHECK: [[INS1:%[0-9]+]]:_(s64) = G_INSERT [[IMPDEF]], [[X0]](s32), 0 +; CHECK: [[INS2:%[0-9]+]]:_(s64) = G_INSERT [[INS1]], [[X1]](s32), 32 ; CHECK: ADJCALLSTACKDOWN 0, 0, 14, $noreg, implicit-def $sp, implicit $sp ; CHECK: [[X0:%[0-9]+]]:_(s32), [[X1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS2]](s64) ; CHECK-DAG: $r0 = COPY [[X0]](s32)