From: Reid Kleckner Date: Tue, 11 Oct 2016 20:54:41 +0000 (+0000) Subject: Revert "[Thumb] Save/restore high registers in Thumb1 pro/epilogues" X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=63d2a1d96b9fafcdc99a86653fc20ceccf2ac146;p=llvm Revert "[Thumb] Save/restore high registers in Thumb1 pro/epilogues" This reverts r283867. This appears to be an infinite loop: while (HiRegToSave != AllHighRegs.end() && CopyReg != AllCopyRegs.end()) { if (HiRegsToSave.count(*HiRegToSave)) { ... CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegs.end()); HiRegToSave = findNextOrderedReg(++HiRegToSave, HiRegsToSave, AllHighRegs.end()); } } git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@283938 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 29f4c099f47..61be18c2b54 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -30,8 +30,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetOptions.h" -#define DEBUG_TYPE "arm-frame-lowering" - using namespace llvm; static cl::opt @@ -1487,8 +1485,6 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, ARMFunctionInfo *AFI = MF.getInfo(); MachineFrameInfo &MFI = MF.getFrameInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - (void)TRI; // Silence unused warning in non-assert builds. unsigned FramePtr = RegInfo->getFrameRegister(MF); // Spill R4 if Thumb2 function requires stack realignment - it will be used as @@ -1644,9 +1640,6 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, SavedRegs.set(ARM::LR); LRSpilled = true; NumGPRSpills++; - auto LRPos = find(UnspilledCS1GPRs, ARM::LR); - if (LRPos != UnspilledCS1GPRs.end()) - UnspilledCS1GPRs.erase(LRPos); } auto FPPos = find(UnspilledCS1GPRs, FramePtr); if (FPPos != UnspilledCS1GPRs.end()) @@ -1656,116 +1649,6 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, CS1Spilled = true; } - if (AFI->isThumb1OnlyFunction()) { - // For Thumb1-only targets, we need some low registers when we save and - // restore the high registers (which aren't allocatable, but could be - // used by inline assembly) because the push/pop instructions can not - // access high registers. If necessary, we might need to push more low - // registers to ensure that there is at least one free that can be used - // for the saving & restoring, and preferably we should ensure that as - // many as are needed are available so that fewer push/pop instructions - // are required. - - // Low registers which are not currently pushed, but could be (r4-r7). - SmallVector AvailableRegs; - - // Unused argument registers (r0-r3) can be clobbered in the prologue for - // free. - int EntryRegDeficit = 0; - for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) { - if (!MF.getRegInfo().isLiveIn(Reg)) { - --EntryRegDeficit; - DEBUG(dbgs() << PrintReg(Reg, TRI) - << " is unused argument register, EntryRegDeficit = " - << EntryRegDeficit << "\n"); - } - } - - // Unused return registers can be clobbered in the epilogue for free. - int ExitRegDeficit = AFI->getReturnRegsCount() - 4; - DEBUG(dbgs() << AFI->getReturnRegsCount() - << " return regs used, ExitRegDeficit = " << ExitRegDeficit - << "\n"); - - int RegDeficit = std::max(EntryRegDeficit, ExitRegDeficit); - DEBUG(dbgs() << "RegDeficit = " << RegDeficit << "\n"); - - // r4-r6 can be used in the prologue if they are pushed by the first push - // instruction. - for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) { - if (SavedRegs.test(Reg)) { - --RegDeficit; - DEBUG(dbgs() << PrintReg(Reg, TRI) - << " is saved low register, RegDeficit = " << RegDeficit - << "\n"); - } else { - AvailableRegs.push_back(Reg); - DEBUG(dbgs() - << PrintReg(Reg, TRI) - << " is non-saved low register, adding to AvailableRegs\n"); - } - } - - // r7 can be used if it is not being used as the frame pointer. - if (!hasFP(MF)) { - if (SavedRegs.test(ARM::R7)) { - --RegDeficit; - DEBUG(dbgs() << "%R7 is saved low register, RegDeficit = " - << RegDeficit << "\n"); - } else { - AvailableRegs.push_back(ARM::R7); - DEBUG(dbgs() - << "%R7 is non-saved low register, adding to AvailableRegs\n"); - } - } - - // Each of r8-r11 needs to be copied to a low register, then pushed. - for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) { - if (SavedRegs.test(Reg)) { - ++RegDeficit; - DEBUG(dbgs() << PrintReg(Reg, TRI) - << " is saved high register, RegDeficit = " << RegDeficit - << "\n"); - } - } - - // LR can only be used by PUSH, not POP, and can't be used at all if the - // llvm.returnaddress intrinsic is used. This is only worth doing if we - // are more limited at function entry than exit. - if ((EntryRegDeficit > ExitRegDeficit) && - !(MF.getRegInfo().isLiveIn(ARM::LR) && - MF.getFrameInfo().isReturnAddressTaken())) { - if (SavedRegs.test(ARM::LR)) { - --RegDeficit; - DEBUG(dbgs() << "%LR is saved register, RegDeficit = " << RegDeficit - << "\n"); - } else { - AvailableRegs.push_back(ARM::LR); - DEBUG(dbgs() << "%LR is not saved, adding to AvailableRegs\n"); - } - } - - // If there are more high registers that need pushing than low registers - // available, push some more low registers so that we can use fewer push - // instructions. This might not reduce RegDeficit all the way to zero, - // because we can only guarantee that r4-r6 are available, but r8-r11 may - // need saving. - DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n"); - for (; RegDeficit > 0 && !AvailableRegs.empty(); --RegDeficit) { - unsigned Reg = AvailableRegs.pop_back_val(); - DEBUG(dbgs() << "Spilling " << PrintReg(Reg, TRI) - << " to make up reg deficit\n"); - SavedRegs.set(Reg); - NumGPRSpills++; - CS1Spilled = true; - ExtraCSSpill = true; - UnspilledCS1GPRs.erase(find(UnspilledCS1GPRs, Reg)); - if (Reg == ARM::LR) - LRSpilled = true; - } - DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit << "\n"); - } - // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled. // Spill LR as well so we can fold BX_RET to the registers restore (LDM). if (!LRSpilled && CS1Spilled) { @@ -1783,7 +1666,6 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, // If stack and double are 8-byte aligned and we are spilling an odd number // of GPRs, spill one extra callee save GPR so we won't have to pad between // the integer and double callee save areas. - DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n"); unsigned TargetAlign = getStackAlignment(); if (TargetAlign >= 8 && (NumGPRSpills & 1)) { if (CS1Spilled && !UnspilledCS1GPRs.empty()) { @@ -1795,8 +1677,6 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, (STI.isTargetWindows() && Reg == ARM::R11) || isARMLowRegister(Reg) || Reg == ARM::LR) { SavedRegs.set(Reg); - DEBUG(dbgs() << "Spilling " << PrintReg(Reg, TRI) - << " to make up alignment\n"); if (!MRI.isReserved(Reg)) ExtraCSSpill = true; break; @@ -1805,8 +1685,6 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) { unsigned Reg = UnspilledCS2GPRs.front(); SavedRegs.set(Reg); - DEBUG(dbgs() << "Spilling " << PrintReg(Reg, TRI) - << " to make up alignment\n"); if (!MRI.isReserved(Reg)) ExtraCSSpill = true; } diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 04e5ff288ba..6c056547eee 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -568,12 +568,10 @@ public: } /// Returns true if the frame setup is split into two separate pushes (first /// r0-r7,lr then r8-r11), principally so that the frame pointer is adjacent - /// to lr. This is always required on Thumb1-only targets, as the push and - /// pop instructions can't access the high registers. + /// to lr. bool splitFramePushPop(const MachineFunction &MF) const { - return (useR7AsFramePointer() && - MF.getTarget().Options.DisableFramePointerElim(MF)) || - isThumb1Only(); + return useR7AsFramePointer() && + MF.getTarget().Options.DisableFramePointerElim(MF); } bool useStride4VFPs(const MachineFunction &MF) const; diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index a23b85c1622..b3f26165a87 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -188,8 +188,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, int FramePtrOffsetInBlock = 0; unsigned adjustedGPRCS1Size = GPRCS1Size; - if (GPRCS1Size > 0 && GPRCS2Size == 0 && - tryFoldSPUpdateIntoPushPop(STI, MF, &*std::prev(MBBI), NumBytes)) { + if (tryFoldSPUpdateIntoPushPop(STI, MF, &*std::prev(MBBI), NumBytes)) { FramePtrOffsetInBlock = NumBytes; adjustedGPRCS1Size += NumBytes; NumBytes = 0; @@ -262,48 +261,6 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, AFI->setShouldRestoreSPFromFP(true); } - // Skip past the spilling of r8-r11, which could consist of multiple tPUSH - // and tMOVr instructions. We don't need to add any call frame information - // in-between these instructions, because they do not modify the high - // registers. - while (true) { - MachineBasicBlock::iterator OldMBBI = MBBI; - // Skip a run of tMOVr instructions - while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr) - MBBI++; - if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { - MBBI++; - } else { - // We have reached an instruction which is not a push, so the previous - // run of tMOVr instructions (which may have been empty) was not part of - // the prologue. Reset MBBI back to the last PUSH of the prologue. - MBBI = OldMBBI; - break; - } - } - - // Emit call frame information for the callee-saved high registers. - for (auto &I : CSI) { - unsigned Reg = I.getReg(); - int FI = I.getFrameIdx(); - switch (Reg) { - case ARM::R8: - case ARM::R9: - case ARM::R10: - case ARM::R11: - case ARM::R12: { - unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( - nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlags(MachineInstr::FrameSetup); - break; - } - default: - break; - } - } - if (NumBytes) { // Insert it after all the callee-save spills. emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes, @@ -351,12 +308,12 @@ static bool isCSRestore(MachineInstr &MI, const MCPhysReg *CSRegs) { isCalleeSavedRegister(MI.getOperand(0).getReg(), CSRegs)) return true; else if (MI.getOpcode() == ARM::tPOP) { + // The first two operands are predicates. The last two are + // imp-def and imp-use of SP. Check everything in between. + for (int i = 2, e = MI.getNumOperands() - 2; i != e; ++i) + if (!isCalleeSavedRegister(MI.getOperand(i).getReg(), CSRegs)) + return false; return true; - } else if (MI.getOpcode() == ARM::tMOVr) { - unsigned Dst = MI.getOperand(0).getReg(); - unsigned Src = MI.getOperand(1).getReg(); - return ((ARM::tGPRRegClass.contains(Src) || Src == ARM::LR) && - ARM::hGPRRegClass.contains(Dst)); } return false; } @@ -611,19 +568,6 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, return true; } -// Return the first iteraror after CurrentReg which is present in EnabledRegs, -// or OrderEnd if no further registers are in that set. This does not advance -// the iterator fiorst, so returns CurrentReg if it is in EnabledRegs. -template -static ArrayRef::const_iterator -findNextOrderedReg(ArrayRef::const_iterator CurrentReg, - SmallSet &EnabledRegs, - ArrayRef::const_iterator OrderEnd) { - while (CurrentReg != OrderEnd && !EnabledRegs.count(*CurrentReg)) - ++CurrentReg; - return CurrentReg; -} - bool Thumb1FrameLowering:: spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, @@ -634,111 +578,29 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, DebugLoc DL; const TargetInstrInfo &TII = *STI.getInstrInfo(); - MachineFunction &MF = *MBB.getParent(); - const ARMBaseRegisterInfo *RegInfo = static_cast( - MF.getSubtarget().getRegisterInfo()); - - SmallSet LoRegsToSave; // r0-r7, lr - SmallSet HiRegsToSave; // r8-r11 - SmallSet CopyRegs; // Registers which can be used after pushing - // LoRegs for saving HiRegs. + MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)); + AddDefaultPred(MIB); for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); + bool isKill = true; - if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) { - LoRegsToSave.insert(Reg); - } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) { - HiRegsToSave.insert(Reg); - } else { - llvm_unreachable("callee-saved register of unexpected class"); - } - - if ((ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) && - !MF.getRegInfo().isLiveIn(Reg) && - !(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF))) - CopyRegs.insert(Reg); - } - - // Unused argument registers can be used for the high register saving. - for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) - if (!MF.getRegInfo().isLiveIn(ArgReg)) - CopyRegs.insert(ArgReg); - - // Push the low registers and lr - if (!LoRegsToSave.empty()) { - MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)); - AddDefaultPred(MIB); - for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6, ARM::R7, ARM::LR}) { - if (LoRegsToSave.count(Reg)) { - bool isKill = !MF.getRegInfo().isLiveIn(Reg); - if (isKill) - MBB.addLiveIn(Reg); - - MIB.addReg(Reg, getKillRegState(isKill)); - } - } - MIB.setMIFlags(MachineInstr::FrameSetup); - } - - // Push the high registers. There are no store instructions that can access - // these registers directly, so we have to move them to low registers, and - // push them. This might take multiple pushes, as it is possible for there to - // be fewer low registers available than high registers which need saving. - - // These are in reverse order so that in the case where we need to use - // multiple PUSH instructions, the order of the registers on the stack still - // matches the unwind info. They need to be swicthed back to ascending order - // before adding to the PUSH instruction. - ArrayRef AllCopyRegs({ARM::LR, - ARM::R7, ARM::R6, ARM::R5, ARM::R4, - ARM::R3, ARM::R2, ARM::R1, ARM::R0}); - ArrayRef AllHighRegs({ARM::R11, ARM::R10, ARM::R9, ARM::R8}); - - // Find the first register to save. - auto HiRegToSave = - findNextOrderedReg(AllHighRegs.begin(), HiRegsToSave, AllHighRegs.end()); - - while (HiRegToSave != AllHighRegs.end()) { - // Find the first low register to use. - auto CopyReg = - findNextOrderedReg(AllCopyRegs.begin(), CopyRegs, AllCopyRegs.end()); - - // Create the PUSH, but don't insert it yet (the MOVs need to come first). - MachineInstrBuilder PushMIB = BuildMI(MF, DL, TII.get(ARM::tPUSH)); - AddDefaultPred(PushMIB); - - SmallVector RegsToPush; - while (HiRegToSave != AllHighRegs.end() && CopyReg != AllCopyRegs.end()) { - if (HiRegsToSave.count(*HiRegToSave)) { - bool isKill = !MF.getRegInfo().isLiveIn(*HiRegToSave); - if (isKill) - MBB.addLiveIn(*HiRegToSave); - - // Emit a MOV from the high reg to the low reg. - MachineInstrBuilder MIB = - BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)); - MIB.addReg(*CopyReg, RegState::Define); - MIB.addReg(*HiRegToSave, getKillRegState(isKill)); - AddDefaultPred(MIB); - - // Record the register that must be added to the PUSH. - RegsToPush.push_back(*CopyReg); - - CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegs.end()); - HiRegToSave = - findNextOrderedReg(++HiRegToSave, HiRegsToSave, AllHighRegs.end()); - } + // Add the callee-saved register as live-in unless it's LR and + // @llvm.returnaddress is called. If LR is returned for @llvm.returnaddress + // then it's already added to the function and entry block live-in sets. + if (Reg == ARM::LR) { + MachineFunction &MF = *MBB.getParent(); + if (MF.getFrameInfo().isReturnAddressTaken() && + MF.getRegInfo().isLiveIn(Reg)) + isKill = false; } - // Add the low registers to the PUSH, in ascending order. - for (unsigned Reg : reverse(RegsToPush)) - PushMIB.addReg(Reg, RegState::Kill); + if (isKill) + MBB.addLiveIn(Reg); - // Insert the PUSH instruction after the MOVs. - MBB.insert(MI, PushMIB); + MIB.addReg(Reg, getKillRegState(isKill)); } - + MIB.setMIFlags(MachineInstr::FrameSetup); return true; } @@ -753,98 +615,15 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo(); const TargetInstrInfo &TII = *STI.getInstrInfo(); - const ARMBaseRegisterInfo *RegInfo = static_cast( - MF.getSubtarget().getRegisterInfo()); bool isVarArg = AFI->getArgRegsSaveSize() > 0; DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); - - SmallSet LoRegsToRestore; - SmallSet HiRegsToRestore; - // Low registers (r0-r7) which can be used to restore the high registers. - SmallSet CopyRegs; - - for (CalleeSavedInfo I : CSI) { - unsigned Reg = I.getReg(); - - if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) { - LoRegsToRestore.insert(Reg); - } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) { - HiRegsToRestore.insert(Reg); - } else { - llvm_unreachable("callee-saved register of unexpected class"); - } - - // If this is a low register not used as the frame pointer, we may want to - // use it for restoring the high registers. - if ((ARM::tGPRRegClass.contains(Reg)) && - !(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF))) - CopyRegs.insert(Reg); - } - - // If this is a return block, we may be able to use some unused return value - // registers for restoring the high regs. - auto Terminator = MBB.getFirstTerminator(); - if (Terminator != MBB.end() && Terminator->getOpcode() == ARM::tBX_RET) { - CopyRegs.insert(ARM::R0); - CopyRegs.insert(ARM::R1); - CopyRegs.insert(ARM::R2); - CopyRegs.insert(ARM::R3); - for (auto Op : Terminator->implicit_operands()) { - if (Op.isReg()) - CopyRegs.erase(Op.getReg()); - } - } - - ArrayRef AllCopyRegs({ARM::R0, ARM::R1, ARM::R2, ARM::R3, - ARM::R4, ARM::R5, ARM::R6, ARM::R7}); - ArrayRef AllHighRegs({ARM::R8, ARM::R9, ARM::R10, ARM::R11}); - - // Find the first register to restore. - auto HiRegToRestore = findNextOrderedReg(AllHighRegs.begin(), HiRegsToRestore, - AllHighRegs.end()); - - while (HiRegToRestore != AllHighRegs.end()) { - assert(!CopyRegs.empty()); - // Find the first low register to use. - auto CopyReg = - findNextOrderedReg(AllCopyRegs.begin(), CopyRegs, AllCopyRegs.end()); - - // Create the POP instruction. - MachineInstrBuilder PopMIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPOP)); - AddDefaultPred(PopMIB); - - while (HiRegToRestore != AllHighRegs.end() && CopyReg != AllCopyRegs.end()) { - // Add the low register to the POP. - PopMIB.addReg(*CopyReg, RegState::Define); - - // Create the MOV from low to high register. - MachineInstrBuilder MIB = - BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)); - MIB.addReg(*HiRegToRestore, RegState::Define); - MIB.addReg(*CopyReg, RegState::Kill); - AddDefaultPred(MIB); - - CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegs.end()); - HiRegToRestore = findNextOrderedReg(++HiRegToRestore, HiRegsToRestore, - AllHighRegs.end()); - } - } - - - - MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP)); AddDefaultPred(MIB); bool NeedsPop = false; for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); - - // High registers (excluding lr) have already been dealt with - if (!(ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR)) - continue; - if (Reg == ARM::LR) { if (MBB.succ_empty()) { // Special epilogue for vararg functions. See emitEpilogue diff --git a/test/CodeGen/Thumb/callee_save.ll b/test/CodeGen/Thumb/callee_save.ll deleted file mode 100644 index b9216e74429..00000000000 --- a/test/CodeGen/Thumb/callee_save.ll +++ /dev/null @@ -1,236 +0,0 @@ -; RUN: llc -mtriple=thumbv6m-none-eabi < %s | FileCheck %s - -declare i8* @llvm.returnaddress(i32) - -; We don't allocate high registers, so any function not using inline asm will -; only need to save the low registers. -define void @low_regs_only() { -; CHECK-LABEL: low_regs_only: -entry: -; CHECK: push {r4, r5, r6, r7, lr} - tail call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7}"() -; CHECK: pop {r4, r5, r6, r7, pc} - ret void -} - -; One high reg clobbered, but no low regs, args or returns. We can use an -; argument/return register to help save/restore it. -define void @one_high() { -; CHECK-LABEL: one_high: -entry: -; CHECK: mov [[SAVEREG:r[0-3]]], r8 -; CHECK: push {[[SAVEREG]]} - tail call void asm sideeffect "", "~{r8}"() -; CHECK: pop {[[RESTOREREG:r[0-3]]]} -; CHECK: mov r8, [[RESTOREREG]] - ret void -} - -; 4 high regs clobbered, but still no low regs, args or returns. We can use all -; 4 arg/return regs for the save/restore. -define void @four_high() { -; CHECK-LABEL: four_high: -entry: -; CHECK: mov r3, r11 -; CHECK: mov r2, r10 -; CHECK: mov r1, r9 -; CHECK: mov r0, r8 -; CHECK: push {r0, r1, r2, r3} - tail call void asm sideeffect "", "~{r8},~{r9},~{r10},~{r11}"() -; CHECK: pop {r0, r1, r2, r3} -; CHECK: mov r8, r0 -; CHECK: mov r9, r1 -; CHECK: mov r10, r2 -; CHECK: mov r11, r3 - ret void -} - -; One high and one low register clobbered. lr also gets pushed to simplify the -; return, and r7 to keep the stack aligned. Here, we could use r0-r3, r4, r7 or -; lr to save/restore r8. -define void @one_high_one_low() { -; CHECK-LABEL: one_high_one_low: -entry: -; CHECK: push {r4, r7, lr} -; CHECK: mov [[SAVEREG:r0|r1|r2|r3|r4|r7|lr]], r8 -; CHECK: push {[[SAVEREG]]} - tail call void asm sideeffect "", "~{r4},~{r8}"() -; CHECK: pop {[[RESTOREREG:r0|r1|r2|r3|r4|r7]]} -; CHECK: mov r8, [[RESTOREREG]] -; CHECK: pop {r4, r7, pc} - ret void -} - -; All callee-saved registers clobbered, r4-r7 and lr are not live after the -; first push so can be used for pushing the high registers. -define void @four_high_four_low() { -; CHECK-LABEL: four_high_four_low: -entry: -; CHECK: push {r4, r5, r6, r7, lr} -; CHECK: mov lr, r11 -; CHECK: mov r7, r10 -; CHECK: mov r6, r9 -; CHECK: mov r5, r8 -; CHECK: push {r5, r6, r7, lr} - tail call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11}"() -; CHECK: pop {r0, r1, r2, r3} -; CHECK: mov r8, r0 -; CHECK: mov r9, r1 -; CHECK: mov r10, r2 -; CHECK: mov r11, r3 -; CHECK: pop {r4, r5, r6, r7, pc} - ret void -} - - -; All callee-saved registers clobbered, and frame pointer is requested. r7 now -; cannot be used while saving/restoring the high regs. -define void @four_high_four_low_frame_ptr() "no-frame-pointer-elim"="true" { -; CHECK-LABEL: four_high_four_low_frame_ptr: -entry: -; CHECK: push {r4, r5, r6, r7, lr} -; CHECK: add r7, sp, #12 -; CHECK: mov lr, r11 -; CHECK: mov r6, r10 -; CHECK: mov r5, r9 -; CHECK: mov r4, r8 -; CHECK: push {r4, r5, r6, lr} - tail call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11}"() -; CHECK: pop {r0, r1, r2, r3} -; CHECK: mov r8, r0 -; CHECK: mov r9, r1 -; CHECK: mov r10, r2 -; CHECK: mov r11, r3 -; CHECK: pop {r4, r5, r6, r7, pc} - ret void -} - -; All callee-saved registers clobbered, frame pointer is requested and -; llvm.returnaddress used. r7 and lr now cannot be used while saving/restoring -; the high regs. -define void @four_high_four_low_frame_ptr_ret_addr() "no-frame-pointer-elim"="true" { -; CHECK-LABEL: four_high_four_low_frame_ptr_ret_addr: -entry: -; CHECK: push {r4, r5, r6, r7, lr} -; CHECK: mov r6, r11 -; CHECK: mov r5, r10 -; CHECK: mov r4, r9 -; CHECK: mov r3, r8 -; CHECK: push {r3, r4, r5, r6} - %a = tail call i8* @llvm.returnaddress(i32 0) - tail call void asm sideeffect "", "r,~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11}"(i8* %a) -; CHECK: pop {r0, r1, r2, r3} -; CHECK: mov r8, r0 -; CHECK: mov r9, r1 -; CHECK: mov r10, r2 -; CHECK: mov r11, r3 -; CHECK: pop {r4, r5, r6, r7, pc} - ret void -} - -; 4 high regs clobbered, all 4 argument registers used. We push an extra 4 low -; registers, so that we can use them for saving the high regs. -define void @four_high_four_arg(i32 %a, i32 %b, i32 %c, i32 %d) { -; CHECK-LABEL: four_high_four_arg: -entry: -; CHECK: push {r5, r6, r7, lr} -; CHECK: mov lr, r11 -; CHECK: mov r7, r10 -; CHECK: mov r6, r9 -; CHECK: mov r5, r8 -; CHECK: push {r5, r6, r7, lr} - tail call void asm sideeffect "", "r,r,r,r,~{r8},~{r9},~{r10},~{r11}"(i32 %a, i32 %b, i32 %c, i32 %d) -; CHECK: pop {r0, r1, r2, r3} -; CHECK: mov r8, r0 -; CHECK: mov r9, r1 -; CHECK: mov r10, r2 -; CHECK: mov r11, r3 -; CHECK: pop {r5, r6, r7, pc} - ret void -} - -; 4 high regs clobbered, all 4 return registers used. We push an extra 4 low -; registers, so that we can use them for restoring the high regs. -define <4 x i32> @four_high_four_return() { -; CHECK-LABEL: four_high_four_return: -entry: -; CHECK: push {r4, r5, r6, r7, lr} -; CHECK: mov lr, r11 -; CHECK: mov r7, r10 -; CHECK: mov r6, r9 -; CHECK: mov r5, r8 -; CHECK: push {r5, r6, r7, lr} - tail call void asm sideeffect "", "~{r8},~{r9},~{r10},~{r11}"() - %vecinit = insertelement <4 x i32> undef, i32 1, i32 0 - %vecinit11 = insertelement <4 x i32> %vecinit, i32 2, i32 1 - %vecinit12 = insertelement <4 x i32> %vecinit11, i32 3, i32 2 - %vecinit13 = insertelement <4 x i32> %vecinit12, i32 4, i32 3 -; CHECK: pop {r4, r5, r6, r7} -; CHECK: mov r8, r4 -; CHECK: mov r9, r5 -; CHECK: mov r10, r6 -; CHECK: mov r11, r7 -; CHECK: pop {r4, r5, r6, r7, pc} - ret <4 x i32> %vecinit13 -} - -; 4 high regs clobbered, all args & returns used, frame pointer requested and -; llvm.returnaddress called. This leaves us with 3 low registers available (r4, -; r5, r6), with which to save 4 high registers, so we have to use two pushes -; and pops. -define <4 x i32> @all_of_the_above(i32 %a, i32 %b, i32 %c, i32 %d) "no-frame-pointer-elim"="true" { -; CHECK-LABEL: all_of_the_above -entry: -; CHECK: push {r4, r5, r6, r7, lr} -; CHECK: add r7, sp, #12 -; CHECK: mov r6, r11 -; CHECK: mov r5, r10 -; CHECK: mov r4, r9 -; CHECK: push {r4, r5, r6} -; CHECK: mov r6, r8 -; CHECK: push {r6} - tail call void asm sideeffect "", "r,r,r,r,~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11}"(i32 %a, i32 %b, i32 %c, i32 %d) - %e = tail call i8* @llvm.returnaddress(i32 0) - %f = ptrtoint i8* %e to i32 - %vecinit = insertelement <4 x i32> undef, i32 %f, i32 0 - %vecinit11 = insertelement <4 x i32> %vecinit, i32 2, i32 1 - %vecinit12 = insertelement <4 x i32> %vecinit11, i32 3, i32 2 - %vecinit13 = insertelement <4 x i32> %vecinit12, i32 4, i32 3 -; CHECK: pop {r4, r5, r6} -; CHECK: mov r8, r4 -; CHECK: mov r9, r5 -; CHECK: mov r10, r6 -; CHECK: pop {r4} -; CHECK: mov r11, r4 -; CHECK: pop {r4, r5, r6, r7, pc} - ret <4 x i32> %vecinit13 -} - -; When a base pointer is being used, we can safely use it for saving/restoring -; the high regs because it is set after the last push, and not used at all in the -; epliogue. We can also use r4 for restoring the registers despite it also being -; used when restoring sp from fp, as that happens before the first pop. -define <4 x i32> @base_pointer(i32 %a) { -; CHECK-LABEL: base_pointer: -entry: -; CHECK: push {r4, r6, r7, lr} -; CHECK: add r7, sp, #8 -; CHECK: mov lr, r9 -; CHECK: mov r6, r8 -; CHECK: push {r6, lr} -; CHECK: mov r6, sp - %b = alloca i32, i32 %a - call void asm sideeffect "", "r,~{r8},~{r9}"(i32* %b) - %vecinit = insertelement <4 x i32> undef, i32 1, i32 0 - %vecinit11 = insertelement <4 x i32> %vecinit, i32 2, i32 1 - %vecinit12 = insertelement <4 x i32> %vecinit11, i32 3, i32 2 - %vecinit13 = insertelement <4 x i32> %vecinit12, i32 4, i32 3 -; CHECK: subs r4, r7, #7 -; CHECK: subs r4, #9 -; CHECK: mov sp, r4 -; CHECK: pop {r4, r6} -; CHECK: mov r8, r4 -; CHECK: mov r9, r6 -; CHECK: pop {r4, r6, r7, pc} - ret <4 x i32> %vecinit13 -} diff --git a/test/CodeGen/Thumb/large-stack.ll b/test/CodeGen/Thumb/large-stack.ll index 938dadce23d..66d0f4b8b18 100644 --- a/test/CodeGen/Thumb/large-stack.ll +++ b/test/CodeGen/Thumb/large-stack.ll @@ -1,9 +1,9 @@ -; RUN: llc < %s -mtriple=thumb-apple-ios | FileCheck %s --check-prefix=CHECK --check-prefix=ALIGN4 -; RUN: llc < %s -mtriple=thumb-none-eabi | FileCheck %s --check-prefix=CHECK --check-prefix=ALIGN8 +; RUN: llc < %s -mtriple=thumb-apple-ios | FileCheck %s +; RUN: llc < %s -mtriple=thumb-none-eabi | FileCheck %s ; RUN: llc < %s -o %t -filetype=obj -mtriple=thumbv6-apple-ios -; RUN: llvm-objdump -triple=thumbv6-apple-ios -d %t | FileCheck %s --check-prefix=CHECK --check-prefix=ALIGN4 +; RUN: llvm-objdump -triple=thumbv6-apple-ios -d %t | FileCheck %s ; RUN: llc < %s -o %t -filetype=obj -mtriple=thumbv6-none-eabi -; RUN: llvm-objdump -triple=thumbv6-none-eabi -d %t | FileCheck %s --check-prefix=CHECK --check-prefix=ALIGN8 +; RUN: llvm-objdump -triple=thumbv6-none-eabi -d %t | FileCheck %s ; Largest stack for which a single tADDspi/tSUBspi is enough define void @test1() { @@ -33,9 +33,7 @@ define void @test100_nofpelim() "no-frame-pointer-elim"="true" { ; CHECK: sub sp, #508 ; CHECK: sub sp, #508 ; CHECK: sub sp, #508 -; ALIGN4: subs r4, r7, #4 -; ALIGN8: subs r4, r7, #7 -; ALIGN8: subs r4, #1 +; CHECK: subs r4, r7, #4 ; CHECK: mov sp, r4 %tmp = alloca [ 1524 x i8 ] , align 4 ret void @@ -57,9 +55,7 @@ define void @test2_nofpelim() "no-frame-pointer-elim"="true" { ; CHECK-LABEL: test2_nofpelim: ; CHECK: ldr [[TEMP:r[0-7]]], ; CHECK: add sp, [[TEMP]] -; ALIGN4: subs r4, r7, #4 -; ALIGN8: subs r4, r7, #7 -; ALIGN8: subs r4, #1 +; CHECK: subs r4, r7, #4 ; CHECK: mov sp, r4 %tmp = alloca [ 1528 x i8 ] , align 4 ret void diff --git a/test/CodeGen/Thumb2/frame-pointer.ll b/test/CodeGen/Thumb2/frame-pointer.ll index 832cebe3252..f6e18603b5f 100644 --- a/test/CodeGen/Thumb2/frame-pointer.ll +++ b/test/CodeGen/Thumb2/frame-pointer.ll @@ -27,9 +27,9 @@ define void @leaf_nofpelim() "no-frame-pointer-elim"="true" { ; need to use a frame pointer. define void @leaf_lowreg_nofpelim() "no-frame-pointer-elim"="true" { ; CHECK-LABEL: leaf_lowreg_nofpelim: -; CHECK: push {r4, r6, r7, lr} -; CHECK: add r7, sp, #8 -; CHECK: pop {r4, r6, r7, pc} +; CHECK: push {r4, r7, lr} +; CHECK: add r7, sp, #4 +; CHECK: pop {r4, r7, pc} call void asm sideeffect "", "~{r4}" () ret void } @@ -40,11 +40,11 @@ define void @leaf_lowreg_nofpelim() "no-frame-pointer-elim"="true" { ; the stack. define void @leaf_highreg_nofpelim() "no-frame-pointer-elim"="true" { ; CHECK-LABEL: leaf_highreg_nofpelim: -; CHECK: push {r6, r7, lr} -; CHECK: add r7, sp, #4 +; CHECK: push {r7, lr} +; CHECK: mov r7, sp ; CHECK: str r8, [sp, #-4]! ; CHECK: ldr r8, [sp], #4 -; CHECK: pop {r6, r7, pc} +; CHECK: pop {r7, pc} call void asm sideeffect "", "~{r8}" () ret void }