return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
}
+/// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
+/// call frame size. Update the MachineFunction object with the stack size.
+unsigned
+PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
+ bool UseEstimate) const {
+ unsigned NewMaxCallFrameSize = 0;
+ unsigned FrameSize = determineFrameLayout(MF, UseEstimate,
+ &NewMaxCallFrameSize);
+ MF.getFrameInfo().setStackSize(FrameSize);
+ MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
+ return FrameSize;
+}
+
/// determineFrameLayout - Determine the size of the frame and maximum call
/// frame size.
-unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
- bool UpdateMF,
- bool UseEstimate) const {
- MachineFrameInfo &MFI = MF.getFrameInfo();
+unsigned
+PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
+ bool UseEstimate,
+ unsigned *NewMaxCallFrameSize) const {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
// Get the number of bytes to allocate from the FrameInfo
unsigned FrameSize =
// Check whether we can skip adjusting the stack pointer (by using red zone)
if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
- NumNoNeedForFrame++;
// No need for frame
- if (UpdateMF)
- MFI.setStackSize(0);
return 0;
}
if (MFI.hasVarSizedObjects())
maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
- // Update maximum call frame size.
- if (UpdateMF)
- MFI.setMaxCallFrameSize(maxCallFrameSize);
+ // Update the new max call frame size if the caller passes in a valid pointer.
+ if (NewMaxCallFrameSize)
+ *NewMaxCallFrameSize = maxCallFrameSize;
// Include call frame size in total.
FrameSize += maxCallFrameSize;
// Make sure the frame is aligned.
FrameSize = (FrameSize + AlignMask) & ~AlignMask;
- // Update frame info.
- if (UpdateMF)
- MFI.setStackSize(FrameSize);
-
return FrameSize;
}
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
MachineFunction &MF = *(MBB->getParent());
bool HasBP = RegInfo->hasBasePointer(MF);
- unsigned FrameSize = determineFrameLayout(MF, false);
+ unsigned FrameSize = determineFrameLayout(MF);
int NegFrameSize = -FrameSize;
bool IsLargeFrame = !isInt<16>(NegFrameSize);
MachineFrameInfo &MFI = MF.getFrameInfo();
return findScratchRegister(TmpMBB, true);
}
+bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
+ const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+
+ // Abort if there is no register info or function info.
+ if (!RegInfo || !FI)
+ return false;
+
+ // Only move the stack update on ELFv2 ABI and PPC64.
+ if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
+ return false;
+
+ // Check the frame size first and return false if it does not fit the
+ // requirements.
+ // We need a non-zero frame size as well as a frame that will fit in the red
+ // zone. This is because by moving the stack pointer update we are now storing
+ // to the red zone until the stack pointer is updated. If we get an interrupt
+ // inside the prologue but before the stack update we now have a number of
+ // stores to the red zone and those stores must all fit.
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ unsigned FrameSize = MFI.getStackSize();
+ if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
+ return false;
+
+ // Frame pointers and base pointers complicate matters so don't do anything
+ // if we have them. For example having a frame pointer will sometimes require
+ // a copy of r1 into r31 and that makes keeping track of updates to r1 more
+ // difficult.
+ if (hasFP(MF) || RegInfo->hasBasePointer(MF))
+ return false;
+
+ // Calls to fast_cc functions use different rules for passing parameters on
+ // the stack from the ABI and using PIC base in the function imposes
+ // similar restrictions to using the base pointer. It is not generally safe
+ // to move the stack pointer update in these situations.
+ if (FI->hasFastCall() || FI->usesPICBase())
+ return false;
+
+ // Finally we can move the stack update if we do not require regiser
+ // scavenging. Register scavenging can introduce more spills and so
+ // may make the frame size larger than we have computed.
+ return !RegInfo->requiresFrameIndexScavenging(MF);
+}
+
void PPCFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
MBBI = MBB.begin();
// Work out frame sizes.
- unsigned FrameSize = determineFrameLayout(MF);
+ unsigned FrameSize = determineFrameLayoutAndUpdate(MF);
int NegFrameSize = -FrameSize;
if (!isInt<32>(NegFrameSize))
llvm_unreachable("Unhandled stack size!");
assert((isPPC64 || !MustSaveCR) &&
"Prologue CR saving supported only in 64-bit mode");
+ // Check if we can move the stack update instruction (stdu) down the prologue
+ // past the callee saves. Hopefully this will avoid the situation where the
+ // saves are waiting for the update on the store with update to complete.
+ MachineBasicBlock::iterator StackUpdateLoc = MBBI;
+ bool MovingStackUpdateDown = false;
+
+ // Check if we can move the stack update.
+ if (stackUpdateCanBeMoved(MF)) {
+ const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
+ for (CalleeSavedInfo CSI : Info) {
+ int FrIdx = CSI.getFrameIdx();
+ // If the frame index is not negative the callee saved info belongs to a
+ // stack object that is not a fixed stack object. We ignore non-fixed
+ // stack objects because we won't move the stack update pointer past them.
+ if (FrIdx >= 0)
+ continue;
+
+ if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
+ StackUpdateLoc++;
+ MovingStackUpdateDown = true;
+ } else {
+ // We need all of the Frame Indices to meet these conditions.
+ // If they do not, abort the whole operation.
+ StackUpdateLoc = MBBI;
+ MovingStackUpdateDown = false;
+ break;
+ }
+ }
+
+ // If the operation was not aborted then update the object offset.
+ if (MovingStackUpdateDown) {
+ for (CalleeSavedInfo CSI : Info) {
+ int FrIdx = CSI.getFrameIdx();
+ if (FrIdx < 0)
+ MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
+ }
+ }
+ }
+
// If we need to spill the CR and the LR but we don't have two separate
// registers available, we must spill them one at a time
if (MustSaveCR && SingleScratchReg && MustSaveLR) {
}
if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, StoreInst)
+ BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
.addReg(ScratchReg, getKillRegState(true))
.addImm(LROffset)
.addReg(SPReg);
HasSTUX = true;
} else if (!isLargeFrame) {
- BuildMI(MBB, MBBI, dl, StoreUpdtInst, SPReg)
+ BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
.addReg(SPReg)
.addImm(NegFrameSize)
.addReg(SPReg);
.addCFIIndex(CFIRegister);
} else {
int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
+ // We have changed the object offset above but we do not want to change
+ // the actual offsets in the CFI instruction so we have to undo the
+ // offset change here.
+ if (MovingStackUpdateDown)
+ Offset -= NegFrameSize;
+
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
unsigned RBReg = SPReg;
unsigned SPAdd = 0;
+ // Check if we can move the stack update instruction up the epilogue
+ // past the callee saves. This will allow the move to LR instruction
+ // to be executed before the restores of the callee saves which means
+ // that the callee saves can hide the latency from the MTLR instrcution.
+ MachineBasicBlock::iterator StackUpdateLoc = MBBI;
+ if (stackUpdateCanBeMoved(MF)) {
+ const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
+ for (CalleeSavedInfo CSI : Info) {
+ int FrIdx = CSI.getFrameIdx();
+ // If the frame index is not negative the callee saved info belongs to a
+ // stack object that is not a fixed stack object. We ignore non-fixed
+ // stack objects because we won't move the update of the stack pointer
+ // past them.
+ if (FrIdx >= 0)
+ continue;
+
+ if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
+ StackUpdateLoc--;
+ else {
+ // Abort the operation as we can't update all CSR restores.
+ StackUpdateLoc = MBBI;
+ break;
+ }
+ }
+ }
+
if (FrameSize) {
// In the prologue, the loaded (or persistent) stack pointer value is
// offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
}
} else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
if (HasRedZone) {
- BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
+ BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
.addReg(SPReg)
.addImm(FrameSize);
} else {
.addReg(FPReg);
RBReg = FPReg;
}
- BuildMI(MBB, MBBI, dl, LoadInst, RBReg)
+ BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
.addImm(0)
.addReg(SPReg);
}
// a base register anyway, because it may happen to be R0.
bool LoadedLR = false;
if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
- BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
+ BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
.addImm(LROffset+SPAdd)
.addReg(RBReg);
LoadedLR = true;
.addReg(TempReg, getKillRegState(i == e-1));
if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, MTLRInst).addReg(ScratchReg);
+ BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
// Callee pop calling convention. Pop parameter/linkage area. Used for tail
// call optimization
// the 16-bit immediate. We don't know the complete frame size here
// because we've not yet computed callee-saved register spills or the
// needed alignment padding.
- unsigned StackSize = determineFrameLayout(MF, false, true);
+ unsigned StackSize = determineFrameLayout(MF, true);
MachineFrameInfo &MFI = MF.getFrameInfo();
if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
*/
void createTailCallBranchInstr(MachineBasicBlock &MBB) const;
+ /**
+ * Check if the conditions are correct to allow for the stack update
+ * to be moved past the CSR save/restore code.
+ */
+ bool stackUpdateCanBeMoved(MachineFunction &MF) const;
+
public:
PPCFrameLowering(const PPCSubtarget &STI);
- unsigned determineFrameLayout(MachineFunction &MF,
- bool UpdateMF = true,
- bool UseEstimate = false) const;
+ /**
+ * Determine the frame layout and update the machine function.
+ */
+ unsigned determineFrameLayoutAndUpdate(MachineFunction &MF,
+ bool UseEstimate = false) const;
+
+ /**
+ * Determine the frame layout but do not update the machine function.
+ * The MachineFunction object can be const in this case as it is not
+ * modified.
+ */
+ unsigned determineFrameLayout(const MachineFunction &MF,
+ bool UseEstimate = false,
+ unsigned *NewMaxCallFrameSize = nullptr) const;
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
OpcodeIndex = SOK_Float8Spill;
} else if (PPC::F4RCRegClass.contains(Reg)) {
OpcodeIndex = SOK_Float4Spill;
+ } else if (PPC::SPERCRegClass.contains(Reg)) {
+ OpcodeIndex = SOK_SPESpill;
+ } else if (PPC::SPE4RCRegClass.contains(Reg)) {
+ OpcodeIndex = SOK_SPE4Spill;
} else if (PPC::CRRCRegClass.contains(Reg)) {
OpcodeIndex = SOK_CRSpill;
} else if (PPC::CRBITRCRegClass.contains(Reg)) {
OpcodeIndex = SOK_Float8Spill;
} else if (PPC::F4RCRegClass.contains(Reg)) {
OpcodeIndex = SOK_Float4Spill;
+ } else if (PPC::SPERCRegClass.contains(Reg)) {
+ OpcodeIndex = SOK_SPESpill;
+ } else if (PPC::SPE4RCRegClass.contains(Reg)) {
+ OpcodeIndex = SOK_SPE4Spill;
} else if (PPC::CRRCRegClass.contains(Reg)) {
OpcodeIndex = SOK_CRSpill;
} else if (PPC::CRBITRCRegClass.contains(Reg)) {
"caller preserved registers can be LICM candidates"),
cl::init(true), cl::Hidden);
+static unsigned offsetMinAlignForOpcode(unsigned OpC);
+
PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM)
: PPCGenRegisterInfo(TM.isPPC64() ? PPC::LR8 : PPC::LR,
TM.isPPC64() ? 0 : 1,
return Reserved;
}
+bool PPCRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) const {
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ const PPCInstrInfo *InstrInfo = Subtarget.getInstrInfo();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
+
+ // If the callee saved info is invalid we have to default to true for safety.
+ if (!MFI.isCalleeSavedInfoValid())
+ return true;
+
+ // We will require the use of X-Forms because the frame is larger than what
+ // can be represented in signed 16 bits that fit in the immediate of a D-Form.
+ // If we need an X-Form then we need a register to store the address offset.
+ unsigned FrameSize = MFI.getStackSize();
+ // Signed 16 bits means that the FrameSize cannot be more than 15 bits.
+ if (FrameSize & ~0x7FFF)
+ return true;
+
+ // The callee saved info is valid so it can be traversed.
+ // Checking for registers that need saving that do not have load or store
+ // forms where the address offset is an immediate.
+ for (unsigned i = 0; i < Info.size(); i++) {
+ int FrIdx = Info[i].getFrameIdx();
+ unsigned Reg = Info[i].getReg();
+
+ unsigned Opcode = InstrInfo->getStoreOpcodeForSpill(Reg);
+ if (!MFI.isFixedObjectIndex(FrIdx)) {
+ // This is not a fixed object. If it requires alignment then we may still
+ // need to use the XForm.
+ if (offsetMinAlignForOpcode(Opcode) > 1)
+ return true;
+ }
+
+ // This is eiher:
+ // 1) A fixed frame index object which we know are aligned so
+ // as long as we have a valid DForm/DSForm/DQForm (non XForm) we don't
+ // need to consider the alignement here.
+ // 2) A not fixed object but in that case we now know that the min required
+ // alignment is no more than 1 based on the previous check.
+ if (InstrInfo->isXFormMemOp(Opcode))
+ return true;
+ }
+ return false;
+}
+
bool PPCRegisterInfo::isCallerPreservedPhysReg(unsigned PhysReg,
const MachineFunction &MF) const {
assert(TargetRegisterInfo::isPhysicalRegister(PhysReg));
}
// If the offset must be a multiple of some value, return what that value is.
-static unsigned offsetMinAlign(const MachineInstr &MI) {
- unsigned OpC = MI.getOpcode();
-
+static unsigned offsetMinAlignForOpcode(unsigned OpC) {
switch (OpC) {
default:
return 1;
}
}
+// If the offset must be a multiple of some value, return what that value is.
+static unsigned offsetMinAlign(const MachineInstr &MI) {
+ unsigned OpC = MI.getOpcode();
+ return offsetMinAlignForOpcode(OpC);
+}
+
// Return the OffsetOperandNo given the FIOperandNum (and the instruction).
static unsigned getOffsetONFromFION(const MachineInstr &MI,
unsigned FIOperandNum) {
MachineBasicBlock &MBB = *MI->getParent();
MachineFunction &MF = *MBB.getParent();
const PPCFrameLowering *TFI = getFrameLowering(MF);
- unsigned StackEst = TFI->determineFrameLayout(MF, false, true);
+ unsigned StackEst = TFI->determineFrameLayout(MF, true);
// If we likely don't need a stack frame, then we probably don't need a
// virtual base register either.
return true;
}
- bool requiresFrameIndexScavenging(const MachineFunction &MF) const override {
- return true;
- }
+ bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override {
return true;
; CHECK-PWR8-LABEL: caller1:
; CHECK-PWR8: # %bb.0: # %entry
; CHECK-PWR8-NEXT: mflr r0
-; CHECK-PWR8-NEXT: std r0, 16(r1)
-; CHECK-PWR8-NEXT: stdu r1, -176(r1)
; CHECK-PWR8-NEXT: .cfi_def_cfa_offset 176
; CHECK-PWR8-NEXT: .cfi_offset lr, 16
; CHECK-PWR8-NEXT: .cfi_offset r14, -144
; CHECK-PWR8-NEXT: .cfi_offset r15, -136
-; CHECK-PWR8-NEXT: std r14, 32(r1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT: std r15, 40(r1) # 8-byte Folded Spill
+; CHECK-PWR8-NEXT: std r14, -144(r1) # 8-byte Folded Spill
+; CHECK-PWR8-NEXT: std r15, -136(r1) # 8-byte Folded Spill
+; CHECK-PWR8-NEXT: std r0, 16(r1)
+; CHECK-PWR8-NEXT: stdu r1, -176(r1)
; CHECK-PWR8-NEXT: #APP
; CHECK-PWR8-NEXT: add r3, r3, r4
; CHECK-PWR8-NEXT: #NO_APP
; CHECK-PWR8-NEXT: extsw r3, r3
; CHECK-PWR8-NEXT: bl callee
; CHECK-PWR8-NEXT: nop
-; CHECK-PWR8-NEXT: ld r15, 40(r1) # 8-byte Folded Reload
-; CHECK-PWR8-NEXT: ld r14, 32(r1) # 8-byte Folded Reload
; CHECK-PWR8-NEXT: addi r1, r1, 176
; CHECK-PWR8-NEXT: ld r0, 16(r1)
; CHECK-PWR8-NEXT: mtlr r0
+; CHECK-PWR8-NEXT: ld r15, -136(r1) # 8-byte Folded Reload
+; CHECK-PWR8-NEXT: ld r14, -144(r1) # 8-byte Folded Reload
; CHECK-PWR8-NEXT: blr
;
; CHECK-PWR9-LABEL: caller1:
; CHECK-PWR9: # %bb.0: # %entry
; CHECK-PWR9-NEXT: mflr r0
-; CHECK-PWR9-NEXT: std r0, 16(r1)
-; CHECK-PWR9-NEXT: stdu r1, -176(r1)
; CHECK-PWR9-NEXT: .cfi_def_cfa_offset 176
; CHECK-PWR9-NEXT: .cfi_offset lr, 16
; CHECK-PWR9-NEXT: .cfi_offset r14, -144
; CHECK-PWR9-NEXT: .cfi_offset r15, -136
-; CHECK-PWR9-NEXT: std r14, 32(r1) # 8-byte Folded Spill
-; CHECK-PWR9-NEXT: std r15, 40(r1) # 8-byte Folded Spill
+; CHECK-PWR9-NEXT: std r14, -144(r1) # 8-byte Folded Spill
+; CHECK-PWR9-NEXT: std r15, -136(r1) # 8-byte Folded Spill
+; CHECK-PWR9-NEXT: std r0, 16(r1)
+; CHECK-PWR9-NEXT: stdu r1, -176(r1)
; CHECK-PWR9-NEXT: #APP
; CHECK-PWR9-NEXT: add r3, r3, r4
; CHECK-PWR9-NEXT: #NO_APP
; CHECK-PWR9-NEXT: extsw r3, r3
; CHECK-PWR9-NEXT: bl callee
; CHECK-PWR9-NEXT: nop
-; CHECK-PWR9-NEXT: ld r15, 40(r1) # 8-byte Folded Reload
-; CHECK-PWR9-NEXT: ld r14, 32(r1) # 8-byte Folded Reload
; CHECK-PWR9-NEXT: addi r1, r1, 176
; CHECK-PWR9-NEXT: ld r0, 16(r1)
; CHECK-PWR9-NEXT: mtlr r0
+; CHECK-PWR9-NEXT: ld r15, -136(r1) # 8-byte Folded Reload
+; CHECK-PWR9-NEXT: ld r14, -144(r1) # 8-byte Folded Reload
; CHECK-PWR9-NEXT: blr
entry:
%0 = tail call i32 asm "add $0, $1, $2", "=r,r,r,~{r14},~{r15}"(i32 %a, i32 %b)
; CHECK-PWR8-LABEL: caller2:
; CHECK-PWR8: # %bb.0: # %entry
; CHECK-PWR8-NEXT: mflr r0
-; CHECK-PWR8-NEXT: std r0, 16(r1)
-; CHECK-PWR8-NEXT: stdu r1, -176(r1)
; CHECK-PWR8-NEXT: .cfi_def_cfa_offset 176
; CHECK-PWR8-NEXT: .cfi_offset lr, 16
; CHECK-PWR8-NEXT: .cfi_offset f14, -144
; CHECK-PWR8-NEXT: .cfi_offset f15, -136
-; CHECK-PWR8-NEXT: stfd f14, 32(r1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT: stfd f15, 40(r1) # 8-byte Folded Spill
+; CHECK-PWR8-NEXT: stfd f14, -144(r1) # 8-byte Folded Spill
+; CHECK-PWR8-NEXT: stfd f15, -136(r1) # 8-byte Folded Spill
+; CHECK-PWR8-NEXT: std r0, 16(r1)
+; CHECK-PWR8-NEXT: stdu r1, -176(r1)
; CHECK-PWR8-NEXT: #APP
; CHECK-PWR8-NEXT: add r3, r3, r4
; CHECK-PWR8-NEXT: #NO_APP
; CHECK-PWR8-NEXT: extsw r3, r3
; CHECK-PWR8-NEXT: bl callee
; CHECK-PWR8-NEXT: nop
-; CHECK-PWR8-NEXT: lfd f15, 40(r1) # 8-byte Folded Reload
-; CHECK-PWR8-NEXT: lfd f14, 32(r1) # 8-byte Folded Reload
; CHECK-PWR8-NEXT: addi r1, r1, 176
; CHECK-PWR8-NEXT: ld r0, 16(r1)
; CHECK-PWR8-NEXT: mtlr r0
+; CHECK-PWR8-NEXT: lfd f15, -136(r1) # 8-byte Folded Reload
+; CHECK-PWR8-NEXT: lfd f14, -144(r1) # 8-byte Folded Reload
; CHECK-PWR8-NEXT: blr
;
; CHECK-PWR9-LABEL: caller2:
; CHECK-PWR9: # %bb.0: # %entry
; CHECK-PWR9-NEXT: mflr r0
-; CHECK-PWR9-NEXT: std r0, 16(r1)
-; CHECK-PWR9-NEXT: stdu r1, -176(r1)
; CHECK-PWR9-NEXT: .cfi_def_cfa_offset 176
; CHECK-PWR9-NEXT: .cfi_offset lr, 16
; CHECK-PWR9-NEXT: .cfi_offset f14, -144
; CHECK-PWR9-NEXT: .cfi_offset f15, -136
-; CHECK-PWR9-NEXT: stfd f14, 32(r1) # 8-byte Folded Spill
-; CHECK-PWR9-NEXT: stfd f15, 40(r1) # 8-byte Folded Spill
+; CHECK-PWR9-NEXT: stfd f14, -144(r1) # 8-byte Folded Spill
+; CHECK-PWR9-NEXT: stfd f15, -136(r1) # 8-byte Folded Spill
+; CHECK-PWR9-NEXT: std r0, 16(r1)
+; CHECK-PWR9-NEXT: stdu r1, -176(r1)
; CHECK-PWR9-NEXT: #APP
; CHECK-PWR9-NEXT: add r3, r3, r4
; CHECK-PWR9-NEXT: #NO_APP
; CHECK-PWR9-NEXT: extsw r3, r3
; CHECK-PWR9-NEXT: bl callee
; CHECK-PWR9-NEXT: nop
-; CHECK-PWR9-NEXT: lfd f15, 40(r1) # 8-byte Folded Reload
-; CHECK-PWR9-NEXT: lfd f14, 32(r1) # 8-byte Folded Reload
; CHECK-PWR9-NEXT: addi r1, r1, 176
; CHECK-PWR9-NEXT: ld r0, 16(r1)
; CHECK-PWR9-NEXT: mtlr r0
+; CHECK-PWR9-NEXT: lfd f15, -136(r1) # 8-byte Folded Reload
+; CHECK-PWR9-NEXT: lfd f14, -144(r1) # 8-byte Folded Reload
; CHECK-PWR9-NEXT: blr
entry:
%0 = tail call i32 asm "add $0, $1, $2", "=r,r,r,~{f14},~{f15}"(i32 %a, i32 %b)
define noalias i8* @_ZN2CC3funEv(%class.CC* %this) {
; CHECK-LABEL: _ZN2CC3funEv:
; CHECK: mflr 0
-; CHECK-NEXT: std 0, 16(1)
-; CHECK-NEXT: stdu 1, -48(1)
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: .cfi_offset lr, 16
; CHECK-NEXT: .cfi_offset r30, -16
+; CHECK-NEXT: std 30, -16(1)
+; CHECK-NEXT: std 0, 16(1)
+; CHECK-NEXT: stdu 1, -48(1)
; CHECK-NEXT: ld 12, 0(3)
-; CHECK-NEXT: std 30, 32(1)
; CHECK-NEXT: mr 30, 3
; CHECK-NEXT: std 2, 24(1)
; CHECK-NEXT: mtctr 12
; CHECK-NEXT: mr 3, 30
; CHECK-NEXT: bl _ZN2CC3barEPi
; CHECK-NEXT: nop
-; CHECK: ld 30, 32(1)
-; CHECK-NEXT: li 3, 0
+; CHECK: li 3, 0
; CHECK-NEXT: addi 1, 1, 48
; CHECK-NEXT: ld 0, 16(1)
; CHECK-NEXT: mtlr 0
+; CHECK: ld 30, -16(1)
; CHECK-NEXT: blr
entry:
%foo = getelementptr inbounds %class.CC, %class.CC* %this, i64 0, i32 0, i32 0
; CHECK-LABEL: caller:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mflr r0
-; CHECK-NEXT: std r0, 16(r1)
-; CHECK-NEXT: stdu r1, -192(r1)
; CHECK-NEXT: .cfi_def_cfa_offset 192
; CHECK-NEXT: .cfi_offset lr, 16
; CHECK-NEXT: .cfi_offset r14, -144
; CHECK-NEXT: .cfi_offset r29, -24
; CHECK-NEXT: .cfi_offset r30, -16
; CHECK-NEXT: .cfi_offset r31, -8
+; CHECK-NEXT: std r14, -144(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r15, -136(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r16, -128(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r17, -120(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r18, -112(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r19, -104(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r20, -96(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r21, -88(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r22, -80(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r23, -72(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r24, -64(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r25, -56(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r26, -48(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r27, -40(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r31, -8(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r0, 16(r1)
+; CHECK-NEXT: stdu r1, -192(r1)
; CHECK-NEXT: std r5, 32(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r3, 40(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r14, 48(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r15, 56(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r16, 64(r1) # 8-byte Folded Spill
; CHECK-NEXT: mr r0, r4
; CHECK-NEXT: ld r3, 40(r1) # 8-byte Folded Reload
-; CHECK-NEXT: std r17, 72(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r18, 80(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r19, 88(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r20, 96(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r21, 104(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r22, 112(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r23, 120(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r24, 128(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r25, 136(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r26, 144(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r27, 152(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r28, 160(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r29, 168(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r30, 176(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r31, 184(r1) # 8-byte Folded Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: add r3, r3, r0
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: mr r5, r0
; CHECK-NEXT: bl callee
; CHECK-NEXT: nop
-; CHECK-NEXT: ld r31, 184(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r30, 176(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r29, 168(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r28, 160(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r27, 152(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r26, 144(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r25, 136(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r24, 128(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r23, 120(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r22, 112(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r21, 104(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r20, 96(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r19, 88(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r18, 80(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r17, 72(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r16, 64(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r15, 56(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r14, 48(r1) # 8-byte Folded Reload
; CHECK-NEXT: addi r1, r1, 192
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: ld r31, -8(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r27, -40(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r26, -48(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r25, -56(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r24, -64(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r23, -72(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r22, -80(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r21, -88(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r20, -96(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r19, -104(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r18, -112(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r17, -120(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r16, -128(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r15, -136(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r14, -144(r1) # 8-byte Folded Reload
; CHECK-NEXT: blr
entry:
%0 = tail call i32 asm "add $0, $1, $2", "=r,r,r,~{r14},~{r15},~{r16},~{r17},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30},~{r31},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13}"(i32 %a, i32 %b)
;
; Epilogue code.
; CHECK: mtlr {{[0-9]+}}
-; CHECK-NEXT: blr
+; CHECK: blr
;
; ENABLE: .[[ELSE_LABEL]]: # %if.else
; Shift second argument by one and store into returned register.
; Next BB
; CHECK: %for.exit
; CHECK: mtlr {{[0-9]+}}
-; CHECK-NEXT: blr
+; CHECK: blr
define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) {
entry:
br label %for.preheader
; Make sure we save the link register
; CHECK: mflr {{[0-9]+}}
;
-; DISABLE: cmplwi 0, 3, 0
-; DISABLE-NEXT: std
+; DISABLE: std
; DISABLE-NEXT: std
+; DISABLE: cmplwi 0, 3, 0
; DISABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
;
; Loop preheader
; DISABLE: .[[EPILOG_BB]]: # %if.end
; Epilog code
; CHECK: mtlr {{[0-9]+}}
-; CHECK-NEXT: blr
+; CHECK: blr
;
; ENABLE: .[[ELSE_LABEL]]: # %if.else
; Shift second argument by one and store into returned register.
; Make sure we save the link register
; CHECK: mflr {{[0-9]+}}
;
-; DISABLE: cmplwi 0, 3, 0
-; DISABLE-NEXT: std
+; DISABLE: std
; DISABLE-NEXT: std
+; DISABLE: cmplwi 0, 3, 0
; DISABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
;
; CHECK: bl somethingElse
;
; Epilogue code.
; CHECK: mtlr {{[0-9]+}}
-; CHECK-NEXT: blr
+; CHECK: blr
;
; ENABLE: .[[ELSE_LABEL]]: # %if.else
; Shift second argument by one and store into returned register.
entry:
; CHECK-LABEL: test_foo:
-; CHECK: stdu 1, {{-?[0-9]+}}(1)
+; CHECK-DAG: stdu 1, {{-?[0-9]+}}(1)
; CHECK-DAG: mr [[BACKUP_3:[0-9]+]], 3
; CHECK-DAG: mr [[BACKUP_4:[0-9]+]], 4
; CHECK-DAG: mr [[BACKUP_5:[0-9]+]], 5
; CHECK-DAG: mr [[BACKUP_8:[0-9]+]], 8
; CHECK-DAG: mr [[BACKUP_9:[0-9]+]], 9
; CHECK-DAG: mr [[BACKUP_10:[0-9]+]], 10
-; CHECK-DAG: std [[BACKUP_3]], {{[0-9]+}}(1)
-; CHECK-DAG: std [[BACKUP_4]], {{[0-9]+}}(1)
-; CHECK-DAG: std [[BACKUP_5]], {{[0-9]+}}(1)
-; CHECK-DAG: std [[BACKUP_6]], {{[0-9]+}}(1)
-; CHECK-DAG: std [[BACKUP_7]], {{[0-9]+}}(1)
-; CHECK-DAG: std [[BACKUP_8]], {{[0-9]+}}(1)
-; CHECK-DAG: std [[BACKUP_9]], {{[0-9]+}}(1)
-; CHECK-DAG: std [[BACKUP_10]], {{[0-9]+}}(1)
+; CHECK-DAG: std [[BACKUP_3]], {{-?[0-9]+}}(1)
+; CHECK-DAG: std [[BACKUP_4]], {{-?[0-9]+}}(1)
+; CHECK-DAG: std [[BACKUP_5]], {{-?[0-9]+}}(1)
+; CHECK-DAG: std [[BACKUP_6]], {{-?[0-9]+}}(1)
+; CHECK-DAG: std [[BACKUP_7]], {{-?[0-9]+}}(1)
+; CHECK-DAG: std [[BACKUP_8]], {{-?[0-9]+}}(1)
+; CHECK-DAG: std [[BACKUP_9]], {{-?[0-9]+}}(1)
+; CHECK-DAG: std [[BACKUP_10]], {{-?[0-9]+}}(1)
; CHECK: bl __tls_get_addr
; CHECK-DAG: stw 3, 0([[BACKUP_3]])
; CHECK-DAG: stw 3, 0([[BACKUP_4]])