bool UseEstimate,
unsigned *NewMaxCallFrameSize) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
+ const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
// Get the number of bytes to allocate from the FrameInfo
unsigned FrameSize =
bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
!MFI.adjustsStack() && // No calls.
!MustSaveLR(MF, LR) && // No need to save LR.
+ !FI->mustSaveTOC() && // No need to save TOC.
!RegInfo->hasBasePointer(MF); // No special alignment.
// Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless
// Check if the link register (LR) must be saved.
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
bool MustSaveLR = FI->mustSaveLR();
+ bool MustSaveTOC = FI->mustSaveTOC();
const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
bool MustSaveCR = !MustSaveCRs.empty();
// Do we have a frame pointer and/or base pointer for this function?
unsigned BPReg = RegInfo->getBaseRegister(MF);
unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR;
+ unsigned TOCReg = isPPC64 ? PPC::X2 : PPC::R2;
unsigned ScratchReg = 0;
unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
// ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
HasSTUX = true;
}
+ // Save the TOC register after the stack pointer update if a prologue TOC
+ // save is required for the function.
+ if (MustSaveTOC) {
+ assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
+ BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
+ .addReg(TOCReg, getKillRegState(true))
+ .addImm(TOCSaveOffset)
+ .addReg(SPReg);
+ }
+
if (!HasRedZone) {
assert(!isPPC64 && "A red zone is always available on PPC64");
if (HasSTUX) {
if (PPC::CRBITRCRegClass.contains(Reg))
continue;
+ if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
+ continue;
+
// For SVR4, don't emit a move for the CR spill slot if we haven't
// spilled CRs.
if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
unsigned MinFPR = PPC::F31;
unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31;
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
bool HasGPSaveArea = false;
bool HasG8SaveArea = false;
bool HasFPSaveArea = false;
bool HasVRSAVESaveArea = false;
bool HasVRSaveArea = false;
+ bool MustSaveTOC = FI->mustSaveTOC();
SmallVector<CalleeSavedInfo, 18> GPRegs;
SmallVector<CalleeSavedInfo, 18> G8Regs;
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
+ assert((!MustSaveTOC || (Reg != PPC::X2 && Reg != PPC::R2)) &&
+ "Not expecting to try to spill R2 in a function that must save TOC");
if (PPC::GPRCRegClass.contains(Reg) ||
PPC::SPE4RCRegClass.contains(Reg)) {
HasGPSaveArea = true;
MachineFunction *MF = MBB.getParent();
const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
+ PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
+ bool MustSaveTOC = FI->mustSaveTOC();
DebugLoc DL;
bool CRSpilled = false;
MachineInstrBuilder CRMIB;
continue;
}
+ // The actual spill will happen in the prologue.
+ if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
+ continue;
+
// Insert the spill to the stack frame.
if (IsCRField) {
PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
MachineFunction *MF = MBB.getParent();
const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
+ PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
+ bool MustSaveTOC = FI->mustSaveTOC();
bool CR2Spilled = false;
bool CR3Spilled = false;
bool CR4Spilled = false;
if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI())
continue;
+ if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
+ continue;
+
if (Reg == PPC::CR2) {
CR2Spilled = true;
// The spill slot is associated only with CR2, which is the
#include "PPC.h"
#include "PPCInstrBuilder.h"
#include "PPCInstrInfo.h"
+#include "PPCMachineFunctionInfo.h"
#include "PPCTargetMachine.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
STATISTIC(RemoveTOCSave, "Number of TOC saves removed");
STATISTIC(MultiTOCSaves,
"Number of functions with multiple TOC saves that must be kept");
+STATISTIC(NumTOCSavesInPrologue, "Number of TOC saves placed in the prologue");
STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions");
STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions");
STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
private:
MachineDominatorTree *MDT;
+ MachinePostDominatorTree *MPDT;
+ MachineBlockFrequencyInfo *MBFI;
+ uint64_t EntryFreq;
// Initialize class variables.
void initialize(MachineFunction &MFParm);
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
+ AU.addRequired<MachineBlockFrequencyInfo>();
AU.addPreserved<MachineDominatorTree>();
+ AU.addPreserved<MachinePostDominatorTree>();
+ AU.addPreserved<MachineBlockFrequencyInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
MF = &MFParm;
MRI = &MF->getRegInfo();
MDT = &getAnalysis<MachineDominatorTree>();
+ MPDT = &getAnalysis<MachinePostDominatorTree>();
+ MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+ EntryFreq = MBFI->getEntryFreq();
TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
LLVM_DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
LLVM_DEBUG(MF->dump());
void PPCMIPeephole::UpdateTOCSaves(
std::map<MachineInstr *, bool> &TOCSaves, MachineInstr *MI) {
assert(TII->isTOCSaveMI(*MI) && "Expecting a TOC save instruction here");
+ assert(MF->getSubtarget<PPCSubtarget>().isELFv2ABI() &&
+ "TOC-save removal only supported on ELFv2");
+ PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
+ MachineFrameInfo &MFI = MF->getFrameInfo();
+
+ MachineBasicBlock *Entry = &MF->front();
+ uint64_t CurrBlockFreq = MBFI->getBlockFreq(MI->getParent()).getFrequency();
+
+ // If the block in which the TOC save resides is in a block that
+ // post-dominates Entry, or a block that is hotter than entry (keep in mind
+ // that early MachineLICM has already run so the TOC save won't be hoisted)
+ // we can just do the save in the prologue.
+ if (CurrBlockFreq > EntryFreq || MPDT->dominates(MI->getParent(), Entry))
+ FI->setMustSaveTOC(true);
+
+ // If we are saving the TOC in the prologue, all the TOC saves can be removed
+ // from the code.
+ if (FI->mustSaveTOC()) {
+ for (auto &TOCSave : TOCSaves)
+ TOCSave.second = false;
+ // Add new instruction to map.
+ TOCSaves[MI] = false;
+ return;
+ }
+
bool Keep = true;
for (auto It = TOCSaves.begin(); It != TOCSaves.end(); It++ ) {
MachineInstr *CurrInst = It->first;
// Eliminate all the TOC save instructions which are redundant.
Simplified |= eliminateRedundantTOCSaves(TOCSaves);
+ PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
+ if (FI->mustSaveTOC())
+ NumTOCSavesInPrologue++;
+
// We try to eliminate redundant compare instruction.
Simplified |= eliminateRedundantCompare();
INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,
"PowerPC MI Peephole Optimization", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
INITIALIZE_PASS_END(PPCMIPeephole, DEBUG_TYPE,
"PowerPC MI Peephole Optimization", false, false)
/// PEI.
bool MustSaveLR;
+ /// MustSaveTOC - Indicates that the TOC save needs to be performed in the
+ /// prologue of the function. This is typically the case when there are
+ /// indirect calls in the function and it is more profitable to save the
+ /// TOC pointer in the prologue than in the block(s) containing the call(s).
+ bool MustSaveTOC = false;
+
/// Do we have to disable shrink-wrapping? This has to be set if we emit any
/// instructions that clobber LR in the entry block because discovering this
/// in PEI is too late (happens after shrink-wrapping);
void setMustSaveLR(bool U) { MustSaveLR = U; }
bool mustSaveLR() const { return MustSaveLR; }
+ void setMustSaveTOC(bool U) { MustSaveTOC = U; }
+ bool mustSaveTOC() const { return MustSaveTOC; }
+
/// We certainly don't want to shrink wrap functions if we've emitted a
/// MovePCtoLR8 as that has to go into the entry, so the prologue definitely
/// has to go into the entry block.
; CHECK-NEXT: std 30, -16(1)
; CHECK-NEXT: std 0, 16(1)
; CHECK-NEXT: stdu 1, -48(1)
-; CHECK-NEXT: ld 12, 0(3)
-; CHECK-NEXT: mr 30, 3
; CHECK-NEXT: std 2, 24(1)
+; CHECK-NEXT: mr 30, 3
+; CHECK-NEXT: ld 12, 0(3)
; CHECK-NEXT: mtctr 12
; CHECK-NEXT: bctrl
; CHECK-NEXT: ld 2, 24(1)
define signext i32 @test3(i32 signext %i, i32 (i32)* nocapture %Func, i32 (i32)* nocapture %Func2) {
; CHECK-LABEL: test3:
; CHECK: std 2, 24(1)
-; CHECK: std 2, 24(1)
; CHECK-NOT: std 2, 24(1)
entry:
%tobool = icmp eq i32 %i, 0
define signext i32 @test5(i32 signext %i, i32 (i32)* nocapture %Func, i32 (i32)* nocapture readnone %Func2) {
entry:
; CHECK-LABEL: test5:
-; CHECK: std 2, 24(1)
; CHECK: std 2, 24(1)
%tobool = icmp eq i32 %i, 0
; CHECK-NEXT: cmpwi cr1, r4, 11
; CHECK-NEXT: mr r30, r3
; CHECK-NEXT: extsw r28, r4
+; CHECK-NEXT: std r2, 24(r1)
; CHECK-NEXT: cmpwi r29, 1
; CHECK-NEXT: cror 4*cr5+lt, lt, 4*cr1+lt
-; CHECK-NEXT: bc 12, 4*cr5+lt, .LBB0_3
-; CHECK-NEXT: # %bb.1: # %for.body.us.preheader
-; CHECK-NEXT: std r2, 24(r1)
+; CHECK-NEXT: bc 12, 4*cr5+lt, .LBB0_2
; CHECK-NEXT: .p2align 5
-; CHECK-NEXT: .LBB0_2: # %for.body.us
+; CHECK-NEXT: .LBB0_1: # %for.body.us
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: mtctr r30
; CHECK-NEXT: mr r3, r28
; CHECK-NEXT: ld 2, 24(r1)
; CHECK-NEXT: addi r29, r29, -1
; CHECK-NEXT: cmplwi r29, 0
-; CHECK-NEXT: bne cr0, .LBB0_2
-; CHECK-NEXT: .LBB0_3: # %for.cond.cleanup
+; CHECK-NEXT: bne cr0, .LBB0_1
+; CHECK-NEXT: .LBB0_2: # %for.cond.cleanup
; CHECK-NEXT: mtctr r30
; CHECK-NEXT: mr r3, r28
; CHECK-NEXT: mr r12, r30
-; CHECK-NEXT: std r2, 24(r1)
; CHECK-NEXT: bctrl
; CHECK-NEXT: ld 2, 24(r1)
; CHECK-NEXT: addi r1, r1, 64