cl::desc("MachineLICM should hoist even cheap instructions"),
cl::init(false), cl::Hidden);
+static cl::opt<bool>
+SinkInstsToAvoidSpills("sink-insts-to-avoid-spills",
+ cl::desc("MachineLICM should sink instructions into "
+ "loops to avoid register spills"),
+ cl::init(false), cl::Hidden);
+
STATISTIC(NumHoisted,
"Number of machine instructions hoisted out of loops");
STATISTIC(NumLowRP,
void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode);
void HoistRegion(MachineDomTreeNode *N, bool IsHeader);
+ /// SinkIntoLoop - Sink instructions into loops if profitable. This
+ /// especially tries to prevent register spills caused by register pressure
+ /// if there is little to no overhead moving instructions into loops.
+ void SinkIntoLoop();
+
/// getRegisterClassIDAndCost - For a given MI, register, and the operand
/// index, return the ID and cost of its representative register class by
/// reference.
FirstInLoop = true;
HoistOutOfLoop(N);
CSEMap.clear();
+
+ if (SinkInstsToAvoidSpills)
+ SinkIntoLoop();
}
}
}
}
+void MachineLICM::SinkIntoLoop() {
+ MachineBasicBlock *Preheader = getCurPreheader();
+ if (!Preheader)
+ return;
+
+ SmallVector<MachineInstr *, 8> Candidates;
+ for (MachineBasicBlock::instr_iterator I = Preheader->instr_begin();
+ I != Preheader->instr_end(); ++I) {
+ // We need to ensure that we can safely move this instruction into the loop.
+ // As such, it must not have side-effects, e.g. such as a call has.
+ if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(I))
+ Candidates.push_back(I);
+ }
+
+ for (MachineInstr *I : Candidates) {
+ const MachineOperand &MO = I->getOperand(0);
+ if (!MO.isDef() || !MO.isReg() || !MO.getReg())
+ continue;
+ if (!MRI->hasOneDef(MO.getReg()))
+ continue;
+ bool CanSink = true;
+ MachineBasicBlock *B = nullptr;
+ for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) {
+ // FIXME: Come up with a proper cost model that estimates whether sinking
+ // the instruction (and thus possibly executing it on every loop
+ // iteration) is more expensive than a register.
+ // For now assumes that copies are cheap and thus almost always worth it.
+ if (!MI.isCopy()) {
+ CanSink = false;
+ break;
+ }
+ if (!B) {
+ B = MI.getParent();
+ continue;
+ }
+ B = DT->findNearestCommonDominator(B, MI.getParent());
+ if (!B) {
+ CanSink = false;
+ break;
+ }
+ }
+ if (!CanSink || !B || B == Preheader)
+ continue;
+ B->splice(B->getFirstNonPHI(), Preheader, I);
+ }
+}
+
static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
}
--- /dev/null
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=CHECK
+; RUN: llc < %s -mtriple=x86_64-linux -sink-insts-to-avoid-spills | FileCheck %s -check-prefix=SINK
+
+; Ensure that we sink copy-like instructions into loops to avoid register
+; spills.
+
+; CHECK: Spill
+; SINK-NOT: Spill
+
+%struct.A = type { i32, i32, i32, i32, i32, i32 }
+
+define void @_Z1fPhP1A(i8* nocapture readonly %input, %struct.A* %a) {
+ %1 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 0
+ %2 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 1
+ %3 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 2
+ %4 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 3
+ %5 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 4
+ %6 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 5
+ br label %.backedge
+
+.backedge:
+ %.0 = phi i8* [ %input, %0 ], [ %7, %.backedge.backedge ]
+ %7 = getelementptr inbounds i8, i8* %.0, i64 1
+ %8 = load i8, i8* %7, align 1
+ switch i8 %8, label %.backedge.backedge [
+ i8 0, label %9
+ i8 10, label %10
+ i8 20, label %11
+ i8 30, label %12
+ i8 40, label %13
+ i8 50, label %14
+ ]
+
+; <label>:9
+ tail call void @_Z6assignPj(i32* %1)
+ br label %.backedge.backedge
+
+; <label>:10
+ tail call void @_Z6assignPj(i32* %2)
+ br label %.backedge.backedge
+
+.backedge.backedge:
+ br label %.backedge
+
+; <label>:11
+ tail call void @_Z6assignPj(i32* %3)
+ br label %.backedge.backedge
+
+; <label>:12
+ tail call void @_Z6assignPj(i32* %4)
+ br label %.backedge.backedge
+
+; <label>:13
+ tail call void @_Z6assignPj(i32* %5)
+ br label %.backedge.backedge
+
+; <label>:14
+ tail call void @_Z6assignPj(i32* %6)
+ br label %.backedge.backedge
+}
+
+declare void @_Z6assignPj(i32*)