class AliasSet;
class AliasSetTracker;
class BasicBlock;
+class BlockFrequencyInfo;
class DataLayout;
class Loop;
class LoopInfo;
/// reverse depth first order w.r.t the DominatorTree. This allows us to visit
/// uses before definitions, allowing us to sink a loop body in one pass without
/// iteration. Takes DomTreeNode, AliasAnalysis, LoopInfo, DominatorTree,
-/// DataLayout, TargetLibraryInfo, Loop, AliasSet information for all
+/// BlockFrequencyInfo, TargetLibraryInfo, Loop, AliasSet information for all
/// instructions of the loop and loop safety information as
/// arguments. Diagnostics is emitted via \p ORE. It returns changed status.
bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
- TargetLibraryInfo *, TargetTransformInfo *, Loop *,
- AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *,
+ BlockFrequencyInfo *, TargetLibraryInfo *, TargetTransformInfo *,
+ Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *,
SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *);
/// Walk the specified region of the CFG (defined by all blocks
/// dominated by the specified block, and that are in the current loop) in depth
/// first order w.r.t the DominatorTree. This allows us to visit definitions
/// before uses, allowing us to hoist a loop body in one pass without iteration.
-/// Takes DomTreeNode, AliasAnalysis, LoopInfo, DominatorTree, DataLayout,
+/// Takes DomTreeNode, AliasAnalysis, LoopInfo, DominatorTree, BlockFrequencyInfo,
/// TargetLibraryInfo, Loop, AliasSet information for all instructions of the
/// loop and loop safety information as arguments. Diagnostics is emitted via \p
/// ORE. It returns changed status.
bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
- TargetLibraryInfo *, Loop *, AliasSetTracker *,
- MemorySSAUpdater *, ICFLoopSafetyInfo *,
- SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *);
+ BlockFrequencyInfo *, TargetLibraryInfo *, Loop *, AliasSetTracker *,
+ MemorySSAUpdater *, ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &,
+ OptimizationRemarkEmitter *);
/// This function deletes dead loops. The caller of this function needs to
/// guarantee that the loop is infact dead.
"licm-control-flow-hoisting", cl::Hidden, cl::init(false),
cl::desc("Enable control flow (and PHI) hoisting in LICM"));
+static cl::opt<unsigned> HoistSinkColdnessThreshold(
+ "licm-coldness-threshold", cl::Hidden, cl::init(4),
+ cl::desc("Relative coldness Threshold of hoisting/sinking destination "
+ "block for LICM to be considered beneficial"));
+
static cl::opt<uint32_t> MaxNumUsesTraversed(
"licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
cl::desc("Max num uses visited for identifying load "
BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE);
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
- const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
- MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE);
+ BlockFrequencyInfo *BFI, const Loop *CurLoop,
+ ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU,
+ OptimizationRemarkEmitter *ORE);
static bool isSafeToExecuteUnconditionally(Instruction &Inst,
const DominatorTree *DT,
const Loop *CurLoop,
struct LoopInvariantCodeMotion {
using ASTrackerMapTy = DenseMap<Loop *, std::unique_ptr<AliasSetTracker>>;
bool runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
- TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
- ScalarEvolution *SE, MemorySSA *MSSA,
+ BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI,
+ TargetTransformInfo *TTI, ScalarEvolution *SE, MemorySSA *MSSA,
OptimizationRemarkEmitter *ORE, bool DeleteAST);
ASTrackerMapTy &getLoopToAliasSetMap() { return LoopToAliasSetMap; }
&getAnalysis<AAResultsWrapperPass>().getAAResults(),
&getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
&getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
+ &getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(),
&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
*L->getHeader()->getParent()),
/// loop preheaders be inserted into the CFG...
///
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<BlockFrequencyInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
"cached at a higher level");
LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
- if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.TTI, &AR.SE,
+ auto BFI = FAM.getCachedResult<BlockFrequencyAnalysis>(*F);
+ if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, BFI, &AR.TLI, &AR.TTI, &AR.SE,
AR.MSSA, ORE, true))
return PreservedAnalyses::all();
///
bool LoopInvariantCodeMotion::runOnLoop(
Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
- TargetLibraryInfo *TLI, TargetTransformInfo *TTI, ScalarEvolution *SE,
- MemorySSA *MSSA, OptimizationRemarkEmitter *ORE, bool DeleteAST) {
+ BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
+ ScalarEvolution *SE, MemorySSA *MSSA, OptimizationRemarkEmitter *ORE,
+ bool DeleteAST) {
bool Changed = false;
assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
/*IsSink=*/true};
if (L->hasDedicatedExits())
- Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
+ Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, TTI, L,
CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE);
Flags.IsSink = false;
if (Preheader)
- Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
+ Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L,
CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE);
// Now that all loop invariants have been removed from the loop, promote any
/// definitions, allowing us to sink a loop body in one pass without iteration.
///
bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
- DominatorTree *DT, TargetLibraryInfo *TLI,
- TargetTransformInfo *TTI, Loop *CurLoop,
- AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
+ DominatorTree *DT, BlockFrequencyInfo *BFI,
+ TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
+ Loop *CurLoop, AliasSetTracker *CurAST,
+ MemorySSAUpdater *MSSAU,
ICFLoopSafetyInfo *SafetyInfo,
SinkAndHoistLICMFlags &Flags,
OptimizationRemarkEmitter *ORE) {
canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
ORE) &&
!I.mayHaveSideEffects()) {
- if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE)) {
+ if (sink(I, LI, DT, BFI, CurLoop, SafetyInfo, MSSAU, ORE)) {
if (!FreeInLoop) {
++II;
eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
};
} // namespace
+// Hoisting/sinking instruction out of a loop isn't always beneficial. It's only
+// only worthwhile if the destination block is actually colder than current
+// block.
+static bool worthSinkOrHoistInst(Instruction &I, BasicBlock *DstBlock,
+ OptimizationRemarkEmitter *ORE,
+ BlockFrequencyInfo *BFI) {
+ // Check block frequency only when runtime profile is available.
+ // to avoid pathological cases. With static profile, lean towards
+ // hosting because it helps canonicalize the loop for vectorizer.
+ if (!DstBlock->getParent()->hasProfileData())
+ return true;
+
+ if (!HoistSinkColdnessThreshold || !BFI)
+ return true;
+
+ BasicBlock *SrcBlock = I.getParent();
+ if (BFI->getBlockFreq(DstBlock).getFrequency() / HoistSinkColdnessThreshold >
+ BFI->getBlockFreq(SrcBlock).getFrequency()) {
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "SinkHoistInst", &I)
+ << "failed to sink or hoist instruction because containing block "
+ "has lower frequency than destination block";
+ });
+ return false;
+ }
+
+ return true;
+}
+
/// Walk the specified region of the CFG (defined by all blocks dominated by
/// the specified block, and that are in the current loop) in depth first
/// order w.r.t the DominatorTree. This allows us to visit definitions before
/// uses, allowing us to hoist a loop body in one pass without iteration.
///
bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
- DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
+ DominatorTree *DT, BlockFrequencyInfo *BFI,
+ TargetLibraryInfo *TLI, Loop *CurLoop,
AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
ICFLoopSafetyInfo *SafetyInfo,
SinkAndHoistLICMFlags &Flags,
// Try hoisting the instruction out to the preheader. We can only do
// this if all of the operands of the instruction are loop invariant and
- // if it is safe to hoist the instruction.
+ // if it is safe to hoist the instruction. We also check block frequency
+ // to make sure instruction only gets hoisted into colder blocks.
// TODO: It may be safe to hoist if we are hoisting to a conditional block
// and we have accurately duplicated the control flow from the loop header
// to that block.
if (CurLoop->hasLoopInvariantOperands(&I) &&
canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
ORE) &&
+ worthSinkOrHoistInst(I, CurLoop->getLoopPreheader(), ORE, BFI) &&
isSafeToExecuteUnconditionally(
I, DT, CurLoop, SafetyInfo, ORE,
CurLoop->getLoopPreheader()->getTerminator())) {
/// position, and may either delete it or move it to outside of the loop.
///
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
- const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
- MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE) {
+ BlockFrequencyInfo *BFI, const Loop *CurLoop,
+ ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU,
+ OptimizationRemarkEmitter *ORE) {
LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
ORE->emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
// If this instruction is only used outside of the loop, then all users are
// PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
// the instruction.
+ // First check if I is worth sinking for all uses. Sink only when it is worth
+ // across all uses.
SmallSetVector<User*, 8> Users(I.user_begin(), I.user_end());
+ SmallVector<PHINode*, 8> ExitPNs;
for (auto *UI : Users) {
auto *User = cast<Instruction>(UI);
PHINode *PN = cast<PHINode>(User);
assert(ExitBlockSet.count(PN->getParent()) &&
"The LCSSA PHI is not in an exit block!");
+
+ if (!worthSinkOrHoistInst(I, PN->getParent(), ORE, BFI)) {
+ return Changed;
+ }
+
+ ExitPNs.push_back(PN);
+ }
+
+ for (auto *PN: ExitPNs) {
// The PHI must be trivially replaceable.
Instruction *New = sinkThroughTriviallyReplaceablePHI(
PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU);