class AliasSet;
class AliasSetTracker;
class BasicBlock;
-class BlockFrequencyInfo;
class DataLayout;
class Loop;
class LoopInfo;
/// reverse depth first order w.r.t the DominatorTree. This allows us to visit
/// uses before definitions, allowing us to sink a loop body in one pass without
/// iteration. Takes DomTreeNode, AliasAnalysis, LoopInfo, DominatorTree,
-/// BlockFrequencyInfo, TargetLibraryInfo, Loop, AliasSet information for all
+/// DataLayout, TargetLibraryInfo, Loop, AliasSet information for all
/// instructions of the loop and loop safety information as
/// arguments. Diagnostics is emitted via \p ORE. It returns changed status.
bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
- BlockFrequencyInfo *, TargetLibraryInfo *, TargetTransformInfo *,
- Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *,
+ TargetLibraryInfo *, TargetTransformInfo *, Loop *,
+ AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *,
SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *);
/// Walk the specified region of the CFG (defined by all blocks
/// dominated by the specified block, and that are in the current loop) in depth
/// first order w.r.t the DominatorTree. This allows us to visit definitions
/// before uses, allowing us to hoist a loop body in one pass without iteration.
-/// Takes DomTreeNode, AliasAnalysis, LoopInfo, DominatorTree, BlockFrequencyInfo,
+/// Takes DomTreeNode, AliasAnalysis, LoopInfo, DominatorTree, DataLayout,
/// TargetLibraryInfo, Loop, AliasSet information for all instructions of the
/// loop and loop safety information as arguments. Diagnostics is emitted via \p
/// ORE. It returns changed status.
bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
- BlockFrequencyInfo *, TargetLibraryInfo *, Loop *, AliasSetTracker *,
- MemorySSAUpdater *, ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &,
- OptimizationRemarkEmitter *);
+ TargetLibraryInfo *, Loop *, AliasSetTracker *,
+ MemorySSAUpdater *, ICFLoopSafetyInfo *,
+ SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *);
/// This function deletes dead loops. The caller of this function needs to
/// guarantee that the loop is infact dead.
"licm-control-flow-hoisting", cl::Hidden, cl::init(false),
cl::desc("Enable control flow (and PHI) hoisting in LICM"));
-static cl::opt<unsigned> HoistSinkColdnessThreshold(
- "licm-coldness-threshold", cl::Hidden, cl::init(4),
- cl::desc("Relative coldness Threshold of hoisting/sinking destination "
- "block for LICM to be considered beneficial"));
-
static cl::opt<uint32_t> MaxNumUsesTraversed(
"licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
cl::desc("Max num uses visited for identifying load "
BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE);
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
- BlockFrequencyInfo *BFI, const Loop *CurLoop,
- ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU,
- OptimizationRemarkEmitter *ORE);
+ const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
+ MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE);
static bool isSafeToExecuteUnconditionally(Instruction &Inst,
const DominatorTree *DT,
const Loop *CurLoop,
struct LoopInvariantCodeMotion {
using ASTrackerMapTy = DenseMap<Loop *, std::unique_ptr<AliasSetTracker>>;
bool runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
- BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI,
- TargetTransformInfo *TTI, ScalarEvolution *SE, MemorySSA *MSSA,
+ TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
+ ScalarEvolution *SE, MemorySSA *MSSA,
OptimizationRemarkEmitter *ORE, bool DeleteAST);
ASTrackerMapTy &getLoopToAliasSetMap() { return LoopToAliasSetMap; }
&getAnalysis<AAResultsWrapperPass>().getAAResults(),
&getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
&getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
- &getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(),
&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
*L->getHeader()->getParent()),
/// loop preheaders be inserted into the CFG...
///
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<BlockFrequencyInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
"cached at a higher level");
LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
- auto BFI = FAM.getCachedResult<BlockFrequencyAnalysis>(*F);
- if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, BFI, &AR.TLI, &AR.TTI, &AR.SE,
+ if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.TTI, &AR.SE,
AR.MSSA, ORE, true))
return PreservedAnalyses::all();
INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion",
false, false)
INITIALIZE_PASS_DEPENDENCY(LoopPass)
-INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
///
bool LoopInvariantCodeMotion::runOnLoop(
Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
- BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
- ScalarEvolution *SE, MemorySSA *MSSA, OptimizationRemarkEmitter *ORE,
- bool DeleteAST) {
+ TargetLibraryInfo *TLI, TargetTransformInfo *TTI, ScalarEvolution *SE,
+ MemorySSA *MSSA, OptimizationRemarkEmitter *ORE, bool DeleteAST) {
bool Changed = false;
assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
/*IsSink=*/true};
if (L->hasDedicatedExits())
- Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, TTI, L,
+ Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE);
Flags.IsSink = false;
if (Preheader)
- Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L,
+ Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE);
// Now that all loop invariants have been removed from the loop, promote any
/// definitions, allowing us to sink a loop body in one pass without iteration.
///
bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
- DominatorTree *DT, BlockFrequencyInfo *BFI,
- TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
- Loop *CurLoop, AliasSetTracker *CurAST,
- MemorySSAUpdater *MSSAU,
+ DominatorTree *DT, TargetLibraryInfo *TLI,
+ TargetTransformInfo *TTI, Loop *CurLoop,
+ AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
ICFLoopSafetyInfo *SafetyInfo,
SinkAndHoistLICMFlags &Flags,
OptimizationRemarkEmitter *ORE) {
canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
ORE) &&
!I.mayHaveSideEffects()) {
- if (sink(I, LI, DT, BFI, CurLoop, SafetyInfo, MSSAU, ORE)) {
+ if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE)) {
if (!FreeInLoop) {
++II;
eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
};
} // namespace
-// Hoisting/sinking instruction out of a loop isn't always beneficial. It's only
-// only worthwhile if the destination block is actually colder than current
-// block.
-static bool worthSinkOrHoistInst(Instruction &I, BasicBlock *DstBlock,
- OptimizationRemarkEmitter *ORE,
- BlockFrequencyInfo *BFI) {
- // Check block frequency only when runtime profile is available.
- // to avoid pathological cases. With static profile, lean towards
- // hosting because it helps canonicalize the loop for vectorizer.
- if (!DstBlock->getParent()->hasProfileData())
- return true;
-
- if (!HoistSinkColdnessThreshold || !BFI)
- return true;
-
- BasicBlock *SrcBlock = I.getParent();
- if (BFI->getBlockFreq(DstBlock).getFrequency() / HoistSinkColdnessThreshold >
- BFI->getBlockFreq(SrcBlock).getFrequency()) {
- ORE->emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "SinkHoistInst", &I)
- << "failed to sink or hoist instruction because containing block "
- "has lower frequency than destination block";
- });
- return false;
- }
-
- return true;
-}
-
/// Walk the specified region of the CFG (defined by all blocks dominated by
/// the specified block, and that are in the current loop) in depth first
/// order w.r.t the DominatorTree. This allows us to visit definitions before
/// uses, allowing us to hoist a loop body in one pass without iteration.
///
bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
- DominatorTree *DT, BlockFrequencyInfo *BFI,
- TargetLibraryInfo *TLI, Loop *CurLoop,
+ DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
ICFLoopSafetyInfo *SafetyInfo,
SinkAndHoistLICMFlags &Flags,
// Try hoisting the instruction out to the preheader. We can only do
// this if all of the operands of the instruction are loop invariant and
- // if it is safe to hoist the instruction. We also check block frequency
- // to make sure instruction only gets hoisted into colder blocks.
+ // if it is safe to hoist the instruction.
// TODO: It may be safe to hoist if we are hoisting to a conditional block
// and we have accurately duplicated the control flow from the loop header
// to that block.
if (CurLoop->hasLoopInvariantOperands(&I) &&
canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
ORE) &&
- worthSinkOrHoistInst(I, CurLoop->getLoopPreheader(), ORE, BFI) &&
isSafeToExecuteUnconditionally(
I, DT, CurLoop, SafetyInfo, ORE,
CurLoop->getLoopPreheader()->getTerminator())) {
/// position, and may either delete it or move it to outside of the loop.
///
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
- BlockFrequencyInfo *BFI, const Loop *CurLoop,
- ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU,
- OptimizationRemarkEmitter *ORE) {
+ const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
+ MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE) {
LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
ORE->emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
// If this instruction is only used outside of the loop, then all users are
// PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
// the instruction.
- // First check if I is worth sinking for all uses. Sink only when it is worth
- // across all uses.
SmallSetVector<User*, 8> Users(I.user_begin(), I.user_end());
- SmallVector<PHINode*, 8> ExitPNs;
for (auto *UI : Users) {
auto *User = cast<Instruction>(UI);
PHINode *PN = cast<PHINode>(User);
assert(ExitBlockSet.count(PN->getParent()) &&
"The LCSSA PHI is not in an exit block!");
-
- if (!worthSinkOrHoistInst(I, PN->getParent(), ORE, BFI)) {
- return Changed;
- }
-
- ExitPNs.push_back(PN);
- }
-
- for (auto *PN: ExitPNs) {
// The PHI must be trivially replaceable.
Instruction *New = sinkThroughTriviallyReplaceablePHI(
PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU);
// Note that proving a load safe to speculate requires proving
// sufficient alignment at the target location. Proving it guaranteed
// to execute does as well. Thus we can increase our guaranteed
- // alignment as well.
+ // alignment as well.
if (!DereferenceableInPH || (InstAlignment > Alignment))
if (isSafeToExecuteUnconditionally(*Load, DT, CurLoop, SafetyInfo,
ORE, Preheader->getTerminator())) {