STATISTIC(NonAdjacent, "Loops are not adjacent");
STATISTIC(NonEmptyPreheader, "Loop has a non-empty preheader");
STATISTIC(FusionNotBeneficial, "Fusion is not beneficial");
+STATISTIC(NonIdenticalGuards, "Candidates have different guards");
+STATISTIC(NonEmptyExitBlock, "Candidate has a non-empty exit block");
+STATISTIC(NonEmptyGuardBlock, "Candidate has a non-empty guard block");
enum FusionDependenceAnalysisChoice {
FUSION_DEPENDENCE_ANALYSIS_SCEV,
SmallVector<Instruction *, 16> MemWrites;
/// Are all of the members of this fusion candidate still valid
bool Valid;
+ /// Guard branch of the loop, if it exists
+ BranchInst *GuardBranch;
/// Dominator and PostDominator trees are needed for the
/// FusionCandidateCompare function, required by FusionCandidateSet to
const PostDominatorTree *PDT, OptimizationRemarkEmitter &ORE)
: Preheader(L->getLoopPreheader()), Header(L->getHeader()),
ExitingBlock(L->getExitingBlock()), ExitBlock(L->getExitBlock()),
- Latch(L->getLoopLatch()), L(L), Valid(true), DT(DT), PDT(PDT),
- ORE(ORE) {
+ Latch(L->getLoopLatch()), L(L), Valid(true), GuardBranch(nullptr),
+ DT(DT), PDT(PDT), ORE(ORE) {
+
+ // TODO: This is temporary while we fuse both rotated and non-rotated
+ // loops. Once we switch to only fusing rotated loops, the initialization of
+ // GuardBranch can be moved into the initialization list above.
+ if (isRotated())
+ GuardBranch = L->getLoopGuardBranch();
// Walk over all blocks in the loop and check for conditions that may
// prevent fusion. For each block, walk over all instructions and collect
assert(Latch == L->getLoopLatch() && "Latch is out of sync");
}
+ /// Get the entry block for this fusion candidate.
+ ///
+ /// If this fusion candidate represents a guarded loop, the entry block is the
+ /// loop guard block. If it represents an unguarded loop, the entry block is
+ /// the preheader of the loop.
+ BasicBlock *getEntryBlock() const {
+ if (GuardBranch)
+ return GuardBranch->getParent();
+ else
+ return Preheader;
+ }
+
+ /// Given a guarded loop, get the successor of the guard that is not in the
+ /// loop.
+ ///
+ /// This method returns the successor of the loop guard that is not located
+ /// within the loop (i.e., the successor of the guard that is not the
+ /// preheader).
+ /// This method is only valid for guarded loops.
+ BasicBlock *getNonLoopBlock() const {
+ assert(GuardBranch && "Only valid on guarded loops.");
+ assert(GuardBranch->isConditional() &&
+ "Expecting guard to be a conditional branch.");
+ return (GuardBranch->getSuccessor(0) == Preheader)
+ ? GuardBranch->getSuccessor(1)
+ : GuardBranch->getSuccessor(0);
+ }
+
+ bool isRotated() const {
+ assert(L && "Expecting loop to be valid.");
+ assert(Latch && "Expecting latch to be valid.");
+ return L->isLoopExiting(Latch);
+ }
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void dump() const {
- dbgs() << "\tPreheader: " << (Preheader ? Preheader->getName() : "nullptr")
+ dbgs() << "\tGuardBranch: "
+ << (GuardBranch ? GuardBranch->getName() : "nullptr") << "\n"
+ << "\tPreheader: " << (Preheader ? Preheader->getName() : "nullptr")
<< "\n"
<< "\tHeader: " << (Header ? Header->getName() : "nullptr") << "\n"
<< "\tExitingBB: "
<< (ExitingBlock ? ExitingBlock->getName() : "nullptr") << "\n"
<< "\tExitBB: " << (ExitBlock ? ExitBlock->getName() : "nullptr")
<< "\n"
- << "\tLatch: " << (Latch ? Latch->getName() : "nullptr") << "\n";
+ << "\tLatch: " << (Latch ? Latch->getName() : "nullptr") << "\n"
+ << "\tEntryBlock: "
+ << (getEntryBlock() ? getEntryBlock()->getName() : "nullptr")
+ << "\n";
}
#endif
const FusionCandidate &RHS) const {
const DominatorTree *DT = LHS.DT;
+ BasicBlock *LHSEntryBlock = LHS.getEntryBlock();
+ BasicBlock *RHSEntryBlock = RHS.getEntryBlock();
+
// Do not save PDT to local variable as it is only used in asserts and thus
// will trigger an unused variable warning if building without asserts.
assert(DT && LHS.PDT && "Expecting valid dominator tree");
// Do this compare first so if LHS == RHS, function returns false.
- if (DT->dominates(RHS.Preheader, LHS.Preheader)) {
+ if (DT->dominates(RHSEntryBlock, LHSEntryBlock)) {
// RHS dominates LHS
// Verify LHS post-dominates RHS
- assert(LHS.PDT->dominates(LHS.Preheader, RHS.Preheader));
+ assert(LHS.PDT->dominates(LHSEntryBlock, RHSEntryBlock));
return false;
}
- if (DT->dominates(LHS.Preheader, RHS.Preheader)) {
+ if (DT->dominates(LHSEntryBlock, RHSEntryBlock)) {
// Verify RHS Postdominates LHS
- assert(LHS.PDT->dominates(RHS.Preheader, LHS.Preheader));
+ assert(LHS.PDT->dominates(RHSEntryBlock, LHSEntryBlock));
return true;
}
const FusionCandidate &FC1) const {
assert(FC0.Preheader && FC1.Preheader && "Expecting valid preheaders");
- if (DT.dominates(FC0.Preheader, FC1.Preheader))
- return PDT.dominates(FC1.Preheader, FC0.Preheader);
+ BasicBlock *FC0EntryBlock = FC0.getEntryBlock();
+ BasicBlock *FC1EntryBlock = FC1.getEntryBlock();
+
+ if (DT.dominates(FC0EntryBlock, FC1EntryBlock))
+ return PDT.dominates(FC1EntryBlock, FC0EntryBlock);
- if (DT.dominates(FC1.Preheader, FC0.Preheader))
- return PDT.dominates(FC0.Preheader, FC1.Preheader);
+ if (DT.dominates(FC1EntryBlock, FC0EntryBlock))
+ return PDT.dominates(FC0EntryBlock, FC1EntryBlock);
return false;
}
continue;
}
- // For now we skip fusing if the second candidate has any instructions
- // in the preheader. This is done because we currently do not have the
- // safety checks to determine if it is save to move the preheader of
- // the second candidate past the body of the first candidate. Once
- // these checks are added, this condition can be removed.
+ // Ensure that FC0 and FC1 have identical guards.
+ // If one (or both) are not guarded, this check is not necessary.
+ if (FC0->GuardBranch && FC1->GuardBranch &&
+ !haveIdenticalGuards(*FC0, *FC1)) {
+ LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical "
+ "guards. Not Fusing.\n");
+ reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
+ NonIdenticalGuards);
+ continue;
+ }
+
+ // The following three checks look for empty blocks in FC0 and FC1. If
+ // any of these blocks are non-empty, we do not fuse. This is done
+ // because we currently do not have the safety checks to determine if
+ // it is safe to move the blocks past other blocks in the loop. Once
+ // these checks are added, these conditions can be relaxed.
if (!isEmptyPreheader(*FC1)) {
LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty "
"preheader. Not fusing.\n");
continue;
}
+ if (FC0->GuardBranch && !isEmptyExitBlock(*FC0)) {
+ LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty exit "
+ "block. Not fusing.\n");
+ reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
+ NonEmptyExitBlock);
+ continue;
+ }
+
+ if (FC1->GuardBranch && !isEmptyGuardBlock(*FC1)) {
+ LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty guard "
+ "block. Not fusing.\n");
+ reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
+ NonEmptyGuardBlock);
+ continue;
+ }
+
+ // Check the dependencies across the loops and do not fuse if it would
+ // violate them.
if (!dependencesAllowFusion(*FC0, *FC1)) {
LLVM_DEBUG(dbgs() << "Memory dependencies do not allow fusion!\n");
reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
LLVM_DEBUG(dbgs() << "Check if " << FC0 << " can be fused with " << FC1
<< "\n");
assert(FC0.L->getLoopDepth() == FC1.L->getLoopDepth());
- assert(DT.dominates(FC0.Preheader, FC1.Preheader));
+ assert(DT.dominates(FC0.getEntryBlock(), FC1.getEntryBlock()));
for (Instruction *WriteL0 : FC0.MemWrites) {
for (Instruction *WriteL1 : FC1.MemWrites)
return true;
}
- /// Determine if the exit block of \p FC0 is the preheader of \p FC1. In this
- /// case, there is no code in between the two fusion candidates, thus making
- /// them adjacent.
+ /// Determine if two fusion candidates are adjacent in the CFG.
+ ///
+ /// This method will determine if there are additional basic blocks in the CFG
+ /// between the exit of \p FC0 and the entry of \p FC1.
+ /// If the two candidates are guarded loops, then it checks whether the
+ /// non-loop successor of the \p FC0 guard branch is the entry block of \p
+ /// FC1. If not, then the loops are not adjacent. If the two candidates are
+ /// not guarded loops, then it checks whether the exit block of \p FC0 is the
+ /// preheader of \p FC1.
bool isAdjacent(const FusionCandidate &FC0,
const FusionCandidate &FC1) const {
- return FC0.ExitBlock == FC1.Preheader;
+ // If the successor of the guard branch is FC1, then the loops are adjacent
+ if (FC0.GuardBranch)
+ return FC0.getNonLoopBlock() == FC1.getEntryBlock();
+ else
+ return FC0.ExitBlock == FC1.getEntryBlock();
+ }
+
+ /// Determine if two fusion candidates have identical guards
+ ///
+ /// This method will determine if two fusion candidates have the same guards.
+ /// The guards are considered the same if:
+ /// 1. The instructions to compute the condition used in the compare are
+ /// identical.
+ /// 2. The successors of the guard have the same flow into/around the loop.
+ /// If the compare instructions are identical, then the first successor of the
+ /// guard must go to the same place (either the preheader of the loop or the
+ /// NonLoopBlock). In other words, the the first successor of both loops must
+ /// both go into the loop (i.e., the preheader) or go around the loop (i.e.,
+ /// the NonLoopBlock). The same must be true for the second successor.
+ bool haveIdenticalGuards(const FusionCandidate &FC0,
+ const FusionCandidate &FC1) const {
+ assert(FC0.GuardBranch && FC1.GuardBranch &&
+ "Expecting FC0 and FC1 to be guarded loops.");
+
+ if (auto FC0CmpInst =
+ dyn_cast<Instruction>(FC0.GuardBranch->getCondition()))
+ if (auto FC1CmpInst =
+ dyn_cast<Instruction>(FC1.GuardBranch->getCondition()))
+ if (!FC0CmpInst->isIdenticalTo(FC1CmpInst))
+ return false;
+
+ // The compare instructions are identical.
+ // Now make sure the successor of the guards have the same flow into/around
+ // the loop
+ if (FC0.GuardBranch->getSuccessor(0) == FC0.Preheader)
+ return (FC1.GuardBranch->getSuccessor(0) == FC1.Preheader);
+ else
+ return (FC1.GuardBranch->getSuccessor(1) == FC1.Preheader);
+ }
+
+ /// Check that the guard for \p FC *only* contains the cmp/branch for the
+ /// guard.
+ /// Once we are able to handle intervening code, any code in the guard block
+ /// for FC1 will need to be treated as intervening code and checked whether
+ /// it can safely move around the loops.
+ bool isEmptyGuardBlock(const FusionCandidate &FC) const {
+ assert(FC.GuardBranch && "Expecting a fusion candidate with guard branch.");
+ if (auto *CmpInst = dyn_cast<Instruction>(FC.GuardBranch->getCondition())) {
+ auto *GuardBlock = FC.GuardBranch->getParent();
+ // If the generation of the cmp value is in GuardBlock, then the size of
+ // the guard block should be 2 (cmp + branch). If the generation of the
+ // cmp value is in a different block, then the size of the guard block
+ // should only be 1.
+ if (CmpInst->getParent() == GuardBlock)
+ return GuardBlock->size() == 2;
+ else
+ return GuardBlock->size() == 1;
+ }
+
+ return false;
}
bool isEmptyPreheader(const FusionCandidate &FC) const {
+ assert(FC.Preheader && "Expecting a valid preheader");
return FC.Preheader->size() == 1;
}
+ bool isEmptyExitBlock(const FusionCandidate &FC) const {
+ assert(FC.ExitBlock && "Expecting a valid exit block");
+ return FC.ExitBlock->size() == 1;
+ }
+
/// Fuse two fusion candidates, creating a new fused loop.
///
/// This method contains the mechanics of fusing two loops, represented by \p
LLVM_DEBUG(dbgs() << "Fusion Candidate 0: \n"; FC0.dump();
dbgs() << "Fusion Candidate 1: \n"; FC1.dump(););
+ // Fusing guarded loops is handled slightly differently than non-guarded
+ // loops and has been broken out into a separate method instead of trying to
+ // intersperse the logic within a single method.
+ if (FC0.GuardBranch)
+ return fuseGuardedLoops(FC0, FC1);
+
assert(FC1.Preheader == FC0.ExitBlock);
assert(FC1.Preheader->size() == 1 &&
FC1.Preheader->getSingleSuccessor() == FC1.Header);
SE.verify();
#endif
- FuseCounter++;
-
LLVM_DEBUG(dbgs() << "Fusion done:\n");
return FC0.L;
<< " and " << NV("Cand2", StringRef(FC1.Preheader->getName()))
<< ": " << Stat.getDesc());
}
+
+ /// Fuse two guarded fusion candidates, creating a new fused loop.
+ ///
+ /// Fusing guarded loops is handled much the same way as fusing non-guarded
+ /// loops. The rewiring of the CFG is slightly different though, because of
+ /// the presence of the guards around the loops and the exit blocks after the
+ /// loop body. As such, the new loop is rewired as follows:
+ /// 1. Keep the guard branch from FC0 and use the non-loop block target
+ /// from the FC1 guard branch.
+ /// 2. Remove the exit block from FC0 (this exit block should be empty
+ /// right now).
+ /// 3. Remove the guard branch for FC1
+ /// 4. Remove the preheader for FC1.
+ /// The exit block successor for the latch of FC0 is updated to be the header
+ /// of FC1 and the non-exit block successor of the latch of FC1 is updated to
+ /// be the header of FC0, thus creating the fused loop.
+ Loop *fuseGuardedLoops(const FusionCandidate &FC0,
+ const FusionCandidate &FC1) {
+ assert(FC0.GuardBranch && FC1.GuardBranch && "Expecting guarded loops");
+
+ BasicBlock *FC0GuardBlock = FC0.GuardBranch->getParent();
+ BasicBlock *FC1GuardBlock = FC1.GuardBranch->getParent();
+ BasicBlock *FC0NonLoopBlock = FC0.getNonLoopBlock();
+ BasicBlock *FC1NonLoopBlock = FC1.getNonLoopBlock();
+
+ assert(FC0NonLoopBlock == FC1GuardBlock && "Loops are not adjacent");
+
+ SmallVector<DominatorTree::UpdateType, 8> TreeUpdates;
+
+ ////////////////////////////////////////////////////////////////////////////
+ // Update the Loop Guard
+ ////////////////////////////////////////////////////////////////////////////
+ // The guard for FC0 is updated to guard both FC0 and FC1. This is done by
+ // changing the NonLoopGuardBlock for FC0 to the NonLoopGuardBlock for FC1.
+ // Thus, one path from the guard goes to the preheader for FC0 (and thus
+ // executes the new fused loop) and the other path goes to the NonLoopBlock
+ // for FC1 (where FC1 guard would have gone if FC1 was not executed).
+ FC0.GuardBranch->replaceUsesOfWith(FC0NonLoopBlock, FC1NonLoopBlock);
+ FC0.ExitBlock->getTerminator()->replaceUsesOfWith(FC1GuardBlock,
+ FC1.Header);
+
+ // The guard of FC1 is not necessary anymore.
+ FC1.GuardBranch->eraseFromParent();
+ new UnreachableInst(FC1GuardBlock->getContext(), FC1GuardBlock);
+
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(
+ DominatorTree::Delete, FC1GuardBlock, FC1.Preheader));
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(
+ DominatorTree::Delete, FC1GuardBlock, FC1NonLoopBlock));
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(
+ DominatorTree::Delete, FC0GuardBlock, FC1GuardBlock));
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(
+ DominatorTree::Insert, FC0GuardBlock, FC1NonLoopBlock));
+
+ assert(pred_begin(FC1GuardBlock) == pred_end(FC1GuardBlock) &&
+ "Expecting guard block to have no predecessors");
+ assert(succ_begin(FC1GuardBlock) == succ_end(FC1GuardBlock) &&
+ "Expecting guard block to have no successors");
+
+ // Remember the phi nodes originally in the header of FC0 in order to rewire
+ // them later. However, this is only necessary if the new loop carried
+ // values might not dominate the exiting branch. While we do not generally
+ // test if this is the case but simply insert intermediate phi nodes, we
+ // need to make sure these intermediate phi nodes have different
+ // predecessors. To this end, we filter the special case where the exiting
+ // block is the latch block of the first loop. Nothing needs to be done
+ // anyway as all loop carried values dominate the latch and thereby also the
+ // exiting branch.
+ // KB: This is no longer necessary because FC0.ExitingBlock == FC0.Latch
+ // (because the loops are rotated. Thus, nothing will ever be added to
+ // OriginalFC0PHIs.
+ SmallVector<PHINode *, 8> OriginalFC0PHIs;
+ if (FC0.ExitingBlock != FC0.Latch)
+ for (PHINode &PHI : FC0.Header->phis())
+ OriginalFC0PHIs.push_back(&PHI);
+
+ assert(OriginalFC0PHIs.empty() && "Expecting OriginalFC0PHIs to be empty!");
+
+ // Replace incoming blocks for header PHIs first.
+ FC1.Preheader->replaceSuccessorsPhiUsesWith(FC0.Preheader);
+ FC0.Latch->replaceSuccessorsPhiUsesWith(FC1.Latch);
+
+ // The old exiting block of the first loop (FC0) has to jump to the header
+ // of the second as we need to execute the code in the second header block
+ // regardless of the trip count. That is, if the trip count is 0, so the
+ // back edge is never taken, we still have to execute both loop headers,
+ // especially (but not only!) if the second is a do-while style loop.
+ // However, doing so might invalidate the phi nodes of the first loop as
+ // the new values do only need to dominate their latch and not the exiting
+ // predicate. To remedy this potential problem we always introduce phi
+ // nodes in the header of the second loop later that select the loop carried
+ // value, if the second header was reached through an old latch of the
+ // first, or undef otherwise. This is sound as exiting the first implies the
+ // second will exit too, __without__ taking the back-edge (their
+ // trip-counts are equal after all).
+ FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(FC0.ExitBlock,
+ FC1.Header);
+
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(
+ DominatorTree::Delete, FC0.ExitingBlock, FC0.ExitBlock));
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(
+ DominatorTree::Insert, FC0.ExitingBlock, FC1.Header));
+
+ // Remove FC0 Exit Block
+ // The exit block for FC0 is no longer needed since control will flow
+ // directly to the header of FC1. Since it is an empty block, it can be
+ // removed at this point.
+ // TODO: In the future, we can handle non-empty exit blocks my merging any
+ // instructions from FC0 exit block into FC1 exit block prior to removing
+ // the block.
+ assert(pred_begin(FC0.ExitBlock) == pred_end(FC0.ExitBlock) &&
+ "Expecting exit block to be empty");
+ FC0.ExitBlock->getTerminator()->eraseFromParent();
+ new UnreachableInst(FC0.ExitBlock->getContext(), FC0.ExitBlock);
+
+ // Remove FC1 Preheader
+ // The pre-header of L1 is not necessary anymore.
+ assert(pred_begin(FC1.Preheader) == pred_end(FC1.Preheader));
+ FC1.Preheader->getTerminator()->eraseFromParent();
+ new UnreachableInst(FC1.Preheader->getContext(), FC1.Preheader);
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(
+ DominatorTree::Delete, FC1.Preheader, FC1.Header));
+
+ // Moves the phi nodes from the second to the first loops header block.
+ while (PHINode *PHI = dyn_cast<PHINode>(&FC1.Header->front())) {
+ if (SE.isSCEVable(PHI->getType()))
+ SE.forgetValue(PHI);
+ if (PHI->hasNUsesOrMore(1))
+ PHI->moveBefore(&*FC0.Header->getFirstInsertionPt());
+ else
+ PHI->eraseFromParent();
+ }
+
+ // Introduce new phi nodes in the second loop header to ensure
+ // exiting the first and jumping to the header of the second does not break
+ // the SSA property of the phis originally in the first loop. See also the
+ // comment above.
+ Instruction *L1HeaderIP = &FC1.Header->front();
+ for (PHINode *LCPHI : OriginalFC0PHIs) {
+ int L1LatchBBIdx = LCPHI->getBasicBlockIndex(FC1.Latch);
+ assert(L1LatchBBIdx >= 0 &&
+ "Expected loop carried value to be rewired at this point!");
+
+ Value *LCV = LCPHI->getIncomingValue(L1LatchBBIdx);
+
+ PHINode *L1HeaderPHI = PHINode::Create(
+ LCV->getType(), 2, LCPHI->getName() + ".afterFC0", L1HeaderIP);
+ L1HeaderPHI->addIncoming(LCV, FC0.Latch);
+ L1HeaderPHI->addIncoming(UndefValue::get(LCV->getType()),
+ FC0.ExitingBlock);
+
+ LCPHI->setIncomingValue(L1LatchBBIdx, L1HeaderPHI);
+ }
+
+ // Update the latches
+
+ // Replace latch terminator destinations.
+ FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header);
+ FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header);
+
+ // If FC0.Latch and FC0.ExitingBlock are the same then we have already
+ // performed the updates above.
+ if (FC0.Latch != FC0.ExitingBlock)
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(
+ DominatorTree::Insert, FC0.Latch, FC1.Header));
+
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Delete,
+ FC0.Latch, FC0.Header));
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Insert,
+ FC1.Latch, FC0.Header));
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Delete,
+ FC1.Latch, FC1.Header));
+
+ // All done
+ // Apply the updates to the Dominator Tree and cleanup.
+
+ assert(succ_begin(FC1GuardBlock) == succ_end(FC1GuardBlock) &&
+ "FC1GuardBlock has successors!!");
+ assert(pred_begin(FC1GuardBlock) == pred_end(FC1GuardBlock) &&
+ "FC1GuardBlock has predecessors!!");
+
+ // Update DT/PDT
+ DTU.applyUpdates(TreeUpdates);
+
+ LI.removeBlock(FC1.Preheader);
+ DTU.deleteBB(FC1.Preheader);
+ DTU.deleteBB(FC0.ExitBlock);
+ DTU.flush();
+
+ // Is there a way to keep SE up-to-date so we don't need to forget the loops
+ // and rebuild the information in subsequent passes of fusion?
+ SE.forgetLoop(FC1.L);
+ SE.forgetLoop(FC0.L);
+
+ // Merge the loops.
+ SmallVector<BasicBlock *, 8> Blocks(FC1.L->block_begin(),
+ FC1.L->block_end());
+ for (BasicBlock *BB : Blocks) {
+ FC0.L->addBlockEntry(BB);
+ FC1.L->removeBlockFromLoop(BB);
+ if (LI.getLoopFor(BB) != FC1.L)
+ continue;
+ LI.changeLoopFor(BB, FC0.L);
+ }
+ while (!FC1.L->empty()) {
+ const auto &ChildLoopIt = FC1.L->begin();
+ Loop *ChildLoop = *ChildLoopIt;
+ FC1.L->removeChildLoop(ChildLoopIt);
+ FC0.L->addChildLoop(ChildLoop);
+ }
+
+ // Delete the now empty loop L1.
+ LI.erase(FC1.L);
+
+#ifndef NDEBUG
+ assert(!verifyFunction(*FC0.Header->getParent(), &errs()));
+ assert(DT.verify(DominatorTree::VerificationLevel::Fast));
+ assert(PDT.verify());
+ LI.verify(DT);
+ SE.verify();
+#endif
+
+ LLVM_DEBUG(dbgs() << "Fusion done:\n");
+
+ return FC0.L;
+ }
};
struct LoopFuseLegacy : public FunctionPass {