const TargetTransformInfo *TTI = nullptr;
const TargetLibraryInfo *TLInfo;
const LoopInfo *LI;
+ DominatorTree *DT;
std::unique_ptr<BlockFrequencyInfo> BFI;
std::unique_ptr<BranchProbabilityInfo> BPI;
void getAnalysisUsage(AnalysisUsage &AU) const override {
// FIXME: When we can selectively preserve passes, preserve the domtree.
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
OptSize = F.optForSize();
if (ProfileGuidedSectionPrefix) {
return true;
SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs;
+ SmallVector<PHINode *, 16> PNs;
+
+ for (auto DestBBI = DestBB->begin();
+ auto *DestPN = dyn_cast<PHINode>(&*DestBBI); ++DestBBI)
+ PNs.push_back(DestPN);
// Find all other incoming blocks from which incoming values of all PHIs in
// DestBB are the same as the ones from BB.
if (DestBBPred == BB)
continue;
- bool HasAllSameValue = true;
- BasicBlock::const_iterator DestBBI = DestBB->begin();
- while (const PHINode *DestPN = dyn_cast<PHINode>(DestBBI++)) {
- if (DestPN->getIncomingValueForBlock(BB) !=
- DestPN->getIncomingValueForBlock(DestBBPred)) {
- HasAllSameValue = false;
- break;
- }
- }
- if (HasAllSameValue)
+ if (llvm::all_of(PNs, [&](PHINode *PN) {
+ return (PN->getIncomingValueForBlock(BB) ==
+ PN->getIncomingValueForBlock(DestBBPred));
+ }))
SameIncomingValueBBs.insert(DestBBPred);
}
if (SameIncomingValueBBs.count(Pred))
return true;
+ // Check to see if none of the phis have constant incoming values for BB and
+ // Pred dominates DestBB, in such case extra COPYs are likely not added, so
+ // there is no reason to skip merging.
+ if (DT->dominates(Pred, DestBB) && llvm::none_of(PNs, [BB](PHINode *PN) {
+ return isa<Constant>(PN->getIncomingValueForBlock(BB));
+ }))
+ return true;
+
if (!BFI) {
Function &F = *BB->getParent();
LoopInfo LI{DominatorTree(F)};
// Remember if SinglePred was the entry block of the function. If so, we
// will need to move BB back to the entry position.
bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
- MergeBasicBlockIntoOnlyPred(DestBB, nullptr);
+ MergeBasicBlockIntoOnlyPred(DestBB, DT);
if (isEntry && BB != &BB->getParent()->getEntryBlock())
BB->moveBefore(&BB->getParent()->getEntryBlock());
// The PHIs are now updated, change everything that refers to BB to use
// DestBB and remove BB.
+ SmallVector<DominatorTree::UpdateType, 3> Updates;
+ for (auto *PredBB : predecessors(BB)) {
+ if (PredBB == BB)
+ continue;
+ DominatorTree::UpdateType UT = {DominatorTree::Delete, PredBB, BB};
+ if (!is_contained(Updates, UT)) {
+ Updates.push_back(UT);
+ if (PredBB != DestBB)
+ Updates.push_back({DominatorTree::Insert, PredBB, DestBB});
+ }
+ }
BB->replaceAllUsesWith(DestBB);
+ DT->applyUpdates(Updates);
+ BB->getTerminator()->eraseFromParent();
+ DT->deleteEdge(BB, DestBB);
BB->eraseFromParent();
++NumBlocksElim;
!1 = !{!"branch_weights", i32 1 , i32 5, i32 1,i32 1, i32 1}
!2 = !{!"branch_weights", i32 1 , i32 4, i32 1,i32 1, i32 1}
+; while.cond does not dominate return, expect to skip merging empty block
+; return.loopexit into return.
+@b = external global i32, align 4
+@a = external global i32*, align 8
+
+define void @f_switch4(i32 %i) local_unnamed_addr personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK-LABEL: @f_switch4
+entry:
+ %0 = load i32, i32* @b, align 4
+ %cond = icmp eq i32 %0, 6
+ br i1 %cond, label %return, label %if.end
+
+if.end: ; preds = %entry
+ %add = add i32 %i, 2
+ %1 = load i32*, i32** @a, align 8
+ %magicptr = ptrtoint i32* %1 to i32
+ br label %while.cond
+
+; CHECK-LABEL: while.cond:
+; CHECK: i32 0, label %return.loopexit
+; CHECK: i32 47, label %return.loopexit
+while.cond: ; preds = %while.cond.backedge, %if.end
+ switch i32 %magicptr, label %while.cond.if.end10_crit_edge [
+ i32 0, label %return.loopexit
+ i32 47, label %return.loopexit
+ ]
+
+while.cond.if.end10_crit_edge: ; preds = %while.cond
+ br label %while.cond.backedge
+
+while.cond.backedge: ; preds = %while.cond.if.end10_crit_edge, %if.then9
+ br label %while.cond
+
+return.loopexit: ; preds = %while.cond
+ br label %return
+
+; CHECK_LABEL: return:
+; CHECK: %{{.*}} = phi i32 [ 0, %entry ], [ %add, %return.loopexit ]
+return: ; preds = %return.loopexit, %entry
+ %retval.4 = phi i32 [ 0, %entry ], [ %add, %return.loopexit ]
+ ret void
+}
+declare i32 @__gxx_personality_v0(...)
+
+; Expect to merge empty block while.cond2.loopexit into while.cond2
+define i32 @f_switch5(i32 %i) local_unnamed_addr {
+; CHECK-LABEL: @f_switch5
+entry:
+ %0 = load i32, i32* @b, align 4
+ %cond = icmp eq i32 %0, 6
+ br i1 %cond, label %while.cond.preheader, label %sw.epilog
+
+while.cond.preheader: ; preds = %entry
+ %1 = load i32*, i32** @a, align 8
+ %magicptr = ptrtoint i32* %1 to i64
+ %arrayidx = getelementptr inbounds i32, i32* %1, i64 1
+ br label %while.cond
+
+; CHECK-LABEL: while.cond:
+; CHECK: i64 32, label %while.cond2
+; CHECK: i64 0, label %while.cond2
+while.cond: ; preds = %land.rhs, %while.cond.preheader
+ switch i64 %magicptr, label %land.rhs [
+ i64 32, label %while.cond2.loopexit
+ i64 0, label %while.cond2.loopexit
+ ]
+
+land.rhs: ; preds = %while.cond
+ %2 = load i32, i32* %arrayidx, align 4
+ %tobool1 = icmp eq i32 %2, 0
+ br i1 %tobool1, label %while.cond2thread-pre-split.loopexit, label %while.cond
+
+while.cond2thread-pre-split.loopexit: ; preds = %land.rhs
+ br label %while.cond2thread-pre-split
+
+while.cond2thread-pre-split: ; preds = %while.body4, %while.cond2thread-pre-split.loopexit
+ %.pr = phi i32* [ %.pr.pre, %while.body4 ], [ %1, %while.cond2thread-pre-split.loopexit ]
+ br label %while.cond2
+
+while.cond2.loopexit: ; preds = %while.cond, %while.cond
+ br label %while.cond2
+
+; CHECK-LABEL: while.cond2:
+; CHECK: %{{.*}} = phi i32* [ %.pr.pre, %while.body4 ], [ %1, %land.rhs ], [ %1, %while.cond ], [ %1, %while.cond ]
+while.cond2: ; preds = %while.cond2.loopexit, %while.cond2thread-pre-split
+ %3 = phi i32* [ %.pr, %while.cond2thread-pre-split ], [ %1, %while.cond2.loopexit ]
+ %tobool3 = icmp eq i32* %3, null
+ br i1 %tobool3, label %sw.epilog, label %while.body4
+
+while.body4: ; preds = %while.cond2
+ tail call void bitcast (void (...)* @fn2 to void ()*)()
+ %.pr.pre = load i32*, i32** @a, align 8
+ br label %while.cond2thread-pre-split
+
+sw.epilog: ; preds = %while.cond2, %entry
+ ret i32 undef
+}
+
; This test that BFI/BPI is created without any assertion in isMergingEmptyBlockProfitable()
; in the case where empty blocks are removed before creating BFI/BPI.
-@b = common global i32 0, align 4
-@a = common global i32* null, align 8
define i32 @should_not_assert(i32 %i) local_unnamed_addr {
entry:
%0 = load i32, i32* @b, align 4