From: Balaram Makam Date: Thu, 26 Oct 2017 22:34:01 +0000 (+0000) Subject: [CGP] Merge empty case blocks if no extra moves are added. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=9a0f2ee60ef2dd9aa66309afc03b400fa623827f;p=llvm [CGP] Merge empty case blocks if no extra moves are added. Summary: Currently we skip merging when extra moves may be added in the header of switch instead of the case block, if the case block is used as an incoming block of a PHI. If all the incoming values of the PHIs are non-constants and the destination block is dominated by the switch block then extra moves are likely not added by ISel, so there is no need to skip merging in this case. Reviewers: efriedma, junbuml, davidxl, hfinkel, qcolombet Reviewed By: efriedma Subscribers: dberlin, kuhar, mcrosier, llvm-commits Differential Revision: https://reviews.llvm.org/D37343 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@316711 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index 1e5f15397bb..9f9bde40164 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -212,6 +212,7 @@ class TypePromotionTransaction; const TargetTransformInfo *TTI = nullptr; const TargetLibraryInfo *TLInfo; const LoopInfo *LI; + DominatorTree *DT; std::unique_ptr BFI; std::unique_ptr BPI; @@ -262,6 +263,7 @@ class TypePromotionTransaction; void getAnalysisUsage(AnalysisUsage &AU) const override { // FIXME: When we can selectively preserve passes, preserve the domtree. + AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); @@ -343,6 +345,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) { TLInfo = &getAnalysis().getTLI(); TTI = &getAnalysis().getTTI(F); LI = &getAnalysis().getLoopInfo(); + DT = &getAnalysis().getDomTree(); + OptSize = F.optForSize(); if (ProfileGuidedSectionPrefix) { @@ -755,6 +759,11 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB, return true; SmallPtrSet SameIncomingValueBBs; + SmallVector PNs; + + for (auto DestBBI = DestBB->begin(); + auto *DestPN = dyn_cast(&*DestBBI); ++DestBBI) + PNs.push_back(DestPN); // Find all other incoming blocks from which incoming values of all PHIs in // DestBB are the same as the ones from BB. @@ -764,16 +773,10 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB, if (DestBBPred == BB) continue; - bool HasAllSameValue = true; - BasicBlock::const_iterator DestBBI = DestBB->begin(); - while (const PHINode *DestPN = dyn_cast(DestBBI++)) { - if (DestPN->getIncomingValueForBlock(BB) != - DestPN->getIncomingValueForBlock(DestBBPred)) { - HasAllSameValue = false; - break; - } - } - if (HasAllSameValue) + if (llvm::all_of(PNs, [&](PHINode *PN) { + return (PN->getIncomingValueForBlock(BB) == + PN->getIncomingValueForBlock(DestBBPred)); + })) SameIncomingValueBBs.insert(DestBBPred); } @@ -783,6 +786,14 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB, if (SameIncomingValueBBs.count(Pred)) return true; + // Check to see if none of the phis have constant incoming values for BB and + // Pred dominates DestBB, in such case extra COPYs are likely not added, so + // there is no reason to skip merging. + if (DT->dominates(Pred, DestBB) && llvm::none_of(PNs, [BB](PHINode *PN) { + return isa(PN->getIncomingValueForBlock(BB)); + })) + return true; + if (!BFI) { Function &F = *BB->getParent(); LoopInfo LI{DominatorTree(F)}; @@ -885,7 +896,7 @@ void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) { // Remember if SinglePred was the entry block of the function. If so, we // will need to move BB back to the entry position. bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); - MergeBasicBlockIntoOnlyPred(DestBB, nullptr); + MergeBasicBlockIntoOnlyPred(DestBB, DT); if (isEntry && BB != &BB->getParent()->getEntryBlock()) BB->moveBefore(&BB->getParent()->getEntryBlock()); @@ -926,7 +937,21 @@ void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) { // The PHIs are now updated, change everything that refers to BB to use // DestBB and remove BB. + SmallVector Updates; + for (auto *PredBB : predecessors(BB)) { + if (PredBB == BB) + continue; + DominatorTree::UpdateType UT = {DominatorTree::Delete, PredBB, BB}; + if (!is_contained(Updates, UT)) { + Updates.push_back(UT); + if (PredBB != DestBB) + Updates.push_back({DominatorTree::Insert, PredBB, DestBB}); + } + } BB->replaceAllUsesWith(DestBB); + DT->applyUpdates(Updates); + BB->getTerminator()->eraseFromParent(); + DT->deleteEdge(BB, DestBB); BB->eraseFromParent(); ++NumBlocksElim; diff --git a/test/Transforms/CodeGenPrepare/skip-merging-case-block.ll b/test/Transforms/CodeGenPrepare/skip-merging-case-block.ll index 194c86b8c04..b062e5fe734 100644 --- a/test/Transforms/CodeGenPrepare/skip-merging-case-block.ll +++ b/test/Transforms/CodeGenPrepare/skip-merging-case-block.ll @@ -143,11 +143,107 @@ declare void @calldefault(...) local_unnamed_addr !1 = !{!"branch_weights", i32 1 , i32 5, i32 1,i32 1, i32 1} !2 = !{!"branch_weights", i32 1 , i32 4, i32 1,i32 1, i32 1} +; while.cond does not dominate return, expect to skip merging empty block +; return.loopexit into return. +@b = external global i32, align 4 +@a = external global i32*, align 8 + +define void @f_switch4(i32 %i) local_unnamed_addr personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: @f_switch4 +entry: + %0 = load i32, i32* @b, align 4 + %cond = icmp eq i32 %0, 6 + br i1 %cond, label %return, label %if.end + +if.end: ; preds = %entry + %add = add i32 %i, 2 + %1 = load i32*, i32** @a, align 8 + %magicptr = ptrtoint i32* %1 to i32 + br label %while.cond + +; CHECK-LABEL: while.cond: +; CHECK: i32 0, label %return.loopexit +; CHECK: i32 47, label %return.loopexit +while.cond: ; preds = %while.cond.backedge, %if.end + switch i32 %magicptr, label %while.cond.if.end10_crit_edge [ + i32 0, label %return.loopexit + i32 47, label %return.loopexit + ] + +while.cond.if.end10_crit_edge: ; preds = %while.cond + br label %while.cond.backedge + +while.cond.backedge: ; preds = %while.cond.if.end10_crit_edge, %if.then9 + br label %while.cond + +return.loopexit: ; preds = %while.cond + br label %return + +; CHECK_LABEL: return: +; CHECK: %{{.*}} = phi i32 [ 0, %entry ], [ %add, %return.loopexit ] +return: ; preds = %return.loopexit, %entry + %retval.4 = phi i32 [ 0, %entry ], [ %add, %return.loopexit ] + ret void +} +declare i32 @__gxx_personality_v0(...) + +; Expect to merge empty block while.cond2.loopexit into while.cond2 +define i32 @f_switch5(i32 %i) local_unnamed_addr { +; CHECK-LABEL: @f_switch5 +entry: + %0 = load i32, i32* @b, align 4 + %cond = icmp eq i32 %0, 6 + br i1 %cond, label %while.cond.preheader, label %sw.epilog + +while.cond.preheader: ; preds = %entry + %1 = load i32*, i32** @a, align 8 + %magicptr = ptrtoint i32* %1 to i64 + %arrayidx = getelementptr inbounds i32, i32* %1, i64 1 + br label %while.cond + +; CHECK-LABEL: while.cond: +; CHECK: i64 32, label %while.cond2 +; CHECK: i64 0, label %while.cond2 +while.cond: ; preds = %land.rhs, %while.cond.preheader + switch i64 %magicptr, label %land.rhs [ + i64 32, label %while.cond2.loopexit + i64 0, label %while.cond2.loopexit + ] + +land.rhs: ; preds = %while.cond + %2 = load i32, i32* %arrayidx, align 4 + %tobool1 = icmp eq i32 %2, 0 + br i1 %tobool1, label %while.cond2thread-pre-split.loopexit, label %while.cond + +while.cond2thread-pre-split.loopexit: ; preds = %land.rhs + br label %while.cond2thread-pre-split + +while.cond2thread-pre-split: ; preds = %while.body4, %while.cond2thread-pre-split.loopexit + %.pr = phi i32* [ %.pr.pre, %while.body4 ], [ %1, %while.cond2thread-pre-split.loopexit ] + br label %while.cond2 + +while.cond2.loopexit: ; preds = %while.cond, %while.cond + br label %while.cond2 + +; CHECK-LABEL: while.cond2: +; CHECK: %{{.*}} = phi i32* [ %.pr.pre, %while.body4 ], [ %1, %land.rhs ], [ %1, %while.cond ], [ %1, %while.cond ] +while.cond2: ; preds = %while.cond2.loopexit, %while.cond2thread-pre-split + %3 = phi i32* [ %.pr, %while.cond2thread-pre-split ], [ %1, %while.cond2.loopexit ] + %tobool3 = icmp eq i32* %3, null + br i1 %tobool3, label %sw.epilog, label %while.body4 + +while.body4: ; preds = %while.cond2 + tail call void bitcast (void (...)* @fn2 to void ()*)() + %.pr.pre = load i32*, i32** @a, align 8 + br label %while.cond2thread-pre-split + +sw.epilog: ; preds = %while.cond2, %entry + ret i32 undef +} + ; This test that BFI/BPI is created without any assertion in isMergingEmptyBlockProfitable() ; in the case where empty blocks are removed before creating BFI/BPI. -@b = common global i32 0, align 4 -@a = common global i32* null, align 8 define i32 @should_not_assert(i32 %i) local_unnamed_addr { entry: %0 = load i32, i32* @b, align 4