From: Xin Tong Date: Sun, 23 Apr 2017 20:56:29 +0000 (+0000) Subject: [JumpThread] We want to fold (not thread) when all predecessor go to single BB's... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=fcc344502c5385ea4a73aa4bbcb5415443bf3468;p=llvm [JumpThread] We want to fold (not thread) when all predecessor go to single BB's successor. Summary: In case all predecessor go to a single successor of current BB. We want to fold (not thread). I failed to update the phi nodes properly in the last patch https://reviews.llvm.org/rL300657. Phi nodes values are per predecessor in LLVM. Reviewers: sanjoy Reviewed By: sanjoy Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D32400 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@301139 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 08eb95a1a3d..a0da81605a8 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -1289,6 +1289,36 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, if (PredToDestList.empty()) return false; + // If all the predecessors go to a single known successor, we want to fold, + // not thread. By doing so, we do not need to duplicate the current block and + // also miss potential opportunities in case we dont/cant duplicate. + if (OnlyDest && OnlyDest != MultipleDestSentinel) { + if (PredToDestList.size() == + (size_t)std::distance(pred_begin(BB), pred_end(BB))) { + bool SeenFirstBranchToOnlyDest = false; + for (BasicBlock *SuccBB : successors(BB)) { + if (SuccBB == OnlyDest && !SeenFirstBranchToOnlyDest) + SeenFirstBranchToOnlyDest = true; // Don't modify the first branch. + else + SuccBB->removePredecessor(BB, true); // This is unreachable successor. + } + + // Finally update the terminator. + TerminatorInst *Term = BB->getTerminator(); + BranchInst::Create(OnlyDest, Term); + Term->eraseFromParent(); + + // If the condition is now dead due to the removal of the old terminator, + // erase it. + auto *CondInst = dyn_cast(Cond); + if (CondInst && CondInst->use_empty()) + CondInst->eraseFromParent(); + // FIXME: in case this instruction is defined in the current BB and it + // resolves to a single value from all predecessors, we can do RAUW. + return true; + } + } + // Determine which is the most common successor. If we have many inputs and // this block is a switch, we want to start by threading the batch that goes // to the most popular destination first. If we only know about one diff --git a/test/Transforms/JumpThreading/fold-not-thread.ll b/test/Transforms/JumpThreading/fold-not-thread.ll new file mode 100644 index 00000000000..75deca62f75 --- /dev/null +++ b/test/Transforms/JumpThreading/fold-not-thread.ll @@ -0,0 +1,135 @@ +; RUN: opt -jump-threading -S -verify < %s | FileCheck %s + +declare i32 @f1() +declare i32 @f2() +declare void @f3() +declare void @f4(i32) + + +; Make sure we update the phi node properly. +; +; CHECK-LABEL: define void @test_br_folding_not_threading_update_phi( +; CHECK: br label %L1 +; Make sure we update the phi node properly here, i.e. we only have 2 predecessors, entry and L0 +; CHECK: %res.0 = phi i32 [ 0, %L0 ], [ 1, %entry ] +define void @test_br_folding_not_threading_update_phi(i32 %val) nounwind { +entry: + %cmp = icmp eq i32 %val, 32 + br i1 %cmp, label %L0, label %L1 +L0: + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + switch i32 %val, label %L2 [ + i32 0, label %L1 + i32 32, label %L1 + ] + +L1: + %res.0 = phi i32 [ 0, %L0 ], [ 0, %L0 ], [1, %entry] + call void @f4(i32 %res.0) + ret void +L2: + call void @f3() + ret void +} + +; Make sure we can fold this branch ... We will not be able to thread it as +; L0 is too big to duplicate. L2 is the unreachable block here. +; +; CHECK-LABEL: @test_br_folding_not_threading( +; CHECK: L1: +; CHECK: call i32 @f2() +; CHECK: call void @f3() +; CHECK-NEXT: ret void +; CHECK-NOT: br +; CHECK: L3: +define void @test_br_folding_not_threading(i1 %cond) nounwind { +entry: + br i1 %cond, label %L0, label %L3 +L0: + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + br i1 %cond, label %L1, label %L2 + +L1: + call void @f3() + ret void +L2: + call void @f3() + ret void +L3: + call void @f3() + ret void +} + + +; Make sure we can fold this branch ... We will not be able to thread it as +; L0 is too big to duplicate. L2 is the unreachable block here. +; With more than 1 predecessors. +; +; CHECK-LABEL: @test_br_folding_not_threading_multiple_preds( +; CHECK: L1: +; CHECK: call i32 @f2() +; CHECK: call void @f3() +; CHECK-NEXT: ret void +; CHECK-NOT: br +; CHECK: L3: +define void @test_br_folding_not_threading_multiple_preds(i1 %condx, i1 %cond) nounwind { +entry: + br i1 %condx, label %X0, label %X1 + +X0: + br i1 %cond, label %L0, label %L3 + +X1: + br i1 %cond, label %L0, label %L3 + +L0: + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + call i32 @f2() + br i1 %cond, label %L1, label %L2 + +L1: + call void @f3() + ret void +L2: + call void @f3() + ret void +L3: + call void @f3() + ret void +} +