// Can't thread an unconditional jump, but if the block is "almost
// empty", we can replace uses of it with uses of the successor and make
// this dead.
- // We should not eliminate the loop header either, because eliminating
- // a loop header might later prevent LoopSimplify from transforming nested
- // loops into simplified form.
+ // We should not eliminate the loop header or latch either, because
+ // eliminating a loop header or latch might later prevent LoopSimplify
+ // from transforming nested loops into simplified form. We will rely on
+ // later passes in backend to clean up empty blocks.
if (BI && BI->isUnconditional() &&
BB != &BB->getParent()->getEntryBlock() &&
// If the terminator is the only non-phi instruction, try to nuke it.
- BB->getFirstNonPHIOrDbg()->isTerminator() && !LoopHeaders.count(BB)) {
+ BB->getFirstNonPHIOrDbg()->isTerminator() && !LoopHeaders.count(BB) &&
+ !LoopHeaders.count(BI->getSuccessor(0))) {
// FIXME: It is always conservatively correct to drop the info
// for a block even if it doesn't get erased. This isn't totally
// awesome, but it allows us to use AssertingVH to prevent nasty
bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI,
IRBuilder<> &Builder) {
BasicBlock *BB = BI->getParent();
+ BasicBlock *Succ = BI->getSuccessor(0);
if (SinkCommon && SinkThenElseCodeToEnd(BI))
return true;
// If the Terminator is the only non-phi instruction, simplify the block.
- // if LoopHeader is provided, check if the block is a loop header
- // (This is for early invocations before loop simplify and vectorization
- // to keep canonical loop forms for nested loops.
- // These blocks can be eliminated when the pass is invoked later
- // in the back-end.)
+ // if LoopHeader is provided, check if the block or its successor is a loop
+ // header (This is for early invocations before loop simplify and
+ // vectorization to keep canonical loop forms for nested loops. These blocks
+ // can be eliminated when the pass is invoked later in the back-end.)
+ bool NeedCanonicalLoop =
+ !LateSimplifyCFG &&
+ (LoopHeaders && (LoopHeaders->count(BB) || LoopHeaders->count(Succ)));
BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator();
if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
- (!LoopHeaders || !LoopHeaders->count(BB)) &&
- TryToSimplifyUncondBranchFromEmptyBlock(BB))
+ !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB))
return true;
// If the only instruction in the block is a seteq/setne comparison
do.body.i:
; CHECK-LABEL: do.body.i:
-; CHECK: %uglygep2 = getelementptr i8, i8* %uglygep, i64 %3
-; CHECK-NEXT: %4 = bitcast i8* %uglygep2 to i32*
-; CHECK-NOT: %uglygep2 = getelementptr i8, i8* %uglygep, i64 1032
+; CHECK: %uglygep1 = getelementptr i8, i8* %uglygep, i64 %3
+; CHECK-NEXT: %4 = bitcast i8* %uglygep1 to i32*
+; CHECK-NOT: %uglygep1 = getelementptr i8, i8* %uglygep, i64 1032
%0 = phi i32 [ 256, %entry ], [ %.be, %do.body.i.backedge ]
--- /dev/null
+; RUN: opt < %s -jump-threading -S | FileCheck %s
+
+; Skip simplifying unconditional branches from empty blocks in simplifyCFG,
+; when it can destroy canonical loop structure.
+
+; void foo();
+; bool test(int a, int b, int *c) {
+; bool changed = false;
+; for (unsigned int i = 2; i--;) {
+; int r = a | b;
+; if ( r != c[i]) {
+; c[i] = r;
+; foo();
+; changed = true;
+; }
+; }
+; return changed;
+; }
+
+; CHECK-LABEL: @test(
+; CHECK: for.cond:
+; CHECK-NEXT: %i.0 = phi i32 [ 2, %entry ], [ %dec, %if.end ]
+; CHECK: for.body:
+; CHECK: br i1 %cmp, label %if.end, label %if.then
+; CHECK-NOT: br i1 %cmp, label %for.cond, label %if.then
+; CHECK: if.then:
+; CHECK: br label %if.end
+; CHECK-NOT: br label %for.cond
+; CHECK: if.end:
+; CHECK br label %for.cond
+define i1 @test(i32 %a, i32 %b, i32* %c) {
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %if.end, %entry
+ %i.0 = phi i32 [ 2, %entry ], [ %dec, %if.end ]
+ %changed.0.off0 = phi i1 [ false, %entry ], [ %changed.1.off0, %if.end ]
+ %dec = add nsw i32 %i.0, -1
+ %tobool = icmp eq i32 %i.0, 0
+ br i1 %tobool, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup: ; preds = %for.cond
+ %changed.0.off0.lcssa = phi i1 [ %changed.0.off0, %for.cond ]
+ ret i1 %changed.0.off0.lcssa
+
+for.body: ; preds = %for.cond
+ %or = or i32 %a, %b
+ %idxprom = sext i32 %dec to i64
+ %arrayidx = getelementptr inbounds i32, i32* %c, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ %cmp = icmp eq i32 %or, %0
+ br i1 %cmp, label %if.end, label %if.then
+
+if.then: ; preds = %for.body
+ store i32 %or, i32* %arrayidx, align 4
+ call void @foo()
+ br label %if.end
+
+if.end: ; preds = %for.body, %if.then
+ %changed.1.off0 = phi i1 [ true, %if.then ], [ %changed.0.off0, %for.body ]
+ br label %for.cond
+}
+
+declare void @foo()
; Verify the new backedge:
; CHECK: check_2.thread:
; CHECK-NEXT: call void @bar()
-; CHECK-NEXT: br label %check_1
+; CHECK-NEXT: br label %check_3.thread
check_2:
%cond2 = icmp eq i32 %v, 2
; Verify the new backedge:
; CHECK: eq_2:
; CHECK-NEXT: call void @bar()
-; CHECK-NEXT: br label %check_1
+; CHECK-NEXT: br label %check_3.thread
check_3:
%condE = icmp eq i32 %v, 3
; CHECK: %[[INC2:.*]] = getelementptr inbounds i32, i32* %p, i64 2
; CHECK: store i32 2, i32* %[[INC2]], align 4
; CHECK: %[[CMP3:.*]] = icmp eq i32 %k, 3
-; CHECK: br i1 %[[CMP3]], label %for.end, label %[[LOOP:.*]]
+; CHECK: br i1 %[[CMP3]], label %for.end, label %[[LOOP_PH:.*]]
+; CHECK: [[LOOP_PH]]:
+; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
-; CHECK: %[[IV:.*]] = phi i32 [ {{.*}}, %[[LOOP]] ], [ 3, %[[NEXT2]] ]
+; CHECK: %[[IV:.*]] = phi i32 [ 3, %[[LOOP_PH]] ], [ {{.*}}, %[[LOOP]] ]
define void @basic(i32* %p, i32 %k) #0 {
entry:
; CHECK: %[[INC2:.*]] = getelementptr inbounds i32, i32* %p, i64 2
; CHECK: store i32 2, i32* %[[INC2]], align 4
; CHECK: %[[CMP3:.*]] = icmp eq i32 %k, 3
-; CHECK: br i1 %[[CMP3]], label %for.end, label %[[LOOP:.*]]
+; CHECK: br i1 %[[CMP3]], label %for.end, label %[[LOOP_PH:.*]]
+; CHECK: [[LOOP_PH]]:
+; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
-; CHECK: %[[IV:.*]] = phi i32 [ %[[IV:.*]], %[[LOOP]] ], [ 3, %[[NEXT2]] ]
+; CHECK: %[[IV:.*]] = phi i32 [ 3, %[[LOOP_PH]] ], [ %[[IV:.*]], %[[LOOP]] ]
; CHECK: %ret = phi i32 [ 0, %entry ], [ 1, %[[NEXT0]] ], [ 2, %[[NEXT1]] ], [ 3, %[[NEXT2]] ], [ %[[IV]], %[[LOOP]] ]
; CHECK: ret i32 %ret
define i32 @output(i32* %p, i32 %k) #0 {
%cmp1 = icmp eq i32 %a, 12345
br i1 %cmp1, label %if.then, label %if.else, !prof !0
; CHECK: %cmp1 = icmp eq i32 %a, 12345
-; CHECK-NEXT: br i1 %cmp1, label %for.body.us, label %for.body, !prof !0
+; CHECK-NEXT: br i1 %cmp1, label %for.body.preheader.split.us, label %for.body.preheader.split, !prof !0
if.then: ; preds = %for.body
; CHECK: for.body.us:
; CHECK: add nsw i32 %{{.*}}, 123
br label %for.body
;CHECK: entry:
;CHECK-NEXT: %cmp1 = icmp eq i32 1, 2
-;CHECK-NEXT: br i1 %cmp1, label %for.body, label %for.cond.cleanup.split, !prof !1
+;CHECK-NEXT: br i1 %cmp1, label %entry.split, label %for.cond.cleanup.split, !prof !1
;CHECK: for.body:
for.body: ; preds = %for.inc, %entry
%inc.i = phi i32 [ 0, %entry ], [ %inc, %if.then ]
; Loop unswitching shouldn't trivially unswitch the true case of condition %a
; in the code here because it leads to an infinite loop. While this doesn't
; contain any instructions with side effects, it's still a kind of side effect.
-; It can trivially unswitch on the false cas of condition %a though.
+; It can trivially unswitch on the false case of condition %a though.
; STATS: 2 loop-unswitch - Number of branches unswitched
; STATS: 2 loop-unswitch - Number of unswitches that are trivial
; CHECK-NEXT: br i1 %a, label %entry.split, label %abort0.split
; CHECK: entry.split:
-; CHECK-NEXT: br i1 %b, label %for.body, label %abort1.split
+; CHECK-NEXT: br i1 %b, label %entry.split.split, label %abort1.split
; CHECK: for.body:
; CHECK-NEXT: br label %for.body
-; RUN: opt < %s -O3 -mcpu=core-avx2 -mtriple=x86_64-unknown-linux-gnu -S | FileCheck --check-prefix AUTO_VEC %s
+; RUN: opt < %s -O3 -latesimplifycfg -mcpu=core-avx2 -mtriple=x86_64-unknown-linux-gnu -S | FileCheck --check-prefix AUTO_VEC %s
; This test checks auto-vectorization with FP induction variable.
; The FP operation is not "fast" and requires "fast-math" function attribute.
; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck --check-prefix VEC4_INTERL1 %s
; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -dce -instcombine -S | FileCheck --check-prefix VEC4_INTERL2 %s
; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=1 -dce -instcombine -S | FileCheck --check-prefix VEC1_INTERL2 %s
-; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -dce -simplifycfg -instcombine -S | FileCheck --check-prefix VEC2_INTERL1_PRED_STORE %s
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -dce -simplifycfg -instcombine -latesimplifycfg -S | FileCheck --check-prefix VEC2_INTERL1_PRED_STORE %s
@fp_inc = common global float 0.000000e+00, align 4
; Speculation depth must be limited to avoid a zero-cost instruction cycle.
; CHECK-LABEL: @PR26308(
-; CHECK: while.body:
-; CHECK-NEXT: br label %while.body
+; CHECK: cleanup4:
+; CHECK-NEXT: br label %cleanup4
define i32 @PR26308(i1 %B, i64 %load) {
entry:
-; RUN: opt -simplifycfg -S < %s | FileCheck %s
+; RUN: opt -latesimplifycfg -S < %s | FileCheck %s
; It's not worthwhile to if-convert one of the phi nodes and leave
; the other behind, because that still requires a branch. If
--- /dev/null
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+; Skip simplifying unconditional branches from empty blocks in simplifyCFG,
+; when it can destroy canonical loop structure.
+
+; void foo();
+; bool test(int a, int b, int *c) {
+; bool changed = false;
+; for (unsigned int i = 2; i--;) {
+; int r = a | b;
+; if ( r != c[i]) {
+; c[i] = r;
+; foo();
+; changed = true;
+; }
+; }
+; return changed;
+; }
+
+; CHECK-LABEL: @test(
+; CHECK: for.cond:
+; CHECK-NEXT: %i.0 = phi i32 [ 2, %entry ], [ %dec, %if.end ]
+; CHECK: for.body:
+; CHECK: br i1 %cmp, label %if.end, label %if.then
+; CHECK-NOT: br i1 %cmp, label %for.cond, label %if.then
+; CHECK: if.then:
+; CHECK: br label %if.end
+; CHECK-NOT: br label %for.cond
+; CHECK: if.end:
+; CHECK br label %for.cond
+define i1 @test(i32 %a, i32 %b, i32* %c) {
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %if.end, %entry
+ %i.0 = phi i32 [ 2, %entry ], [ %dec, %if.end ]
+ %changed.0.off0 = phi i1 [ false, %entry ], [ %changed.1.off0, %if.end ]
+ %dec = add nsw i32 %i.0, -1
+ %tobool = icmp eq i32 %i.0, 0
+ br i1 %tobool, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup: ; preds = %for.cond
+ %changed.0.off0.lcssa = phi i1 [ %changed.0.off0, %for.cond ]
+ ret i1 %changed.0.off0.lcssa
+
+for.body: ; preds = %for.cond
+ %or = or i32 %a, %b
+ %idxprom = sext i32 %dec to i64
+ %arrayidx = getelementptr inbounds i32, i32* %c, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ %cmp = icmp eq i32 %or, %0
+ br i1 %cmp, label %if.end, label %if.then
+
+if.then: ; preds = %for.body
+ store i32 %or, i32* %arrayidx, align 4
+ call void @foo()
+ br label %if.end
+
+if.end: ; preds = %for.body, %if.then
+ %changed.1.off0 = phi i1 [ true, %if.then ], [ %changed.0.off0, %for.body ]
+ br label %for.cond
+}
+
+declare void @foo()
-; RUN: opt -simplifycfg -S < %s | FileCheck %s
+; RUN: opt -latesimplifycfg -S < %s | FileCheck %s
define void @test1(i32 %n) #0 {
entry: