void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
private:
- void splitInnerLoopLatch(Instruction *);
void splitInnerLoopHeader();
bool adjustLoopLinks();
void adjustLoopPreheaders();
if (InnerLoop->getSubLoops().empty()) {
BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
- LLVM_DEBUG(dbgs() << "Calling Split Inner Loop\n");
+ LLVM_DEBUG(dbgs() << "Splitting the inner loop latch\n");
PHINode *InductionPHI = getInductionVariable(InnerLoop, SE);
if (!InductionPHI) {
LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n");
if (&InductionPHI->getParent()->front() != InductionPHI)
InductionPHI->moveBefore(&InductionPHI->getParent()->front());
- // Split at the place were the induction variable is
- // incremented/decremented.
- // TODO: This splitting logic may not work always. Fix this.
- splitInnerLoopLatch(InnerIndexVar);
- LLVM_DEBUG(dbgs() << "splitInnerLoopLatch done\n");
+ // Create a new latch block for the inner loop. We split at the
+ // current latch's terminator and then move the condition and all
+ // operands that are not either loop-invariant or the induction PHI into the
+ // new latch block.
+ BasicBlock *NewLatch =
+ SplitBlock(InnerLoop->getLoopLatch(),
+ InnerLoop->getLoopLatch()->getTerminator(), DT, LI);
+
+ SmallSetVector<Instruction *, 4> WorkList;
+ unsigned i = 0;
+ auto MoveInstructions = [&i, &WorkList, this, InductionPHI, NewLatch]() {
+ for (; i < WorkList.size(); i++) {
+ // Duplicate instruction and move it the new latch. Update uses that
+ // have been moved.
+ Instruction *NewI = WorkList[i]->clone();
+ NewI->insertBefore(NewLatch->getFirstNonPHI());
+ assert(!NewI->mayHaveSideEffects() &&
+ "Moving instructions with side-effects may change behavior of "
+ "the loop nest!");
+ for (auto UI = WorkList[i]->use_begin(), UE = WorkList[i]->use_end();
+ UI != UE;) {
+ Use &U = *UI++;
+ Instruction *UserI = cast<Instruction>(U.getUser());
+ if (!InnerLoop->contains(UserI->getParent()) ||
+ UserI->getParent() == NewLatch || UserI == InductionPHI)
+ U.set(NewI);
+ }
+ // Add operands of moved instruction to the worklist, except if they are
+ // outside the inner loop or are the induction PHI.
+ for (Value *Op : WorkList[i]->operands()) {
+ Instruction *OpI = dyn_cast<Instruction>(Op);
+ if (!OpI ||
+ this->LI->getLoopFor(OpI->getParent()) != this->InnerLoop ||
+ OpI == InductionPHI)
+ continue;
+ WorkList.insert(OpI);
+ }
+ }
+ };
+
+ // FIXME: Should we interchange when we have a constant condition?
+ Instruction *CondI = dyn_cast<Instruction>(
+ cast<BranchInst>(InnerLoop->getLoopLatch()->getTerminator())
+ ->getCondition());
+ if (CondI)
+ WorkList.insert(CondI);
+ MoveInstructions();
+ WorkList.insert(cast<Instruction>(InnerIndexVar));
+ MoveInstructions();
// Splits the inner loops phi nodes out into a separate basic block.
BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
return true;
}
-void LoopInterchangeTransform::splitInnerLoopLatch(Instruction *Inc) {
- SplitBlock(InnerLoop->getLoopLatch(), Inc, DT, LI);
-}
-
/// \brief Move all instructions except the terminator from FromBB right before
/// InsertBefore
static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
; CHECK: for1.header.preheader:
; CHECK-NEXT: br label [[FOR1_HEADER:%.*]]
; CHECK: for1.header:
-; CHECK-NEXT: [[INDVARS_IV23:%.*]] = phi i64 [ [[INDVARS_IV_NEXT24:%.*]], [[FOR1_INC10:%.*]] ], [ 0, [[FOR1_HEADER_PREHEADER:%.*]] ]
+; CHECK-NEXT: [[J23:%.*]] = phi i64 [ [[J_NEXT24:%.*]], [[FOR1_INC10:%.*]] ], [ 0, [[FOR1_HEADER_PREHEADER:%.*]] ]
; CHECK-NEXT: br label [[FOR2_SPLIT1:%.*]]
; CHECK: for2.preheader:
; CHECK-NEXT: br label [[FOR2:%.*]]
; CHECK: for2:
-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR2_SPLIT:%.*]] ], [ 0, [[FOR2_PREHEADER]] ]
+; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[TMP0:%.*]], [[FOR2_SPLIT:%.*]] ], [ 0, [[FOR2_PREHEADER]] ]
; CHECK-NEXT: br label [[FOR1_HEADER_PREHEADER]]
; CHECK: for2.split1:
-; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* @A, i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV23]]
+; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* @A, i64 0, i64 [[J]], i64 [[J23]]
; CHECK-NEXT: [[LV:%.*]] = load i64, i64* [[ARRAYIDX5]]
; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[LV]], [[K:%.*]]
; CHECK-NEXT: store i64 [[ADD]], i64* [[ARRAYIDX5]]
+; CHECK-NEXT: [[J_NEXT:%.*]] = add nuw nsw i64 [[J]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[J]], 99
; CHECK-NEXT: br label [[FOR1_INC10]]
; CHECK: for2.split:
-; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV]], 99
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END12:%.*]], label [[FOR2]]
+; CHECK-NEXT: [[TMP0]] = add nuw nsw i64 [[J]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[J]], 99
+; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_END12:%.*]], label [[FOR2]]
; CHECK: for1.inc10:
-; CHECK-NEXT: [[INDVARS_IV_NEXT24]] = add nuw nsw i64 [[INDVARS_IV23]], 1
-; CHECK-NEXT: [[EXITCOND26:%.*]] = icmp eq i64 [[INDVARS_IV23]], 99
+; CHECK-NEXT: [[J_NEXT24]] = add nuw nsw i64 [[J23]], 1
+; CHECK-NEXT: [[EXITCOND26:%.*]] = icmp eq i64 [[J23]], 99
; CHECK-NEXT: br i1 [[EXITCOND26]], label [[FOR2_SPLIT]], label [[FOR1_HEADER]]
; CHECK: for.end12:
; CHECK-NEXT: ret void
; CHECK: for1.header.preheader:
; CHECK-NEXT: br label [[FOR1_HEADER:%.*]]
; CHECK: for1.header:
-; CHECK-NEXT: [[INDVARS_IV19:%.*]] = phi i64 [ [[INDVARS_IV_NEXT20:%.*]], [[FOR1_INC10:%.*]] ], [ 0, [[FOR1_HEADER_PREHEADER:%.*]] ]
+; CHECK-NEXT: [[J19:%.*]] = phi i64 [ [[J_NEXT20:%.*]], [[FOR1_INC10:%.*]] ], [ 0, [[FOR1_HEADER_PREHEADER:%.*]] ]
; CHECK-NEXT: br label [[FOR3_SPLIT1:%.*]]
; CHECK: for3.preheader:
; CHECK-NEXT: br label [[FOR3:%.*]]
; CHECK: for3:
-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR3_SPLIT:%.*]] ], [ 100, [[FOR3_PREHEADER]] ]
+; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[TMP1:%.*]], [[FOR3_SPLIT:%.*]] ], [ 100, [[FOR3_PREHEADER]] ]
; CHECK-NEXT: br label [[FOR1_HEADER_PREHEADER]]
; CHECK: for3.split1:
-; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* @A, i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV19]]
+; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* @A, i64 0, i64 [[J]], i64 [[J19]]
; CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[ARRAYIDX5]]
; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP0]], [[K:%.*]]
; CHECK-NEXT: store i64 [[ADD]], i64* [[ARRAYIDX5]]
+; CHECK-NEXT: [[J_NEXT:%.*]] = add nsw i64 [[J]], -1
+; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[J]], 0
; CHECK-NEXT: br label [[FOR1_INC10]]
; CHECK: for3.split:
-; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
-; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[INDVARS_IV]], 0
-; CHECK-NEXT: br i1 [[CMP2]], label [[FOR3]], label [[FOR_END11:%.*]]
+; CHECK-NEXT: [[TMP1]] = add nsw i64 [[J]], -1
+; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i64 [[J]], 0
+; CHECK-NEXT: br i1 [[TMP2]], label [[FOR3]], label [[FOR_END11:%.*]]
; CHECK: for1.inc10:
-; CHECK-NEXT: [[INDVARS_IV_NEXT20]] = add nuw nsw i64 [[INDVARS_IV19]], 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT20]], 100
+; CHECK-NEXT: [[J_NEXT20]] = add nuw nsw i64 [[J19]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[J_NEXT20]], 100
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR3_SPLIT]], label [[FOR1_HEADER]]
; CHECK: for.end11:
; CHECK-NEXT: ret void
;; FIXME: DA misses this case after D35430
define void @interchange_10() {
+; CHECK-LABEL: @interchange_10(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR1_HEADER:%.*]]
+; CHECK: for1.header:
+; CHECK-NEXT: [[J23:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[J_NEXT24:%.*]], [[FOR1_INC10:%.*]] ]
+; CHECK-NEXT: [[J_NEXT24]] = add nuw nsw i64 [[J23]], 1
+; CHECK-NEXT: br label [[FOR2:%.*]]
+; CHECK: for2:
+; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[J_NEXT:%.*]], [[FOR2]] ], [ 1, [[FOR1_HEADER]] ]
+; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J]], 1
+; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* @A, i64 0, i64 [[J]], i64 [[J23]]
+; CHECK-NEXT: store i64 [[J]], i64* [[ARRAYIDX5]]
+; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* @A, i64 0, i64 [[J]], i64 [[J_NEXT24]]
+; CHECK-NEXT: store i64 [[J23]], i64* [[ARRAYIDX10]]
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[J]], 99
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR1_INC10]], label [[FOR2]]
+; CHECK: for1.inc10:
+; CHECK-NEXT: [[EXITCOND26:%.*]] = icmp eq i64 [[J23]], 98
+; CHECK-NEXT: br i1 [[EXITCOND26]], label [[FOR_END12:%.*]], label [[FOR1_HEADER]]
+; CHECK: for.end12:
+; CHECK-NEXT: ret void
+;
entry:
br label %for1.header
define void @test_lcssa_indvars1() {
; CHECK-LABEL: @test_lcssa_indvars1()
+; CHECK-LABEL: inner.body:
+; CHECK-NEXT: %iv.inner = phi i64 [ %[[IVNEXT:[0-9]+]], %inner.body.split ], [ 5, %inner.body.preheader ]
+
; CHECK-LABEL: inner.body.split:
; CHECK-NEXT: %0 = phi i64 [ %iv.outer.next, %outer.latch ]
-; CHECK-NEXT: %iv.inner.next = add nsw i64 %iv.inner, -1
+; CHECK-NEXT: %[[IVNEXT]] = add nsw i64 %iv.inner, -1
+; CHECK-NEXT: %[[COND:[0-9]+]] = icmp eq i64 %iv.inner, 0
+; CHECK-NEXT: br i1 %[[COND]], label %exit, label %inner.body
; CHECK-LABEL: exit:
; CHECK-NEXT: %v4.lcssa = phi i64 [ %0, %inner.body.split ]
-; CHECK-NEXT: %v8.lcssa.lcssa = phi i64 [ %iv.inner.next, %inner.body.split ]
+; CHECK-NEXT: %v8.lcssa.lcssa = phi i64 [ %[[IVNEXT]], %inner.body.split ]
; CHECK-NEXT: store i64 %v8.lcssa.lcssa, i64* @b, align 4
; CHECK-NEXT: store i64 %v4.lcssa, i64* @a, align 4
define void @test_lcssa_indvars2() {
; CHECK-LABEL: @test_lcssa_indvars2()
+; CHECK-LABEL: inner.body:
+; CHECK-NEXT: %iv.inner = phi i64 [ %[[IVNEXT:[0-9]+]], %inner.body.split ], [ 5, %inner.body.preheader ]
+
; CHECK-LABEL: inner.body.split:
; CHECK-NEXT: %0 = phi i64 [ %iv.outer, %outer.latch ]
-; CHECK-NEXT: %iv.inner.next = add nsw i64 %iv.inner, -1
+; CHECK-NEXT: %[[IVNEXT]] = add nsw i64 %iv.inner, -1
+; CHECK-NEXT: %[[COND:[0-9]+]] = icmp eq i64 %[[IVNEXT]], 0
+; CHECK-NEXT: br i1 %[[COND]], label %exit, label %inner.body
; CHECK-LABEL: exit:
; CHECK-NEXT: %v4.lcssa = phi i64 [ %0, %inner.body.split ]
define void @test_lcssa_indvars3() {
; CHECK-LABEL: @test_lcssa_indvars3()
+; CHECK-LABEL: inner.body:
+; CHECK-NEXT: %iv.inner = phi i64 [ %[[IVNEXT:[0-9]+]], %inner.body.split ], [ 5, %inner.body.preheader ]
+
; CHECK-LABEL: inner.body.split:
; CHECK-NEXT: %0 = phi i64 [ %iv.outer.next, %outer.latch ]
-; CHECK-NEXT: %iv.inner.next = add nsw i64 %iv.inner, -1
+; CHECK-NEXT: %[[IVNEXT]] = add nsw i64 %iv.inner, -1
+; CHECK-NEXT: %[[COND:[0-9]+]] = icmp eq i64 %iv.inner, 0
+; CHECK-NEXT: br i1 %[[COND]], label %exit, label %inner.body
; CHECK-LABEL: exit:
; CHECK-NEXT: %v4.lcssa = phi i64 [ %0, %inner.body.split ]
-; CHECK-NEXT: %v8.lcssa.lcssa = phi i64 [ %iv.inner.next, %inner.body.split ]
-; CHECK-NEXT: %v8.lcssa.lcssa.2 = phi i64 [ %iv.inner.next, %inner.body.split ]
+; CHECK-NEXT: %v8.lcssa.lcssa = phi i64 [ %[[IVNEXT]], %inner.body.split ]
+; CHECK-NEXT: %v8.lcssa.lcssa.2 = phi i64 [ %[[IVNEXT]], %inner.body.split ]
; CHECK-NEXT: %r1 = add i64 %v8.lcssa.lcssa, %v8.lcssa.lcssa.2
; CHECK-NEXT: store i64 %r1, i64* @b, align 4
; CHECK-NEXT: store i64 %v4.lcssa, i64* @a, align 4
; CHECK-LABEL: inner.ph:
; CHECK-NEXT: br label %inner.body
; CHECK-LABEL: inner.body:
-; CHECK-NEXT: %tmp31 = phi i32 [ 0, %inner.ph ], [ %tmp6, %inner.body.split ]
+; CHECK-NEXT: %tmp31 = phi i32 [ 0, %inner.ph ], [ %[[IVNEXT:[0-9]]], %inner.body.split ]
; CHECK-NEXT: br label %outer.ph
+; CHECK-LABEL: inner.body.split:
+; CHECK-NEXT: %[[IVNEXT]] = add nsw i32 %tmp31, 1
+; CHECK-NEXT: br i1 false, label %inner.body, label %exit
+
bb:
br label %outer.ph
; CHECK: for3.preheader:
; CHECK-NEXT: br label [[FOR3:%.*]]
; CHECK: for3:
-; CHECK-NEXT: [[K:%.*]] = phi i32 [ [[INC:%.*]], [[FOR3_SPLIT:%.*]] ], [ 1, [[FOR3_PREHEADER]] ]
+; CHECK-NEXT: [[K:%.*]] = phi i32 [ [[TMP1:%.*]], [[FOR3_SPLIT:%.*]] ], [ 1, [[FOR3_PREHEADER]] ]
; CHECK-NEXT: br label [[FOR1_HEADER_PREHEADER]]
; CHECK: for3.split1:
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[K]], [[MUL]]
; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[ARRAYIDX]], align 2
; CHECK-NEXT: [[ADD15:%.*]] = add nsw i16 [[TMP0]], 1
; CHECK-NEXT: store i16 [[ADD15]], i16* [[ARRAYIDX]]
+; CHECK-NEXT: [[INC:%.*]] = add nuw nsw i32 [[K]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 90
; CHECK-NEXT: br label [[FOR2_INC16]]
; CHECK: for3.split:
-; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[K]], 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 90
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR1_LOOPEXIT:%.*]], label [[FOR3]]
+; CHECK-NEXT: [[TMP1]] = add nuw nsw i32 [[K]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 90
+; CHECK-NEXT: br i1 [[TMP2]], label [[FOR1_LOOPEXIT:%.*]], label [[FOR3]]
; CHECK: for2.inc16:
; CHECK-NEXT: [[INC17]] = add nuw nsw i32 [[J]], 1
; CHECK-NEXT: [[EXITCOND47:%.*]] = icmp eq i32 [[INC17]], 90
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -loop-interchange -verify-loop-lcssa -verify-dom-info -S %s | FileCheck %s
+
+@b = external dso_local global [5 x i32], align 16
+
+define void @test1() {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR_BODY2_PREHEADER:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INC41:%.*]] = phi i32 [ [[INC4:%.*]], [[FOR_INC3:%.*]] ], [ undef, [[FOR_BODY_PREHEADER:%.*]] ]
+; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[INC41]] to i64
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* @b, i64 0, i64 [[IDXPROM]]
+; CHECK-NEXT: br label [[FOR_BODY2_SPLIT:%.*]]
+; CHECK: for.body2.preheader:
+; CHECK-NEXT: br label [[FOR_BODY2:%.*]]
+; CHECK: for.body2:
+; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[TMP1:%.*]], [[FOR_INC_SPLIT:%.*]] ], [ 1, [[FOR_BODY2_PREHEADER]] ]
+; CHECK-NEXT: br label [[FOR_BODY_PREHEADER]]
+; CHECK: for.body2.split:
+; CHECK-NEXT: br label [[FOR_INC:%.*]]
+; CHECK: for.inc:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: store i32 undef, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[LSR_IV]], 4
+; CHECK-NEXT: [[LSR_IV_NEXT:%.*]] = add nuw nsw i32 [[LSR_IV]], 1
+; CHECK-NEXT: br label [[FOR_COND1_FOR_END_CRIT_EDGE:%.*]]
+; CHECK: for.inc.split:
+; CHECK-NEXT: [[TMP1]] = add nuw nsw i32 [[LSR_IV]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[LSR_IV]], 4
+; CHECK-NEXT: br i1 [[TMP2]], label [[FOR_BODY2]], label [[FOR_COND_FOR_END5_CRIT_EDGE:%.*]]
+; CHECK: for.cond1.for.end_crit_edge:
+; CHECK-NEXT: br label [[FOR_INC3]]
+; CHECK: for.inc3:
+; CHECK-NEXT: [[INC4]] = add nsw i32 [[INC41]], 1
+; CHECK-NEXT: br i1 false, label [[FOR_BODY]], label [[FOR_INC_SPLIT]]
+; CHECK: for.cond.for.end5_crit_edge:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.inc3, %entry
+ %inc41 = phi i32 [ %inc4, %for.inc3 ], [ undef, %entry ]
+ br label %for.body2
+
+for.body2: ; preds = %for.inc, %for.body
+ %lsr.iv = phi i32 [ %lsr.iv.next, %for.inc ], [ 1, %for.body ]
+ br label %for.inc
+
+for.inc: ; preds = %for.body2
+ %idxprom = sext i32 %inc41 to i64
+ %arrayidx = getelementptr inbounds [5 x i32], [5 x i32]* @b, i64 0, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ store i32 undef, i32* %arrayidx, align 4
+ %cmp = icmp slt i32 %lsr.iv, 4
+ %lsr.iv.next = add nuw nsw i32 %lsr.iv, 1
+ br i1 %cmp, label %for.body2, label %for.cond1.for.end_crit_edge
+
+for.cond1.for.end_crit_edge: ; preds = %for.inc
+ br label %for.inc3
+
+for.inc3: ; preds = %for.cond1.for.end_crit_edge
+ %inc4 = add nsw i32 %inc41, 1
+ br i1 undef, label %for.body, label %for.cond.for.end5_crit_edge
+
+for.cond.for.end5_crit_edge: ; preds = %for.inc3
+ ret void
+}
+
+define void @test2() {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR_BODY2_PREHEADER:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INC41:%.*]] = phi i32 [ [[INC4:%.*]], [[FOR_INC3:%.*]] ], [ undef, [[FOR_BODY_PREHEADER:%.*]] ]
+; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[INC41]] to i64
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* @b, i64 0, i64 [[IDXPROM]]
+; CHECK-NEXT: br label [[FOR_BODY2_SPLIT:%.*]]
+; CHECK: for.body2.preheader:
+; CHECK-NEXT: br label [[FOR_BODY2:%.*]]
+; CHECK: for.body2:
+; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[TMP1:%.*]], [[FOR_INC_SPLIT:%.*]] ], [ 1, [[FOR_BODY2_PREHEADER]] ]
+; CHECK-NEXT: br label [[FOR_BODY_PREHEADER]]
+; CHECK: for.body2.split:
+; CHECK-NEXT: br label [[FOR_INC:%.*]]
+; CHECK: for.inc:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[LSR_IV]], 4
+; CHECK-NEXT: [[CMP_ZEXT:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT: store i32 [[CMP_ZEXT]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[LSR_IV_NEXT:%.*]] = add nuw nsw i32 [[LSR_IV]], 1
+; CHECK-NEXT: br label [[FOR_COND1_FOR_END_CRIT_EDGE:%.*]]
+; CHECK: for.inc.split:
+; CHECK-NEXT: [[TMP1]] = add nuw nsw i32 [[LSR_IV]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[LSR_IV]], 4
+; CHECK-NEXT: br i1 [[TMP2]], label [[FOR_BODY2]], label [[FOR_COND_FOR_END5_CRIT_EDGE:%.*]]
+; CHECK: for.cond1.for.end_crit_edge:
+; CHECK-NEXT: br label [[FOR_INC3]]
+; CHECK: for.inc3:
+; CHECK-NEXT: [[INC4]] = add nsw i32 [[INC41]], 1
+; CHECK-NEXT: br i1 false, label [[FOR_BODY]], label [[FOR_INC_SPLIT]]
+; CHECK: for.cond.for.end5_crit_edge:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.inc3, %entry
+ %inc41 = phi i32 [ %inc4, %for.inc3 ], [ undef, %entry ]
+ br label %for.body2
+
+for.body2: ; preds = %for.inc, %for.body
+ %lsr.iv = phi i32 [ %lsr.iv.next, %for.inc ], [ 1, %for.body ]
+ br label %for.inc
+
+for.inc: ; preds = %for.body2
+ %idxprom = sext i32 %inc41 to i64
+ %arrayidx = getelementptr inbounds [5 x i32], [5 x i32]* @b, i64 0, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ %cmp = icmp slt i32 %lsr.iv, 4
+ %cmp.zext = zext i1 %cmp to i32
+ store i32 %cmp.zext, i32* %arrayidx, align 4
+ %lsr.iv.next = add nuw nsw i32 %lsr.iv, 1
+ br i1 %cmp, label %for.body2, label %for.cond1.for.end_crit_edge
+
+for.cond1.for.end_crit_edge: ; preds = %for.inc
+ br label %for.inc3
+
+for.inc3: ; preds = %for.cond1.for.end_crit_edge
+ %inc4 = add nsw i32 %inc41, 1
+ br i1 undef, label %for.body, label %for.cond.for.end5_crit_edge
+
+for.cond.for.end5_crit_edge: ; preds = %for.inc3
+ ret void
+}
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* [[ARR:%.*]], i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV23]]
; CHECK-NEXT: [[LV:%.*]] = load i64, i64* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[SUM_INC]] = add i64 [[SUM_INNER]], [[LV]]
+; CHECK-NEXT: [[IV_ORIGINAL:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[EXIT1_ORIGINAL:%.*]] = icmp eq i64 [[IV_ORIGINAL]], 100
; CHECK-NEXT: br label [[FOR1_INC]]
; CHECK: for2.split:
; CHECK-NEXT: [[SUM_INC_LCSSA]] = phi i64 [ [[SUM_INC]], %for1.inc ]