// TODO: Should these be here or in LoopUnroll?
STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled");
STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
+STATISTIC(NumUnrolledWithHeader, "Number of loops unrolled without a "
+ "conditional latch (completely or otherwise)");
static cl::opt<bool>
UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden,
return LoopUnrollResult::Unmodified;
}
- // The current loop unroll pass can only unroll loops with a single latch
+ // The current loop unroll pass can unroll loops with a single latch or header
// that's a conditional branch exiting the loop.
// FIXME: The implementation can be extended to work with more complicated
// cases, e.g. loops with multiple latches.
BasicBlock *Header = L->getHeader();
+ BranchInst *HeaderBI = dyn_cast<BranchInst>(Header->getTerminator());
BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
- if (!BI || BI->isUnconditional()) {
- // The loop-rotate pass can be helpful to avoid this in many cases.
+ // FIXME: Support loops without conditional latch and multiple exiting blocks.
+ if (!BI ||
+ (BI->isUnconditional() && (!HeaderBI || HeaderBI->isUnconditional() ||
+ L->getExitingBlock() != Header))) {
+ LLVM_DEBUG(dbgs() << " Can't unroll; loop not terminated by a conditional "
+ "branch in the latch or header.\n");
+ return LoopUnrollResult::Unmodified;
+ }
+
+ auto CheckLatchSuccessors = [&](unsigned S1, unsigned S2) {
+ return BI->isConditional() && BI->getSuccessor(S1) == Header &&
+ !L->contains(BI->getSuccessor(S2));
+ };
+
+ // If we have a conditional latch, it must exit the loop.
+ if (BI && BI->isConditional() && !CheckLatchSuccessors(0, 1) &&
+ !CheckLatchSuccessors(1, 0)) {
LLVM_DEBUG(
- dbgs()
- << " Can't unroll; loop not terminated by a conditional branch.\n");
+ dbgs() << "Can't unroll; a conditional latch must exit the loop");
return LoopUnrollResult::Unmodified;
}
- auto CheckSuccessors = [&](unsigned S1, unsigned S2) {
- return BI->getSuccessor(S1) == Header && !L->contains(BI->getSuccessor(S2));
+ auto CheckHeaderSuccessors = [&](unsigned S1, unsigned S2) {
+ return HeaderBI && HeaderBI->isConditional() &&
+ L->contains(HeaderBI->getSuccessor(S1)) &&
+ !L->contains(HeaderBI->getSuccessor(S2));
};
- if (!CheckSuccessors(0, 1) && !CheckSuccessors(1, 0)) {
- LLVM_DEBUG(dbgs() << "Can't unroll; only loops with one conditional latch"
- " exiting the loop can be unrolled\n");
+ // If we do not have a conditional latch, the header must exit the loop.
+ if (BI && !BI->isConditional() && HeaderBI && HeaderBI->isConditional() &&
+ !CheckHeaderSuccessors(0, 1) && !CheckHeaderSuccessors(1, 0)) {
+ LLVM_DEBUG(dbgs() << "Can't unroll; conditional header must exit the loop");
return LoopUnrollResult::Unmodified;
}
SE->forgetTopmostLoop(L);
}
- bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
- BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);
+ bool ContinueOnTrue;
+ bool LatchIsExiting = BI->isConditional();
+ BasicBlock *LoopExit = nullptr;
+ if (LatchIsExiting) {
+ ContinueOnTrue = L->contains(BI->getSuccessor(0));
+ LoopExit = BI->getSuccessor(ContinueOnTrue);
+ } else {
+ NumUnrolledWithHeader++;
+ ContinueOnTrue = L->contains(HeaderBI->getSuccessor(0));
+ LoopExit = HeaderBI->getSuccessor(ContinueOnTrue);
+ }
// For the first iteration of the loop, we should use the precloned values for
// PHI nodes. Insert associations now.
OrigPHINode.push_back(cast<PHINode>(I));
}
- std::vector<BasicBlock*> Headers;
- std::vector<BasicBlock*> Latches;
+ std::vector<BasicBlock *> Headers;
+ std::vector<BasicBlock *> HeaderSucc;
+ std::vector<BasicBlock *> Latches;
Headers.push_back(Header);
Latches.push_back(LatchBlock);
+ if (!LatchIsExiting) {
+ auto *Term = cast<BranchInst>(Header->getTerminator());
+ if (Term->isUnconditional() || L->contains(Term->getSuccessor(0))) {
+ assert(L->contains(Term->getSuccessor(0)));
+ HeaderSucc.push_back(Term->getSuccessor(0));
+ } else {
+ assert(L->contains(Term->getSuccessor(1)));
+ HeaderSucc.push_back(Term->getSuccessor(1));
+ }
+ }
+
// The current on-the-fly SSA update requires blocks to be processed in
// reverse postorder so that LastValueMap contains the correct value at each
// exit.
if (*BB == LatchBlock)
Latches.push_back(New);
+ // Keep track of the successor of the new header in the current iteration.
+ for (auto *Pred : predecessors(*BB))
+ if (Pred == Header) {
+ HeaderSucc.push_back(New);
+ break;
+ }
+
NewBlocks.push_back(New);
UnrolledLoopBlocks.push_back(New);
}
}
- // Now that all the basic blocks for the unrolled iterations are in place,
- // set up the branches to connect them.
- for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
- // The original branch was replicated in each unrolled iteration.
- BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
-
- // The branch destination.
- unsigned j = (i + 1) % e;
- BasicBlock *Dest = Headers[j];
- bool NeedConditional = true;
-
- if (RuntimeTripCount && j != 0) {
- NeedConditional = false;
- }
-
- // For a complete unroll, make the last iteration end with a branch
- // to the exit block.
- if (CompletelyUnroll) {
- if (j == 0)
- Dest = LoopExit;
- // If using trip count upper bound to completely unroll, we need to keep
- // the conditional branch except the last one because the loop may exit
- // after any iteration.
- assert(NeedConditional &&
- "NeedCondition cannot be modified by both complete "
- "unrolling and runtime unrolling");
- NeedConditional =
- (ULO.PreserveCondBr && j && !(ULO.PreserveOnlyFirst && i != 0));
- } else if (j != BreakoutTrip &&
- (ULO.TripMultiple == 0 || j % ULO.TripMultiple != 0)) {
- // If we know the trip count or a multiple of it, we can safely use an
- // unconditional branch for some iterations.
- NeedConditional = false;
- }
-
+ auto setDest = [LoopExit, ContinueOnTrue](BasicBlock *Src, BasicBlock *Dest,
+ ArrayRef<BasicBlock *> NextBlocks,
+ BasicBlock *CurrentHeader,
+ bool NeedConditional) {
+ auto *Term = cast<BranchInst>(Src->getTerminator());
if (NeedConditional) {
// Update the conditional branch's successor for the following
// iteration.
} else {
// Remove phi operands at this loop exit
if (Dest != LoopExit) {
- BasicBlock *BB = Latches[i];
- for (BasicBlock *Succ: successors(BB)) {
- if (Succ == Headers[i])
+ BasicBlock *BB = Src;
+ for (BasicBlock *Succ : successors(BB)) {
+ if (Succ == CurrentHeader)
continue;
for (PHINode &Phi : Succ->phis())
Phi.removeIncomingValue(BB, false);
BranchInst::Create(Dest, Term);
Term->eraseFromParent();
}
+ };
+
+ // Now that all the basic blocks for the unrolled iterations are in place,
+ // set up the branches to connect them.
+ if (LatchIsExiting) {
+ // Set up latches to branch to the new header in the unrolled iterations or
+ // the loop exit for the last latch in a fully unrolled loop.
+ for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
+ // The branch destination.
+ unsigned j = (i + 1) % e;
+ BasicBlock *Dest = Headers[j];
+ bool NeedConditional = true;
+
+ if (RuntimeTripCount && j != 0) {
+ NeedConditional = false;
+ }
+
+ // For a complete unroll, make the last iteration end with a branch
+ // to the exit block.
+ if (CompletelyUnroll) {
+ if (j == 0)
+ Dest = LoopExit;
+ // If using trip count upper bound to completely unroll, we need to keep
+ // the conditional branch except the last one because the loop may exit
+ // after any iteration.
+ assert(NeedConditional &&
+ "NeedCondition cannot be modified by both complete "
+ "unrolling and runtime unrolling");
+ NeedConditional =
+ (ULO.PreserveCondBr && j && !(ULO.PreserveOnlyFirst && i != 0));
+ } else if (j != BreakoutTrip &&
+ (ULO.TripMultiple == 0 || j % ULO.TripMultiple != 0)) {
+ // If we know the trip count or a multiple of it, we can safely use an
+ // unconditional branch for some iterations.
+ NeedConditional = false;
+ }
+
+ setDest(Latches[i], Dest, Headers, Headers[i], NeedConditional);
+ }
+ } else {
+ // Setup headers to branch to their new successors in the unrolled
+ // iterations.
+ for (unsigned i = 0, e = Headers.size(); i != e; ++i) {
+ // The branch destination.
+ unsigned j = (i + 1) % e;
+ BasicBlock *Dest = HeaderSucc[i];
+ bool NeedConditional = true;
+
+ if (RuntimeTripCount && j != 0)
+ NeedConditional = false;
+
+ if (CompletelyUnroll)
+ // We cannot drop the conditional branch for the last condition, as we
+ // may have to execute the loop body depending on the condition.
+ NeedConditional = j == 0 || ULO.PreserveCondBr;
+ else if (j != BreakoutTrip &&
+ (ULO.TripMultiple == 0 || j % ULO.TripMultiple != 0))
+ // If we know the trip count or a multiple of it, we can safely use an
+ // unconditional branch for some iterations.
+ NeedConditional = false;
+
+ setDest(Headers[i], Dest, Headers, Headers[i], NeedConditional);
+ }
+
+ // Set up latches to branch to the new header in the unrolled iterations or
+ // the loop exit for the last latch in a fully unrolled loop.
+
+ for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
+ // The original branch was replicated in each unrolled iteration.
+ BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
+
+ // The branch destination.
+ unsigned j = (i + 1) % e;
+ BasicBlock *Dest = Headers[j];
+
+ // When completely unrolling, the last latch becomes unreachable.
+ if (CompletelyUnroll && j == 0)
+ new UnreachableInst(Term->getContext(), Term);
+ else
+ // Replace the conditional branch with an unconditional one.
+ BranchInst::Create(Dest, Term);
+
+ Term->eraseFromParent();
+ }
}
// Update dominators of blocks we might reach through exits.
ChildrenToUpdate.push_back(ChildBB);
}
BasicBlock *NewIDom;
- if (BB == LatchBlock) {
+ BasicBlock *&TermBlock = LatchIsExiting ? LatchBlock : Header;
+ auto &TermBlocks = LatchIsExiting ? Latches : Headers;
+ if (BB == TermBlock) {
// The latch is special because we emit unconditional branches in
// some cases where the original loop contained a conditional branch.
// Since the latch is always at the bottom of the loop, if the latch
// must also be a latch. Specifically, the dominator is the first
// latch which ends in a conditional branch, or the last latch if
// there is no such latch.
- NewIDom = Latches.back();
- for (BasicBlock *IterLatch : Latches) {
- Instruction *Term = IterLatch->getTerminator();
+ // For loops exiting from the header, we limit the supported loops
+ // to have a single exiting block.
+ NewIDom = TermBlocks.back();
+ for (BasicBlock *Iter : TermBlocks) {
+ Instruction *Term = Iter->getTerminator();
if (isa<BranchInst>(Term) && cast<BranchInst>(Term)->isConditional()) {
- NewIDom = IterLatch;
+ NewIDom = Iter;
break;
}
}
}
assert(!DT || !UnrollVerifyDomtree ||
- DT->verify(DominatorTree::VerificationLevel::Fast));
+ DT->verify(DominatorTree::VerificationLevel::Fast));
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
// Merge adjacent basic blocks, if possible.
for (BasicBlock *Latch : Latches) {
- BranchInst *Term = cast<BranchInst>(Latch->getTerminator());
- if (Term->isUnconditional()) {
+ BranchInst *Term = dyn_cast<BranchInst>(Latch->getTerminator());
+ assert((Term ||
+ (CompletelyUnroll && !LatchIsExiting && Latch == Latches.back())) &&
+ "Need a branch as terminator, except when fully unrolling with "
+ "unconditional latch");
+ if (Term && Term->isUnconditional()) {
BasicBlock *Dest = Term->getSuccessor(0);
BasicBlock *Fold = Dest->getUniquePredecessor();
if (MergeBlockIntoPredecessor(Dest, &DTU, LI)) {
; RUN: opt < %s -loop-unroll -unroll-runtime -unroll-count=2 -verify-scev-maps -S | FileCheck %s
; Check SCEV expansion uses existing value when unrolling an inner loop with runtime trip count in a loop nest.
+; The outer loop gets unrolled twice, so we see 2 selects in the outer loop blocks.
; CHECK-LABEL: @foo(
+; CHECK-LABEL: for.body.loopexit:
+; CHECK: select
+; CHECK-LABEL: for.body:
; CHECK: select
; CHECK-NOT: select
; CHECK: ret
%xfL.addr.033 = phi i32 [ %xfL, %entry ], [ %add, %for.body5 ]
%add = add nsw i32 %xfL.addr.033, %scaleL
%shr = ashr i32 %add, 16
- %cmp.i = icmp slt i32 0, %shr
+ %cmp.i = icmp slt i32 10, %shr
%.sroa.speculated = select i1 %cmp.i, i32 0, i32 %shr
%cmp425 = icmp slt i32 0, %.sroa.speculated
br i1 %cmp425, label %for.body5.preheader, label %for.end
--- /dev/null
+; RUN: opt -loop-unroll -unroll-allow-partial -S %s -verify-loop-info -verify-dom-info -verify-loop-lcssa | FileCheck %s
+
+@table = internal unnamed_addr global [344 x i32] zeroinitializer, align 16
+
+define i32 @test_partial_unroll_with_breakout_at_iter0() {
+; CHECK-LABEL: define i32 @test_partial_unroll_with_breakout_at_iter0() {
+; CHECK-LABEL: entry:
+; CHECK-NEXT: br label %for.header
+
+; CHECK-LABEL: for.header: ; preds = %for.latch.3, %entry
+; CHECK-NEXT: %red = phi i32 [ 0, %entry ], [ %red.next.3, %for.latch.3 ]
+; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next.3, %for.latch.3 ]
+; CHECK-NEXT: %red.next = add nuw nsw i32 10, %red
+; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 2
+; CHECK-NEXT: %ptr = getelementptr inbounds [344 x i32], [344 x i32]* @table, i64 0, i64 %iv.next
+; CHECK-NEXT: store i32 %red.next, i32* %ptr, align 4
+; CHECK-NEXT: br label %for.latch
+
+; CHECK-LABEL: for.latch: ; preds = %for.header
+; CHECK-NEXT: %red.next.1 = add nuw nsw i32 10, %red.next
+; CHECK-NEXT: %iv.next.1 = add nuw nsw i64 %iv.next, 2
+; CHECK-NEXT: %ptr.1 = getelementptr inbounds [344 x i32], [344 x i32]* @table, i64 0, i64 %iv.next.1
+; CHECK-NEXT: store i32 %red.next.1, i32* %ptr.1, align 4
+; CHECK-NEXT: br label %for.latch.1
+
+; CHECK-LABEL: exit: ; preds = %for.latch.2
+; CHECK-NEXT: ret i32 0
+
+; CHECK-LABEL: for.latch.1: ; preds = %for.latch
+; CHECK-NEXT: %red.next.2 = add nuw nsw i32 10, %red.next.1
+; CHECK-NEXT: %iv.next.2 = add nuw nsw i64 %iv.next.1, 2
+; CHECK-NEXT: %ptr.2 = getelementptr inbounds [344 x i32], [344 x i32]* @table, i64 0, i64 %iv.next.2
+; CHECK-NEXT: store i32 %red.next.2, i32* %ptr.2, align 4
+; CHECK-NEXT: br label %for.latch.2
+
+; CHECK-LABEL: for.latch.2: ; preds = %for.latch.1
+; CHECK-NEXT: %red.next.3 = add nuw nsw i32 10, %red.next.2
+; CHECK-NEXT: %iv.next.3 = add nuw nsw i64 %iv.next.2, 2
+; CHECK-NEXT: %ptr.3 = getelementptr inbounds [344 x i32], [344 x i32]* @table, i64 0, i64 %iv.next.3
+; CHECK-NEXT: store i32 %red.next.3, i32* %ptr.3, align 4
+; CHECK-NEXT: %exitcond.1.i.3 = icmp eq i64 %iv.next.3, 344
+; CHECK-NEXT: br i1 %exitcond.1.i.3, label %exit, label %for.latch.3
+
+; CHECK-LABEL: for.latch.3: ; preds = %for.latch.2
+; CHECK-NEXT: br label %for.header
+;
+entry:
+ br label %for.header
+
+for.header: ; preds = %for.body28.i.for.body28.i_crit_edge, %for.body.i
+ %red = phi i32 [ 0, %entry ], [ %red.next, %for.latch ]
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.latch ]
+ %red.next = add i32 10, %red
+ %iv.next = add nuw nsw i64 %iv, 2
+ %ptr = getelementptr inbounds [344 x i32], [344 x i32]* @table, i64 0, i64 %iv.next
+ store i32 %red.next, i32* %ptr, align 4
+ %exitcond.1.i = icmp eq i64 %iv.next, 344
+ br i1 %exitcond.1.i, label %exit, label %for.latch
+
+for.latch: ; preds = %for.header
+ br label %for.header
+
+exit:
+ ret i32 0
+}
br label %header.inner
header.inner: ; preds = %latch.inner, %header.outer
+ %tmp5 = load i64, i64* %q1, align 8
+ %tmp6 = icmp eq double* %p, %arg
+ br i1 undef, label %exiting.inner, label %latch.outer
+
+exiting.inner: ; preds = %latch.inner, %header.outer
br i1 undef, label %latch.inner, label %latch.outer
latch.inner: ; preds = %header.inner
- %tmp5 = load i64, i64* %q1, align 8
store i64 %tmp5, i64* %q2, align 8
- %tmp6 = icmp eq double* %p, %arg
br label %header.inner
latch.outer: ; preds = %header.inner
; Check that the loop backedge is removed from the middle loop 1699,
; but not the inner loop 1676.
; CHECK: while.body1694:
-; CHECK: br label %while.cond1676
+; CHECK: unreachable
; CHECK: while.end1699:
; CHECK: br label %sw.default1711
define void @removeSubloopBlocks() nounwind {
--- /dev/null
+; RUN: opt -loop-unroll -S %s -verify-loop-info -verify-dom-info -verify-loop-lcssa | FileCheck %s
+
+%struct.spam = type { double, double, double, double, double, double, double }
+
+define void @test2(i32* %arg) {
+; CHECK-LABEL: void @test2
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label %for.header
+
+; CHECK-LABEL: for.header: ; preds = %entry
+; CHECK-NEXT: store i32 0, i32* %arg, align 4
+; CHECK-NEXT: br label %for.latch
+
+; CHECK-LABEL: for.latch: ; preds = %for.header
+; CHECK-NEXT: %ptr.1 = getelementptr inbounds i32, i32* %arg, i64 1
+; CHECK-NEXT: store i32 0, i32* %ptr.1, align 4
+; CHECK-NEXT: br label %for.latch.1
+
+; CHECK-LABEL: if.end.loopexit: ; preds = %for.latch.2
+; CHECK-NEXT: ret void
+
+; CHECK-LABEL: for.latch.1: ; preds = %for.latch
+; CHECK-NEXT: %ptr.2 = getelementptr inbounds i32, i32* %arg, i64 2
+; CHECK-NEXT: store i32 0, i32* %ptr.2, align 4
+; CHECK-NEXT: br label %for.latch.2
+
+; CHECK-LABEL: for.latch.2: ; preds = %for.latch.1
+; CHECK-NEXT: %ptr.3 = getelementptr inbounds i32, i32* %arg, i64 3
+; CHECK-NEXT: store i32 0, i32* %ptr.3, align 4
+; CHECK-NEXT: br i1 true, label %if.end.loopexit, label %for.latch.3
+
+; CHECK-LABEL: for.latch.3: ; preds = %for.latch.2
+; CHECK-NEXT: unreachable
+
+entry:
+ br label %for.header
+
+for.header: ; preds = %for.latch, %entry
+ %indvars.iv800 = phi i64 [ 0, %entry ], [ %indvars.iv.next801, %for.latch ]
+ %ptr = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv800
+ store i32 0, i32* %ptr, align 4
+ %indvars.iv.next801 = add nuw nsw i64 %indvars.iv800, 1
+ %exitcond802 = icmp eq i64 %indvars.iv.next801, 4
+ br i1 %exitcond802, label %if.end.loopexit, label %for.latch
+
+for.latch: ; preds = %for.header
+ br label %for.header
+
+if.end.loopexit: ; preds = %for.header
+ ret void
+}
+
+define double @test_with_lcssa(double %arg1, double* %arg2) {
+; CHECK-LABEL: define double @test_with_lcssa(
+; CHECK-LABEL: entry:
+; CHECK-NEXT: br label %loop.header
+
+; CHECK-LABEL: loop.header: ; preds = %entry
+; CHECK-NEXT: %res = fsub double %arg1, 3.000000e+00
+; CHECK-NEXT: br label %loop.latch
+
+; CHECK-LABEL: loop.latch: ; preds = %loop.header
+; CHECK-NEXT: %ptr = getelementptr inbounds double, double* %arg2, i64 1
+; CHECK-NEXT: %lv = load double, double* %ptr, align 8
+; CHECK-NEXT: %res.1 = fsub double %lv, %res
+; CHECK-NEXT: br i1 true, label %loop.exit, label %loop.latch.1
+
+; CHECK-LABEL: loop.exit: ; preds = %loop.latch
+; CHECK-NEXT: %res.lcssa = phi double [ %res.1, %loop.latch ]
+; CHECK-NEXT: ret double %res.lcssa
+
+; CHECK-LABEL: loop.latch.1: ; preds = %loop.latch
+; CHECK-NEXT: %ptr.1 = getelementptr inbounds double, double* %arg2, i64 2
+; CHECK-NEXT: unreachable
+
+entry:
+ br label %loop.header
+
+loop.header: ; preds = %entry, %loop.latch
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %d1 = phi double [ %arg1, %entry ], [ %lv, %loop.latch ]
+ %d2 = phi double [ 3.0, %entry ], [ %res, %loop.latch ]
+ %res = fsub double %d1, %d2
+ %iv.next = add nuw nsw i64 %iv, 1
+ %cond = icmp eq i64 %iv.next, 2
+ br i1 %cond, label %loop.exit, label %loop.latch
+
+loop.latch: ; preds = %bb366
+ %ptr = getelementptr inbounds double, double* %arg2, i64 %iv.next
+ %lv = load double, double* %ptr, align 8
+ br label %loop.header
+
+loop.exit: ; preds = %bb366
+ %res.lcssa = phi double [ %res, %loop.header ]
+ ret double %res.lcssa
+}
+
+; We unroll the outer loop and need to preserve LI for the inner loop.
+define void @test_with_nested_loop(i32* %arg) {
+; CHECK-LABEL: void @test_with_nested_loop
+; CHECK-LABEL: entry:
+; CHECK-NEXT: br label %outer.header
+
+; CHECK-DAG: outer.header: ; preds = %entry
+; CHECK-NEXT: br label %inner.body.preheader
+
+; CHECK-DAG: inner.body.preheader: ; preds = %outer.header
+; CHECK-NEXT: br label %inner.body
+
+; CHECK-LABEL: inner.body: ; preds = %inner.body.preheader, %inner.body
+; CHECK-NEXT: %j.iv = phi i64 [ %j.iv.next, %inner.body ], [ 0, %inner.body.preheader ]
+; CHECK-NEXT: %ptr = getelementptr inbounds i32, i32* %arg, i64 %j.iv
+; CHECK-NEXT: store i32 0, i32* %ptr, align 4
+; CHECK-NEXT: %j.iv.next = add nuw nsw i64 %j.iv, 1
+; CHECK-NEXT: %inner.cond = icmp eq i64 %j.iv.next, 40000
+; CHECK-NEXT: br i1 %inner.cond, label %outer.latch, label %inner.body
+
+; CHECK-LABEL: outer.latch: ; preds = %inner.body
+; CHECK-NEXT: br label %inner.body.preheader.1
+
+; CHECK-LABEL: exit: ; preds = %outer.latch.1
+; CHECK-NEXT: ret void
+
+; CHECK-LABEL: inner.body.preheader.1: ; preds = %outer.latch
+; CHECK-NEXT: br label %inner.body.1
+
+; CHECK-LABEL: inner.body.1: ; preds = %inner.body.1, %inner.body.preheader.1
+; CHECK-NEXT: %j.iv.1 = phi i64 [ %j.iv.next.1, %inner.body.1 ], [ 0, %inner.body.preheader.1 ]
+; CHECK-NEXT: %idx.1 = add i64 1, %j.iv.1
+; CHECK-NEXT: %ptr.1 = getelementptr inbounds i32, i32* %arg, i64 %idx.1
+; CHECK-NEXT: store i32 0, i32* %ptr.1, align 4
+; CHECK-NEXT: %j.iv.next.1 = add nuw nsw i64 %j.iv.1, 1
+; CHECK-NEXT: %inner.cond.1 = icmp eq i64 %j.iv.next.1, 40000
+; CHECK-NEXT: br i1 %inner.cond.1, label %outer.latch.1, label %inner.body.1
+
+; CHECK-LABEL: outer.latch.1: ; preds = %inner.body.1
+; CHECK-NEXT: br i1 true, label %exit, label %inner.body.preheader.2
+
+; CHECK-LABEL: inner.body.preheader.2: ; preds = %outer.latch.1
+; CHECK-NEXT: br label %inner.body.2
+
+; CHECK-LABEL: inner.body.2: ; preds = %inner.body.2, %inner.body.preheader.2
+; CHECK-NEXT: %j.iv.2 = phi i64 [ %j.iv.next.2, %inner.body.2 ], [ 0, %inner.body.preheader.2 ]
+; CHECK-NEXT: %idx.2 = add i64 2, %j.iv.2
+; CHECK-NEXT: %ptr.2 = getelementptr inbounds i32, i32* %arg, i64 %idx.2
+; CHECK-NEXT: store i32 0, i32* %ptr.2, align 4
+; CHECK-NEXT: %j.iv.next.2 = add nuw nsw i64 %j.iv.2, 1
+; CHECK-NEXT: %inner.cond.2 = icmp eq i64 %j.iv.next.2, 40000
+; CHECK-NEXT: br i1 %inner.cond.2, label %outer.latch.2, label %inner.body.2
+
+; CHECK-LABEL: outer.latch.2: ; preds = %inner.body.2
+; CHECK-NEXT: unreachable
+;
+entry:
+ br label %outer.header
+
+outer.header: ; preds = %outer.latch, %entry
+ %outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %outer.latch ]
+ %outer.iv.next = add nuw nsw i64 %outer.iv, 1
+ %outer.cond = icmp eq i64 %outer.iv, 2
+ br i1 %outer.cond, label %exit, label %inner.body
+
+inner.body:
+ %j.iv = phi i64 [ 0, %outer.header ], [ %j.iv.next, %inner.body ]
+ %idx = add i64 %outer.iv, %j.iv
+ %ptr = getelementptr inbounds i32, i32* %arg, i64 %idx
+ store i32 0, i32* %ptr, align 4
+ %j.iv.next = add nuw nsw i64 %j.iv, 1
+ %inner.cond = icmp eq i64 %j.iv.next, 40000
+ br i1 %inner.cond, label %outer.latch, label %inner.body
+
+outer.latch: ; preds = %inner.body
+ br label %outer.header
+
+exit: ; preds = %outer.header
+ ret void
+}
+
+; We unroll the inner loop and need to preserve LI for the outer loop.
+define void @test_with_nested_loop_unroll_inner(i32* %arg) {
+; CHECK-LABEL: define void @test_with_nested_loop_unroll_inner(
+; CHECK-LABEL: entry:
+; CHECK-NEXT: br label %outer.header
+
+; CHECK-LABEL: outer.header: ; preds = %inner.body, %entry
+; CHECK-NEXT: %outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %inner.body ]
+; CHECK-NEXT: %outer.iv.next = add nuw nsw i64 %outer.iv, 1
+; CHECK-NEXT: %outer.cond = icmp eq i64 %outer.iv, 40000
+; CHECK-NEXT: br i1 %outer.cond, label %exit, label %inner.body.preheader
+
+; CHECK-LABEL: inner.body.preheader: ; preds = %outer.header
+; CHECK-NEXT: br label %inner.body
+
+; CHECK-LABEL: inner.body: ; preds = %inner.body.preheader
+; CHECK-NEXT: %ptr = getelementptr inbounds i32, i32* %arg, i64 %outer.iv
+; CHECK-NEXT: store i32 0, i32* %ptr, align 4
+; CHECK-NEXT: %idx.1 = add i64 %outer.iv, 1
+; CHECK-NEXT: %ptr.1 = getelementptr inbounds i32, i32* %arg, i64 %idx.1
+; CHECK-NEXT: store i32 0, i32* %ptr.1, align 4
+; CHECK-NEXT: br label %outer.header
+
+; CHECK-LABEL: exit: ; preds = %outer.header
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %outer.header
+
+outer.header: ; preds = %outer.latch, %entry
+ %outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %outer.latch ]
+ %outer.iv.next = add nuw nsw i64 %outer.iv, 1
+ %outer.cond = icmp eq i64 %outer.iv, 40000
+ br i1 %outer.cond, label %exit, label %inner.body
+
+inner.body:
+ %j.iv = phi i64 [ 0, %outer.header ], [ %j.iv.next, %inner.body ]
+ %idx = add i64 %outer.iv, %j.iv
+ %ptr = getelementptr inbounds i32, i32* %arg, i64 %idx
+ store i32 0, i32* %ptr, align 4
+ %j.iv.next = add nuw nsw i64 %j.iv, 1
+ %inner.cond = icmp eq i64 %j.iv.next, 2
+ br i1 %inner.cond, label %outer.latch, label %inner.body
+
+outer.latch: ; preds = %inner.body
+ br label %outer.header
+
+exit: ; preds = %outer.header
+ ret void
+}
+
+
+
+; Check that we do not crash for headers with non-branch instructions, e.g.
+; switch. We do not unroll in those cases.
+define void @test_switchinst_in_header() {
+; CHECK-LABEL: define void @test_switchinst_in_header() {
+; CHECK-LABEL: entry:
+; CHECK-NEXT: br label %while.header
+
+; CHECK-LABEL: while.header: ; preds = %while.latch, %entry
+; CHECK-NEXT: switch i32 undef, label %exit [
+; CHECK-NEXT: i32 11, label %while.body1
+; CHECK-NEXT: i32 5, label %while.body2
+; CHECK-NEXT: ]
+
+; CHECK-LABEL: while.body1: ; preds = %while.header
+; CHECK-NEXT: unreachable
+
+; CHECK-LABEL: while.body2: ; preds = %while.header
+; CHECK-NEXT: br label %while.latch
+
+; CHECK-LABEL: while.latch: ; preds = %while.body2
+; CHECK-NEXT: br label %while.header
+
+; CHECK-LABEL: exit: ; preds = %while.header
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %while.header
+
+while.header: ; preds = %while.latch, %entry
+ switch i32 undef, label %exit [
+ i32 11, label %while.body1
+ i32 5, label %while.body2
+ ]
+
+while.body1: ; preds = %while.header
+ unreachable
+
+while.body2: ; preds = %while.header
+ br label %while.latch
+
+while.latch: ; preds = %while.body2
+ br label %while.header
+
+exit: ; preds = %while.header
+ ret void
+}