"unroll-force-peel-count", cl::init(0), cl::Hidden,
cl::desc("Force a peel count regardless of profiling information."));
+static cl::opt<bool> UnrollPeelMultiDeoptExit(
+ "unroll-peel-multi-deopt-exit", cl::init(false), cl::Hidden,
+ cl::desc("Allow peeling of loops with multiple deopt exits."));
+
// Designates that a Phi is estimated to become invariant after an "infinite"
// number of loop iterations (i.e. only may become an invariant if the loop is
// fully unrolled).
if (!L->isLoopSimplifyForm())
return false;
+ if (UnrollPeelMultiDeoptExit) {
+ SmallVector<BasicBlock *, 4> Exits;
+ L->getUniqueNonLatchExitBlocks(Exits);
+
+ if (!Exits.empty()) {
+ // Latch's terminator is a conditional branch, Latch is exiting and
+ // all non Latch exits ends up with deoptimize.
+ const BasicBlock *Latch = L->getLoopLatch();
+ const BranchInst *T = dyn_cast<BranchInst>(Latch->getTerminator());
+ return T && T->isConditional() && L->isLoopExiting(Latch) &&
+ all_of(Exits, [](const BasicBlock *BB) {
+ return BB->getTerminatingDeoptimizeCall();
+ });
+ }
+ }
+
// Only peel loops that contain a single exit
if (!L->getExitingBlock() || !L->getUniqueExitBlock())
return false;
--- /dev/null
+; RUN: opt < %s -S -debug-only=loop-unroll -loop-unroll -unroll-runtime -unroll-peel-multi-deopt-exit 2>&1 | FileCheck %s
+; RUN: opt < %s -S -debug-only=loop-unroll -unroll-peel-multi-deopt-exit -passes='require<profile-summary>,function(require<opt-remark-emit>,unroll)' 2>&1 | FileCheck %s
+
+; Make sure we use the profile information correctly to peel-off 3 iterations
+; from the loop, and update the branch weights for the peeled loop properly.
+
+; CHECK: Loop Unroll: F[basic]
+; CHECK: PEELING loop %for.body with iteration count 3!
+
+; CHECK-LABEL: @basic
+; CHECK: br i1 %{{.*}}, label %[[NEXT0:.*]], label %for.cond.for.end_crit_edge, !prof !16
+; CHECK: [[NEXT0]]:
+; CHECK: br i1 %{{.*}}, label %[[NEXT1:.*]], label %for.cond.for.end_crit_edge, !prof !17
+; CHECK: [[NEXT1]]:
+; CHECK: br i1 %{{.*}}, label %[[NEXT2:.*]], label %for.cond.for.end_crit_edge, !prof !18
+; CHECK: [[NEXT2]]:
+; CHECK: br i1 %{{.*}}, label %for.body, label %{{.*}}, !prof !19
+
+define i32 @basic(i32* %p, i32 %k, i1 %c) #0 !prof !15 {
+entry:
+ %cmp3 = icmp slt i32 0, %k
+ br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.lr.ph, %for.body
+ %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %continue ]
+ %p.addr.04 = phi i32* [ %p, %for.body.lr.ph ], [ %incdec.ptr, %continue ]
+ %incdec.ptr = getelementptr inbounds i32, i32* %p.addr.04, i32 1
+ store i32 %i.05, i32* %p.addr.04, align 4
+ %inc = add nsw i32 %i.05, 1
+ %cmp = icmp slt i32 %inc, %k
+ br i1 %c, label %continue, label %side_exit, !prof !17
+
+continue:
+ br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge, !prof !16
+
+for.cond.for.end_crit_edge: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.cond.for.end_crit_edge, %entry
+ %res = phi i32 [ 0, %entry ], [ %inc, %for.cond.for.end_crit_edge ]
+ ret i32 %res
+
+side_exit:
+ %rval = call i32(...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 %inc) ]
+ ret i32 %rval
+}
+
+declare i32 @llvm.experimental.deoptimize.i32(...)
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind optsize }
+
+!llvm.module.flags = !{!1}
+
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 10}
+!5 = !{!"MaxCount", i64 3}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 3}
+!8 = !{!"NumCounts", i64 2}
+!9 = !{!"NumFunctions", i64 2}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 3, i32 2}
+!13 = !{i32 999000, i64 1, i32 10}
+!14 = !{i32 999999, i64 1, i32 10}
+!15 = !{!"function_entry_count", i64 1}
+!16 = !{!"branch_weights", i32 3001, i32 1001}
+!17 = !{!"branch_weights", i32 1, i32 0}
+
+;CHECK: !16 = !{!"branch_weights", i32 900, i32 101}
+;CHECK: !17 = !{!"branch_weights", i32 540, i32 360}
+;CHECK: !18 = !{!"branch_weights", i32 162, i32 378}
+;CHECK: !19 = !{!"branch_weights", i32 1399, i32 162}
+