!isSafeToExpand(ExactBTC, *SE))
return Changed;
- auto Filter = [&](BasicBlock *ExitingBB) {
+ auto BadExit = [&](BasicBlock *ExitingBB) {
// If our exiting block exits multiple loops, we can only rewrite the
// innermost one. Otherwise, we're changing how many times the innermost
// loop runs before it exits.
return false;
};
- auto Erased = std::remove_if(ExitingBlocks.begin(), ExitingBlocks.end(),
- Filter);
- ExitingBlocks.erase(Erased, ExitingBlocks.end());
+
+ // If we have any exits which can't be predicated themselves, than we can't
+ // predicate any exit which isn't guaranteed to execute before it. Consider
+ // two exits (a) and (b) which would both exit on the same iteration. If we
+ // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
+ // we could convert a loop from exiting through (a) to one exiting through
+ // (b). Note that this problem exists only for exits with the same exit
+ // count, and we could be more aggressive when exit counts are known inequal.
+ llvm::sort(ExitingBlocks,
+ [&](BasicBlock *A, BasicBlock *B) {
+ // std::sort sorts in ascending order, so we want the inverse of
+ // the normal dominance relation, plus a tie breaker for blocks
+ // unordered by dominance.
+ if (DT->properlyDominates(A, B)) return true;
+ if (DT->properlyDominates(B, A)) return false;
+ return A->getName() < B->getName();
+ });
+ // Check to see if our exit blocks are a total order (i.e. a linear chain of
+ // exits before the backedge). If they aren't, reasoning about reachability
+ // is complicated and we choose not to for now.
+ for (unsigned i = 1; i < ExitingBlocks.size(); i++)
+ if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
+ return Changed;
+
+ // Given our sorted total order, we know that exit[j] must be evaluated
+ // after all exit[i] such j > i.
+ for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
+ if (BadExit(ExitingBlocks[i])) {
+ ExitingBlocks.resize(i);
+ break;
+ }
if (ExitingBlocks.empty())
return Changed;
// We rely on not being able to reach an exiting block on a later iteration
- // than it's statically compute exit count. The implementaton of
+ // then it's statically compute exit count. The implementaton of
// getExitCount currently has this invariant, but assert it here so that
// breakage is obvious if this ever changes..
assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
define i32 @neg_dominating_exit(i32* %array, i32 %length, i32 %n) {
; CHECK-LABEL: @neg_dominating_exit(
; CHECK-NEXT: loop.preheader:
-; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[N:%.*]], 1
-; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 1
-; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1
-; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP1]]
-; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i32 [[LENGTH]], i32 [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED2:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED2]] ], [ 0, [[LOOP_PREHEADER]] ]
-; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH]]
+; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]]
; CHECK-NEXT: br i1 [[WITHIN_BOUNDS]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0
; CHECK: deopt:
; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC]], [[LOOP]] ]
; CHECK-NEXT: call void @prevent_merging()
; CHECK-NEXT: ret i32 [[RESULT]]
; CHECK: guarded:
-; CHECK-NEXT: br i1 [[TMP3]], label [[GUARDED2]], label [[DEOPT2:%.*]], !prof !0
+; CHECK-NEXT: [[WITHIN_BOUNDS2:%.*]] = icmp ult i32 [[I]], [[LENGTH]]
+; CHECK-NEXT: br i1 [[WITHIN_BOUNDS2]], label [[GUARDED2]], label [[DEOPT2:%.*]], !prof !0
; CHECK: deopt2:
; CHECK-NEXT: call void @prevent_merging()
; CHECK-NEXT: ret i32 -1
; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4
; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]]
; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1
-; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N:%.*]]
; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: [[RESULT2:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED2]] ]