static cl::opt<bool> AllowUnsignedLatchCondition("irce-allow-unsigned-latch",
cl::Hidden, cl::init(true));
+static cl::opt<bool> AllowNarrowLatchCondition(
+ "irce-allow-narrow-latch", cl::Hidden, cl::init(false),
+ cl::desc("If set to true, IRCE may eliminate wide range checks in loops "
+ "with narrow latch condition."));
+
static const char *ClonedLoopTag = "irce.loop.clone";
#define DEBUG_TYPE "irce"
return Result;
}
+/// If the type of \p S matches with \p Ty, return \p S. Otherwise, return
+/// signed or unsigned extension of \p S to type \p Ty.
+static const SCEV *NoopOrExtend(const SCEV *S, Type *Ty, ScalarEvolution &SE,
+ bool Signed) {
+ return Signed ? SE.getNoopOrSignExtend(S, Ty) : SE.getNoopOrZeroExtend(S, Ty);
+}
+
Optional<LoopConstrainer::SubRanges>
LoopConstrainer::calculateSubRanges(bool IsSignedPredicate) const {
IntegerType *Ty = cast<IntegerType>(LatchTakenCount->getType());
- if (Range.getType() != Ty)
+ auto *RTy = cast<IntegerType>(Range.getType());
+
+ // We only support wide range checks and narrow latches.
+ if (!AllowNarrowLatchCondition && RTy != Ty)
+ return None;
+ if (RTy->getBitWidth() < Ty->getBitWidth())
return None;
LoopConstrainer::SubRanges Result;
// I think we can be more aggressive here and make this nuw / nsw if the
// addition that feeds into the icmp for the latch's terminating branch is nuw
// / nsw. In any case, a wrapping 2's complement addition is safe.
- const SCEV *Start = SE.getSCEV(MainLoopStructure.IndVarStart);
- const SCEV *End = SE.getSCEV(MainLoopStructure.LoopExitAt);
+ const SCEV *Start = NoopOrExtend(SE.getSCEV(MainLoopStructure.IndVarStart),
+ RTy, SE, IsSignedPredicate);
+ const SCEV *End = NoopOrExtend(SE.getSCEV(MainLoopStructure.LoopExitAt), RTy,
+ SE, IsSignedPredicate);
bool Increasing = MainLoopStructure.IndVarIncreasing;
const SCEV *Smallest = nullptr, *Greatest = nullptr, *GreatestSeen = nullptr;
- const SCEV *One = SE.getOne(Ty);
+ const SCEV *One = SE.getOne(RTy);
if (Increasing) {
Smallest = Start;
Greatest = End;
bool IsSignedPredicate = LS.IsSignedPredicate;
IRBuilder<> B(PreheaderJump);
+ auto *RangeTy = Range.getBegin()->getType();
+ auto NoopOrExt = [&](Value *V) {
+ if (V->getType() == RangeTy)
+ return V;
+ return IsSignedPredicate ? B.CreateSExt(V, RangeTy, "wide." + V->getName())
+ : B.CreateZExt(V, RangeTy, "wide." + V->getName());
+ };
// EnterLoopCond - is it okay to start executing this `LS'?
Value *EnterLoopCond = nullptr;
Increasing
? (IsSignedPredicate ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT)
: (IsSignedPredicate ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT);
- Value *IndVarStart = LS.IndVarStart;
- Value *IndVarBase = LS.IndVarBase;
- Value *LoopExitAt = LS.LoopExitAt;
+ Value *IndVarStart = NoopOrExt(LS.IndVarStart);
EnterLoopCond = B.CreateICmp(Pred, IndVarStart, ExitSubloopAt);
B.CreateCondBr(EnterLoopCond, LS.Header, RRI.PseudoExit);
LS.LatchBr->setSuccessor(LS.LatchBrExitIdx, RRI.ExitSelector);
B.SetInsertPoint(LS.LatchBr);
+ Value *IndVarBase = NoopOrExt(LS.IndVarBase);
Value *TakeBackedgeLoopCond = B.CreateICmp(Pred, IndVarBase, ExitSubloopAt);
Value *CondForBranch = LS.LatchBrExitIdx == 1
// IterationsLeft - are there any more iterations left, given the original
// upper bound on the induction variable? If not, we branch to the "real"
// exit.
+ Value *LoopExitAt = NoopOrExt(LS.LoopExitAt);
Value *IterationsLeft = B.CreateICmp(Pred, IndVarBase, LoopExitAt);
B.CreateCondBr(IterationsLeft, RRI.PseudoExit, LS.LatchExit);
SubRanges SR = MaybeSR.getValue();
bool Increasing = MainLoopStructure.IndVarIncreasing;
IntegerType *IVTy =
- cast<IntegerType>(MainLoopStructure.IndVarBase->getType());
+ cast<IntegerType>(Range.getBegin()->getType());
SCEVExpander Expander(SE, F.getParent()->getDataLayout(), "irce");
Instruction *InsertPt = OriginalPreheader->getTerminator();
InductiveRangeCheck::computeSafeIterationSpace(
ScalarEvolution &SE, const SCEVAddRecExpr *IndVar,
bool IsLatchSigned) const {
+ // We can deal when types of latch check and range checks don't match in case
+ // if latch check is more narrow.
+ auto *IVType = cast<IntegerType>(IndVar->getType());
+ auto *RCType = cast<IntegerType>(getBegin()->getType());
+ if (IVType->getBitWidth() > RCType->getBitWidth())
+ return None;
// IndVar is of the form "A + B * I" (where "I" is the canonical induction
// variable, that may or may not exist as a real llvm::Value in the loop) and
// this inductive range check is a range check on the "C + D * I" ("C" is
if (!IndVar->isAffine())
return None;
- const SCEV *A = IndVar->getStart();
- const SCEVConstant *B = dyn_cast<SCEVConstant>(IndVar->getStepRecurrence(SE));
+ const SCEV *A = NoopOrExtend(IndVar->getStart(), RCType, SE, IsLatchSigned);
+ const SCEVConstant *B = dyn_cast<SCEVConstant>(
+ NoopOrExtend(IndVar->getStepRecurrence(SE), RCType, SE, IsLatchSigned));
if (!B)
return None;
assert(!B->isZero() && "Recurrence with zero step?");
return None;
assert(!D->getValue()->isZero() && "Recurrence with zero step?");
- unsigned BitWidth = cast<IntegerType>(IndVar->getType())->getBitWidth();
+ unsigned BitWidth = RCType->getBitWidth();
const SCEV *SIntMax = SE.getConstant(APInt::getSignedMaxValue(BitWidth));
// Subtract Y from X so that it does not go through border of the IV
--- /dev/null
+; RUN: opt -verify-loop-info -irce-print-changed-loops -irce -irce-allow-narrow-latch=true -S < %s 2>&1 | FileCheck %s
+; RUN: opt -verify-loop-info -irce-print-changed-loops -passes='require<branch-prob>,loop(irce)' -irce-allow-narrow-latch=true -S < %s 2>&1 | FileCheck %s
+
+; Check that we can remove trivially non-failing range check.
+define i32 @test_increasing_slt_slt_wide_simple_no_postloop() {
+
+; CHECK-LABEL: @test_increasing_slt_slt_wide_simple_no_postloop(
+; CHECK-NOT: preloop
+; CHECK-NOT: postloop
+; CHECK: loop:
+; CHECK: br i1 true, label %backedge, label %check_failed
+
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %backedge ]
+ %rc = icmp slt i64 %iv, 100
+ br i1 %rc, label %backedge, label %check_failed
+
+backedge:
+ %iv.next = add i64 %iv, 1
+ %narrow.iv = trunc i64 %iv.next to i32
+ %latch.cond = icmp slt i32 %narrow.iv, 100
+ br i1 %latch.cond, label %loop, label %exit
+
+exit:
+ ret i32 %narrow.iv
+
+check_failed:
+ ret i32 -1
+}
+
+; This range check fails on the last iteration, so it needs a postloop.
+define i32 @test_increasing_slt_slt_wide_simple_postloop() {
+
+; CHECK-LABEL: @test_increasing_slt_slt_wide_simple_postloop(
+; CHECK-NOT: preloop
+; CHECK: loop:
+; CHECK: br i1 true, label %backedge, label %check_failed
+; CHECK: backedge
+; CHECK: [[COND:%[^ ]+]] = icmp slt i64 %wide.narrow.iv, 99
+; CHECK: br i1 [[COND]], label %loop, label %main.exit.selector
+; CHECK: postloop
+
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %backedge ]
+ %rc = icmp slt i64 %iv, 99
+ br i1 %rc, label %backedge, label %check_failed
+
+backedge:
+ %iv.next = add i64 %iv, 1
+ %narrow.iv = trunc i64 %iv.next to i32
+ %latch.cond = icmp slt i32 %narrow.iv, 100
+ br i1 %latch.cond, label %loop, label %exit
+
+exit:
+ ret i32 %narrow.iv
+
+check_failed:
+ ret i32 -1
+}
+
+; General case. If both %N and %M are non-negative, we do not need a preloop.
+define i32 @test_increasing_slt_slt_wide_non-negative(i32* %n_ptr, i64* %m_ptr) {
+
+; CHECK-LABEL: @test_increasing_slt_slt_wide_non-negative(
+; CHECK-NOT: preloop
+; CHECK: loop:
+; CHECK: br i1 true, label %backedge, label %check_failed
+; CHECK: backedge
+; CHECK: [[COND:%[^ ]+]] = icmp slt i64 %wide.narrow.iv, %exit.mainloop.at
+; CHECK: br i1 [[COND]], label %loop, label %main.exit.selector
+; CHECK: postloop
+
+entry:
+ %N = load i32, i32* %n_ptr, !range !2
+ %M = load i64, i64* %m_ptr, !range !1
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %backedge ]
+ %rc = icmp slt i64 %iv, %M
+ br i1 %rc, label %backedge, label %check_failed
+
+backedge:
+ %iv.next = add i64 %iv, 1
+ %narrow.iv = trunc i64 %iv.next to i32
+ %latch.cond = icmp slt i32 %narrow.iv, %N
+ br i1 %latch.cond, label %loop, label %exit
+
+exit:
+ ret i32 %narrow.iv
+
+check_failed:
+ ret i32 -1
+}
+
+; General case. Even though %M may be negative, we do not need a preloop because
+; we make a non-negativity runtime check against M and do not go to main loop if
+; M was negative.
+define i32 @test_increasing_slt_slt_wide_general(i32* %n_ptr, i64* %m_ptr) {
+
+; CHECK-LABEL: @test_increasing_slt_slt_wide_general(
+; CHECK-NOT: preloop
+; CHECK: loop:
+; CHECK: br i1 true, label %backedge, label %check_failed
+; CHECK: backedge
+; CHECK: [[COND:%[^ ]+]] = icmp slt i64
+; CHECK: br i1 [[COND]], label %loop, label %main.exit.selector
+; CHECK: postloop
+
+entry:
+ %N = load i32, i32* %n_ptr, !range !2
+ %M = load i64, i64* %m_ptr
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %backedge ]
+ %rc = icmp slt i64 %iv, %M
+ br i1 %rc, label %backedge, label %check_failed
+
+backedge:
+ %iv.next = add i64 %iv, 1
+ %narrow.iv = trunc i64 %iv.next to i32
+ %latch.cond = icmp slt i32 %narrow.iv, %N
+ br i1 %latch.cond, label %loop, label %exit
+
+exit:
+ ret i32 %narrow.iv
+
+check_failed:
+ ret i32 -1
+}
+
+; General case with preloop.
+define i32 @test_increasing_slt_slt_wide_general_preloop(i32* %n_ptr, i64* %m_ptr) {
+
+; CHECK-LABEL: @test_increasing_slt_slt_wide_general_preloop(
+; CHECK: loop:
+; CHECK: br i1 true, label %backedge, label %check_failed
+; CHECK: backedge
+; CHECK: [[COND:%[^ ]+]] = icmp slt i64
+; CHECK: br i1 [[COND]], label %loop, label %main.exit.selector
+; CHECK: preloop
+; CHECK: postloop
+
+entry:
+ %N = load i32, i32* %n_ptr, !range !2
+ %M = load i64, i64* %m_ptr
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %backedge ]
+ %rc = icmp slt i64 %iv, %M
+ br i1 %rc, label %backedge, label %check_failed
+
+backedge:
+ %iv.next = add i64 %iv, 1
+ %narrow.iv = trunc i64 %iv to i32
+ %latch.cond = icmp slt i32 %narrow.iv, %N
+ br i1 %latch.cond, label %loop, label %exit
+
+exit:
+ ret i32 %narrow.iv
+
+check_failed:
+ ret i32 -1
+}
+
+; Same as above, multiple checks.
+define i32 @test_increasing_slt_slt_wide_multiple_checks(i32* %n_ptr, i64* %m1_ptr, i64* %m2_ptr, i64* %m3_ptr, i64* %m4_ptr) {
+; CHECK-LABEL: @test_increasing_slt_slt_wide_multiple_checks(
+; CHECK-NOT: preloop
+; CHECK: loop:
+; CHECK: %c1 = and i1 true, true
+; CHECK: %c2 = and i1 %c1, true
+; CHECK: %rc = and i1 %c2, true
+; CHECK: br i1 %rc, label %backedge, label %check_failed.loopexit
+; CHECK: backedge
+; CHECK: [[COND:%[^ ]+]] = icmp slt i64
+; CHECK: br i1 [[COND]], label %loop, label %main.exit.selector
+; CHECK: postloop
+
+entry:
+ %N = load i32, i32* %n_ptr, !range !2
+ %M1 = load i64, i64* %m1_ptr
+ %M2 = load i64, i64* %m2_ptr
+ %M3 = load i64, i64* %m3_ptr
+ %M4 = load i64, i64* %m4_ptr
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %backedge ]
+ %rc1 = icmp slt i64 %iv, %M1
+ %rc2 = icmp slt i64 %iv, %M2
+ %rc3 = icmp slt i64 %iv, %M3
+ %rc4 = icmp slt i64 %iv, %M4
+ %c1 = and i1 %rc1, %rc2
+ %c2 = and i1 %c1, %rc3
+ %rc = and i1 %c2, %rc4
+ br i1 %rc, label %backedge, label %check_failed
+
+backedge:
+ %iv.next = add i64 %iv, 1
+ %narrow.iv = trunc i64 %iv.next to i32
+ %latch.cond = icmp slt i32 %narrow.iv, %N
+ br i1 %latch.cond, label %loop, label %exit
+
+exit:
+ ret i32 %narrow.iv
+
+check_failed:
+ ret i32 -1
+}
+
+; Wide IV against narrow range check. We don't currently support it.
+define i32 @test_increasing_slt_slt_wide_simple_negtest_narrow_rc() {
+
+; CHECK-LABEL: @test_increasing_slt_slt_wide_simple_negtest_narrow_rc(
+; CHECK-NOT: i1 true
+; CHECK-NOT: main
+
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %backedge ]
+ %narrow.iv = trunc i64 %iv to i32
+ %rc = icmp slt i32 %narrow.iv, 101
+ br i1 %rc, label %backedge, label %check_failed
+
+backedge:
+ %iv.next = add i64 %iv, 1
+ %latch.cond = icmp slt i64 %iv, 100
+ br i1 %latch.cond, label %loop, label %exit
+
+exit:
+ ret i32 %narrow.iv
+
+check_failed:
+ ret i32 -1
+}
+
+; Check that we can remove trivially non-failing range check.
+define i32 @test_increasing_ult_ult_wide_simple_no_postloop() {
+
+; CHECK-LABEL: @test_increasing_ult_ult_wide_simple_no_postloop(
+; CHECK-NOT: preloop
+; CHECK-NOT: postloop
+; CHECK: loop:
+; CHECK: br i1 true, label %backedge, label %check_failed
+
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %backedge ]
+ %rc = icmp ult i64 %iv, 100
+ br i1 %rc, label %backedge, label %check_failed
+
+backedge:
+ %iv.next = add i64 %iv, 1
+ %narrow.iv = trunc i64 %iv.next to i32
+ %latch.cond = icmp ult i32 %narrow.iv, 100
+ br i1 %latch.cond, label %loop, label %exit
+
+exit:
+ ret i32 %narrow.iv
+
+check_failed:
+ ret i32 -1
+}
+
+; This range check fails on the last iteration, so it needs a postloop.
+define i32 @test_increasing_ult_ult_wide_simple_postloop() {
+
+; CHECK-LABEL: @test_increasing_ult_ult_wide_simple_postloop(
+; CHECK-NOT: preloop
+; CHECK: loop:
+; CHECK: br i1 true, label %backedge, label %check_failed
+; CHECK: backedge
+; CHECK: [[COND:%[^ ]+]] = icmp ult i64 %wide.narrow.iv, 99
+; CHECK: br i1 [[COND]], label %loop, label %main.exit.selector
+; CHECK: postloop
+
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %backedge ]
+ %rc = icmp ult i64 %iv, 99
+ br i1 %rc, label %backedge, label %check_failed
+
+backedge:
+ %iv.next = add i64 %iv, 1
+ %narrow.iv = trunc i64 %iv.next to i32
+ %latch.cond = icmp ult i32 %narrow.iv, 100
+ br i1 %latch.cond, label %loop, label %exit
+
+exit:
+ ret i32 %narrow.iv
+
+check_failed:
+ ret i32 -1
+}
+
+; General case. If both %N and %M are non-negative, we do not need a preloop.
+define i32 @test_increasing_ult_ult_wide_non-negative(i32* %n_ptr, i64* %m_ptr) {
+
+; CHECK-LABEL: @test_increasing_ult_ult_wide_non-negative(
+; CHECK-NOT: preloop
+; CHECK: loop:
+; CHECK: br i1 true, label %backedge, label %check_failed
+; CHECK: backedge
+; CHECK: [[COND:%[^ ]+]] = icmp ult i64 %wide.narrow.iv, %exit.mainloop.at
+; CHECK: br i1 [[COND]], label %loop, label %main.exit.selector
+; CHECK: postloop
+
+entry:
+ %N = load i32, i32* %n_ptr, !range !2
+ %M = load i64, i64* %m_ptr, !range !1
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %backedge ]
+ %rc = icmp ult i64 %iv, %M
+ br i1 %rc, label %backedge, label %check_failed
+
+backedge:
+ %iv.next = add i64 %iv, 1
+ %narrow.iv = trunc i64 %iv.next to i32
+ %latch.cond = icmp ult i32 %narrow.iv, %N
+ br i1 %latch.cond, label %loop, label %exit
+
+exit:
+ ret i32 %narrow.iv
+
+check_failed:
+ ret i32 -1
+}
+
+; General case. Even though %M may be negative, we do not need a preloop because
+; we make a non-negativity runtime check against M and do not go to main loop if
+; M was negative.
+define i32 @test_increasing_ult_ult_wide_general(i32* %n_ptr, i64* %m_ptr) {
+
+; CHECK-LABEL: @test_increasing_ult_ult_wide_general(
+; CHECK-NOT: preloop
+; CHECK: loop:
+; CHECK: br i1 true, label %backedge, label %check_failed
+; CHECK: backedge
+; CHECK: [[COND:%[^ ]+]] = icmp ult i64
+; CHECK: br i1 [[COND]], label %loop, label %main.exit.selector
+; CHECK: postloop
+
+entry:
+ %N = load i32, i32* %n_ptr, !range !2
+ %M = load i64, i64* %m_ptr
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %backedge ]
+ %rc = icmp ult i64 %iv, %M
+ br i1 %rc, label %backedge, label %check_failed
+
+backedge:
+ %iv.next = add i64 %iv, 1
+ %narrow.iv = trunc i64 %iv.next to i32
+ %latch.cond = icmp ult i32 %narrow.iv, %N
+ br i1 %latch.cond, label %loop, label %exit
+
+exit:
+ ret i32 %narrow.iv
+
+check_failed:
+ ret i32 -1
+}
+
+; Same as above, multiple checks.
+define i32 @test_increasing_ult_ult_wide_multiple_checks(i32* %n_ptr, i64* %m1_ptr, i64* %m2_ptr, i64* %m3_ptr, i64* %m4_ptr) {
+; CHECK-LABEL: @test_increasing_ult_ult_wide_multiple_checks(
+; CHECK-NOT: preloop
+; CHECK: loop:
+; CHECK: %c1 = and i1 true, true
+; CHECK: %c2 = and i1 %c1, true
+; CHECK: %rc = and i1 %c2, true
+; CHECK: br i1 %rc, label %backedge, label %check_failed.loopexit
+; CHECK: backedge
+; CHECK: [[COND:%[^ ]+]] = icmp ult i64
+; CHECK: br i1 [[COND]], label %loop, label %main.exit.selector
+; CHECK: postloop
+
+entry:
+ %N = load i32, i32* %n_ptr, !range !2
+ %M1 = load i64, i64* %m1_ptr
+ %M2 = load i64, i64* %m2_ptr
+ %M3 = load i64, i64* %m3_ptr
+ %M4 = load i64, i64* %m4_ptr
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %backedge ]
+ %rc1 = icmp ult i64 %iv, %M1
+ %rc2 = icmp ult i64 %iv, %M2
+ %rc3 = icmp ult i64 %iv, %M3
+ %rc4 = icmp ult i64 %iv, %M4
+ %c1 = and i1 %rc1, %rc2
+ %c2 = and i1 %c1, %rc3
+ %rc = and i1 %c2, %rc4
+ br i1 %rc, label %backedge, label %check_failed
+
+backedge:
+ %iv.next = add i64 %iv, 1
+ %narrow.iv = trunc i64 %iv.next to i32
+ %latch.cond = icmp ult i32 %narrow.iv, %N
+ br i1 %latch.cond, label %loop, label %exit
+
+exit:
+ ret i32 %narrow.iv
+
+check_failed:
+ ret i32 -1
+}
+
+; Wide IV against narrow range check. We don't currently support it.
+define i32 @test_increasing_ult_ult_wide_simple_negtest_narrow_rc() {
+
+; CHECK-LABEL: @test_increasing_ult_ult_wide_simple_negtest_narrow_rc(
+; CHECK-NOT: i1 true
+; CHECK-NOT: main
+
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %backedge ]
+ %narrow.iv = trunc i64 %iv to i32
+ %rc = icmp ult i32 %narrow.iv, 101
+ br i1 %rc, label %backedge, label %check_failed
+
+backedge:
+ %iv.next = add i64 %iv, 1
+ %latch.cond = icmp ult i64 %iv, 100
+ br i1 %latch.cond, label %loop, label %exit
+
+exit:
+ ret i32 %narrow.iv
+
+check_failed:
+ ret i32 -1
+}
+
+!0 = !{i32 0, i32 2147483647}
+!1 = !{i64 0, i64 9223372036854775807}
+!2 = !{i32 1, i32 2147483647}