return LatchControlDependentOnPoison && loopHasNoAbnormalExits(L);
}
+bool ScalarEvolution::loopHasNoSideEffects(const Loop *L) {
+ auto Itr = LoopHasNoSideEffects.find(L);
+ if (Itr == LoopHasNoSideEffects.end()) {
+ auto NoSideEffectsInBB = [&](BasicBlock *BB) {
+ return all_of(*BB, [](Instruction &I) {
+ // Non-atomic, non-volatile stores are ok.
+ if (auto *SI = dyn_cast<StoreInst>(&I))
+ return SI->isSimple();
+
+ return !I.mayHaveSideEffects();
+ });
+ };
+
+ auto InsertPair = LoopHasNoSideEffects.insert(
+ {L, all_of(L->getBlocks(), NoSideEffectsInBB)});
+ assert(InsertPair.second && "We just checked!");
+ Itr = InsertPair.first;
+ }
+
+ return Itr->second;
+}
+
bool ScalarEvolution::loopHasNoAbnormalExits(const Loop *L) {
auto Itr = LoopHasNoAbnormalExits.find(L);
if (Itr == LoopHasNoAbnormalExits.end()) {
forgetLoop(I);
LoopHasNoAbnormalExits.erase(L);
+ LoopHasNoSideEffects.erase(L);
}
void ScalarEvolution::forgetValue(Value *V) {
bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride,
bool IsSigned, bool NoWrap) {
+ assert(isKnownPositive(Stride) && "Positive stride expected!");
+
if (NoWrap) return false;
unsigned BitWidth = getTypeSizeInBits(RHS->getType());
return getCouldNotCompute();
const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
- if (!IV && AllowPredicates)
+ bool PredicatedIV = false;
+
+ if (!IV && AllowPredicates) {
// Try to make this an AddRec using runtime tests, in the first X
// iterations of this loop, where X is the SCEV expression found by the
// algorithm below.
IV = convertSCEVToAddRecWithPredicates(LHS, L, P);
+ PredicatedIV = true;
+ }
// Avoid weird loops
if (!IV || IV->getLoop() != L || !IV->isAffine())
const SCEV *Stride = IV->getStepRecurrence(*this);
- // Avoid negative or zero stride values
- if (!isKnownPositive(Stride))
- return getCouldNotCompute();
+ bool PositiveStride = isKnownPositive(Stride);
- // Avoid proven overflow cases: this will ensure that the backedge taken count
- // will not generate any unsigned overflow. Relaxed no-overflow conditions
- // exploit NoWrapFlags, allowing to optimize in presence of undefined
- // behaviors like the case of C language.
- if (!Stride->isOne() && doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap))
+ // Avoid negative or zero stride values.
+ if (!PositiveStride) {
+ // We can compute the correct backedge taken count for loops with unknown
+ // strides if we can prove that the loop is not an infinite loop with side
+ // effects. Here's the loop structure we are trying to handle -
+ //
+ // i = start
+ // do {
+ // A[i] = i;
+ // i += s;
+ // } while (i < end);
+ //
+ // The backedge taken count for such loops is evaluated as -
+ // (max(end, start + stride) - start - 1) /u stride
+ //
+ // The additional preconditions that we need to check to prove correctness
+ // of the above formula is as follows -
+ //
+ // a) IV is either nuw or nsw depending upon signedness (indicated by the
+ // NoWrap flag).
+ // b) loop is single exit with no side effects.
+ //
+ //
+ // Precondition a) implies that if the stride is negative, this is a single
+ // trip loop. The backedge taken count formula reduces to zero in this case.
+ //
+ // Precondition b) implies that the unknown stride cannot be zero otherwise
+ // we have UB.
+ //
+ // The positive stride case is the same as isKnownPositive(Stride) returning
+ // true (original behavior of the function).
+ //
+ // We want to make sure that the stride is truly unknown as there are edge
+ // cases where ScalarEvolution propagates no wrap flags to the
+ // post-increment/decrement IV even though the increment/decrement operation
+ // itself is wrapping. The computed backedge taken count may be wrong in
+ // such cases. This is prevented by checking that the stride is not known to
+ // be either positive or non-positive. For example, no wrap flags are
+ // propagated to the post-increment IV of this loop with a trip count of 2 -
+ //
+ // unsigned char i;
+ // for(i=127; i<128; i+=129)
+ // A[i] = i;
+ //
+ if (PredicatedIV || !NoWrap || isKnownNonPositive(Stride) ||
+ !loopHasNoSideEffects(L))
+ return getCouldNotCompute();
+
+ } else if (!Stride->isOne() &&
+ doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap))
+ // Avoid proven overflow cases: this will ensure that the backedge taken
+ // count will not generate any unsigned overflow. Relaxed no-overflow
+ // conditions exploit NoWrapFlags, allowing to optimize in presence of
+ // undefined behaviors like the case of C language.
return getCouldNotCompute();
ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SLT
APInt MinStart = IsSigned ? getSignedRange(Start).getSignedMin()
: getUnsignedRange(Start).getUnsignedMin();
- APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin()
- : getUnsignedRange(Stride).getUnsignedMin();
-
unsigned BitWidth = getTypeSizeInBits(LHS->getType());
- APInt Limit = IsSigned ? APInt::getSignedMaxValue(BitWidth) - (MinStride - 1)
- : APInt::getMaxValue(BitWidth) - (MinStride - 1);
+
+ APInt StrideForMaxBECount;
+
+ if (PositiveStride)
+ StrideForMaxBECount = IsSigned ? getSignedRange(Stride).getSignedMin()
+ : getUnsignedRange(Stride).getUnsignedMin();
+ else
+ // Using a stride of 1 is safe when computing max backedge taken count for
+ // a loop with unknown stride.
+ StrideForMaxBECount = APInt(BitWidth, 1, IsSigned);
+
+ APInt Limit =
+ IsSigned ? APInt::getSignedMaxValue(BitWidth) - (StrideForMaxBECount - 1)
+ : APInt::getMaxValue(BitWidth) - (StrideForMaxBECount - 1);
// Although End can be a MAX expression we estimate MaxEnd considering only
// the case End = RHS. This is safe because in the other case (End - Start)
MaxBECount = BECount;
else
MaxBECount = computeBECount(getConstant(MaxEnd - MinStart),
- getConstant(MinStride), false);
+ getConstant(StrideForMaxBECount), false);
if (isa<SCEVCouldNotCompute>(MaxBECount))
MaxBECount = BECount;
--- /dev/null
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+
+; ScalarEvolution should be able to compute trip count of the loop by proving
+; that this is not an infinite loop with side effects.
+
+; CHECK: Determining loop execution counts for: @foo1
+; CHECK: backedge-taken count is ((-1 + %n) /u %s)
+
+; We should have a conservative estimate for the max backedge taken count for
+; loops with unknown stride.
+; CHECK: max backedge-taken count is -1
+
+target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
+
+; Function Attrs: norecurse nounwind
+define void @foo1(i32* nocapture %A, i32 %n, i32 %s) #0 {
+entry:
+ %cmp4 = icmp sgt i32 %n, 0
+ br i1 %cmp4, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.05
+ %0 = load i32, i32* %arrayidx, align 4
+ %inc = add nsw i32 %0, 1
+ store i32 %inc, i32* %arrayidx, align 4
+ %add = add nsw i32 %i.05, %s
+ %cmp = icmp slt i32 %add, %n
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+
+; Check that we are able to compute trip count of a loop without an entry guard.
+; CHECK: Determining loop execution counts for: @foo2
+; CHECK: backedge-taken count is ((-1 + (%n smax %s)) /u %s)
+
+; We should have a conservative estimate for the max backedge taken count for
+; loops with unknown stride.
+; CHECK: max backedge-taken count is -1
+
+; Function Attrs: norecurse nounwind
+define void @foo2(i32* nocapture %A, i32 %n, i32 %s) #0 {
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.05
+ %0 = load i32, i32* %arrayidx, align 4
+ %inc = add nsw i32 %0, 1
+ store i32 %inc, i32* %arrayidx, align 4
+ %add = add nsw i32 %i.05, %s
+ %cmp = icmp slt i32 %add, %n
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+