Reapplying r278731 after fixing the problem that caused it to be reverted.

author David L Kreitzer <david.l.kreitzer@intel.com>

Fri, 16 Sep 2016 14:38:13 +0000 (14:38 +0000)

committer David L Kreitzer <david.l.kreitzer@intel.com>

Fri, 16 Sep 2016 14:38:13 +0000 (14:38 +0000)
author David L Kreitzer <david.l.kreitzer@intel.com>
Fri, 16 Sep 2016 14:38:13 +0000 (14:38 +0000)
committer David L Kreitzer <david.l.kreitzer@intel.com>
Fri, 16 Sep 2016 14:38:13 +0000 (14:38 +0000)
diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h

index 6321165d7bdefc68ea123b9ad7dd5408183b4f6d..a97c6b74953a280674288eaac476e06d9e1c8863 100644 (file)
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@@ -803,6 +803,13 @@ namespace llvm {
      /// Cache for \c loopHasNoAbnormalExits.
      DenseMap<const Loop *, bool> LoopHasNoAbnormalExits;
  
+    /// Cache for \c loopHasNoSideEffects.
+    DenseMap<const Loop *, bool> LoopHasNoSideEffects;
+
+    /// Returns true if \p L contains no instruction that can have side effects
+    /// (i.e. via throwing an exception, volatile or atomic access).
+    bool loopHasNoSideEffects(const Loop *L);
+
      /// Returns true if \p L contains no instruction that can abnormally exit
      /// the loop (i.e. via throwing an exception, by terminating the thread
      /// cleanly or by infinite looping in a called function).  Strictly
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp

index 3a922229f509785e4698d69134bfb18ebc5d445a..e97e66842906677ce1e1e4a4c8510cbc55d305da 100644 (file)
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -4953,6 +4953,28 @@ bool ScalarEvolution::isAddRecNeverPoison(const Instruction *I, const Loop *L) {
    return LatchControlDependentOnPoison && loopHasNoAbnormalExits(L);
  }
  
+bool ScalarEvolution::loopHasNoSideEffects(const Loop *L) {
+  auto Itr = LoopHasNoSideEffects.find(L);
+  if (Itr == LoopHasNoSideEffects.end()) {
+    auto NoSideEffectsInBB = [&](BasicBlock *BB) {
+      return all_of(*BB, [](Instruction &I) {
+        // Non-atomic, non-volatile stores are ok.
+        if (auto *SI = dyn_cast<StoreInst>(&I))
+          return SI->isSimple();
+
+        return !I.mayHaveSideEffects();
+      });
+    };
+
+    auto InsertPair = LoopHasNoSideEffects.insert(
+        {L, all_of(L->getBlocks(), NoSideEffectsInBB)});
+    assert(InsertPair.second && "We just checked!");
+    Itr = InsertPair.first;
+  }
+
+  return Itr->second;
+}
+
  bool ScalarEvolution::loopHasNoAbnormalExits(const Loop *L) {
    auto Itr = LoopHasNoAbnormalExits.find(L);
    if (Itr == LoopHasNoAbnormalExits.end()) {
@@ -5540,6 +5562,7 @@ void ScalarEvolution::forgetLoop(const Loop *L) {
      forgetLoop(I);
  
    LoopHasNoAbnormalExits.erase(L);
+  LoopHasNoSideEffects.erase(L);
  }
  
  void ScalarEvolution::forgetValue(Value *V) {
@@ -8614,6 +8637,8 @@ bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred,
  
  bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride,
                                           bool IsSigned, bool NoWrap) {
+  assert(isKnownPositive(Stride) && "Positive stride expected!");
+
    if (NoWrap) return false;
  
    unsigned BitWidth = getTypeSizeInBits(RHS->getType());
@@ -8682,11 +8707,15 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
      return getCouldNotCompute();
  
    const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
-  if (!IV && AllowPredicates)
+  bool PredicatedIV = false;
+
+  if (!IV && AllowPredicates) {
      // Try to make this an AddRec using runtime tests, in the first X
      // iterations of this loop, where X is the SCEV expression found by the
      // algorithm below.
      IV = convertSCEVToAddRecWithPredicates(LHS, L, P);
+    PredicatedIV = true;
+  }
  
    // Avoid weird loops
    if (!IV || IV->getLoop() != L || !IV->isAffine())
@@ -8697,15 +8726,62 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
  
    const SCEV *Stride = IV->getStepRecurrence(*this);
  
-  // Avoid negative or zero stride values
-  if (!isKnownPositive(Stride))
-    return getCouldNotCompute();
+  bool PositiveStride = isKnownPositive(Stride);
  
-  // Avoid proven overflow cases: this will ensure that the backedge taken count
-  // will not generate any unsigned overflow. Relaxed no-overflow conditions
-  // exploit NoWrapFlags, allowing to optimize in presence of undefined
-  // behaviors like the case of C language.
-  if (!Stride->isOne() && doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap))
+  // Avoid negative or zero stride values.
+  if (!PositiveStride) {
+    // We can compute the correct backedge taken count for loops with unknown
+    // strides if we can prove that the loop is not an infinite loop with side
+    // effects. Here's the loop structure we are trying to handle -
+    //
+    // i = start
+    // do {
+    //   A[i] = i;
+    //   i += s;
+    // } while (i < end);
+    //
+    // The backedge taken count for such loops is evaluated as -
+    // (max(end, start + stride) - start - 1) /u stride
+    //
+    // The additional preconditions that we need to check to prove correctness
+    // of the above formula is as follows -
+    //
+    // a) IV is either nuw or nsw depending upon signedness (indicated by the
+    //    NoWrap flag).
+    // b) loop is single exit with no side effects.
+    //
+    //
+    // Precondition a) implies that if the stride is negative, this is a single
+    // trip loop. The backedge taken count formula reduces to zero in this case.
+    //
+    // Precondition b) implies that the unknown stride cannot be zero otherwise
+    // we have UB.
+    //
+    // The positive stride case is the same as isKnownPositive(Stride) returning
+    // true (original behavior of the function).
+    //
+    // We want to make sure that the stride is truly unknown as there are edge
+    // cases where ScalarEvolution propagates no wrap flags to the
+    // post-increment/decrement IV even though the increment/decrement operation
+    // itself is wrapping. The computed backedge taken count may be wrong in
+    // such cases. This is prevented by checking that the stride is not known to
+    // be either positive or non-positive. For example, no wrap flags are
+    // propagated to the post-increment IV of this loop with a trip count of 2 -
+    //
+    // unsigned char i;
+    // for(i=127; i<128; i+=129)
+    //   A[i] = i;
+    //
+    if (PredicatedIV || !NoWrap || isKnownNonPositive(Stride) ||
+        !loopHasNoSideEffects(L))
+      return getCouldNotCompute();
+
+  } else if (!Stride->isOne() &&
+             doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap))
+    // Avoid proven overflow cases: this will ensure that the backedge taken
+    // count will not generate any unsigned overflow. Relaxed no-overflow
+    // conditions exploit NoWrapFlags, allowing to optimize in presence of
+    // undefined behaviors like the case of C language.
      return getCouldNotCompute();
  
    ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SLT
@@ -8720,12 +8796,21 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
    APInt MinStart = IsSigned ? getSignedRange(Start).getSignedMin()
                              : getUnsignedRange(Start).getUnsignedMin();
  
-  APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin()
-                             : getUnsignedRange(Stride).getUnsignedMin();
-
    unsigned BitWidth = getTypeSizeInBits(LHS->getType());
-  APInt Limit = IsSigned ? APInt::getSignedMaxValue(BitWidth) - (MinStride - 1)
-                         : APInt::getMaxValue(BitWidth) - (MinStride - 1);
+
+  APInt StrideForMaxBECount;
+
+  if (PositiveStride)
+    StrideForMaxBECount = IsSigned ? getSignedRange(Stride).getSignedMin()
+                                   : getUnsignedRange(Stride).getUnsignedMin();
+  else
+    // Using a stride of 1 is safe when computing max backedge taken count for
+    // a loop with unknown stride.
+    StrideForMaxBECount = APInt(BitWidth, 1, IsSigned);
+
+  APInt Limit =
+      IsSigned ? APInt::getSignedMaxValue(BitWidth) - (StrideForMaxBECount - 1)
+               : APInt::getMaxValue(BitWidth) - (StrideForMaxBECount - 1);
  
    // Although End can be a MAX expression we estimate MaxEnd considering only
    // the case End = RHS. This is safe because in the other case (End - Start)
@@ -8739,7 +8824,7 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
      MaxBECount = BECount;
    else
      MaxBECount = computeBECount(getConstant(MaxEnd - MinStart),
-                                getConstant(MinStride), false);
+                                getConstant(StrideForMaxBECount), false);
  
    if (isa<SCEVCouldNotCompute>(MaxBECount))
      MaxBECount = BECount;
diff --git a/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll b/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll

new file mode 100644 (file)

index 0000000..60370d6
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll
@@ -0,0 +1,62 @@
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+
+; ScalarEvolution should be able to compute trip count of the loop by proving
+; that this is not an infinite loop with side effects.
+
+; CHECK: Determining loop execution counts for: @foo1
+; CHECK: backedge-taken count is ((-1 + %n) /u %s)
+
+; We should have a conservative estimate for the max backedge taken count for
+; loops with unknown stride.
+; CHECK: max backedge-taken count is -1
+
+target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
+
+; Function Attrs: norecurse nounwind
+define void @foo1(i32* nocapture %A, i32 %n, i32 %s) #0 {
+entry:
+  %cmp4 = icmp sgt i32 %n, 0
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.05
+  %0 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %add = add nsw i32 %i.05, %s
+  %cmp = icmp slt i32 %add, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+; Check that we are able to compute trip count of a loop without an entry guard.
+; CHECK: Determining loop execution counts for: @foo2
+; CHECK: backedge-taken count is ((-1 + (%n smax %s)) /u %s)
+
+; We should have a conservative estimate for the max backedge taken count for
+; loops with unknown stride.
+; CHECK: max backedge-taken count is -1
+
+; Function Attrs: norecurse nounwind
+define void @foo2(i32* nocapture %A, i32 %n, i32 %s) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.05
+  %0 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %add = add nsw i32 %i.05, %s
+  %cmp = icmp slt i32 %add, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
author	David L Kreitzer <david.l.kreitzer@intel.com>
	Fri, 16 Sep 2016 14:38:13 +0000 (14:38 +0000)
committer	David L Kreitzer <david.l.kreitzer@intel.com>
	Fri, 16 Sep 2016 14:38:13 +0000 (14:38 +0000)
include/llvm/Analysis/ScalarEvolution.h		patch \| blob \| history
lib/Analysis/ScalarEvolution.cpp		patch \| blob \| history
test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll	[new file with mode: 0644]	patch \| blob