[Loop Peeling] Do not close further unroll/peel if profile based peeling was not...

author Serguei Katkov <serguei.katkov@azul.com>

Fri, 2 Aug 2019 04:29:23 +0000 (04:29 +0000)

committer Serguei Katkov <serguei.katkov@azul.com>

Fri, 2 Aug 2019 04:29:23 +0000 (04:29 +0000)
author Serguei Katkov <serguei.katkov@azul.com>
Fri, 2 Aug 2019 04:29:23 +0000 (04:29 +0000)
committer Serguei Katkov <serguei.katkov@azul.com>
Fri, 2 Aug 2019 04:29:23 +0000 (04:29 +0000)
diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h

index 7574b811bc1c4f88803264715a202b231d642563..b501522723077926f91d0cff5d68cd853b74815d 100644 (file)
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@@ -469,12 +469,17 @@ public:
      bool Force;
      /// Allow using trip count upper bound to unroll loops.
      bool UpperBound;
-    /// Allow peeling off loop iterations for loops with low dynamic tripcount.
+    /// Allow peeling off loop iterations.
      bool AllowPeeling;
      /// Allow unrolling of all the iterations of the runtime loop remainder.
      bool UnrollRemainder;
      /// Allow unroll and jam. Used to enable unroll and jam for the target.
      bool UnrollAndJam;
+    /// Allow peeling basing on profile. Uses to enable peeling off all
+    /// iterations basing on provided profile.
+    /// If the value is true the peeling cost model can decide to peel only
+    /// some iterations and in this case it will set this to false.
+    bool PeelProfiledIterations;
      /// Threshold for unroll and jam, for inner loop size. The 'Threshold'
      /// value above is used during unroll and jam for the outer loop size.
      /// This value is used in the same manner to limit the size of the inner
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp

index 2fa7436213dd5bee4059fcb2fb10b10c9ccb48a9..081dc760c58f537c43d85e24381cbde802a6db80 100644 (file)
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -202,6 +202,7 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
    UP.UpperBound = false;
    UP.AllowPeeling = true;
    UP.UnrollAndJam = false;
+  UP.PeelProfiledIterations = true;
    UP.UnrollAndJamInnerLoopThreshold = 60;
  
    // Override with any target specific settings
@@ -1139,7 +1140,7 @@ static LoopUnrollResult tryToUnrollLoop(
    // If the loop was peeled, we already "used up" the profile information
    // we had, so we don't want to unroll or peel again.
    if (UnrollResult != LoopUnrollResult::FullyUnrolled &&
-      (IsCountSetExplicitly || UP.PeelCount))
+      (IsCountSetExplicitly || (UP.PeelProfiledIterations && UP.PeelCount)))
      L->setLoopAlreadyUnrolled();
  
    return UnrollResult;
diff --git a/lib/Transforms/Utils/LoopUnrollPeel.cpp b/lib/Transforms/Utils/LoopUnrollPeel.cpp

index e78a5708340b782eeb8f3c85618f93d029ea1024..3fad421f58fa694c3908ae456c8154d6822c131b 100644 (file)
--- a/lib/Transforms/Utils/LoopUnrollPeel.cpp
+++ b/lib/Transforms/Utils/LoopUnrollPeel.cpp
@@ -65,6 +65,8 @@ static cl::opt<bool> UnrollPeelMultiDeoptExit(
      "unroll-peel-multi-deopt-exit", cl::init(true), cl::Hidden,
      cl::desc("Allow peeling of loops with multiple deopt exits."));
  
+static const char *PeeledCountMetaData = "llvm.loop.peeled.count";
+
  // Designates that a Phi is estimated to become invariant after an "infinite"
  // number of loop iterations (i.e. only may become an invariant if the loop is
  // fully unrolled).
@@ -275,6 +277,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
      LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount
                        << " iterations.\n");
      UP.PeelCount = UnrollForcePeelCount;
+    UP.PeelProfiledIterations = true;
      return;
    }
  
@@ -282,6 +285,13 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
    if (!UP.AllowPeeling)
      return;
  
+  unsigned AlreadyPeeled = 0;
+  if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData))
+    AlreadyPeeled = *Peeled;
+  // Stop if we already peeled off the maximum number of iterations.
+  if (AlreadyPeeled >= UnrollPeelMaxCount)
+    return;
+
    // Here we try to get rid of Phis which become invariants after 1, 2, ..., N
    // iterations of the loop. For this we compute the number for iterations after
    // which every Phi is guaranteed to become an invariant, and try to peel the
@@ -317,11 +327,14 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
        DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount);
        // Consider max peel count limitation.
        assert(DesiredPeelCount > 0 && "Wrong loop size estimation?");
-      LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
-                        << " iteration(s) to turn"
-                        << " some Phis into invariants.\n");
-      UP.PeelCount = DesiredPeelCount;
-      return;
+      if (DesiredPeelCount + AlreadyPeeled <= UnrollPeelMaxCount) {
+        LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount
+                          << " iteration(s) to turn"
+                          << " some Phis into invariants.\n");
+        UP.PeelCount = DesiredPeelCount;
+        UP.PeelProfiledIterations = false;
+        return;
+      }
      }
    }
  
@@ -330,6 +343,9 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
    if (TripCount)
      return;
  
+  // Do not apply profile base peeling if it is disabled.
+  if (!UP.PeelProfiledIterations)
+    return;
    // If we don't know the trip count, but have reason to believe the average
    // trip count is low, peeling should be beneficial, since we will usually
    // hit the peeled section.
@@ -344,7 +360,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
                        << "\n");
  
      if (*PeelCount) {
-      if ((*PeelCount <= UnrollPeelMaxCount) &&
+      if ((*PeelCount + AlreadyPeeled <= UnrollPeelMaxCount) &&
            (LoopSize * (*PeelCount + 1) <= UP.Threshold)) {
          LLVM_DEBUG(dbgs() << "Peeling first " << *PeelCount
                            << " iterations.\n");
@@ -352,6 +368,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
          return;
        }
        LLVM_DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n");
+      LLVM_DEBUG(dbgs() << "Already peel count: " << AlreadyPeeled << "\n");
        LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n");
        LLVM_DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1)
                          << "\n");
@@ -738,5 +755,11 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
  
    NumPeeled++;
  
+  // Update Metadata for count of peeled off iterations.
+  unsigned AlreadyPeeled = 0;
+  if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData))
+    AlreadyPeeled = *Peeled;
+  addStringMetadataToLoop(L, PeeledCountMetaData, AlreadyPeeled + PeelCount);
+
    return true;
  }
diff --git a/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-1.ll b/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-1.ll

new file mode 100644 (file)

index 0000000..0c56229
--- /dev/null
+++ b/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-1.ll
@@ -0,0 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -S -loop-unroll -loop-unroll -verify-dom-info -debug-only=loop-unroll -unroll-peel-max-count=7 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+declare void @f1()
+declare void @f2()
+
+; Check that we can peel off iterations that make conditions true.
+; The second invocation of loop-unroll will do profile based peeling of
+; remained iterations.
+define void @test1(i32 %k) !prof !4 {
+; CHECK: Loop Unroll: F[test1] Loop %for.body
+; CHECK: PEELING loop %for.body with iteration count 2!
+; CHECK: PEELING loop %for.body with iteration count 4!
+; CHECK: llvm.loop.unroll.disable
+for.body.lr.ph:
+  br label %for.body
+
+for.body:
+  %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
+  %cmp1 = icmp ult i32 %i.05, 2
+  br i1 %cmp1, label %if.then, label %if.else
+
+if.then:
+  call void @f1()
+  br label %for.inc
+
+if.else:
+  call void @f2()
+  br label %for.inc
+
+for.inc:
+  %inc = add nsw i32 %i.05, 1
+  %cmp = icmp slt i32 %inc, %k
+  br i1 %cmp, label %for.body, label %for.end, !llvm.loop !1, !prof !2
+
+for.end:
+  ret void
+}
+
+!1 = distinct !{!1}
+!2 = !{!"branch_weights", i32 6, i32 1}
+!4 = !{!"function_entry_count", i64 1}
diff --git a/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-2.ll b/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-2.ll

new file mode 100644 (file)

index 0000000..6462283
--- /dev/null
+++ b/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-2.ll
@@ -0,0 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -S -loop-unroll -loop-unroll -verify-dom-info -debug-only=loop-unroll -unroll-peel-max-count=7 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+declare void @f1()
+declare void @f2()
+
+; Check that we can peel off iterations that make conditions true.
+; The second invocation of loop-unroll will NOT do profile based peeling of
+; remained iterations because the total number of peeled iterations exceeds
+; threashold specified with -unroll-peel-max-count=7.
+define void @test2(i32 %k) !prof !4 {
+; CHECK: Loop Unroll: F[test2] Loop %for.body
+; CHECK: PEELING loop %for.body with iteration count 2!
+; CHECK-NOT: llvm.loop.unroll.disable
+for.body.lr.ph:
+  br label %for.body
+
+for.body:
+  %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
+  %cmp1 = icmp ult i32 %i.05, 2
+  br i1 %cmp1, label %if.then, label %if.else
+
+if.then:
+  call void @f1()
+  br label %for.inc
+
+if.else:
+  call void @f2()
+  br label %for.inc
+
+for.inc:
+  %inc = add nsw i32 %i.05, 1
+  %cmp = icmp slt i32 %inc, %k
+  br i1 %cmp, label %for.body, label %for.end, !llvm.loop !1, !prof !3
+
+for.end:
+  ret void
+}
+
+!1 = distinct !{!1}
+!3 = !{!"branch_weights", i32 8, i32 1}
+!4 = !{!"function_entry_count", i64 1}
diff --git a/test/Transforms/LoopUnroll/peel-loop-conditions.ll b/test/Transforms/LoopUnroll/peel-loop-conditions.ll

index a9937d9db9979738ccf2bceeba6a2b755a05d85c..0217e9384f9bc8e4b12671af2ff8bc138f53eeff 100644 (file)
--- a/test/Transforms/LoopUnroll/peel-loop-conditions.ll
+++ b/test/Transforms/LoopUnroll/peel-loop-conditions.ll
@@ -643,3 +643,4 @@ for.inc:
  for.end:
    ret void
  }
+; CHECK-NOT: llvm.loop.unroll.disable
+\ No newline at end of file
author	Serguei Katkov <serguei.katkov@azul.com>
	Fri, 2 Aug 2019 04:29:23 +0000 (04:29 +0000)
committer	Serguei Katkov <serguei.katkov@azul.com>
	Fri, 2 Aug 2019 04:29:23 +0000 (04:29 +0000)
include/llvm/Analysis/TargetTransformInfo.h		patch \| blob \| history
lib/Transforms/Scalar/LoopUnrollPass.cpp		patch \| blob \| history
lib/Transforms/Utils/LoopUnrollPeel.cpp		patch \| blob \| history
test/Transforms/LoopUnroll/peel-loop-conditions-pgo-1.ll	[new file with mode: 0644]	patch \| blob
test/Transforms/LoopUnroll/peel-loop-conditions-pgo-2.ll	[new file with mode: 0644]	patch \| blob
test/Transforms/LoopUnroll/peel-loop-conditions.ll		patch \| blob \| history