From be5ae1bbe6783dc43a632f92395ef3d443132fa9 Mon Sep 17 00:00:00 2001 From: Serguei Katkov Date: Fri, 2 Aug 2019 04:29:23 +0000 Subject: [PATCH] [Loop Peeling] Do not close further unroll/peel if profile based peeling was not used. Current peeling cost model can decide to peel off not all iterations but only some of them to eliminate conditions on phi. At the same time if any peeling happens the door for further unroll/peel optimizations on that loop closes because the part of the code thinks that if peeling happened it is profile based peeling and all iterations are peeled off. To resolve this inconsistency the patch provides the flag which states whether the full peeling basing on profile is enabled or not and peeling cost model is able to modify this field like it does not PeelCount. In a separate patch I will introduce an option to allow/disallow peeling basing on profile. To avoid infinite loop peeling the patch tracks the total number of peeled iteration through llvm.loop.peeled.count loop metadata. Reviewers: reames, fhahn Reviewed By: reames Subscribers: hiraditya, zzheng, dmgreen, llvm-commits Differential Revision: https://reviews.llvm.org/D64972 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@367647 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Analysis/TargetTransformInfo.h | 7 ++- lib/Transforms/Scalar/LoopUnrollPass.cpp | 3 +- lib/Transforms/Utils/LoopUnrollPeel.cpp | 35 ++++++++++++--- .../LoopUnroll/peel-loop-conditions-pgo-1.ll | 43 +++++++++++++++++++ .../LoopUnroll/peel-loop-conditions-pgo-2.ll | 43 +++++++++++++++++++ .../LoopUnroll/peel-loop-conditions.ll | 1 + 6 files changed, 124 insertions(+), 8 deletions(-) create mode 100644 test/Transforms/LoopUnroll/peel-loop-conditions-pgo-1.ll create mode 100644 test/Transforms/LoopUnroll/peel-loop-conditions-pgo-2.ll diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h index 7574b811bc1..b5015227230 100644 --- a/include/llvm/Analysis/TargetTransformInfo.h +++ b/include/llvm/Analysis/TargetTransformInfo.h @@ -469,12 +469,17 @@ public: bool Force; /// Allow using trip count upper bound to unroll loops. bool UpperBound; - /// Allow peeling off loop iterations for loops with low dynamic tripcount. + /// Allow peeling off loop iterations. bool AllowPeeling; /// Allow unrolling of all the iterations of the runtime loop remainder. bool UnrollRemainder; /// Allow unroll and jam. Used to enable unroll and jam for the target. bool UnrollAndJam; + /// Allow peeling basing on profile. Uses to enable peeling off all + /// iterations basing on provided profile. + /// If the value is true the peeling cost model can decide to peel only + /// some iterations and in this case it will set this to false. + bool PeelProfiledIterations; /// Threshold for unroll and jam, for inner loop size. The 'Threshold' /// value above is used during unroll and jam for the outer loop size. /// This value is used in the same manner to limit the size of the inner diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 2fa7436213d..081dc760c58 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -202,6 +202,7 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences( UP.UpperBound = false; UP.AllowPeeling = true; UP.UnrollAndJam = false; + UP.PeelProfiledIterations = true; UP.UnrollAndJamInnerLoopThreshold = 60; // Override with any target specific settings @@ -1139,7 +1140,7 @@ static LoopUnrollResult tryToUnrollLoop( // If the loop was peeled, we already "used up" the profile information // we had, so we don't want to unroll or peel again. if (UnrollResult != LoopUnrollResult::FullyUnrolled && - (IsCountSetExplicitly || UP.PeelCount)) + (IsCountSetExplicitly || (UP.PeelProfiledIterations && UP.PeelCount))) L->setLoopAlreadyUnrolled(); return UnrollResult; diff --git a/lib/Transforms/Utils/LoopUnrollPeel.cpp b/lib/Transforms/Utils/LoopUnrollPeel.cpp index e78a5708340..3fad421f58f 100644 --- a/lib/Transforms/Utils/LoopUnrollPeel.cpp +++ b/lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -65,6 +65,8 @@ static cl::opt UnrollPeelMultiDeoptExit( "unroll-peel-multi-deopt-exit", cl::init(true), cl::Hidden, cl::desc("Allow peeling of loops with multiple deopt exits.")); +static const char *PeeledCountMetaData = "llvm.loop.peeled.count"; + // Designates that a Phi is estimated to become invariant after an "infinite" // number of loop iterations (i.e. only may become an invariant if the loop is // fully unrolled). @@ -275,6 +277,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount << " iterations.\n"); UP.PeelCount = UnrollForcePeelCount; + UP.PeelProfiledIterations = true; return; } @@ -282,6 +285,13 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, if (!UP.AllowPeeling) return; + unsigned AlreadyPeeled = 0; + if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData)) + AlreadyPeeled = *Peeled; + // Stop if we already peeled off the maximum number of iterations. + if (AlreadyPeeled >= UnrollPeelMaxCount) + return; + // Here we try to get rid of Phis which become invariants after 1, 2, ..., N // iterations of the loop. For this we compute the number for iterations after // which every Phi is guaranteed to become an invariant, and try to peel the @@ -317,11 +327,14 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount); // Consider max peel count limitation. assert(DesiredPeelCount > 0 && "Wrong loop size estimation?"); - LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount - << " iteration(s) to turn" - << " some Phis into invariants.\n"); - UP.PeelCount = DesiredPeelCount; - return; + if (DesiredPeelCount + AlreadyPeeled <= UnrollPeelMaxCount) { + LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount + << " iteration(s) to turn" + << " some Phis into invariants.\n"); + UP.PeelCount = DesiredPeelCount; + UP.PeelProfiledIterations = false; + return; + } } } @@ -330,6 +343,9 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, if (TripCount) return; + // Do not apply profile base peeling if it is disabled. + if (!UP.PeelProfiledIterations) + return; // If we don't know the trip count, but have reason to believe the average // trip count is low, peeling should be beneficial, since we will usually // hit the peeled section. @@ -344,7 +360,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, << "\n"); if (*PeelCount) { - if ((*PeelCount <= UnrollPeelMaxCount) && + if ((*PeelCount + AlreadyPeeled <= UnrollPeelMaxCount) && (LoopSize * (*PeelCount + 1) <= UP.Threshold)) { LLVM_DEBUG(dbgs() << "Peeling first " << *PeelCount << " iterations.\n"); @@ -352,6 +368,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, return; } LLVM_DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n"); + LLVM_DEBUG(dbgs() << "Already peel count: " << AlreadyPeeled << "\n"); LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n"); LLVM_DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1) << "\n"); @@ -738,5 +755,11 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, NumPeeled++; + // Update Metadata for count of peeled off iterations. + unsigned AlreadyPeeled = 0; + if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData)) + AlreadyPeeled = *Peeled; + addStringMetadataToLoop(L, PeeledCountMetaData, AlreadyPeeled + PeelCount); + return true; } diff --git a/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-1.ll b/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-1.ll new file mode 100644 index 00000000000..0c56229d064 --- /dev/null +++ b/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-1.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -loop-unroll -loop-unroll -verify-dom-info -debug-only=loop-unroll -unroll-peel-max-count=7 2>&1 | FileCheck %s +; REQUIRES: asserts + +declare void @f1() +declare void @f2() + +; Check that we can peel off iterations that make conditions true. +; The second invocation of loop-unroll will do profile based peeling of +; remained iterations. +define void @test1(i32 %k) !prof !4 { +; CHECK: Loop Unroll: F[test1] Loop %for.body +; CHECK: PEELING loop %for.body with iteration count 2! +; CHECK: PEELING loop %for.body with iteration count 4! +; CHECK: llvm.loop.unroll.disable +for.body.lr.ph: + br label %for.body + +for.body: + %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] + %cmp1 = icmp ult i32 %i.05, 2 + br i1 %cmp1, label %if.then, label %if.else + +if.then: + call void @f1() + br label %for.inc + +if.else: + call void @f2() + br label %for.inc + +for.inc: + %inc = add nsw i32 %i.05, 1 + %cmp = icmp slt i32 %inc, %k + br i1 %cmp, label %for.body, label %for.end, !llvm.loop !1, !prof !2 + +for.end: + ret void +} + +!1 = distinct !{!1} +!2 = !{!"branch_weights", i32 6, i32 1} +!4 = !{!"function_entry_count", i64 1} diff --git a/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-2.ll b/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-2.ll new file mode 100644 index 00000000000..6462283dfaf --- /dev/null +++ b/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-2.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -loop-unroll -loop-unroll -verify-dom-info -debug-only=loop-unroll -unroll-peel-max-count=7 2>&1 | FileCheck %s +; REQUIRES: asserts + +declare void @f1() +declare void @f2() + +; Check that we can peel off iterations that make conditions true. +; The second invocation of loop-unroll will NOT do profile based peeling of +; remained iterations because the total number of peeled iterations exceeds +; threashold specified with -unroll-peel-max-count=7. +define void @test2(i32 %k) !prof !4 { +; CHECK: Loop Unroll: F[test2] Loop %for.body +; CHECK: PEELING loop %for.body with iteration count 2! +; CHECK-NOT: llvm.loop.unroll.disable +for.body.lr.ph: + br label %for.body + +for.body: + %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] + %cmp1 = icmp ult i32 %i.05, 2 + br i1 %cmp1, label %if.then, label %if.else + +if.then: + call void @f1() + br label %for.inc + +if.else: + call void @f2() + br label %for.inc + +for.inc: + %inc = add nsw i32 %i.05, 1 + %cmp = icmp slt i32 %inc, %k + br i1 %cmp, label %for.body, label %for.end, !llvm.loop !1, !prof !3 + +for.end: + ret void +} + +!1 = distinct !{!1} +!3 = !{!"branch_weights", i32 8, i32 1} +!4 = !{!"function_entry_count", i64 1} diff --git a/test/Transforms/LoopUnroll/peel-loop-conditions.ll b/test/Transforms/LoopUnroll/peel-loop-conditions.ll index a9937d9db99..0217e9384f9 100644 --- a/test/Transforms/LoopUnroll/peel-loop-conditions.ll +++ b/test/Transforms/LoopUnroll/peel-loop-conditions.ll @@ -643,3 +643,4 @@ for.inc: for.end: ret void } +; CHECK-NOT: llvm.loop.unroll.disable \ No newline at end of file -- 2.40.0