Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI,
ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
OptimizationRemarkEmitter *ORE, unsigned &TripCount, unsigned MaxTripCount,
- unsigned &TripMultiple, unsigned LoopSize,
+ bool MaxOrZero, unsigned &TripMultiple, unsigned LoopSize,
TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound) {
// Check for explicit Count.
// Also we need to check if we exceed FullUnrollMaxCount.
// If using the upper bound to unroll, TripMultiple should be set to 1 because
// we do not know when loop may exit.
- // MaxTripCount and ExactTripCount cannot both be non zero since we only
+
+ // We can unroll by the upper bound amount if it's generally allowed or if
+ // we know that the loop is executed either the upper bound or zero times.
+ // (MaxOrZero unrolling keeps only the first loop test, so the number of
+ // loop tests remains the same compared to the non-unrolled version, whereas
+ // the generic upper bound unrolling keeps all but the last loop test so the
+ // number of loop tests goes up which may end up being worse on targets with
+ // constrained branch predictor resources so is controlled by an option.)
+ // In addition we only unroll small upper bounds.
+ unsigned FullUnrollMaxTripCount = MaxTripCount;
+ if (!(UP.UpperBound || MaxOrZero) ||
+ FullUnrollMaxTripCount > UnrollMaxUpperBound)
+ FullUnrollMaxTripCount = 0;
+
+ // UnrollByMaxCount and ExactTripCount cannot both be non zero since we only
// compute the former when the latter is zero.
unsigned ExactTripCount = TripCount;
- assert((ExactTripCount == 0 || MaxTripCount == 0) &&
- "ExtractTripCount and MaxTripCount cannot both be non zero.");
- unsigned FullUnrollTripCount = ExactTripCount ? ExactTripCount : MaxTripCount;
+ assert((ExactTripCount == 0 || FullUnrollMaxTripCount == 0) &&
+ "ExtractTripCount and UnrollByMaxCount cannot both be non zero.");
+
+ unsigned FullUnrollTripCount =
+ ExactTripCount ? ExactTripCount : FullUnrollMaxTripCount;
UP.Count = FullUnrollTripCount;
if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
// When computing the unrolled size, note that BEInsns are not replicated
// like the rest of the loop body.
if (getUnrolledLoopSize(LoopSize, UP) < UP.Threshold) {
- UseUpperBound = (MaxTripCount == FullUnrollTripCount);
+ UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
TripCount = FullUnrollTripCount;
TripMultiple = UP.UpperBound ? 1 : TripMultiple;
return ExplicitUnroll;
unsigned Boost =
getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost);
if (Cost->UnrolledCost < UP.Threshold * Boost / 100) {
- UseUpperBound = (MaxTripCount == FullUnrollTripCount);
+ UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
TripCount = FullUnrollTripCount;
TripMultiple = UP.UpperBound ? 1 : TripMultiple;
return ExplicitUnroll;
"because "
"unrolled size is too large.";
});
+ LLVM_DEBUG(dbgs() << " partially unrolling with count: " << UP.Count
+ << "\n");
return ExplicitUnroll;
}
assert(TripCount == 0 &&
return false;
}
+ // Don't unroll a small upper bound loop unless user or TTI asked to do so.
+ if (MaxTripCount && !UP.Force && MaxTripCount < UnrollMaxUpperBound) {
+ UP.Count = 0;
+ return false;
+ }
+
// Check if the runtime trip count is too small when profile is available.
if (L->getHeader()->getParent()->hasProfileData()) {
if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) {
if (UP.Count > UP.MaxCount)
UP.Count = UP.MaxCount;
- LLVM_DEBUG(dbgs() << " partially unrolling with count: " << UP.Count
+
+ if (MaxTripCount && UP.Count > MaxTripCount)
+ UP.Count = MaxTripCount;
+
+ LLVM_DEBUG(dbgs() << " runtime unrolling with count: " << UP.Count
<< "\n");
if (UP.Count < 2)
UP.Count = 0;
// Find trip count and trip multiple if count is not available
unsigned TripCount = 0;
- unsigned MaxTripCount = 0;
unsigned TripMultiple = 1;
// If there are multiple exiting blocks but one of them is the latch, use the
// latch for the trip count estimation. Otherwise insist on a single exiting
// Try to find the trip count upper bound if we cannot find the exact trip
// count.
+ unsigned MaxTripCount = 0;
bool MaxOrZero = false;
if (!TripCount) {
MaxTripCount = SE.getSmallConstantMaxTripCount(L);
MaxOrZero = SE.isBackedgeTakenCountMaxOrZero(L);
- // We can unroll by the upper bound amount if it's generally allowed or if
- // we know that the loop is executed either the upper bound or zero times.
- // (MaxOrZero unrolling keeps only the first loop test, so the number of
- // loop tests remains the same compared to the non-unrolled version, whereas
- // the generic upper bound unrolling keeps all but the last loop test so the
- // number of loop tests goes up which may end up being worse on targets with
- // constrained branch predictor resources so is controlled by an option.)
- // In addition we only unroll small upper bounds.
- if (!(UP.UpperBound || MaxOrZero) || MaxTripCount > UnrollMaxUpperBound) {
- MaxTripCount = 0;
- }
}
// computeUnrollCount() decides whether it is beneficial to use upper bound to
// fully unroll the loop.
bool UseUpperBound = false;
bool IsCountSetExplicitly = computeUnrollCount(
- L, TTI, DT, LI, SE, EphValues, &ORE, TripCount, MaxTripCount,
+ L, TTI, DT, LI, SE, EphValues, &ORE, TripCount, MaxTripCount, MaxOrZero,
TripMultiple, LoopSize, UP, UseUpperBound);
if (!UP.Count)
return LoopUnrollResult::Unmodified;
--- /dev/null
+; RUN: opt -S -loop-unroll -unroll-runtime %s -o - | FileCheck %s
+; RUN: opt -S -loop-unroll -unroll-runtime -unroll-max-upperbound=6 %s -o - | FileCheck %s --check-prefix=UPPER
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+
+@global = dso_local local_unnamed_addr global i32 0, align 4
+@global.1 = dso_local local_unnamed_addr global i8* null, align 4
+
+; Check that loop in hoge_3, with a runtime upperbound of 3, is not unrolled.
+; CHECK-LABEL: hoge_3
+; CHECK: loop:
+; CHECK: store
+; CHECK-NOT: store
+; CHECK: br i1 %{{.*}}, label %loop
+; UPPER-LABEL: hoge_3
+; UPPER: loop:
+; UPPER: store
+; UPPER-NOT: store
+; UPPER: br i1 %{{.*}}, label %loop
+define dso_local void @hoge_3(i8 %arg) {
+entry:
+ %x = load i32, i32* @global, align 4
+ %y = load i8*, i8** @global.1, align 4
+ %0 = icmp ult i32 %x, 17
+ br i1 %0, label %loop, label %exit
+
+loop:
+ %iv = phi i32 [ %x, %entry ], [ %iv.next, %loop ]
+ %ptr = phi i8* [ %y, %entry ], [ %ptr.next, %loop ]
+ %iv.next = add nuw i32 %iv, 8
+ %ptr.next = getelementptr inbounds i8, i8* %ptr, i32 1
+ store i8 %arg, i8* %ptr.next, align 1
+ %1 = icmp ult i32 %iv.next, 17
+ br i1 %1, label %loop, label %exit
+
+exit:
+ ret void
+}
+
+; Check that loop in hoge_5, with a runtime upperbound of 5, is unrolled when -unroll-max-upperbound=4
+; CHECK-LABEL: hoge_5
+; CHECK: loop:
+; CHECK: store
+; CHECK-NOT: store
+; CHECK: br i1 %{{.*}}, label %loop
+; UPPER-LABEL: hoge_5
+; UPPER: loop:
+; UPPER: store
+; UPPER: store
+; UPPER: store
+; UPPER: br i1 %{{.*}}, label %loop
+define dso_local void @hoge_5(i8 %arg) {
+entry:
+ %x = load i32, i32* @global, align 4
+ %y = load i8*, i8** @global.1, align 4
+ %0 = icmp ult i32 %x, 17
+ br i1 %0, label %loop, label %exit
+
+loop:
+ %iv = phi i32 [ %x, %entry ], [ %iv.next, %loop ]
+ %ptr = phi i8* [ %y, %entry ], [ %ptr.next, %loop ]
+ %iv.next = add nuw i32 %iv, 4
+ %ptr.next = getelementptr inbounds i8, i8* %ptr, i32 1
+ store i8 %arg, i8* %ptr.next, align 1
+ %1 = icmp ult i32 %iv.next, 17
+ br i1 %1, label %loop, label %exit
+
+exit:
+ ret void
+}