bool UpperBound;
/// Allow peeling off loop iterations for loops with low dynamic tripcount.
bool AllowPeeling;
+ /// Allow unrolling of all the iterations of the runtime loop remainder.
+ bool UnrollRemainder;
};
/// \brief Get target-customized preferences for the generic loop unrolling
bool UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
bool AllowRuntime, bool AllowExpensiveTripCount,
bool PreserveCondBr, bool PreserveOnlyFirst,
- unsigned TripMultiple, unsigned PeelCount, LoopInfo *LI,
- ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
- OptimizationRemarkEmitter *ORE, bool PreserveLCSSA);
+ unsigned TripMultiple, unsigned PeelCount, bool UnrollRemainder,
+ LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
+ AssumptionCache *AC, OptimizationRemarkEmitter *ORE,
+ bool PreserveLCSSA);
bool UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
bool AllowExpensiveTripCount,
- bool UseEpilogRemainder, LoopInfo *LI,
+ bool UseEpilogRemainder, bool UnrollRemainder,
+ LoopInfo *LI,
ScalarEvolution *SE, DominatorTree *DT,
+ AssumptionCache *AC,
+ OptimizationRemarkEmitter *ORE,
bool PreserveLCSSA);
void computePeelCount(Loop *L, unsigned LoopSize,
cl::desc("Allows loops to be peeled when the dynamic "
"trip count is known to be low."));
+static cl::opt<bool> UnrollUnrollRemainder(
+ "unroll-remainder", cl::Hidden,
+ cl::desc("Allow the loop remainder to be unrolled."));
+
// This option isn't ever intended to be enabled, it serves to allow
// experiments to check the assumptions about when this kind of revisit is
// necessary.
UP.Partial = false;
UP.Runtime = false;
UP.AllowRemainder = true;
+ UP.UnrollRemainder = false;
UP.AllowExpensiveTripCount = false;
UP.Force = false;
UP.UpperBound = false;
UP.UpperBound = false;
if (UnrollAllowPeeling.getNumOccurrences() > 0)
UP.AllowPeeling = UnrollAllowPeeling;
+ if (UnrollUnrollRemainder.getNumOccurrences() > 0)
+ UP.UnrollRemainder = UnrollUnrollRemainder;
// Apply user values provided by argument
if (UserThreshold.hasValue()) {
// Unroll the loop.
if (!UnrollLoop(L, UP.Count, TripCount, UP.Force, UP.Runtime,
UP.AllowExpensiveTripCount, UseUpperBound, MaxOrZero,
- TripMultiple, UP.PeelCount, LI, &SE, &DT, &AC, &ORE,
+ TripMultiple, UP.PeelCount, UP.UnrollRemainder,
+ LI, &SE, &DT, &AC, &ORE,
PreserveLCSSA))
return false;
bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
bool AllowRuntime, bool AllowExpensiveTripCount,
bool PreserveCondBr, bool PreserveOnlyFirst,
- unsigned TripMultiple, unsigned PeelCount, LoopInfo *LI,
+ unsigned TripMultiple, unsigned PeelCount,
+ bool UnrollRemainder, LoopInfo *LI,
ScalarEvolution *SE, DominatorTree *DT,
AssumptionCache *AC, OptimizationRemarkEmitter *ORE,
bool PreserveLCSSA) {
if (RuntimeTripCount && TripMultiple % Count != 0 &&
!UnrollRuntimeLoopRemainder(L, Count, AllowExpensiveTripCount,
- EpilogProfitability, LI, SE, DT,
+ EpilogProfitability, UnrollRemainder,
+ LI, SE, DT, AC, ORE,
PreserveLCSSA)) {
if (Force)
RuntimeTripCount = false;
/// Return the new cloned loop that is created when CreateRemainderLoop is true.
static Loop *
CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
- const bool UseEpilogRemainder, BasicBlock *InsertTop,
+ const bool UseEpilogRemainder, const bool UnrollRemainder,
+ BasicBlock *InsertTop,
BasicBlock *InsertBot, BasicBlock *Preheader,
std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) {
}
LLVMContext &Context = NewLoop->getHeader()->getContext();
- SmallVector<Metadata *, 1> DisableOperands;
- DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable"));
- MDNode *DisableNode = MDNode::get(Context, DisableOperands);
- MDs.push_back(DisableNode);
+ if (!UnrollRemainder) {
+ SmallVector<Metadata *, 1> DisableOperands;
+ DisableOperands.push_back(MDString::get(Context,
+ "llvm.loop.unroll.disable"));
+ MDNode *DisableNode = MDNode::get(Context, DisableOperands);
+ MDs.push_back(DisableNode);
+ }
MDNode *NewLoopID = MDNode::get(Context, MDs);
// Set operand 0 to refer to the loop id itself.
bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
bool AllowExpensiveTripCount,
bool UseEpilogRemainder,
+ bool UnrollRemainder,
LoopInfo *LI, ScalarEvolution *SE,
- DominatorTree *DT, bool PreserveLCSSA) {
+ DominatorTree *DT, AssumptionCache *AC,
+ OptimizationRemarkEmitter *ORE,
+ bool PreserveLCSSA) {
DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");
DEBUG(L->dump());
BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit;
BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
Loop *remainderLoop = CloneLoopBlocks(
- L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop, InsertBot,
+ L, ModVal, CreateRemainderLoop, UseEpilogRemainder, UnrollRemainder,
+ InsertTop, InsertBot,
NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
// Insert the cloned blocks into the function.
formDedicatedExitBlocks(remainderLoop, DT, LI, PreserveLCSSA);
}
+ if (remainderLoop && UnrollRemainder) {
+ UnrollLoop(remainderLoop, /*Count*/Count - 1, /*TripCount*/Count - 1,
+ /*Force*/false, /*AllowRuntime*/false,
+ /*AllowExpensiveTripCount*/false, /*PreserveCondBr*/true,
+ /*PreserveOnlyFirst*/false, /*TripMultiple*/1,
+ /*PeelCount*/0, /*UnrollRemainder*/false, LI, SE, DT, AC, ORE,
+ PreserveLCSSA);
+ }
+
NumRuntimeUnrolled++;
return true;
}
--- /dev/null
+; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-count=4 -unroll-remainder -instcombine | FileCheck %s
+
+; CHECK-LABEL: unroll
+define i32 @unroll(i32* nocapture readonly %a, i32* nocapture readonly %b, i32 %N) local_unnamed_addr #0 {
+entry:
+ %cmp9 = icmp eq i32 %N, 0
+ br i1 %cmp9, label %for.cond.cleanup, label %for.body.lr.ph
+
+for.body.lr.ph:
+ %wide.trip.count = zext i32 %N to i64
+ br label %for.body
+
+for.cond.cleanup:
+ %c.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ ret i32 %c.0.lcssa
+
+; CHECK-LABEL: for.body.lr.ph
+; CHECK: [[COUNT:%[a-z.0-9]+]] = add nsw i64 %wide.trip.count, -1
+; CHECK: %xtraiter = and i64 %wide.trip.count, 3
+; CHECK: [[CMP:%[a-z.0-9]+]] = icmp ult i64 [[COUNT]], 3
+; CHECK: br i1 [[CMP]], label %[[CLEANUP:.*]], label %for.body.lr.ph.new
+
+; CHECK-LABEL: for.body.lr.ph.new:
+; CHECK: %unroll_iter = sub nsw i64 %wide.trip.count, %xtraiter
+; CHECK: br label %for.body
+
+; CHECK: [[CLEANUP]]:
+; CHECK: [[MOD:%[a-z.0-9]+]] = icmp eq i64 %xtraiter, 0
+; CHECK: br i1 [[MOD]], label %[[EXIT:.*]], label %[[EPIL_PEEL0_PRE:.*]]
+
+; CHECK: [[EPIL_PEEL0_PRE]]:
+; CHECK: br label %[[EPIL_PEEL0:.*]]
+
+; CHECK: [[EPIL_PEEL0]]:
+; CHECK: [[PEEL_CMP0:%[a-z.0-9]+]] = icmp eq i64 %xtraiter, 1
+; CHECK: br i1 [[PEEL_CMP0]], label %[[EPIL_EXIT:.*]], label %[[EPIL_PEEL1:.*]],
+
+; CHECK: [[EPIL_EXIT]]:
+; CHECK: br label %[[EXIT]]
+
+; CHECK: [[EXIT]]:
+; CHECK: ret i32
+
+; CHECK-LABEL: for.body:
+; CHECK: [[INDVAR0:%[a-z.0-9]+]] = phi i64 [ 0, %for.body.lr.ph
+; CHECK: [[ITER:%[a-z.0-9]+]] = phi i64 [ %unroll_iter
+; CHECK: or i64 [[INDVAR0]], 1
+; CHECK: or i64 [[INDVAR0]], 2
+; CHECK: or i64 [[INDVAR0]], 3
+; CHECK: add nsw i64 [[INDVAR0]], 4
+; CHECK: [[SUB:%[a-z.0-9]+]] = add i64 [[ITER]], -4
+; CHECK: [[ITER_CMP:%[a-z.0-9]+]] = icmp eq i64 [[SUB]], 0
+; CHECK: br i1 [[ITER_CMP]], label %[[LOOP_EXIT:.*]], label %for.body
+
+; CHECK: [[EPIL_PEEL1]]:
+; CHECK: [[PEEL_CMP1:%[a-z.0-9]+]] = icmp eq i64 %xtraiter, 2
+; CHECK: br i1 [[PEEL_CMP1]], label %[[EPIL_EXIT]], label %[[EPIL_PEEL2:.*]],
+
+; CHECK: [[EPIL_PEEL2]]:
+; CHECK: br label %[[EXIT]]
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+ %c.010 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+ %1 = load i32, i32* %arrayidx2, align 4
+ %mul = mul nsw i32 %1, %0
+ %add = add nsw i32 %mul, %c.010
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}