From 25333a14e77a601e603d08525bca1c89444c0e20 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 18 Dec 2018 17:16:05 +0000 Subject: [PATCH] [LoopUnroll] Honor '#pragma unroll' even with -fno-unroll-loops. When using clang with `-fno-unroll-loops` (implicitly added with `-O1`), the LoopUnrollPass is not not added to the (legacy) pass pipeline. This also means that it will not process any loop metadata such as llvm.loop.unroll.enable (which is generated by #pragma unroll or WarnMissedTransformationsPass emits a warning that a forced transformation has not been applied (see https://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20181210/610833.html). Such explicit transformations should take precedence over disabling heuristics. This patch unconditionally adds LoopUnrollPass to the optimizing pipeline (that is, it is still not added with `-O0`), but passes a flag indicating whether automatic unrolling is dis-/enabled. This is the same approach as LoopVectorize uses. The new pass manager's pipeline builder has no option to disable unrolling, hence the problem does not apply. Differential Revision: https://reviews.llvm.org/D55716 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@349509 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Transforms/Scalar.h | 5 +- .../llvm/Transforms/Scalar/LoopUnrollPass.h | 16 +++++- lib/Transforms/IPO/PassManagerBuilder.cpp | 30 +++++------ lib/Transforms/Scalar/LoopUnrollPass.cpp | 54 ++++++++++++------- .../disable-loop-unrolling_forced.ll | 30 +++++++++++ 5 files changed, 98 insertions(+), 37 deletions(-) create mode 100644 test/Transforms/LoopUnroll/disable-loop-unrolling_forced.ll diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h index 6df2f9a379a..9019c3be494 100644 --- a/include/llvm/Transforms/Scalar.h +++ b/include/llvm/Transforms/Scalar.h @@ -183,11 +183,12 @@ Pass *createLoopInstSimplifyPass(); // // LoopUnroll - This pass is a simple loop unrolling pass. // -Pass *createLoopUnrollPass(int OptLevel = 2, int Threshold = -1, int Count = -1, +Pass *createLoopUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false, + int Threshold = -1, int Count = -1, int AllowPartial = -1, int Runtime = -1, int UpperBound = -1, int AllowPeeling = -1); // Create an unrolling pass for full unrolling that uses exact trip count only. -Pass *createSimpleLoopUnrollPass(int OptLevel = 2); +Pass *createSimpleLoopUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false); //===----------------------------------------------------------------------===// // diff --git a/include/llvm/Transforms/Scalar/LoopUnrollPass.h b/include/llvm/Transforms/Scalar/LoopUnrollPass.h index 20c9a26b98c..e38e983cc9e 100644 --- a/include/llvm/Transforms/Scalar/LoopUnrollPass.h +++ b/include/llvm/Transforms/Scalar/LoopUnrollPass.h @@ -24,8 +24,14 @@ class LPMUpdater; class LoopFullUnrollPass : public PassInfoMixin { const int OptLevel; + /// If false, use a cost model to determine whether unrolling of a loop is + /// profitable. If true, only loops that explicitly request unrolling via + /// metadata are considered. All other loops are skipped. + const bool OnlyWhenForced; + public: - explicit LoopFullUnrollPass(int OptLevel = 2) : OptLevel(OptLevel) {} + explicit LoopFullUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false) + : OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced) {} PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); @@ -50,7 +56,13 @@ struct LoopUnrollOptions { Optional AllowUpperBound; int OptLevel; - LoopUnrollOptions(int OptLevel = 2) : OptLevel(OptLevel) {} + /// If false, use a cost model to determine whether unrolling of a loop is + /// profitable. If true, only loops that explicitly request unrolling via + /// metadata are considered. All other loops are skipped. + bool OnlyWhenForced; + + LoopUnrollOptions(int OptLevel = 2, bool OnlyWhenForced = false) + : OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced) {} /// Enables or disables partial unrolling. When disabled only full unrolling /// is allowed. diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index e625433a8e4..268939e8247 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -378,8 +378,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses( if (EnableLoopInterchange) MPM.add(createLoopInterchangePass()); // Interchange loops - if (!DisableUnrollLoops) - MPM.add(createSimpleLoopUnrollPass(OptLevel)); // Unroll small loops + MPM.add(createSimpleLoopUnrollPass(OptLevel, + DisableUnrollLoops)); // Unroll small loops addExtensionsToPM(EP_LoopOptimizerEnd, MPM); // This ends the loop pass pipelines. @@ -682,16 +682,17 @@ void PassManagerBuilder::populateModulePassManager( addExtensionsToPM(EP_Peephole, MPM); addInstructionCombiningPass(MPM); - if (!DisableUnrollLoops) { - if (EnableUnrollAndJam) { - // Unroll and Jam. We do this before unroll but need to be in a separate - // loop pass manager in order for the outer loop to be processed by - // unroll and jam before the inner loop is unrolled. - MPM.add(createLoopUnrollAndJamPass(OptLevel)); - } + if (EnableUnrollAndJam && !DisableUnrollLoops) { + // Unroll and Jam. We do this before unroll but need to be in a separate + // loop pass manager in order for the outer loop to be processed by + // unroll and jam before the inner loop is unrolled. + MPM.add(createLoopUnrollAndJamPass(OptLevel)); + } - MPM.add(createLoopUnrollPass(OptLevel)); // Unroll small loops + MPM.add(createLoopUnrollPass(OptLevel, + DisableUnrollLoops)); // Unroll small loops + if (!DisableUnrollLoops) { // LoopUnroll may generate some redundency to cleanup. addInstructionCombiningPass(MPM); @@ -700,7 +701,7 @@ void PassManagerBuilder::populateModulePassManager( // outer loop. LICM pass can help to promote the runtime check out if the // checked value is loop invariant. MPM.add(createLICMPass()); - } + } MPM.add(createWarnMissedTransformationsPass()); @@ -872,12 +873,11 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { if (EnableLoopInterchange) PM.add(createLoopInterchangePass()); - if (!DisableUnrollLoops) - PM.add(createSimpleLoopUnrollPass(OptLevel)); // Unroll small loops + PM.add(createSimpleLoopUnrollPass(OptLevel, + DisableUnrollLoops)); // Unroll small loops PM.add(createLoopVectorizePass(true, LoopVectorize)); // The vectorizer may have significantly shortened a loop body; unroll again. - if (!DisableUnrollLoops) - PM.add(createLoopUnrollPass(OptLevel)); + PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops)); PM.add(createWarnMissedTransformationsPass()); diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index b7baba6b928..38b80f48ed0 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -965,13 +965,15 @@ static LoopUnrollResult tryToUnrollLoop( Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, OptimizationRemarkEmitter &ORE, bool PreserveLCSSA, int OptLevel, - Optional ProvidedCount, Optional ProvidedThreshold, - Optional ProvidedAllowPartial, Optional ProvidedRuntime, - Optional ProvidedUpperBound, Optional ProvidedAllowPeeling) { + bool OnlyWhenForced, Optional ProvidedCount, + Optional ProvidedThreshold, Optional ProvidedAllowPartial, + Optional ProvidedRuntime, Optional ProvidedUpperBound, + Optional ProvidedAllowPeeling) { LLVM_DEBUG(dbgs() << "Loop Unroll: F[" << L->getHeader()->getParent()->getName() << "] Loop %" << L->getHeader()->getName() << "\n"); - if (hasUnrollTransformation(L) & TM_Disable) + TransformationMode TM = hasUnrollTransformation(L); + if (TM & TM_Disable) return LoopUnrollResult::Unmodified; if (!L->isLoopSimplifyForm()) { LLVM_DEBUG( @@ -979,6 +981,11 @@ static LoopUnrollResult tryToUnrollLoop( return LoopUnrollResult::Unmodified; } + // When automtatic unrolling is disabled, do not unroll unless overridden for + // this loop. + if (OnlyWhenForced && !(TM & TM_Enable)) + return LoopUnrollResult::Unmodified; + unsigned NumInlineCandidates; bool NotDuplicatable; bool Convergent; @@ -1119,6 +1126,12 @@ public: static char ID; // Pass ID, replacement for typeid int OptLevel; + + /// If false, use a cost model to determine whether unrolling of a loop is + /// profitable. If true, only loops that explicitly request unrolling via + /// metadata are considered. All other loops are skipped. + bool OnlyWhenForced; + Optional ProvidedCount; Optional ProvidedThreshold; Optional ProvidedAllowPartial; @@ -1126,15 +1139,16 @@ public: Optional ProvidedUpperBound; Optional ProvidedAllowPeeling; - LoopUnroll(int OptLevel = 2, Optional Threshold = None, + LoopUnroll(int OptLevel = 2, bool OnlyWhenForced = false, + Optional Threshold = None, Optional Count = None, Optional AllowPartial = None, Optional Runtime = None, Optional UpperBound = None, Optional AllowPeeling = None) - : LoopPass(ID), OptLevel(OptLevel), ProvidedCount(std::move(Count)), - ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial), - ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound), - ProvidedAllowPeeling(AllowPeeling) { + : LoopPass(ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced), + ProvidedCount(std::move(Count)), ProvidedThreshold(Threshold), + ProvidedAllowPartial(AllowPartial), ProvidedRuntime(Runtime), + ProvidedUpperBound(UpperBound), ProvidedAllowPeeling(AllowPeeling) { initializeLoopUnrollPass(*PassRegistry::getPassRegistry()); } @@ -1157,8 +1171,8 @@ public: bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); LoopUnrollResult Result = tryToUnrollLoop( - L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA, OptLevel, ProvidedCount, - ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime, + L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA, OptLevel, OnlyWhenForced, + ProvidedCount, ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound, ProvidedAllowPeeling); if (Result == LoopUnrollResult::FullyUnrolled) @@ -1188,14 +1202,16 @@ INITIALIZE_PASS_DEPENDENCY(LoopPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false) -Pass *llvm::createLoopUnrollPass(int OptLevel, int Threshold, int Count, - int AllowPartial, int Runtime, int UpperBound, +Pass *llvm::createLoopUnrollPass(int OptLevel, bool OnlyWhenForced, + int Threshold, int Count, int AllowPartial, + int Runtime, int UpperBound, int AllowPeeling) { // TODO: It would make more sense for this function to take the optionals // directly, but that's dangerous since it would silently break out of tree // callers. return new LoopUnroll( - OptLevel, Threshold == -1 ? None : Optional(Threshold), + OptLevel, OnlyWhenForced, + Threshold == -1 ? None : Optional(Threshold), Count == -1 ? None : Optional(Count), AllowPartial == -1 ? None : Optional(AllowPartial), Runtime == -1 ? None : Optional(Runtime), @@ -1203,8 +1219,8 @@ Pass *llvm::createLoopUnrollPass(int OptLevel, int Threshold, int Count, AllowPeeling == -1 ? None : Optional(AllowPeeling)); } -Pass *llvm::createSimpleLoopUnrollPass(int OptLevel) { - return createLoopUnrollPass(OptLevel, -1, -1, 0, 0, 0, 0); +Pass *llvm::createSimpleLoopUnrollPass(int OptLevel, bool OnlyWhenForced) { + return createLoopUnrollPass(OptLevel, OnlyWhenForced, -1, -1, 0, 0, 0, 0); } PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM, @@ -1234,7 +1250,8 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM, bool Changed = tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE, - /*PreserveLCSSA*/ true, OptLevel, /*Count*/ None, + /*PreserveLCSSA*/ true, OptLevel, OnlyWhenForced, + /*Count*/ None, /*Threshold*/ None, /*AllowPartial*/ false, /*Runtime*/ false, /*UpperBound*/ false, /*AllowPeeling*/ false) != LoopUnrollResult::Unmodified; @@ -1371,7 +1388,8 @@ PreservedAnalyses LoopUnrollPass::run(Function &F, // flavors of unrolling during construction time (by setting UnrollOpts). LoopUnrollResult Result = tryToUnrollLoop( &L, DT, &LI, SE, TTI, AC, ORE, - /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, /*Count*/ None, + /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, UnrollOpts.OnlyWhenForced, + /*Count*/ None, /*Threshold*/ None, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime, UnrollOpts.AllowUpperBound, LocalAllowPeeling); Changed |= Result != LoopUnrollResult::Unmodified; diff --git a/test/Transforms/LoopUnroll/disable-loop-unrolling_forced.ll b/test/Transforms/LoopUnroll/disable-loop-unrolling_forced.ll new file mode 100644 index 00000000000..9a0900df1a1 --- /dev/null +++ b/test/Transforms/LoopUnroll/disable-loop-unrolling_forced.ll @@ -0,0 +1,30 @@ +; RUN: opt -disable-loop-unrolling -O1 -S < %s | FileCheck %s +; +; Check loop unrolling metadata is honored despite automatic unrolling +; being disabled in the pass builder. +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; CHECK-LABEL: @forced( +; CHECK: load +; CHECK: load +define void @forced(i32* nocapture %a) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 64 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0 + +for.end: + ret void +} + +!0 = distinct !{!0, !{!"llvm.loop.unroll.enable"}, + !{!"llvm.loop.unroll.count", i32 2}} -- 2.50.1