From 52192eb65c45baf50cde8700bfc98df2795a4c3a Mon Sep 17 00:00:00 2001 From: Sjoerd Meijer Date: Wed, 16 Oct 2019 09:09:55 +0000 Subject: [PATCH] [HardwareLoops] Optimisation remarks This adds the initial plumbing to support optimisation remarks in the IR hardware-loop pass. I have left a todo in a comment where we can improve the reporting, and will iterate on that now that we have this initial support in. Differential Revision: https://reviews.llvm.org/D68579 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@374980 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/HardwareLoops.cpp | 104 ++++++++++++++---- test/CodeGen/ARM/O3-pipeline.ll | 3 + .../Transforms/HardwareLoops/ARM/structure.ll | 7 +- .../HardwareLoops/unconditional-latch.ll | 8 +- 4 files changed, 97 insertions(+), 25 deletions(-) diff --git a/lib/CodeGen/HardwareLoops.cpp b/lib/CodeGen/HardwareLoops.cpp index 6a0f98d2e2b..968177cd96f 100644 --- a/lib/CodeGen/HardwareLoops.cpp +++ b/lib/CodeGen/HardwareLoops.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -75,8 +76,44 @@ ForceGuardLoopEntry( STATISTIC(NumHWLoops, "Number of loops converted to hardware loops"); +#ifndef NDEBUG +static void debugHWLoopFailure(const StringRef DebugMsg, + Instruction *I) { + dbgs() << "HWLoops: " << DebugMsg; + if (I) + dbgs() << ' ' << *I; + else + dbgs() << '.'; + dbgs() << '\n'; +} +#endif + +static OptimizationRemarkAnalysis +createHWLoopAnalysis(StringRef RemarkName, Loop *L, Instruction *I) { + Value *CodeRegion = L->getHeader(); + DebugLoc DL = L->getStartLoc(); + + if (I) { + CodeRegion = I->getParent(); + // If there is no debug location attached to the instruction, revert back to + // using the loop's. + if (I->getDebugLoc()) + DL = I->getDebugLoc(); + } + + OptimizationRemarkAnalysis R(DEBUG_TYPE, RemarkName, DL, CodeRegion); + R << "hardware-loop not created: "; + return R; +} + namespace { + void reportHWLoopFailure(const StringRef Msg, const StringRef ORETag, + OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr) { + LLVM_DEBUG(debugHWLoopFailure(Msg, I)); + ORE->emit(createHWLoopAnalysis(ORETag, TheLoop, I) << Msg); + } + using TTI = TargetTransformInfo; class HardwareLoops : public FunctionPass { @@ -97,6 +134,7 @@ namespace { AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); } // Try to convert the given Loop into a hardware loop. @@ -110,6 +148,7 @@ namespace { ScalarEvolution *SE = nullptr; LoopInfo *LI = nullptr; const DataLayout *DL = nullptr; + OptimizationRemarkEmitter *ORE = nullptr; const TargetTransformInfo *TTI = nullptr; DominatorTree *DT = nullptr; bool PreserveLCSSA = false; @@ -143,8 +182,9 @@ namespace { public: HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE, - const DataLayout &DL) : - SE(SE), DL(DL), L(Info.L), M(L->getHeader()->getModule()), + const DataLayout &DL, + OptimizationRemarkEmitter *ORE) : + SE(SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader()->getModule()), ExitCount(Info.ExitCount), CountType(Info.CountType), ExitBranch(Info.ExitBranch), @@ -157,6 +197,7 @@ namespace { private: ScalarEvolution &SE; const DataLayout &DL; + OptimizationRemarkEmitter *ORE = nullptr; Loop *L = nullptr; Module *M = nullptr; const SCEV *ExitCount = nullptr; @@ -182,6 +223,7 @@ bool HardwareLoops::runOnFunction(Function &F) { DT = &getAnalysis().getDomTree(); TTI = &getAnalysis().getTTI(F); DL = &F.getParent()->getDataLayout(); + ORE = &getAnalysis().getORE(); auto *TLIP = getAnalysisIfAvailable(); LibInfo = TLIP ? &TLIP->getTLI(F) : nullptr; PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); @@ -201,31 +243,39 @@ bool HardwareLoops::runOnFunction(Function &F) { // converted and the parent loop doesn't support containing a hardware loop. bool HardwareLoops::TryConvertLoop(Loop *L) { // Process nested loops first. - for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) - if (TryConvertLoop(*I)) + for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) { + if (TryConvertLoop(*I)) { + reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested", + ORE, L); return true; // Stop search. + } + } HardwareLoopInfo HWLoopInfo(L); - if (!HWLoopInfo.canAnalyze(*LI)) + if (!HWLoopInfo.canAnalyze(*LI)) { + reportHWLoopFailure("cannot analyze loop, irreducible control flow", + "HWLoopCannotAnalyze", ORE, L); return false; + } - if (TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo) || - ForceHardwareLoops) { - - // Allow overriding of the counter width and loop decrement value. - if (CounterBitWidth.getNumOccurrences()) - HWLoopInfo.CountType = - IntegerType::get(M->getContext(), CounterBitWidth); + if (!ForceHardwareLoops && + !TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo)) { + reportHWLoopFailure("it's not profitable to create a hardware-loop", + "HWLoopNotProfitable", ORE, L); + return false; + } - if (LoopDecrement.getNumOccurrences()) - HWLoopInfo.LoopDecrement = - ConstantInt::get(HWLoopInfo.CountType, LoopDecrement); + // Allow overriding of the counter width and loop decrement value. + if (CounterBitWidth.getNumOccurrences()) + HWLoopInfo.CountType = + IntegerType::get(M->getContext(), CounterBitWidth); - MadeChange |= TryConvertLoop(HWLoopInfo); - return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop); - } + if (LoopDecrement.getNumOccurrences()) + HWLoopInfo.LoopDecrement = + ConstantInt::get(HWLoopInfo.CountType, LoopDecrement); - return false; + MadeChange |= TryConvertLoop(HWLoopInfo); + return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop); } bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) { @@ -234,8 +284,13 @@ bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) { LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L); if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT, ForceNestedLoop, - ForceHardwareLoopPHI)) + ForceHardwareLoopPHI)) { + // TODO: there can be many reasons a loop is not considered a + // candidate, so we should let isHardwareLoopCandidate fill in the + // reason and then report a better message here. + reportHWLoopFailure("loop is not a candidate", "HWLoopNoCandidate", ORE, L); return false; + } assert( (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) && @@ -249,7 +304,7 @@ bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) { if (!Preheader) return false; - HardwareLoop HWLoop(HWLoopInfo, *SE, *DL); + HardwareLoop HWLoop(HWLoopInfo, *SE, *DL, ORE); HWLoop.Create(); ++NumHWLoops; return true; @@ -257,10 +312,13 @@ bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) { void HardwareLoop::Create() { LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n"); - + Value *LoopCountInit = InitLoopCount(); - if (!LoopCountInit) + if (!LoopCountInit) { + reportHWLoopFailure("could not safely create a loop count expression", + "HWLoopNotSafe", ORE, L); return; + } InsertIterationSetup(LoopCountInit); diff --git a/test/CodeGen/ARM/O3-pipeline.ll b/test/CodeGen/ARM/O3-pipeline.ll index 6cc7e53aeff..cb6a005445b 100644 --- a/test/CodeGen/ARM/O3-pipeline.ll +++ b/test/CodeGen/ARM/O3-pipeline.ll @@ -52,6 +52,9 @@ ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Scalar Evolution Analysis +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis +; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Hardware Loop Insertion ; CHECK-NEXT: Scalar Evolution Analysis ; CHECK-NEXT: Loop Pass Manager diff --git a/test/Transforms/HardwareLoops/ARM/structure.ll b/test/Transforms/HardwareLoops/ARM/structure.ll index d413e2b3006..37af5c308e0 100644 --- a/test/Transforms/HardwareLoops/ARM/structure.ll +++ b/test/Transforms/HardwareLoops/ARM/structure.ll @@ -1,7 +1,8 @@ ; RUN: opt -mtriple=thumbv8.1m.main-arm-none-eabi -hardware-loops %s -S -o - | FileCheck %s -; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi %s -o - | FileCheck %s --check-prefix=CHECK-LLC +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi %s -o - -pass-remarks-analysis=hardware-loops 2>&1 | FileCheck %s --check-prefix=CHECK-LLC ; RUN: opt -mtriple=thumbv8.1m.main -loop-unroll -unroll-remainder=false -S < %s | llc -mtriple=thumbv8.1m.main | FileCheck %s --check-prefix=CHECK-UNROLL +; CHECK-LLC: remark: :0:0: hardware-loop not created: it's not profitable to create a hardware-loop ; CHECK-LABEL: early_exit ; CHECK-NOT: llvm.set.loop.iterations ; CHECK-NOT: llvm.loop.decrement @@ -46,6 +47,7 @@ do.end: ; CHECK-NOT: [[LOOP_DEC1:%[^ ]+]] = call i1 @llvm.loop.decrement.i32(i32 1) ; CHECK-NOT: br i1 [[LOOP_DEC1]], label %while.cond1.preheader.us, label %while.end7 +; CHECK-LLC: remark: :0:0: hardware-loop not created: nested hardware-loops not supported ; CHECK-LLC: nested: ; CHECK-LLC-NOT: mov lr, r1 ; CHECK-LLC: dls lr, r1 @@ -176,6 +178,9 @@ while.end7: ret void } + +; CHECK-LLC: remark: :0:0: hardware-loop not created: loop is not a candidate +; CHECK-LLC: remark: :0:0: hardware-loop not created: nested hardware-loops not supported ; CHECK-LABEL: not_rotated ; CHECK-NOT: call void @llvm.set.loop.iterations ; CHECK-NOT: call i32 @llvm.loop.decrement.i32 diff --git a/test/Transforms/HardwareLoops/unconditional-latch.ll b/test/Transforms/HardwareLoops/unconditional-latch.ll index 4a3cd989d94..9d02e1c27b6 100644 --- a/test/Transforms/HardwareLoops/unconditional-latch.ll +++ b/test/Transforms/HardwareLoops/unconditional-latch.ll @@ -1,6 +1,12 @@ ; RUN: opt -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -hardware-loops -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ALLOW ; RUN: opt -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -hardware-loops -force-hardware-loop-guard=true -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ALLOW -; RUN: opt -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-phi=true -hardware-loops -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LATCH +; +; RUN: opt -force-hardware-loops=true -hardware-loop-decrement=1 \ +; RUN: -hardware-loop-counter-bitwidth=32 -force-hardware-loop-phi=true \ +; RUN: -hardware-loops -S -pass-remarks-analysis=hardware-loops %s -o - \ +; RUN: 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-LATCH + +; CHECK-LATCH: remark: :0:0: hardware-loop not created: loop is not a candidate ; CHECK-LABEL: not_rotated ; CHECK-LATCH-NOT: call void @llvm.set.loop.iterations -- 2.40.0