From 875f0a3e72cdd47bbb597f87bbac6e6930ae1cee Mon Sep 17 00:00:00 2001 From: Dean Michael Berris Date: Thu, 4 May 2017 01:24:26 +0000 Subject: [PATCH] [XRay] Detect loops in functions being lowered Summary: This is an implementation of the loop detection logic that XRay needs to determine whether a function might take time at runtime. Without this heuristic, XRay will tend to not instrument short functions that have loops that might have runtime dependent on inputs or external values. While this implementation doesn't do any further analysis than just figuring out whether there is a loop in the MachineFunction being code-gen'ed, we're paving the way for being able to perform more sophisticated analysis of the function in the future (for example to determine whether the trip count for the loop might be constant, and make a decision on that instead). This enables us to cover more functions with the default heuristics, and potentially identify ones that have variable runtime latency just by looking for the presence of loops. Reviewers: chandlerc, rnk, pelikan Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D32274 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@302103 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/XRayInstrumentation.cpp | 46 ++++++++++++++++--------- test/CodeGen/X86/xray-loop-detection.ll | 23 +++++++++++++ 2 files changed, 53 insertions(+), 16 deletions(-) create mode 100644 test/CodeGen/X86/xray-loop-detection.ll diff --git a/lib/CodeGen/XRayInstrumentation.cpp b/lib/CodeGen/XRayInstrumentation.cpp index 7d2848bdc13..2df3602733f 100644 --- a/lib/CodeGen/XRayInstrumentation.cpp +++ b/lib/CodeGen/XRayInstrumentation.cpp @@ -18,6 +18,8 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetInstrInfo.h" @@ -33,6 +35,14 @@ struct XRayInstrumentation : public MachineFunctionPass { initializeXRayInstrumentationPass(*PassRegistry::getPassRegistry()); } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); + } + bool runOnMachineFunction(MachineFunction &MF) override; private: @@ -43,7 +53,7 @@ private: // This is the approach to go on CPUs which have a single RET instruction, // like x86/x86_64. void replaceRetWithPatchableRet(MachineFunction &MF, - const TargetInstrInfo *TII); + const TargetInstrInfo *TII); // Prepend the original return instruction with the exit sled code ("patchable // function exit" pseudo-instruction), preserving the original return @@ -54,13 +64,12 @@ private: // have to call the trampoline and return from it to the original return // instruction of the function being instrumented. void prependRetWithPatchableExit(MachineFunction &MF, - const TargetInstrInfo *TII); + const TargetInstrInfo *TII); }; } // anonymous namespace -void XRayInstrumentation::replaceRetWithPatchableRet(MachineFunction &MF, - const TargetInstrInfo *TII) -{ +void XRayInstrumentation::replaceRetWithPatchableRet( + MachineFunction &MF, const TargetInstrInfo *TII) { // We look for *all* terminators and returns, then replace those with // PATCHABLE_RET instructions. SmallVector Terminators; @@ -91,9 +100,8 @@ void XRayInstrumentation::replaceRetWithPatchableRet(MachineFunction &MF, I->eraseFromParent(); } -void XRayInstrumentation::prependRetWithPatchableExit(MachineFunction &MF, - const TargetInstrInfo *TII) -{ +void XRayInstrumentation::prependRetWithPatchableExit( + MachineFunction &MF, const TargetInstrInfo *TII) { for (auto &MBB : MF) { for (auto &T : MBB.terminators()) { unsigned Opc = 0; @@ -106,7 +114,7 @@ void XRayInstrumentation::prependRetWithPatchableExit(MachineFunction &MF, if (Opc != 0) { // Prepend the return instruction with PATCHABLE_FUNCTION_EXIT or // PATCHABLE_TAIL_CALL . - BuildMI(MBB, T, T.getDebugLoc(),TII->get(Opc)); + BuildMI(MBB, T, T.getDebugLoc(), TII->get(Opc)); } } } @@ -125,8 +133,13 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) { return false; // XRay threshold attribute not found. if (Attr.getValueAsString().getAsInteger(10, XRayThreshold)) return false; // Invalid value for threshold. - if (F.size() < XRayThreshold) - return false; // Function is too small. + + // Check if we have a loop. + // FIXME: Maybe make this smarter, and see whether the loops are dependent + // on inputs or side-effects? + MachineLoopInfo &MLI = getAnalysis(); + if (MLI.empty() && F.size() < XRayThreshold) + return false; // Function is too small and has no loops. } // We look for the first non-empty MachineBasicBlock, so that we can insert @@ -142,12 +155,10 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) { if (!MF.getSubtarget().isXRaySupported()) { FirstMI.emitError("An attempt to perform XRay instrumentation for an" - " unsupported target."); + " unsupported target."); return false; } - // FIXME: Do the loop triviality analysis here or in an earlier pass. - // First, insert an PATCHABLE_FUNCTION_ENTER as the first instruction of the // MachineFunction. BuildMI(FirstMBB, FirstMI, FirstMI.getDebugLoc(), @@ -176,5 +187,8 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) { char XRayInstrumentation::ID = 0; char &llvm::XRayInstrumentationID = XRayInstrumentation::ID; -INITIALIZE_PASS(XRayInstrumentation, "xray-instrumentation", "Insert XRay ops", - false, false) +INITIALIZE_PASS_BEGIN(XRayInstrumentation, "xray-instrumentation", + "Insert XRay ops", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(XRayInstrumentation, "xray-instrumentation", + "Insert XRay ops", false, false) diff --git a/test/CodeGen/X86/xray-loop-detection.ll b/test/CodeGen/X86/xray-loop-detection.ll new file mode 100644 index 00000000000..3cd6b4aa6f8 --- /dev/null +++ b/test/CodeGen/X86/xray-loop-detection.ll @@ -0,0 +1,23 @@ +; RUN: llc -filetype=asm -o - -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -filetype=asm -o - -mtriple=x86_64-darwin-unknown < %s | FileCheck %s + +define i32 @foo(i32 %i) nounwind noinline uwtable "xray-instruction-threshold"="1" { +entry: + br label %Test +Test: + %indvar = phi i32 [0, %entry], [%nextindvar, %Inc] + %cond = icmp eq i32 %indvar, %i + br i1 %cond, label %Exit, label %Inc +Inc: + %nextindvar = add i32 %indvar, 1 + br label %Test +Exit: + %retval = phi i32 [%indvar, %Test] + ret i32 %retval +} + +; CHECK-LABEL: xray_sled_0: +; CHECK-NEXT: .ascii "\353\t" +; CHECK-NEXT: nopw 512(%rax,%rax) +; CHECK-LABEL: Ltmp0: + -- 2.40.0