From f0c173a27b7b0cc8de7678f8c479795feefd824b Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Wed, 2 Oct 2019 08:44:15 +0000 Subject: [PATCH] [AMDGPU] Make printf lowering faster when there are no printfs Summary: Printf lowering unconditionally visited every instruction in the module. To make it faster in the common case where there are no printfs, look up the printf function (if any) and iterate over its users instead. Reviewers: rampitec, kzhuravl, alex-t, arsenm Subscribers: jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D68145 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@373433 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../AMDGPU/AMDGPUPrintfRuntimeBinding.cpp | 30 +++++++++---------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp b/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp index 261d6287763..5250bf455d7 100644 --- a/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp +++ b/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp @@ -30,7 +30,6 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InstVisitor.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" @@ -45,20 +44,13 @@ using namespace llvm; namespace { class LLVM_LIBRARY_VISIBILITY AMDGPUPrintfRuntimeBinding final - : public ModulePass, - public InstVisitor { + : public ModulePass { public: static char ID; explicit AMDGPUPrintfRuntimeBinding(); - void visitCallSite(CallSite CS) { - Function *F = CS.getCalledFunction(); - if (F && F->hasName() && F->getName() == "printf") - Printfs.push_back(CS.getInstruction()); - } - private: bool runOnModule(Module &M) override; void getConversionSpecifiers(SmallVectorImpl &OpConvSpecifiers, @@ -80,7 +72,7 @@ private: const DataLayout *TD; const DominatorTree *DT; - SmallVector Printfs; + SmallVector Printfs; }; } // namespace @@ -162,8 +154,7 @@ bool AMDGPUPrintfRuntimeBinding::lowerPrintfForGpu( // NB: This is important for this string size to be divizable by 4 const char NonLiteralStr[4] = "???"; - for (auto P : Printfs) { - auto CI = cast(P); + for (auto CI : Printfs) { unsigned NumOps = CI->getNumArgOperands(); SmallString<16> OpConvSpecifiers; @@ -564,10 +555,8 @@ bool AMDGPUPrintfRuntimeBinding::lowerPrintfForGpu( } // erase the printf calls - for (auto P : Printfs) { - auto CI = cast(P); + for (auto CI : Printfs) CI->eraseFromParent(); - } Printfs.clear(); return true; @@ -578,7 +567,16 @@ bool AMDGPUPrintfRuntimeBinding::runOnModule(Module &M) { if (TT.getArch() == Triple::r600) return false; - visit(M); + auto PrintfFunction = M.getFunction("printf"); + if (!PrintfFunction) + return false; + + for (auto &U : PrintfFunction->uses()) { + if (auto *CI = dyn_cast(U.getUser())) { + if (CI->isCallee(&U)) + Printfs.push_back(CI); + } + } if (Printfs.empty()) return false; -- 2.40.0