From: Dehao Chen Date: Tue, 28 Feb 2017 18:09:44 +0000 (+0000) Subject: Add function importing info from samplepgo profile to the module summary. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=e26c421c66edc0e8f1f7e42f4f450f3b81f6e223;p=llvm Add function importing info from samplepgo profile to the module summary. Summary: For SamplePGO, the profile may contain cross-module inline stacks. As we need to make sure the profile annotation happens when all the hot inline stacks are expanded, we need to pass this info to the module importer so that it can import proper functions if necessary. This patch implemented this feature by emitting cross-module targets as part of function entry metadata. In the module-summary phase, the metadata is used to build call edges that points to functions need to be imported. Reviewers: mehdi_amini, tejohnson Reviewed By: tejohnson Subscribers: davidxl, llvm-commits Differential Revision: https://reviews.llvm.org/D30053 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@296498 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/docs/BranchWeightMetadata.rst b/docs/BranchWeightMetadata.rst index 9e61d232d74..b941d0d1505 100644 --- a/docs/BranchWeightMetadata.rst +++ b/docs/BranchWeightMetadata.rst @@ -123,11 +123,11 @@ To allow comparing different functions during inter-procedural analysis and optimization, ``MD_prof`` nodes can also be assigned to a function definition. The first operand is a string indicating the name of the associated counter. -Currently, one counter is supported: "function_entry_count". This is a 64-bit -counter that indicates the number of times that this function was invoked (in -the case of instrumentation-based profiles). In the case of sampling-based -profiles, this counter is an approximation of how many times the function was -invoked. +Currently, one counter is supported: "function_entry_count". The second operand +is a 64-bit counter that indicates the number of times that this function was +invoked (in the case of instrumentation-based profiles). In the case of +sampling-based profiles, this operand is an approximation of how many times +the function was invoked. For example, in the code below, the instrumentation for function foo() indicates that it was called 2,590 times at runtime. @@ -138,3 +138,13 @@ indicates that it was called 2,590 times at runtime. ret i32 0 } !1 = !{!"function_entry_count", i64 2590} + +If "function_entry_count" has more than 2 operands, the later operands are +the GUID of the functions that needs to be imported by ThinLTO. This is only +set by sampling based profile. It is needed because the sampling based profile +was collected on a binary that had already imported and inlined these functions, +and we need to ensure the IR matches in the ThinLTO backends for profile +annotation. The reason why we cannot annotate this on the callsite is that it +can only goes down 1 level in the call chain. For the cases where +foo_in_a_cc()->bar_in_b_cc()->baz_in_c_cc(), we will need to go down 2 levels +in the call chain to import both bar_in_b_cc and baz_in_c_cc. diff --git a/include/llvm/IR/Function.h b/include/llvm/IR/Function.h index 0c839168221..f9e8fcc52c9 100644 --- a/include/llvm/IR/Function.h +++ b/include/llvm/IR/Function.h @@ -18,6 +18,7 @@ #ifndef LLVM_IR_FUNCTION_H #define LLVM_IR_FUNCTION_H +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/ilist_node.h" #include "llvm/ADT/iterator_range.h" #include "llvm/ADT/StringRef.h" @@ -207,8 +208,11 @@ public: /// \brief Set the entry count for this function. /// /// Entry count is the number of times this function was executed based on - /// pgo data. - void setEntryCount(uint64_t Count); + /// pgo data. \p Imports points to a set of GUIDs that needs to be imported + /// by the function for sample PGO, to enable the same inlines as the + /// profiled optimized binary. + void setEntryCount(uint64_t Count, + const DenseSet *Imports = nullptr); /// \brief Get the entry count for this function. /// @@ -216,6 +220,10 @@ public: /// pgo data. Optional getEntryCount() const; + /// Returns the set of GUIDs that needs to be imported to the function for + /// sample PGO, to enable the same inlines as the profiled optimized binary. + DenseSet getImportGUIDs() const; + /// Set the section prefix for this function. void setSectionPrefix(StringRef Prefix); diff --git a/include/llvm/IR/MDBuilder.h b/include/llvm/IR/MDBuilder.h index bab8728ed49..899976a87bc 100644 --- a/include/llvm/IR/MDBuilder.h +++ b/include/llvm/IR/MDBuilder.h @@ -15,7 +15,9 @@ #ifndef LLVM_IR_MDBUILDER_H #define LLVM_IR_MDBUILDER_H +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringRef.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/Support/DataTypes.h" #include @@ -63,8 +65,11 @@ public: /// Return metadata specifying that a branch or switch is unpredictable. MDNode *createUnpredictable(); - /// Return metadata containing the entry count for a function. - MDNode *createFunctionEntryCount(uint64_t Count); + /// Return metadata containing the entry \p Count for a function, and the + /// GUIDs stored in \p Imports that need to be imported for sample PGO, to + /// enable the same inlines as the profiled optimized binary + MDNode *createFunctionEntryCount(uint64_t Count, + const DenseSet *Imports); /// Return metadata containing the section prefix for a function. MDNode *createFunctionSectionPrefix(StringRef Prefix); diff --git a/include/llvm/ProfileData/SampleProf.h b/include/llvm/ProfileData/SampleProf.h index b286df34957..1ae1cc48a8a 100644 --- a/include/llvm/ProfileData/SampleProf.h +++ b/include/llvm/ProfileData/SampleProf.h @@ -15,8 +15,11 @@ #ifndef LLVM_PROFILEDATA_SAMPLEPROF_H_ #define LLVM_PROFILEDATA_SAMPLEPROF_H_ +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/raw_ostream.h" @@ -300,6 +303,20 @@ public: return Result; } + /// Recursively traverses all children, if the corresponding function is + /// not defined in module \p M, and its total sample is no less than + /// \p Threshold, add its corresponding GUID to \p S. + void findImportedFunctions(DenseSet &S, const Module *M, + uint64_t Threshold) const { + if (TotalSamples <= Threshold) + return; + Function *F = M->getFunction(Name); + if (!F || !F->getSubprogram()) + S.insert(Function::getGUID(Name)); + for (auto CS : CallsiteSamples) + CS.second.findImportedFunctions(S, M, Threshold); + } + /// Set the name of the function. void setName(StringRef FunctionName) { Name = FunctionName; } diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp index 950b8987c5b..9bc55034c14 100644 --- a/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -259,6 +259,11 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, } } + // Explicit add hot edges to enforce importing for designated GUIDs for + // sample PGO, to enable the same inlines as the profiled optimized binary. + for (auto &I : F.getImportGUIDs()) + CallGraphEdges[I].updateHotness(CalleeInfo::HotnessType::Hot); + bool NonRenamableLocal = isNonRenamableLocal(F); bool NotEligibleForImport = NonRenamableLocal || HasInlineAsmMaybeReferencingInternal || diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp index 0c6b3520dad..7b5d49c34b3 100644 --- a/lib/IR/Function.cpp +++ b/lib/IR/Function.cpp @@ -1259,9 +1259,10 @@ void Function::setValueSubclassDataBit(unsigned Bit, bool On) { setValueSubclassData(getSubclassDataFromValue() & ~(1 << Bit)); } -void Function::setEntryCount(uint64_t Count) { +void Function::setEntryCount(uint64_t Count, + const DenseSet *S) { MDBuilder MDB(getContext()); - setMetadata(LLVMContext::MD_prof, MDB.createFunctionEntryCount(Count)); + setMetadata(LLVMContext::MD_prof, MDB.createFunctionEntryCount(Count, S)); } Optional Function::getEntryCount() const { @@ -1278,6 +1279,18 @@ Optional Function::getEntryCount() const { return None; } +DenseSet Function::getImportGUIDs() const { + DenseSet R; + if (MDNode *MD = getMetadata(LLVMContext::MD_prof)) + if (MDString *MDS = dyn_cast(MD->getOperand(0))) + if (MDS->getString().equals("function_entry_count")) + for (unsigned i = 2; i < MD->getNumOperands(); i++) + R.insert(mdconst::extract(MD->getOperand(i)) + ->getValue() + .getZExtValue()); + return R; +} + void Function::setSectionPrefix(StringRef Prefix) { MDBuilder MDB(getContext()); setMetadata(LLVMContext::MD_section_prefix, diff --git a/lib/IR/MDBuilder.cpp b/lib/IR/MDBuilder.cpp index f4bfd599215..b9c4f482adf 100644 --- a/lib/IR/MDBuilder.cpp +++ b/lib/IR/MDBuilder.cpp @@ -56,11 +56,16 @@ MDNode *MDBuilder::createUnpredictable() { return MDNode::get(Context, None); } -MDNode *MDBuilder::createFunctionEntryCount(uint64_t Count) { +MDNode *MDBuilder::createFunctionEntryCount( + uint64_t Count, const DenseSet *Imports) { Type *Int64Ty = Type::getInt64Ty(Context); - return MDNode::get(Context, - {createString("function_entry_count"), - createConstant(ConstantInt::get(Int64Ty, Count))}); + SmallVector Ops; + Ops.push_back(createString("function_entry_count")); + Ops.push_back(createConstant(ConstantInt::get(Int64Ty, Count))); + if (Imports) + for (auto ID : *Imports) + Ops.push_back(createConstant(ConstantInt::get(Int64Ty, ID))); + return MDNode::get(Context, Ops); } MDNode *MDBuilder::createFunctionSectionPrefix(StringRef Prefix) { diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index 5b3ec41b635..90017354631 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -1653,8 +1653,8 @@ void Verifier::verifyFunctionMetadata( for (const auto &Pair : MDs) { if (Pair.first == LLVMContext::MD_prof) { MDNode *MD = Pair.second; - Assert(MD->getNumOperands() == 2, - "!prof annotations should have exactly 2 operands", MD); + Assert(MD->getNumOperands() >= 2, + "!prof annotations should have no less than 2 operands", MD); // Check first operand. Assert(MD->getOperand(0) != nullptr, "first operand should not be null", diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp index ad057d50cc3..a9f5b0333c5 100644 --- a/lib/Transforms/IPO/SampleProfile.cpp +++ b/lib/Transforms/IPO/SampleProfile.cpp @@ -163,7 +163,8 @@ protected: ErrorOr getBlockWeight(const BasicBlock *BB); const FunctionSamples *findCalleeFunctionSamples(const Instruction &I) const; const FunctionSamples *findFunctionSamples(const Instruction &I) const; - bool inlineHotFunctions(Function &F); + bool inlineHotFunctions(Function &F, + DenseSet &ImportGUIDs); void printEdgeWeight(raw_ostream &OS, Edge E); void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const; void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB); @@ -604,9 +605,12 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const { /// it to direct call. Each indirect call is limited with a single target. /// /// \param F function to perform iterative inlining. +/// \param ImportGUIDs a set to be updated to include all GUIDs that come +/// from a different module but inlined in the profiled binary. /// /// \returns True if there is any inline happened. -bool SampleProfileLoader::inlineHotFunctions(Function &F) { +bool SampleProfileLoader::inlineHotFunctions( + Function &F, DenseSet &ImportGUIDs) { DenseSet PromotedInsns; bool Changed = false; LLVMContext &Ctx = F.getContext(); @@ -655,8 +659,12 @@ bool SampleProfileLoader::inlineHotFunctions(Function &F) { continue; } } - if (!CalledFunction || !CalledFunction->getSubprogram()) + if (!CalledFunction || !CalledFunction->getSubprogram()) { + findCalleeFunctionSamples(*I)->findImportedFunctions( + ImportGUIDs, F.getParent(), + Samples->getTotalSamples() * SampleProfileHotThreshold / 100); continue; + } DebugLoc DLoc = I->getDebugLoc(); uint64_t NumSamples = findCalleeFunctionSamples(*I)->getTotalSamples(); if (InlineFunction(CallSite(DI), IFI)) { @@ -1041,10 +1049,6 @@ void SampleProfileLoader::propagateWeights(Function &F) { bool Changed = true; unsigned I = 0; - // Add an entry count to the function using the samples gathered - // at the function entry. - F.setEntryCount(Samples->getHeadSamples() + 1); - // If BB weight is larger than its corresponding loop's header BB weight, // use the BB weight to replace the loop header BB weight. for (auto &BI : F) { @@ -1273,12 +1277,19 @@ bool SampleProfileLoader::emitAnnotations(Function &F) { DEBUG(dbgs() << "Line number for the first instruction in " << F.getName() << ": " << getFunctionLoc(F) << "\n"); - Changed |= inlineHotFunctions(F); + DenseSet ImportGUIDs; + Changed |= inlineHotFunctions(F, ImportGUIDs); // Compute basic block weights. Changed |= computeBlockWeights(F); if (Changed) { + // Add an entry count to the function using the samples gathered at the + // function entry. Also sets the GUIDs that comes from a different + // module but inlined in the profiled binary. This is aiming at making + // the IR match the profiled binary before annotation. + F.setEntryCount(Samples->getHeadSamples() + 1, &ImportGUIDs); + // Compute dominance and loop info needed for propagation. computeDominanceAndLoopInfo(F); diff --git a/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll b/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll index 9e6e72cda3a..a9f65c97610 100644 --- a/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll +++ b/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll @@ -10,7 +10,7 @@ ; CHECK-NEXT: +; CHECK-NEXT: ; CHECK-NEXT: ; CHECK-LABEL: ; CHECK-LABEL: ; COMBINED: