optimization, ``MD_prof`` nodes can also be assigned to a function definition.
The first operand is a string indicating the name of the associated counter.
-Currently, one counter is supported: "function_entry_count". This is a 64-bit
-counter that indicates the number of times that this function was invoked (in
-the case of instrumentation-based profiles). In the case of sampling-based
-profiles, this counter is an approximation of how many times the function was
-invoked.
+Currently, one counter is supported: "function_entry_count". The second operand
+is a 64-bit counter that indicates the number of times that this function was
+invoked (in the case of instrumentation-based profiles). In the case of
+sampling-based profiles, this operand is an approximation of how many times
+the function was invoked.
For example, in the code below, the instrumentation for function foo()
indicates that it was called 2,590 times at runtime.
ret i32 0
}
!1 = !{!"function_entry_count", i64 2590}
+
+If "function_entry_count" has more than 2 operands, the later operands are
+the GUID of the functions that needs to be imported by ThinLTO. This is only
+set by sampling based profile. It is needed because the sampling based profile
+was collected on a binary that had already imported and inlined these functions,
+and we need to ensure the IR matches in the ThinLTO backends for profile
+annotation. The reason why we cannot annotate this on the callsite is that it
+can only goes down 1 level in the call chain. For the cases where
+foo_in_a_cc()->bar_in_b_cc()->baz_in_c_cc(), we will need to go down 2 levels
+in the call chain to import both bar_in_b_cc and baz_in_c_cc.
#ifndef LLVM_IR_FUNCTION_H
#define LLVM_IR_FUNCTION_H
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/ilist_node.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/ADT/StringRef.h"
/// \brief Set the entry count for this function.
///
/// Entry count is the number of times this function was executed based on
- /// pgo data.
- void setEntryCount(uint64_t Count);
+ /// pgo data. \p Imports points to a set of GUIDs that needs to be imported
+ /// by the function for sample PGO, to enable the same inlines as the
+ /// profiled optimized binary.
+ void setEntryCount(uint64_t Count,
+ const DenseSet<GlobalValue::GUID> *Imports = nullptr);
/// \brief Get the entry count for this function.
///
/// pgo data.
Optional<uint64_t> getEntryCount() const;
+ /// Returns the set of GUIDs that needs to be imported to the function for
+ /// sample PGO, to enable the same inlines as the profiled optimized binary.
+ DenseSet<GlobalValue::GUID> getImportGUIDs() const;
+
/// Set the section prefix for this function.
void setSectionPrefix(StringRef Prefix);
#ifndef LLVM_IR_MDBUILDER_H
#define LLVM_IR_MDBUILDER_H
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/Support/DataTypes.h"
#include <utility>
/// Return metadata specifying that a branch or switch is unpredictable.
MDNode *createUnpredictable();
- /// Return metadata containing the entry count for a function.
- MDNode *createFunctionEntryCount(uint64_t Count);
+ /// Return metadata containing the entry \p Count for a function, and the
+ /// GUIDs stored in \p Imports that need to be imported for sample PGO, to
+ /// enable the same inlines as the profiled optimized binary
+ MDNode *createFunctionEntryCount(uint64_t Count,
+ const DenseSet<GlobalValue::GUID> *Imports);
/// Return metadata containing the section prefix for a function.
MDNode *createFunctionSectionPrefix(StringRef Prefix);
#ifndef LLVM_PROFILEDATA_SAMPLEPROF_H_
#define LLVM_PROFILEDATA_SAMPLEPROF_H_
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/raw_ostream.h"
return Result;
}
+ /// Recursively traverses all children, if the corresponding function is
+ /// not defined in module \p M, and its total sample is no less than
+ /// \p Threshold, add its corresponding GUID to \p S.
+ void findImportedFunctions(DenseSet<GlobalValue::GUID> &S, const Module *M,
+ uint64_t Threshold) const {
+ if (TotalSamples <= Threshold)
+ return;
+ Function *F = M->getFunction(Name);
+ if (!F || !F->getSubprogram())
+ S.insert(Function::getGUID(Name));
+ for (auto CS : CallsiteSamples)
+ CS.second.findImportedFunctions(S, M, Threshold);
+ }
+
/// Set the name of the function.
void setName(StringRef FunctionName) { Name = FunctionName; }
}
}
+ // Explicit add hot edges to enforce importing for designated GUIDs for
+ // sample PGO, to enable the same inlines as the profiled optimized binary.
+ for (auto &I : F.getImportGUIDs())
+ CallGraphEdges[I].updateHotness(CalleeInfo::HotnessType::Hot);
+
bool NonRenamableLocal = isNonRenamableLocal(F);
bool NotEligibleForImport =
NonRenamableLocal || HasInlineAsmMaybeReferencingInternal ||
setValueSubclassData(getSubclassDataFromValue() & ~(1 << Bit));
}
-void Function::setEntryCount(uint64_t Count) {
+void Function::setEntryCount(uint64_t Count,
+ const DenseSet<GlobalValue::GUID> *S) {
MDBuilder MDB(getContext());
- setMetadata(LLVMContext::MD_prof, MDB.createFunctionEntryCount(Count));
+ setMetadata(LLVMContext::MD_prof, MDB.createFunctionEntryCount(Count, S));
}
Optional<uint64_t> Function::getEntryCount() const {
return None;
}
+DenseSet<GlobalValue::GUID> Function::getImportGUIDs() const {
+ DenseSet<GlobalValue::GUID> R;
+ if (MDNode *MD = getMetadata(LLVMContext::MD_prof))
+ if (MDString *MDS = dyn_cast<MDString>(MD->getOperand(0)))
+ if (MDS->getString().equals("function_entry_count"))
+ for (unsigned i = 2; i < MD->getNumOperands(); i++)
+ R.insert(mdconst::extract<ConstantInt>(MD->getOperand(i))
+ ->getValue()
+ .getZExtValue());
+ return R;
+}
+
void Function::setSectionPrefix(StringRef Prefix) {
MDBuilder MDB(getContext());
setMetadata(LLVMContext::MD_section_prefix,
return MDNode::get(Context, None);
}
-MDNode *MDBuilder::createFunctionEntryCount(uint64_t Count) {
+MDNode *MDBuilder::createFunctionEntryCount(
+ uint64_t Count, const DenseSet<GlobalValue::GUID> *Imports) {
Type *Int64Ty = Type::getInt64Ty(Context);
- return MDNode::get(Context,
- {createString("function_entry_count"),
- createConstant(ConstantInt::get(Int64Ty, Count))});
+ SmallVector<Metadata *, 8> Ops;
+ Ops.push_back(createString("function_entry_count"));
+ Ops.push_back(createConstant(ConstantInt::get(Int64Ty, Count)));
+ if (Imports)
+ for (auto ID : *Imports)
+ Ops.push_back(createConstant(ConstantInt::get(Int64Ty, ID)));
+ return MDNode::get(Context, Ops);
}
MDNode *MDBuilder::createFunctionSectionPrefix(StringRef Prefix) {
for (const auto &Pair : MDs) {
if (Pair.first == LLVMContext::MD_prof) {
MDNode *MD = Pair.second;
- Assert(MD->getNumOperands() == 2,
- "!prof annotations should have exactly 2 operands", MD);
+ Assert(MD->getNumOperands() >= 2,
+ "!prof annotations should have no less than 2 operands", MD);
// Check first operand.
Assert(MD->getOperand(0) != nullptr, "first operand should not be null",
ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB);
const FunctionSamples *findCalleeFunctionSamples(const Instruction &I) const;
const FunctionSamples *findFunctionSamples(const Instruction &I) const;
- bool inlineHotFunctions(Function &F);
+ bool inlineHotFunctions(Function &F,
+ DenseSet<GlobalValue::GUID> &ImportGUIDs);
void printEdgeWeight(raw_ostream &OS, Edge E);
void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const;
void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB);
/// it to direct call. Each indirect call is limited with a single target.
///
/// \param F function to perform iterative inlining.
+/// \param ImportGUIDs a set to be updated to include all GUIDs that come
+/// from a different module but inlined in the profiled binary.
///
/// \returns True if there is any inline happened.
-bool SampleProfileLoader::inlineHotFunctions(Function &F) {
+bool SampleProfileLoader::inlineHotFunctions(
+ Function &F, DenseSet<GlobalValue::GUID> &ImportGUIDs) {
DenseSet<Instruction *> PromotedInsns;
bool Changed = false;
LLVMContext &Ctx = F.getContext();
continue;
}
}
- if (!CalledFunction || !CalledFunction->getSubprogram())
+ if (!CalledFunction || !CalledFunction->getSubprogram()) {
+ findCalleeFunctionSamples(*I)->findImportedFunctions(
+ ImportGUIDs, F.getParent(),
+ Samples->getTotalSamples() * SampleProfileHotThreshold / 100);
continue;
+ }
DebugLoc DLoc = I->getDebugLoc();
uint64_t NumSamples = findCalleeFunctionSamples(*I)->getTotalSamples();
if (InlineFunction(CallSite(DI), IFI)) {
bool Changed = true;
unsigned I = 0;
- // Add an entry count to the function using the samples gathered
- // at the function entry.
- F.setEntryCount(Samples->getHeadSamples() + 1);
-
// If BB weight is larger than its corresponding loop's header BB weight,
// use the BB weight to replace the loop header BB weight.
for (auto &BI : F) {
DEBUG(dbgs() << "Line number for the first instruction in " << F.getName()
<< ": " << getFunctionLoc(F) << "\n");
- Changed |= inlineHotFunctions(F);
+ DenseSet<GlobalValue::GUID> ImportGUIDs;
+ Changed |= inlineHotFunctions(F, ImportGUIDs);
// Compute basic block weights.
Changed |= computeBlockWeights(F);
if (Changed) {
+ // Add an entry count to the function using the samples gathered at the
+ // function entry. Also sets the GUIDs that comes from a different
+ // module but inlined in the profiled binary. This is aiming at making
+ // the IR match the profiled binary before annotation.
+ F.setEntryCount(Samples->getHeadSamples() + 1, &ImportGUIDs);
+
// Compute dominance and loop info needed for propagation.
computeDominanceAndLoopInfo(F);
; CHECK-NEXT: <VERSION
; See if the call to func is registered, using the expected callsite count
; and profile count, with value id matching the subsequent value symbol table.
-; CHECK-NEXT: <PERMODULE_PROFILE {{.*}} op4=[[HOT1:.*]] op5=3 op6=[[COLD:.*]] op7=1 op8=[[HOT2:.*]] op9=3 op10=[[NONE1:.*]] op11=2 op12=[[HOT3:.*]] op13=3 op14=[[NONE2:.*]] op15=2 op16=[[NONE3:.*]] op17=2/>
+; CHECK-NEXT: <PERMODULE_PROFILE {{.*}} op4=[[HOT1:.*]] op5=3 op6=[[COLD:.*]] op7=1 op8=[[HOT2:.*]] op9=3 op10=[[NONE1:.*]] op11=2 op12=[[HOT3:.*]] op13=3 op14=[[NONE2:.*]] op15=2 op16=[[NONE3:.*]] op17=2 op18=[[LEGACY:.*]] op19=3/>
; CHECK-NEXT: </GLOBALVAL_SUMMARY_BLOCK>
; CHECK-LABEL: <VALUE_SYMTAB
; CHECK-NEXT: <FNENTRY {{.*}} record string = 'hot_function
; CHECK-DAG: <ENTRY abbrevid=6 op0=[[HOT1]] {{.*}} record string = 'hot1'
; CHECK-DAG: <ENTRY abbrevid=6 op0=[[HOT2]] {{.*}} record string = 'hot2'
; CHECK-DAG: <ENTRY abbrevid=6 op0=[[HOT3]] {{.*}} record string = 'hot3'
+; CHECK-DAG: <COMBINED_ENTRY abbrevid=11 op0=[[LEGACY]] op1=123/>
; CHECK-LABEL: </VALUE_SYMTAB>
; COMBINED: <GLOBALVAL_SUMMARY_BLOCK
!llvm.module.flags = !{!1}
-!20 = !{!"function_entry_count", i64 110}
+!20 = !{!"function_entry_count", i64 110, i64 123}
!1 = !{i32 1, !"ProfileSummary", !2}
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
--- /dev/null
+main:10000:0
+ 3: foo:1000
+ 3: bar:200
+ 4: baz:10
--- /dev/null
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/import.prof -S | FileCheck %s
+
+; Tests whether the functions in the inline stack are added to the
+; function_entry_count metadata.
+
+declare void @foo()
+
+define void @main() !dbg !7 {
+ call void @foo(), !dbg !18
+ ret void
+}
+
+; GUIDs of foo and bar should be included in the metadata to make sure hot
+; inline stacks are imported.
+; CHECK: !{!"function_entry_count", i64 1, i64 6699318081062747564, i64 -2012135647395072713}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
+!1 = !DIFile(filename: "calls.cc", directory: ".")
+!2 = !{}
+!6 = !DISubroutineType(types: !2)
+!7 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !1, type: !6, variables: !2)
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 1, !"Debug Info Version", i32 3}
+!10 = !{!"clang version 3.5 "}
+!15 = !DILexicalBlockFile(discriminator: 1, file: !1, scope: !7)
+!17 = distinct !DILexicalBlock(line: 10, column: 0, file: !1, scope: !7)
+!18 = !DILocation(line: 10, scope: !17)
}
!1 = !{!"function_entry_count"}
-; CHECK-NEXT: !prof annotations should have exactly 2 operands
+; CHECK-NEXT: !prof annotations should have no less than 2 operands
; CHECK-NEXT: !1 = !{!"function_entry_count"}
unreachable
}
-!0 = !{}
+!0 = !{!"function_entry_count", i64 100}