From ded3597f2c1234ea8df401b5d1fd5cf096e64108 Mon Sep 17 00:00:00 2001 From: Dehao Chen Date: Tue, 10 Oct 2017 21:13:50 +0000 Subject: [PATCH] Use the first instruction's count to estimate the funciton's entry frequency. Summary: In the current implementation, we only have accurate profile count for standalone symbols. For inlined functions, we do not have entry count data because it's not available in LBR. In this patch, we use the first instruction's frequency to estimiate the function's entry count, especially for inlined functions. This may be inaccurate due to debug info in optimized code. However, this is a better estimate than the static 80/20 estimation we have in the current implementation. Reviewers: tejohnson, davidxl Reviewed By: tejohnson Subscribers: sanjoy, llvm-commits, aprantl Differential Revision: https://reviews.llvm.org/D38478 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@315369 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/ProfileData/SampleProf.h | 27 +++++++++++++++-- lib/Transforms/IPO/SampleProfile.cpp | 29 ++++++++++++------- .../Transforms/SampleProfile/indirect-call.ll | 4 +++ 3 files changed, 48 insertions(+), 12 deletions(-) diff --git a/include/llvm/ProfileData/SampleProf.h b/include/llvm/ProfileData/SampleProf.h index 3e17df14807..09e2723e591 100644 --- a/include/llvm/ProfileData/SampleProf.h +++ b/include/llvm/ProfileData/SampleProf.h @@ -296,10 +296,33 @@ public: /// Return the total number of samples collected inside the function. uint64_t getTotalSamples() const { return TotalSamples; } - /// Return the total number of samples collected at the head of the - /// function. + /// Return the total number of branch samples that have the function as the + /// branch target. This should be equivalent to the sample of the first + /// instruction of the symbol. But as we directly get this info for raw + /// profile without referring to potentially inaccurate debug info, this + /// gives more accurate profile data and is preferred for standalone symbols. uint64_t getHeadSamples() const { return TotalHeadSamples; } + /// Return the sample count of the first instruction of the function. + /// The function can be either a standalone symbol or an inlined function. + uint64_t getEntrySamples() const { + // Use either BodySamples or CallsiteSamples which ever has the smaller + // lineno. + if (!BodySamples.empty() && + (CallsiteSamples.empty() || + BodySamples.begin()->first < CallsiteSamples.begin()->first)) + return BodySamples.begin()->second.getSamples(); + if (!CallsiteSamples.empty()) { + uint64_t T = 0; + // An indirect callsite may be promoted to several inlined direct calls. + // We need to get the sum of them. + for (const auto &N_FS : CallsiteSamples.begin()->second) + T += N_FS.second.getEntrySamples(); + return T; + } + return 0; + } + /// Return all the samples collected in the body of the function. const BodySampleMap &getBodySamples() const { return BodySamples; } diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp index 6df17ec400c..46b9914347b 100644 --- a/lib/Transforms/IPO/SampleProfile.cpp +++ b/lib/Transforms/IPO/SampleProfile.cpp @@ -171,7 +171,7 @@ protected: ErrorOr getBlockWeight(const BasicBlock *BB); const FunctionSamples *findCalleeFunctionSamples(const Instruction &I) const; std::vector - findIndirectCallFunctionSamples(const Instruction &I) const; + findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const; const FunctionSamples *findFunctionSamples(const Instruction &I) const; bool inlineCallInstruction(Instruction *I); bool inlineHotFunctions(Function &F, @@ -625,10 +625,11 @@ SampleProfileLoader::findCalleeFunctionSamples(const Instruction &Inst) const { } /// Returns a vector of FunctionSamples that are the indirect call targets -/// of \p Inst. The vector is sorted by the total number of samples. +/// of \p Inst. The vector is sorted by the total number of samples. Stores +/// the total call count of the indirect call in \p Sum. std::vector SampleProfileLoader::findIndirectCallFunctionSamples( - const Instruction &Inst) const { + const Instruction &Inst, uint64_t &Sum) const { const DILocation *DIL = Inst.getDebugLoc(); std::vector R; @@ -640,16 +641,25 @@ SampleProfileLoader::findIndirectCallFunctionSamples( if (FS == nullptr) return R; + uint32_t LineOffset = getOffset(DIL); + uint32_t Discriminator = DIL->getBaseDiscriminator(); + + auto T = FS->findCallTargetMapAt(LineOffset, Discriminator); + Sum = 0; + if (T) + for (const auto &T_C : T.get()) + Sum += T_C.second; if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt( LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()))) { if (M->size() == 0) return R; for (const auto &NameFS : *M) { + Sum += NameFS.second.getEntrySamples(); R.push_back(&NameFS.second); } std::sort(R.begin(), R.end(), [](const FunctionSamples *L, const FunctionSamples *R) { - return L->getTotalSamples() > R->getTotalSamples(); + return L->getEntrySamples() > R->getEntrySamples(); }); } return R; @@ -764,7 +774,8 @@ bool SampleProfileLoader::inlineHotFunctions( if (CallSite(I).isIndirectCall()) { if (PromotedInsns.count(I)) continue; - for (const auto *FS : findIndirectCallFunctionSamples(*I)) { + uint64_t Sum; + for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) { if (IsThinLTOPreLink) { FS->findImportedFunctions(ImportGUIDs, F.getParent(), Samples->getTotalSamples() * @@ -786,12 +797,10 @@ bool SampleProfileLoader::inlineHotFunctions( !R->getValue()->isDeclaration() && R->getValue()->getSubprogram() && isLegalToPromote(I, R->getValue(), &Reason)) { - // The indirect target was promoted and inlined in the profile, - // as a result, we do not have profile info for the branch - // probability. We set the probability to 80% taken to indicate - // that the static call is likely taken. + uint64_t C = FS->getEntrySamples(); Instruction *DI = promoteIndirectCall( - I, R->getValue(), 80, 100, false, ORE); + I, R->getValue(), C, Sum, false, ORE); + Sum -= C; PromotedInsns.insert(I); // If profile mismatches, we should not attempt to inline DI. if ((isa(DI) || isa(DI)) && diff --git a/test/Transforms/SampleProfile/indirect-call.ll b/test/Transforms/SampleProfile/indirect-call.ll index 8c80091c347..61a1bc51996 100644 --- a/test/Transforms/SampleProfile/indirect-call.ll +++ b/test/Transforms/SampleProfile/indirect-call.ll @@ -17,10 +17,12 @@ define void @test_inline(i64* (i32*)*, i32* %x) !dbg !6 { store i64* (i32*)* %0, i64* (i32*)** %2 %3 = load i64* (i32*)*, i64* (i32*)** %2 ; CHECK: icmp {{.*}} @foo_inline2 +; CHECK: br {{.*}} !prof ![[BR1:[0-9]+]] ; CHECK: if.true.direct_targ: ; CHECK-NOT: call ; CHECK: if.false.orig_indirect: ; CHECK: icmp {{.*}} @foo_inline1 +; CHECK: br {{.*}} !prof ![[BR2:[0-9]+]] ; CHECK: if.true.direct_targ1: ; CHECK-NOT: call ; CHECK: if.false.orig_indirect2: @@ -178,6 +180,8 @@ define void @test_direct() !dbg !22 { !4 = !DILocation(line: 4, scope: !3) !5 = !DILocation(line: 6, scope: !3) ; CHECK: ![[PROF]] = !{!"VP", i32 0, i64 3457, i64 9191153033785521275, i64 2059, i64 -1069303473483922844, i64 1398} +; CHECK: ![[BR1]] = !{!"branch_weights", i32 4000, i32 4000} +; CHECK: ![[BR2]] = !{!"branch_weights", i32 3000, i32 1000} ; CHECK: ![[VP]] = !{!"VP", i32 0, i64 1000, i64 -6391416044382067764, i64 1000} !6 = distinct !DISubprogram(name: "test_inline", scope: !1, file: !1, line: 6, unit: !0) !7 = !DILocation(line: 7, scope: !6) -- 2.40.0