From 87452e81d06b1b779be0a6cc7f2b123ad7863e71 Mon Sep 17 00:00:00 2001 From: Dehao Chen Date: Fri, 28 Jul 2017 01:02:54 +0000 Subject: [PATCH] Separate the ICP total threshold and remaining threshold. Summary: In the current implementation, isPromotionProfitable only checks if the call count to a direct target is no less than a certain percentage threshold of the remaining call counts that have not been promoted. This causes code size problems when the target count is small but greater than a large portion of remaining counts. E.g. target1 takes 99.9%, while target2 takes 0.1%. Both targets will be promoted and inlined, makes the function size too large, which potentially prevents it from further inlining into its callers. This patch adds another percentage threshold against the total indirect call count. If the target count needs to be no less than both thresholds in order to be promoted speculatively. Reviewers: davidxl, tejohnson Reviewed By: tejohnson Subscribers: sanjoy, llvm-commits Differential Revision: https://reviews.llvm.org/D35962 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@309345 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Analysis/IndirectCallPromotionAnalysis.h | 8 +++-- .../IndirectCallPromotionAnalysis.cpp | 32 ++++++++++++------- .../PGOProfile/indirect_call_promotion.ll | 11 +++++-- 3 files changed, 34 insertions(+), 17 deletions(-) diff --git a/include/llvm/Analysis/IndirectCallPromotionAnalysis.h b/include/llvm/Analysis/IndirectCallPromotionAnalysis.h index 007e4d8602f..8b1c10139de 100644 --- a/include/llvm/Analysis/IndirectCallPromotionAnalysis.h +++ b/include/llvm/Analysis/IndirectCallPromotionAnalysis.h @@ -27,10 +27,12 @@ private: // Allocate space to read the profile annotation. std::unique_ptr ValueDataArray; - // Count is the call count for the direct-call target and - // TotalCount is the call count for the indirect-call callsite. + // Count is the call count for the direct-call target. + // TotalCount is the total call count for the indirect-call callsite. + // RemainingCount is the TotalCount minus promoted-direct-call count. // Return true we should promote this indirect-call target. - bool isPromotionProfitable(uint64_t Count, uint64_t TotalCount); + bool isPromotionProfitable(uint64_t Count, uint64_t TotalCount, + uint64_t RemainingCount); // Returns the number of profitable candidates to promote for the // current ValueDataArray and the given \p Inst. diff --git a/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/lib/Analysis/IndirectCallPromotionAnalysis.cpp index ed233d20153..7c16b301b47 100644 --- a/lib/Analysis/IndirectCallPromotionAnalysis.cpp +++ b/lib/Analysis/IndirectCallPromotionAnalysis.cpp @@ -40,12 +40,20 @@ static cl::opt cl::desc("The minimum count to the direct call target " "for the promotion")); +// The percent threshold for the direct-call target (this call site vs the +// remaining call count) for it to be considered as the promotion target. +static cl::opt ICPRemainingPercentThreshold( + "icp-remaining-percent-threshold", cl::init(30), cl::Hidden, cl::ZeroOrMore, + cl::desc("The percentage threshold against remaining unpromoted indirect " + "call count for the promotion")); + // The percent threshold for the direct-call target (this call site vs the // total call count) for it to be considered as the promotion target. static cl::opt - ICPPercentThreshold("icp-percent-threshold", cl::init(30), cl::Hidden, - cl::ZeroOrMore, - cl::desc("The percentage threshold for the promotion")); + ICPTotalPercentThreshold("icp-total-percent-threshold", cl::init(5), + cl::Hidden, cl::ZeroOrMore, + cl::desc("The percentage threshold against total " + "count for the promotion")); // Set the maximum number of targets to promote for a single indirect-call // callsite. @@ -59,12 +67,11 @@ ICallPromotionAnalysis::ICallPromotionAnalysis() { } bool ICallPromotionAnalysis::isPromotionProfitable(uint64_t Count, - uint64_t TotalCount) { - if (Count < ICPCountThreshold) - return false; - - unsigned Percentage = (Count * 100) / TotalCount; - return (Percentage >= ICPPercentThreshold); + uint64_t TotalCount, + uint64_t RemainingCount) { + return Count >= ICPCountThreshold && + Count * 100 >= ICPRemainingPercentThreshold * RemainingCount && + Count * 100 >= ICPTotalPercentThreshold * TotalCount; } // Indirect-call promotion heuristic. The direct targets are sorted based on @@ -78,17 +85,18 @@ uint32_t ICallPromotionAnalysis::getProfitablePromotionCandidates( << "\n"); uint32_t I = 0; + uint64_t RemainingCount = TotalCount; for (; I < MaxNumPromotions && I < NumVals; I++) { uint64_t Count = ValueDataRef[I].Count; - assert(Count <= TotalCount); + assert(Count <= RemainingCount); DEBUG(dbgs() << " Candidate " << I << " Count=" << Count << " Target_func: " << ValueDataRef[I].Value << "\n"); - if (!isPromotionProfitable(Count, TotalCount)) { + if (!isPromotionProfitable(Count, TotalCount, RemainingCount)) { DEBUG(dbgs() << " Not promote: Cold target.\n"); return I; } - TotalCount -= Count; + RemainingCount -= Count; } return I; } diff --git a/test/Transforms/PGOProfile/indirect_call_promotion.ll b/test/Transforms/PGOProfile/indirect_call_promotion.ll index b892c130152..3a8206e5ddc 100644 --- a/test/Transforms/PGOProfile/indirect_call_promotion.ll +++ b/test/Transforms/PGOProfile/indirect_call_promotion.ll @@ -2,13 +2,20 @@ ; RUN: opt < %s -pgo-icall-prom -S -icp-samplepgo | FileCheck %s --check-prefix=ICALL-PROM ; RUN: opt < %s -pgo-icall-prom -S -icp-samplepgo | FileCheck %s --check-prefix=ICALL-PROM-SAMPLEPGO ; RUN: opt < %s -passes=pgo-icall-prom -S | FileCheck %s --check-prefix=ICALL-PROM -; RUN: opt < %s -pgo-icall-prom -S -pass-remarks=pgo-icall-prom -icp-count-threshold=0 -icp-percent-threshold=0 -icp-max-prom=4 2>&1 | FileCheck %s --check-prefix=PASS-REMARK -; RUN: opt < %s -passes=pgo-icall-prom -S -pass-remarks=pgo-icall-prom -icp-count-threshold=0 -icp-percent-threshold=0 -icp-max-prom=4 2>&1 | FileCheck %s --check-prefix=PASS-REMARK +; RUN: opt < %s -pgo-icall-prom -S -pass-remarks=pgo-icall-prom -icp-count-threshold=0 -icp-remaining-percent-threshold=0 -icp-total-percent-threshold=0 -icp-max-prom=4 2>&1 | FileCheck %s --check-prefix=PASS-REMARK +; RUN: opt < %s -passes=pgo-icall-prom -S -pass-remarks=pgo-icall-prom -icp-count-threshold=0 -icp-remaining-percent-threshold=0 -icp-total-percent-threshold=0 -icp-max-prom=4 2>&1 | FileCheck %s --check-prefix=PASS-REMARK +; RUN: opt < %s -passes=pgo-icall-prom -S -pass-remarks=pgo-icall-prom -icp-count-threshold=0 -icp-remaining-percent-threshold=0 -icp-total-percent-threshold=20 -icp-max-prom=4 2>&1 | FileCheck %s --check-prefix=PASS2-REMARK + ; PASS-REMARK: remark: :0:0: Promote indirect call to func4 with count 1030 out of 1600 ; PASS-REMARK: remark: :0:0: Promote indirect call to func2 with count 410 out of 570 ; PASS-REMARK: remark: :0:0: Promote indirect call to func3 with count 150 out of 160 ; PASS-REMARK: remark: :0:0: Promote indirect call to func1 with count 10 out of 10 +; PASS2-REMARK: remark: :0:0: Promote indirect call to func4 with count 1030 out of 1600 +; PASS2-REMARK: remark: :0:0: Promote indirect call to func2 with count 410 out of 570 +; PASS2-REMARK-NOT: remark: :0:0: Promote indirect call to func3 +; PASS2-REMARK-NOT: remark: :0:0: Promote indirect call to func1 + target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -- 2.50.1