cl::desc("A count is cold if it is below the minimum count"
" to reach this percentile of total counts."));
+static cl::opt<bool> AccurateSampleProfile(
+ "accurate-sample-profile", cl::Hidden, cl::init(false),
+ cl::desc("If the sample profile is accurate, we will mark all un-sampled "
+ "callsite as cold. Otherwise, treat un-sampled callsites as if "
+ "we have no profile."));
+
// Find the minimum count to reach a desired percentile of counts.
static uint64_t getMinCountForPercentile(SummaryEntryVector &DS,
uint64_t Percentile) {
if (hasSampleProfile()) {
// In sample PGO mode, check if there is a profile metadata on the
// instruction. If it is present, determine hotness solely based on that,
- // since the sampled entry count may not be accurate.
+ // since the sampled entry count may not be accurate. If there is no
+ // annotated on the instruction, return None.
uint64_t TotalCount;
if (Inst->extractProfTotalWeight(TotalCount))
return TotalCount;
+ return None;
}
if (BFI)
return BFI->getBlockProfileCount(Inst->getParent());
bool ProfileSummaryInfo::isColdCallSite(const CallSite &CS,
BlockFrequencyInfo *BFI) {
auto C = getProfileCount(CS.getInstruction(), BFI);
- return C && isColdCount(*C);
+ if (C)
+ return isColdCount(*C);
+
+ // In SamplePGO, if the caller has been sampled, and there is no profile
+ // annotatedon the callsite, we consider the callsite as cold.
+ // If there is no profile for the caller, and we know the profile is
+ // accurate, we consider the callsite as cold.
+ return (hasSampleProfile() &&
+ (CS.getCaller()->getEntryCount() || AccurateSampleProfile));
}
INITIALIZE_PASS(ProfileSummaryInfoWrapperPass, "profile-summary-info",
; Test to check the callgraph in summary when there is PGO
; RUN: opt -module-summary %s -o %t.o
; RUN: llvm-bcanalyzer -dump %t.o | FileCheck %s
-; RUN: opt -module-summary %p/Inputs/thinlto-function-summary-callgraph-profile-summary.ll -o %t2.o
+; RUN: opt -module-summary %p/Inputs/thinlto-function-summary-callgraph-sample-profile-summary.ll -o %t2.o
; RUN: llvm-lto -thinlto -o %t3 %t.o %t2.o
; RUN: llvm-bcanalyzer -dump %t3.thinlto.bc | FileCheck %s --check-prefix=COMBINED
; "hot3"
; CHECK-NEXT: <FUNCTION op0=20 op1=4
; "hot4"
-; CHECK-NEXT: <FUNCTION op0=24 op1=4
+; CHECK-NEXT: <FUNCTION op0=24 op1=5
; "cold"
-; CHECK-NEXT: <FUNCTION op0=28 op1=4
+; CHECK-NEXT: <FUNCTION op0=29 op1=5
; "none1"
-; CHECK-NEXT: <FUNCTION op0=32 op1=5
+; CHECK-NEXT: <FUNCTION op0=34 op1=5
; "none2"
-; CHECK-NEXT: <FUNCTION op0=37 op1=5
+; CHECK-NEXT: <FUNCTION op0=39 op1=5
; "none3"
-; CHECK-NEXT: <FUNCTION op0=42 op1=5
+; CHECK-NEXT: <FUNCTION op0=44 op1=5
+; CHECK-NEXT: <FUNCTION op0=49 op1=5
+
; CHECK-LABEL: <GLOBALVAL_SUMMARY_BLOCK
; CHECK-NEXT: <VERSION
-; CHECK-NEXT: <VALUE_GUID op0=25 op1=123/>
-; op4=hot1 op6=cold op8=hot2 op10=hot4 op12=none1 op14=hot3 op16=none2 op18=none3 op20=123
-; CHECK-NEXT: <PERMODULE_PROFILE {{.*}} op4=1 op5=3 op6=5 op7=1 op8=2 op9=3 op10=4 op11=3 op12=6 op13=2 op14=3 op15=3 op16=7 op17=2 op18=8 op19=2 op20=25 op21=4/>
+; CHECK-NEXT: <VALUE_GUID op0=26 op1=123/>
+; op4=none1 op6=hot1 op8=cold1 op10=none2 op12=hot2 op14=cold2 op16=none3 op18=hot3 op20=cold3 op22=123
+; CHECK-NEXT: <PERMODULE_PROFILE {{.*}} op4=7 op5=0 op6=1 op7=3 op8=4 op9=1 op10=8 op11=0 op12=2 op13=3 op14=5 op15=1 op16=9 op17=0 op18=3 op19=3 op20=6 op21=1 op22=26 op23=4/>
; CHECK-NEXT: </GLOBALVAL_SUMMARY_BLOCK>
; CHECK: <STRTAB_BLOCK
-; CHECK-NEXT: blob data = 'hot_functionhot1hot2hot3hot4coldnone1none2none3{{.*}}'
+; CHECK-NEXT: blob data = 'hot_functionhot1hot2hot3cold1cold2cold3none1none2none3{{.*}}'
; COMBINED: <GLOBALVAL_SUMMARY_BLOCK
; COMBINED-NEXT: <VERSION
; COMBINED-NEXT: <VALUE_GUID
; COMBINED-NEXT: <VALUE_GUID
; COMBINED-NEXT: <VALUE_GUID
+; COMBINED-NEXT: <VALUE_GUID
+; COMBINED-NEXT: <VALUE_GUID
+; COMBINED-NEXT: <COMBINED abbrevid=
; COMBINED-NEXT: <COMBINED abbrevid=
; COMBINED-NEXT: <COMBINED abbrevid=
; COMBINED-NEXT: <COMBINED abbrevid=
; COMBINED-NEXT: <COMBINED abbrevid=
; COMBINED-NEXT: <COMBINED abbrevid=
; COMBINED-NEXT: <COMBINED abbrevid=
-; COMBINED-NEXT: <COMBINED_PROFILE {{.*}} op5=[[HOT1:.*]] op6=3 op7=[[COLD:.*]] op8=1 op9=[[HOT2:.*]] op10=3 op11=[[NONE1:.*]] op12=2 op13=[[HOT3:.*]] op14=3 op15=[[NONE2:.*]] op16=2 op17=[[NONE3:.*]] op18=2/>
+; COMBINED-NEXT: <COMBINED abbrevid=
+; COMBINED-NEXT: <COMBINED_PROFILE {{.*}} op5=[[NONE1:.*]] op6=0 op7=[[HOT1:.*]] op8=3 op9=[[COLD1:.*]] op10=1 op11=[[NONE2:.*]] op12=0 op13=[[HOT2:.*]] op14=3 op15=[[COLD2:.*]] op16=1 op17=[[NONE3:.*]] op18=0 op19=[[HOT3:.*]] op20=3 op21=[[COLD3:.*]] op22=1/>
; COMBINED_NEXT: <COMBINED abbrevid=
; COMBINED_NEXT: </GLOBALVAL_SUMMARY_BLOCK>
; This function have high profile count, so entry block is hot.
define void @hot_function(i1 %a, i1 %a2) !prof !20 {
entry:
- call void @hot1()
- br i1 %a, label %Cold, label %Hot, !prof !41
-Cold: ; 1/1000 goes here
- call void @cold()
- call void @hot2()
- call void @hot4(), !prof !15
- call void @none1()
- br label %exit
-Hot: ; 999/1000 goes here
- call void @hot2()
- call void @hot3()
- br i1 %a2, label %None1, label %None2, !prof !42
-None1: ; half goes here
call void @none1()
+ call void @hot1(), !prof !15
+ call void @cold1(), !prof !16
+ br i1 %a, label %Cold, label %Hot, !prof !41
+Cold: ; 1/1000 goes here
call void @none2()
+ call void @hot2(), !prof !15
+ call void @cold2(), !prof !16
br label %exit
-None2: ; half goes here
+Hot: ; 999/1000 goes here
call void @none3()
+ call void @hot3(), !prof !15
+ call void @cold3(), !prof !16
br label %exit
exit:
ret void
declare void @hot1() #1
declare void @hot2() #1
declare void @hot3() #1
-declare void @hot4() #1
-declare void @cold() #1
+declare void @cold1() #1
+declare void @cold2() #1
+declare void @cold3() #1
declare void @none1() #1
declare void @none2() #1
declare void @none3() #1
-
!41 = !{!"branch_weights", i32 1, i32 1000}
-!42 = !{!"branch_weights", i32 1, i32 1}
-
-
!llvm.module.flags = !{!1}
!20 = !{!"function_entry_count", i64 110, i64 123}
!13 = !{i32 999000, i64 100, i32 1}
!14 = !{i32 999999, i64 1, i32 2}
!15 = !{!"branch_weights", i32 100}
+!16 = !{!"branch_weights", i32 1}
CallSite CS1(BB1->getFirstNonPHI());
auto *CI2 = BB2->getFirstNonPHI();
+ // Manually attach branch weights metadata to the call instruction.
+ SmallVector<uint32_t, 1> Weights;
+ Weights.push_back(1000);
+ MDBuilder MDB(M->getContext());
+ CI2->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
CallSite CS2(CI2);
- EXPECT_TRUE(PSI.isHotCallSite(CS1, &BFI));
- EXPECT_FALSE(PSI.isHotCallSite(CS2, &BFI));
+ EXPECT_FALSE(PSI.isHotCallSite(CS1, &BFI));
+ EXPECT_TRUE(PSI.isHotCallSite(CS2, &BFI));
// Test that CS2 is considered hot when it gets an MD_prof metadata with
// weights that exceed the hot count threshold.
- MDBuilder MDB(M->getContext());
CI2->setMetadata(llvm::LLVMContext::MD_prof, MDB.createBranchWeights({400}));
EXPECT_TRUE(PSI.isHotCallSite(CS2, &BFI));
}