return None;
assert((isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) &&
"We can only get profile count for call/invoke instruction.");
- // Check if there is a profile metadata on the instruction. If it is present,
- // determine hotness solely based on that.
- uint64_t TotalCount;
- if (Inst->extractProfTotalWeight(TotalCount))
- return TotalCount;
+ if (computeSummary() && Summary->getKind() == ProfileSummary::PSK_Sample) {
+ // In sample PGO mode, check if there is a profile metadata on the
+ // instruction. If it is present, determine hotness solely based on that,
+ // since the sampled entry count may not be accurate.
+ uint64_t TotalCount;
+ if (Inst->extractProfTotalWeight(TotalCount))
+ return TotalCount;
+ }
if (BFI)
return BFI->getBlockProfileCount(Inst->getParent());
return None;
; CHECK-NEXT: <VERSION
; CHECK-NEXT: <VALUE_GUID op0=25 op1=123/>
; op4=hot1 op6=cold op8=hot2 op10=hot4 op12=none1 op14=hot3 op16=none2 op18=none3 op20=123
-; CHECK-NEXT: <PERMODULE_PROFILE {{.*}} op4=1 op5=3 op6=5 op7=1 op8=2 op9=3 op10=4 op11=3 op12=6 op13=2 op14=3 op15=3 op16=7 op17=2 op18=8 op19=2 op20=25 op21=3/>
+; CHECK-NEXT: <PERMODULE_PROFILE {{.*}} op4=1 op5=3 op6=5 op7=1 op8=2 op9=3 op10=4 op11=1 op12=6 op13=2 op14=3 op15=3 op16=7 op17=2 op18=8 op19=2 op20=25 op21=3/>
; CHECK-NEXT: </GLOBALVAL_SUMMARY_BLOCK>
; CHECK: <STRTAB_BLOCK
--- /dev/null
+; Test to check the callgraph in summary when there is PGO
+; RUN: opt -module-summary %s -o %t.o
+; RUN: llvm-bcanalyzer -dump %t.o | FileCheck %s
+; RUN: opt -module-summary %p/Inputs/thinlto-function-summary-callgraph-profile-summary.ll -o %t2.o
+; RUN: llvm-lto -thinlto -o %t3 %t.o %t2.o
+; RUN: llvm-bcanalyzer -dump %t3.thinlto.bc | FileCheck %s --check-prefix=COMBINED
+
+
+; CHECK: <SOURCE_FILENAME
+; "hot_function"
+; CHECK-NEXT: <FUNCTION op0=0 op1=12
+; "hot1"
+; CHECK-NEXT: <FUNCTION op0=12 op1=4
+; "hot2"
+; CHECK-NEXT: <FUNCTION op0=16 op1=4
+; "hot3"
+; CHECK-NEXT: <FUNCTION op0=20 op1=4
+; "hot4"
+; CHECK-NEXT: <FUNCTION op0=24 op1=4
+; "cold"
+; CHECK-NEXT: <FUNCTION op0=28 op1=4
+; "none1"
+; CHECK-NEXT: <FUNCTION op0=32 op1=5
+; "none2"
+; CHECK-NEXT: <FUNCTION op0=37 op1=5
+; "none3"
+; CHECK-NEXT: <FUNCTION op0=42 op1=5
+; CHECK-LABEL: <GLOBALVAL_SUMMARY_BLOCK
+; CHECK-NEXT: <VERSION
+; CHECK-NEXT: <VALUE_GUID op0=25 op1=123/>
+; op4=hot1 op6=cold op8=hot2 op10=hot4 op12=none1 op14=hot3 op16=none2 op18=none3 op20=123
+; CHECK-NEXT: <PERMODULE_PROFILE {{.*}} op4=1 op5=3 op6=5 op7=1 op8=2 op9=3 op10=4 op11=3 op12=6 op13=2 op14=3 op15=3 op16=7 op17=2 op18=8 op19=2 op20=25 op21=3/>
+; CHECK-NEXT: </GLOBALVAL_SUMMARY_BLOCK>
+
+; CHECK: <STRTAB_BLOCK
+; CHECK-NEXT: blob data = 'hot_functionhot1hot2hot3hot4coldnone1none2none3'
+
+; COMBINED: <GLOBALVAL_SUMMARY_BLOCK
+; COMBINED-NEXT: <VERSION
+; COMBINED-NEXT: <VALUE_GUID
+; COMBINED-NEXT: <VALUE_GUID
+; COMBINED-NEXT: <VALUE_GUID
+; COMBINED-NEXT: <VALUE_GUID
+; COMBINED-NEXT: <VALUE_GUID
+; COMBINED-NEXT: <VALUE_GUID
+; COMBINED-NEXT: <VALUE_GUID
+; COMBINED-NEXT: <VALUE_GUID
+; COMBINED-NEXT: <COMBINED abbrevid=
+; COMBINED-NEXT: <COMBINED abbrevid=
+; COMBINED-NEXT: <COMBINED abbrevid=
+; COMBINED-NEXT: <COMBINED abbrevid=
+; COMBINED-NEXT: <COMBINED abbrevid=
+; COMBINED-NEXT: <COMBINED abbrevid=
+; COMBINED-NEXT: <COMBINED_PROFILE {{.*}} op5=[[HOT1:.*]] op6=3 op7=[[COLD:.*]] op8=1 op9=[[HOT2:.*]] op10=3 op11=[[NONE1:.*]] op12=2 op13=[[HOT3:.*]] op14=3 op15=[[NONE2:.*]] op16=2 op17=[[NONE3:.*]] op18=2/>
+; COMBINED_NEXT: <COMBINED abbrevid=
+; COMBINED_NEXT: </GLOBALVAL_SUMMARY_BLOCK>
+
+
+; ModuleID = 'thinlto-function-summary-callgraph.ll'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; This function have high profile count, so entry block is hot.
+define void @hot_function(i1 %a, i1 %a2) !prof !20 {
+entry:
+ call void @hot1()
+ br i1 %a, label %Cold, label %Hot, !prof !41
+Cold: ; 1/1000 goes here
+ call void @cold()
+ call void @hot2()
+ call void @hot4(), !prof !15
+ call void @none1()
+ br label %exit
+Hot: ; 999/1000 goes here
+ call void @hot2()
+ call void @hot3()
+ br i1 %a2, label %None1, label %None2, !prof !42
+None1: ; half goes here
+ call void @none1()
+ call void @none2()
+ br label %exit
+None2: ; half goes here
+ call void @none3()
+ br label %exit
+exit:
+ ret void
+}
+
+declare void @hot1() #1
+declare void @hot2() #1
+declare void @hot3() #1
+declare void @hot4() #1
+declare void @cold() #1
+declare void @none1() #1
+declare void @none2() #1
+declare void @none3() #1
+
+
+!41 = !{!"branch_weights", i32 1, i32 1000}
+!42 = !{!"branch_weights", i32 1, i32 1}
+
+
+
+!llvm.module.flags = !{!1}
+!20 = !{!"function_entry_count", i64 110, i64 123}
+
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"SampleProfile"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxCount", i64 10}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 1000}
+!8 = !{!"NumCounts", i64 3}
+!9 = !{!"NumFunctions", i64 3}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 100, i32 1}
+!13 = !{i32 999000, i64 100, i32 1}
+!14 = !{i32 999999, i64 1, i32 2}
+!15 = !{!"branch_weights", i32 100}
--- /dev/null
+; RUN: opt < %s -codegenprepare -S | FileCheck %s
+
+target triple = "x86_64-pc-linux-gnu"
+
+; This tests that hot/cold functions get correct section prefix assigned
+
+; CHECK: hot_func{{.*}}!section_prefix ![[HOT_ID:[0-9]+]]
+; The entry is hot
+define void @hot_func() !prof !15 {
+ ret void
+}
+
+; CHECK: hot_call_func{{.*}}!section_prefix ![[HOT_ID]]
+; The sum of 2 callsites are hot
+define void @hot_call_func() !prof !16 {
+ call void @hot_func(), !prof !17
+ call void @hot_func(), !prof !17
+ ret void
+}
+
+; CHECK-NOT: normal_func{{.*}}!section_prefix
+; The sum of all callsites are neither hot or cold
+define void @normal_func() !prof !16 {
+ call void @hot_func(), !prof !17
+ call void @hot_func(), !prof !18
+ call void @hot_func(), !prof !18
+ ret void
+}
+
+; CHECK: cold_func{{.*}}!section_prefix ![[COLD_ID:[0-9]+]]
+; The entry and the callsite are both cold
+define void @cold_func() !prof !16 {
+ call void @hot_func(), !prof !18
+ ret void
+}
+
+; CHECK: ![[HOT_ID]] = !{!"function_section_prefix", !".hot"}
+; CHECK: ![[COLD_ID]] = !{!"function_section_prefix", !".unlikely"}
+!llvm.module.flags = !{!1}
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"SampleProfile"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxCount", i64 1000}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 1000}
+!8 = !{!"NumCounts", i64 3}
+!9 = !{!"NumFunctions", i64 3}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 100, i32 1}
+!13 = !{i32 999000, i64 100, i32 1}
+!14 = !{i32 999999, i64 1, i32 2}
+!15 = !{!"function_entry_count", i64 1000}
+!16 = !{!"function_entry_count", i64 1}
+!17 = !{!"branch_weights", i32 80}
+!18 = !{!"branch_weights", i32 1}
ret void
}
-; CHECK: hot_call_func{{.*}}!section_prefix ![[HOT_ID]]
-; The sum of 2 callsites are hot
-define void @hot_call_func() !prof !16 {
+; For instrumentation based PGO, we should only look at entry counts,
+; not call site VP metadata (which can exist on value profiled memcpy,
+; or possibly left behind after static analysis based devirtualization).
+; CHECK: cold_func1{{.*}}!section_prefix ![[COLD_ID:[0-9]+]]
+define void @cold_func1() !prof !16 {
call void @hot_func(), !prof !17
call void @hot_func(), !prof !17
ret void
}
-; CHECK-NOT: normal_func{{.*}}!section_prefix
-; The sum of all callsites are neither hot or cold
-define void @normal_func() !prof !16 {
+; CHECK: cold_func2{{.*}}!section_prefix
+define void @cold_func2() !prof !16 {
call void @hot_func(), !prof !17
call void @hot_func(), !prof !18
call void @hot_func(), !prof !18
ret void
}
-; CHECK: cold_func{{.*}}!section_prefix ![[COLD_ID:[0-9]+]]
-; The entry and the callsite are both cold
-define void @cold_func() !prof !16 {
+; CHECK: cold_func3{{.*}}!section_prefix ![[COLD_ID]]
+define void @cold_func3() !prof !16 {
call void @hot_func(), !prof !18
ret void
}
@func = global void ()* null
; CHECK: define void @callee(i32 %n) !prof ![[ENTRY_COUNT:[0-9]*]]
-define void @callee(i32 %n) !prof !1 {
+define void @callee(i32 %n) !prof !15 {
%cond = icmp sle i32 %n, 10
br i1 %cond, label %cond_true, label %cond_false
cond_true:
; ext1 is optimized away, thus not updated.
; CHECK: call void @ext1(), !prof ![[COUNT_CALLEE1:[0-9]*]]
- call void @ext1(), !prof !2
+ call void @ext1(), !prof !16
ret void
cond_false:
; ext is cloned and updated.
; CHECK: call void @ext(), !prof ![[COUNT_CALLEE:[0-9]*]]
- call void @ext(), !prof !2
+ call void @ext(), !prof !16
%f = load void ()*, void ()** @func
; CHECK: call void %f(), !prof ![[COUNT_IND_CALLEE:[0-9]*]]
- call void %f(), !prof !4
+ call void %f(), !prof !18
ret void
}
define void @caller() {
; CHECK: call void @ext(), !prof ![[COUNT_CALLER:[0-9]*]]
; CHECK: call void %f.i(), !prof ![[COUNT_IND_CALLER:[0-9]*]]
- call void @callee(i32 15), !prof !3
+ call void @callee(i32 15), !prof !17
ret void
}
-!llvm.module.flags = !{!0}
-!0 = !{i32 1, !"MaxFunctionCount", i32 2000}
-!1 = !{!"function_entry_count", i64 1000}
-!2 = !{!"branch_weights", i64 2000}
-!3 = !{!"branch_weights", i64 400}
-!4 = !{!"VP", i32 0, i64 140, i64 111, i64 80, i64 222, i64 40, i64 333, i64 20}
+!llvm.module.flags = !{!1}
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"SampleProfile"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxCount", i64 10}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 2000}
+!8 = !{!"NumCounts", i64 2}
+!9 = !{!"NumFunctions", i64 2}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 100, i32 1}
+!13 = !{i32 999000, i64 100, i32 1}
+!14 = !{i32 999999, i64 1, i32 2}
+!15 = !{!"function_entry_count", i64 1000}
+!16 = !{!"branch_weights", i64 2000}
+!17 = !{!"branch_weights", i64 400}
+!18 = !{!"VP", i32 0, i64 140, i64 111, i64 80, i64 222, i64 40, i64 333, i64 20}
attributes #0 = { alwaysinline }
; CHECK: ![[ENTRY_COUNT]] = !{!"function_entry_count", i64 600}
; CHECK: ![[COUNT_CALLEE1]] = !{!"branch_weights", i64 2000}
EXPECT_TRUE(PSI.isHotCallSite(CS1, &BFI));
EXPECT_FALSE(PSI.isHotCallSite(CS2, &BFI));
+
+ // Test that adding an MD_prof metadata with a hot count on CS2 does not
+ // change its hotness as it has no effect in instrumented profiling.
+ MDBuilder MDB(M->getContext());
+ CI2->setMetadata(llvm::LLVMContext::MD_prof, MDB.createBranchWeights({400}));
+ EXPECT_FALSE(PSI.isHotCallSite(CS2, &BFI));
}
TEST_F(ProfileSummaryInfoTest, SampleProf) {