assert(ResolvedCalleeSummary->instCount() <= NewThreshold &&
"selectCallee() didn't honor the threshold");
+ auto GetAdjustedThreshold = [](unsigned Threshold, bool IsHotCallsite) {
+ // Adjust the threshold for next level of imported functions.
+ // The threshold is different for hot callsites because we can then
+ // inline chains of hot calls.
+ if (IsHotCallsite)
+ return Threshold * ImportHotInstrFactor;
+ return Threshold * ImportInstrFactor;
+ };
+
+ bool IsHotCallsite = Edge.second.Hotness == CalleeInfo::HotnessType::Hot;
+ const auto AdjThreshold = GetAdjustedThreshold(Threshold, IsHotCallsite);
+
auto ExportModulePath = ResolvedCalleeSummary->modulePath();
auto &ProcessedThreshold = ImportList[ExportModulePath][GUID];
/// Since the traversal of the call graph is DFS, we can revisit a function
/// a second time with a higher threshold. In this case, it is added back to
/// the worklist with the new threshold.
- if (ProcessedThreshold && ProcessedThreshold >= Threshold) {
+ if (ProcessedThreshold && ProcessedThreshold >= AdjThreshold) {
DEBUG(dbgs() << "ignored! Target was already seen with Threshold "
<< ProcessedThreshold << "\n");
continue;
}
+ bool PreviouslyImported = ProcessedThreshold != 0;
// Mark this function as imported in this module, with the current Threshold
- ProcessedThreshold = Threshold;
+ ProcessedThreshold = AdjThreshold;
// Make exports in the source module.
if (ExportLists) {
auto &ExportList = (*ExportLists)[ExportModulePath];
ExportList.insert(GUID);
- // Mark all functions and globals referenced by this function as exported
- // to the outside if they are defined in the same source module.
- for (auto &Edge : ResolvedCalleeSummary->calls()) {
- auto CalleeGUID = Edge.first.getGUID();
- exportGlobalInModule(Index, ExportModulePath, CalleeGUID, ExportList);
- }
- for (auto &Ref : ResolvedCalleeSummary->refs()) {
- auto GUID = Ref.getGUID();
- exportGlobalInModule(Index, ExportModulePath, GUID, ExportList);
+ if (!PreviouslyImported) {
+ // This is the first time this function was exported from its source
+ // module, so mark all functions and globals it references as exported
+ // to the outside if they are defined in the same source module.
+ for (auto &Edge : ResolvedCalleeSummary->calls()) {
+ auto CalleeGUID = Edge.first.getGUID();
+ exportGlobalInModule(Index, ExportModulePath, CalleeGUID, ExportList);
+ }
+ for (auto &Ref : ResolvedCalleeSummary->refs()) {
+ auto GUID = Ref.getGUID();
+ exportGlobalInModule(Index, ExportModulePath, GUID, ExportList);
+ }
}
}
- auto GetAdjustedThreshold = [](unsigned Threshold, bool IsHotCallsite) {
- // Adjust the threshold for next level of imported functions.
- // The threshold is different for hot callsites because we can then
- // inline chains of hot calls.
- if (IsHotCallsite)
- return Threshold * ImportHotInstrFactor;
- return Threshold * ImportInstrFactor;
- };
-
- bool IsHotCallsite = Edge.second.Hotness == CalleeInfo::HotnessType::Hot;
-
// Insert the newly imported function to the worklist.
- Worklist.emplace_back(ResolvedCalleeSummary,
- GetAdjustedThreshold(Threshold, IsHotCallsite));
+ Worklist.emplace_back(ResolvedCalleeSummary, AdjThreshold);
}
}
--- /dev/null
+; ModuleID = 'thinlto-function-summary-callgraph-profile-summary2.ll'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+
+define void @hot() #1 !prof !28 {
+ call void @calledFromHot()
+ ret void
+}
+
+; 9 instructions so it is above decayed cold threshold of 7 and below
+; decayed hot threshold of 10.
+define void @calledFromHot() !prof !28 {
+ %b = alloca i32, align 4
+ store i32 1, i32* %b, align 4
+ store i32 1, i32* %b, align 4
+ store i32 1, i32* %b, align 4
+ store i32 1, i32* %b, align 4
+ store i32 1, i32* %b, align 4
+ store i32 1, i32* %b, align 4
+ store i32 1, i32* %b, align 4
+ ret void
+}
+
+!llvm.module.flags = !{!1}
+
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 222}
+!5 = !{!"MaxCount", i64 110}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 110}
+!8 = !{!"NumCounts", i64 4}
+!9 = !{!"NumFunctions", i64 3}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 110, i32 2}
+!13 = !{i32 999000, i64 2, i32 4}
+!14 = !{i32 999999, i64 2, i32 4}
+!28 = !{!"function_entry_count", i64 110}
+!29 = !{!"function_entry_count", i64 1}
--- /dev/null
+; Test to check that callee reached from cold and then hot path gets
+; hot thresholds.
+; RUN: opt -module-summary %s -o %t.bc
+; RUN: opt -module-summary %p/Inputs/hotness_based_import2.ll -o %t2.bc
+; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc
+
+; Teset with limit set to 10 and multipliers set to 1. Since cold call to
+; hot is first in the other module, we'll first add calledFromHot to worklist
+; with threshold decayed by default 0.7 factor. Test ensures that when we
+; encounter it again from hot path, we re-enqueue with higher non-decayed
+; threshold which will allow it to be imported.
+; RUN: opt -function-import -summary-file %t3.thinlto.bc %t.bc -import-instr-limit=10 -import-hot-multiplier=1.0 -import-cold-multiplier=1.0 -S | FileCheck %s --check-prefix=CHECK
+; CHECK-DAG: define available_externally void @hot()
+; CHECK-DAG: define available_externally void @calledFromHot()
+
+; ModuleID = 'thinlto-function-summary-callgraph.ll'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; This function has a high profile count, so entry block is hot.
+define void @hot_function(i1 %a, i1 %a2) !prof !28 {
+entry:
+ call void @hot()
+ ret void
+}
+
+; This function has a low profile count, so entry block is hot.
+define void @cold_function(i1 %a, i1 %a2) !prof !29 {
+entry:
+ call void @hot()
+ ret void
+}
+
+declare void @hot() #1
+
+!llvm.module.flags = !{!1}
+
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 222}
+!5 = !{!"MaxCount", i64 110}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 110}
+!8 = !{!"NumCounts", i64 4}
+!9 = !{!"NumFunctions", i64 3}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 110, i32 2}
+!13 = !{i32 999000, i64 2, i32 4}
+!14 = !{i32 999999, i64 2, i32 4}
+!28 = !{!"function_entry_count", i64 110}
+!29 = !{!"function_entry_count", i64 1}