[ThinLTO] Ensure callees get hot threshold when first seen on cold path

This is split out from D27696, since it turned out to be a bug fix and
not part of the NFC efficiency change.

Keep the same adjusted (possibly decayed) threshold in both the worklist
and the ImportList. Otherwise if we encountered it first along a cold
path, the callee would be added to the worklist with a lower decayed
threshold than when it is later encountered along a hot path. But the
logic uses the threshold recorded in the ImportList entry to check if
we should re-add it, and without this patch the threshold recorded there
is the same along both paths so we don't re-add it. Using the
same possibly decayed threshold in the ImportList ensures we re-add it
later with the higher non-decayed hot path threshold.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@289843 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Teresa Johnson 2016-12-15 18:21:01 +00:00
parent 69a678c244
commit 260d27c8d8
3 changed files with 130 additions and 31 deletions

View File

@ -316,35 +316,6 @@ static void computeImportForFunction(
assert(ResolvedCalleeSummary->instCount() <= NewThreshold && assert(ResolvedCalleeSummary->instCount() <= NewThreshold &&
"selectCallee() didn't honor the threshold"); "selectCallee() didn't honor the threshold");
auto ExportModulePath = ResolvedCalleeSummary->modulePath();
auto &ProcessedThreshold = ImportList[ExportModulePath][GUID];
/// Since the traversal of the call graph is DFS, we can revisit a function
/// a second time with a higher threshold. In this case, it is added back to
/// the worklist with the new threshold.
if (ProcessedThreshold && ProcessedThreshold >= Threshold) {
DEBUG(dbgs() << "ignored! Target was already seen with Threshold "
<< ProcessedThreshold << "\n");
continue;
}
// Mark this function as imported in this module, with the current Threshold
ProcessedThreshold = Threshold;
// Make exports in the source module.
if (ExportLists) {
auto &ExportList = (*ExportLists)[ExportModulePath];
ExportList.insert(GUID);
// Mark all functions and globals referenced by this function as exported
// to the outside if they are defined in the same source module.
for (auto &Edge : ResolvedCalleeSummary->calls()) {
auto CalleeGUID = Edge.first.getGUID();
exportGlobalInModule(Index, ExportModulePath, CalleeGUID, ExportList);
}
for (auto &Ref : ResolvedCalleeSummary->refs()) {
auto GUID = Ref.getGUID();
exportGlobalInModule(Index, ExportModulePath, GUID, ExportList);
}
}
auto GetAdjustedThreshold = [](unsigned Threshold, bool IsHotCallsite) { auto GetAdjustedThreshold = [](unsigned Threshold, bool IsHotCallsite) {
// Adjust the threshold for next level of imported functions. // Adjust the threshold for next level of imported functions.
// The threshold is different for hot callsites because we can then // The threshold is different for hot callsites because we can then
@ -355,10 +326,43 @@ static void computeImportForFunction(
}; };
bool IsHotCallsite = Edge.second.Hotness == CalleeInfo::HotnessType::Hot; bool IsHotCallsite = Edge.second.Hotness == CalleeInfo::HotnessType::Hot;
const auto AdjThreshold = GetAdjustedThreshold(Threshold, IsHotCallsite);
auto ExportModulePath = ResolvedCalleeSummary->modulePath();
auto &ProcessedThreshold = ImportList[ExportModulePath][GUID];
/// Since the traversal of the call graph is DFS, we can revisit a function
/// a second time with a higher threshold. In this case, it is added back to
/// the worklist with the new threshold.
if (ProcessedThreshold && ProcessedThreshold >= AdjThreshold) {
DEBUG(dbgs() << "ignored! Target was already seen with Threshold "
<< ProcessedThreshold << "\n");
continue;
}
bool PreviouslyImported = ProcessedThreshold != 0;
// Mark this function as imported in this module, with the current Threshold
ProcessedThreshold = AdjThreshold;
// Make exports in the source module.
if (ExportLists) {
auto &ExportList = (*ExportLists)[ExportModulePath];
ExportList.insert(GUID);
if (!PreviouslyImported) {
// This is the first time this function was exported from its source
// module, so mark all functions and globals it references as exported
// to the outside if they are defined in the same source module.
for (auto &Edge : ResolvedCalleeSummary->calls()) {
auto CalleeGUID = Edge.first.getGUID();
exportGlobalInModule(Index, ExportModulePath, CalleeGUID, ExportList);
}
for (auto &Ref : ResolvedCalleeSummary->refs()) {
auto GUID = Ref.getGUID();
exportGlobalInModule(Index, ExportModulePath, GUID, ExportList);
}
}
}
// Insert the newly imported function to the worklist. // Insert the newly imported function to the worklist.
Worklist.emplace_back(ResolvedCalleeSummary, Worklist.emplace_back(ResolvedCalleeSummary, AdjThreshold);
GetAdjustedThreshold(Threshold, IsHotCallsite));
} }
} }

View File

@ -0,0 +1,42 @@
; ModuleID = 'thinlto-function-summary-callgraph-profile-summary2.ll'
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define void @hot() #1 !prof !28 {
call void @calledFromHot()
ret void
}
; 9 instructions so it is above decayed cold threshold of 7 and below
; decayed hot threshold of 10.
define void @calledFromHot() !prof !28 {
%b = alloca i32, align 4
store i32 1, i32* %b, align 4
store i32 1, i32* %b, align 4
store i32 1, i32* %b, align 4
store i32 1, i32* %b, align 4
store i32 1, i32* %b, align 4
store i32 1, i32* %b, align 4
store i32 1, i32* %b, align 4
ret void
}
!llvm.module.flags = !{!1}
!1 = !{i32 1, !"ProfileSummary", !2}
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
!3 = !{!"ProfileFormat", !"InstrProf"}
!4 = !{!"TotalCount", i64 222}
!5 = !{!"MaxCount", i64 110}
!6 = !{!"MaxInternalCount", i64 1}
!7 = !{!"MaxFunctionCount", i64 110}
!8 = !{!"NumCounts", i64 4}
!9 = !{!"NumFunctions", i64 3}
!10 = !{!"DetailedSummary", !11}
!11 = !{!12, !13, !14}
!12 = !{i32 10000, i64 110, i32 2}
!13 = !{i32 999000, i64 2, i32 4}
!14 = !{i32 999999, i64 2, i32 4}
!28 = !{!"function_entry_count", i64 110}
!29 = !{!"function_entry_count", i64 1}

View File

@ -0,0 +1,53 @@
; Test to check that callee reached from cold and then hot path gets
; hot thresholds.
; RUN: opt -module-summary %s -o %t.bc
; RUN: opt -module-summary %p/Inputs/hotness_based_import2.ll -o %t2.bc
; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc
; Teset with limit set to 10 and multipliers set to 1. Since cold call to
; hot is first in the other module, we'll first add calledFromHot to worklist
; with threshold decayed by default 0.7 factor. Test ensures that when we
; encounter it again from hot path, we re-enqueue with higher non-decayed
; threshold which will allow it to be imported.
; RUN: opt -function-import -summary-file %t3.thinlto.bc %t.bc -import-instr-limit=10 -import-hot-multiplier=1.0 -import-cold-multiplier=1.0 -S | FileCheck %s --check-prefix=CHECK
; CHECK-DAG: define available_externally void @hot()
; CHECK-DAG: define available_externally void @calledFromHot()
; ModuleID = 'thinlto-function-summary-callgraph.ll'
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; This function has a high profile count, so entry block is hot.
define void @hot_function(i1 %a, i1 %a2) !prof !28 {
entry:
call void @hot()
ret void
}
; This function has a low profile count, so entry block is hot.
define void @cold_function(i1 %a, i1 %a2) !prof !29 {
entry:
call void @hot()
ret void
}
declare void @hot() #1
!llvm.module.flags = !{!1}
!1 = !{i32 1, !"ProfileSummary", !2}
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
!3 = !{!"ProfileFormat", !"InstrProf"}
!4 = !{!"TotalCount", i64 222}
!5 = !{!"MaxCount", i64 110}
!6 = !{!"MaxInternalCount", i64 1}
!7 = !{!"MaxFunctionCount", i64 110}
!8 = !{!"NumCounts", i64 4}
!9 = !{!"NumFunctions", i64 3}
!10 = !{!"DetailedSummary", !11}
!11 = !{!12, !13, !14}
!12 = !{i32 10000, i64 110, i32 2}
!13 = !{i32 999000, i64 2, i32 4}
!14 = !{i32 999999, i64 2, i32 4}
!28 = !{!"function_entry_count", i64 110}
!29 = !{!"function_entry_count", i64 1}