mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-19 08:24:12 +00:00
Include GUIDs from the same module when computing GUIDs that needs to be imported.
Summary: In the compile phase of SamplePGO+ThinLTO, ICP is not invoked. Instead, indirect call targets will be included as function metadata for ThinIndex to buidl the call graph. This should not only include functions defined in other modules, but also functions defined in the same module, otherwise ThinIndex may find the callee dead and eliminate it, while ICP in backend will revive the symbol, which leads to undefined symbol. Reviewers: tejohnson Reviewed By: tejohnson Subscribers: sanjoy, llvm-commits, mehdi_amini Differential Revision: https://reviews.llvm.org/D39480 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@317118 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
4d7894c6d8
commit
2db2d47e39
@ -352,17 +352,15 @@ public:
|
||||
return Result;
|
||||
}
|
||||
|
||||
/// Recursively traverses all children, if the corresponding function is
|
||||
/// not defined in module \p M, and its total sample is no less than
|
||||
/// \p Threshold, add its corresponding GUID to \p S. Also traverse the
|
||||
/// BodySamples to add hot CallTarget's GUID to \p S.
|
||||
void findImportedFunctions(DenseSet<GlobalValue::GUID> &S, const Module *M,
|
||||
uint64_t Threshold) const {
|
||||
/// Recursively traverses all children, if the total sample count of the
|
||||
/// corresponding function is no less than \p Threshold, add its corresponding
|
||||
/// GUID to \p S. Also traverse the BodySamples to add hot CallTarget's GUID
|
||||
/// to \p S.
|
||||
void findInlinedFunctions(DenseSet<GlobalValue::GUID> &S, const Module *M,
|
||||
uint64_t Threshold) const {
|
||||
if (TotalSamples <= Threshold)
|
||||
return;
|
||||
Function *F = M->getFunction(Name);
|
||||
if (!F || !F->getSubprogram())
|
||||
S.insert(Function::getGUID(Name));
|
||||
S.insert(Function::getGUID(Name));
|
||||
// Import hot CallTargets, which may not be available in IR because full
|
||||
// profile annotation cannot be done until backend compilation in ThinLTO.
|
||||
for (const auto &BS : BodySamples)
|
||||
@ -374,7 +372,7 @@ public:
|
||||
}
|
||||
for (const auto &CS : CallsiteSamples)
|
||||
for (const auto &NameFS : CS.second)
|
||||
NameFS.second.findImportedFunctions(S, M, Threshold);
|
||||
NameFS.second.findInlinedFunctions(S, M, Threshold);
|
||||
}
|
||||
|
||||
/// Set the name of the function.
|
||||
|
@ -200,7 +200,7 @@ protected:
|
||||
const FunctionSamples *findFunctionSamples(const Instruction &I) const;
|
||||
bool inlineCallInstruction(Instruction *I);
|
||||
bool inlineHotFunctions(Function &F,
|
||||
DenseSet<GlobalValue::GUID> &ImportGUIDs);
|
||||
DenseSet<GlobalValue::GUID> &InlinedGUIDs);
|
||||
void printEdgeWeight(raw_ostream &OS, Edge E);
|
||||
void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const;
|
||||
void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB);
|
||||
@ -766,12 +766,12 @@ bool SampleProfileLoader::inlineCallInstruction(Instruction *I) {
|
||||
/// it to direct call. Each indirect call is limited with a single target.
|
||||
///
|
||||
/// \param F function to perform iterative inlining.
|
||||
/// \param ImportGUIDs a set to be updated to include all GUIDs that come
|
||||
/// from a different module but inlined in the profiled binary.
|
||||
/// \param InlinedGUIDs a set to be updated to include all GUIDs that are
|
||||
/// inlined in the profiled binary.
|
||||
///
|
||||
/// \returns True if there is any inline happened.
|
||||
bool SampleProfileLoader::inlineHotFunctions(
|
||||
Function &F, DenseSet<GlobalValue::GUID> &ImportGUIDs) {
|
||||
Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
|
||||
DenseSet<Instruction *> PromotedInsns;
|
||||
bool Changed = false;
|
||||
while (true) {
|
||||
@ -804,9 +804,9 @@ bool SampleProfileLoader::inlineHotFunctions(
|
||||
uint64_t Sum;
|
||||
for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
|
||||
if (IsThinLTOPreLink) {
|
||||
FS->findImportedFunctions(ImportGUIDs, F.getParent(),
|
||||
Samples->getTotalSamples() *
|
||||
SampleProfileHotThreshold / 100);
|
||||
FS->findInlinedFunctions(InlinedGUIDs, F.getParent(),
|
||||
Samples->getTotalSamples() *
|
||||
SampleProfileHotThreshold / 100);
|
||||
continue;
|
||||
}
|
||||
auto CalleeFunctionName = FS->getName();
|
||||
@ -844,8 +844,8 @@ bool SampleProfileLoader::inlineHotFunctions(
|
||||
if (inlineCallInstruction(I))
|
||||
LocalChanged = true;
|
||||
} else if (IsThinLTOPreLink) {
|
||||
findCalleeFunctionSamples(*I)->findImportedFunctions(
|
||||
ImportGUIDs, F.getParent(),
|
||||
findCalleeFunctionSamples(*I)->findInlinedFunctions(
|
||||
InlinedGUIDs, F.getParent(),
|
||||
Samples->getTotalSamples() * SampleProfileHotThreshold / 100);
|
||||
}
|
||||
}
|
||||
@ -1455,18 +1455,19 @@ bool SampleProfileLoader::emitAnnotations(Function &F) {
|
||||
DEBUG(dbgs() << "Line number for the first instruction in " << F.getName()
|
||||
<< ": " << getFunctionLoc(F) << "\n");
|
||||
|
||||
DenseSet<GlobalValue::GUID> ImportGUIDs;
|
||||
Changed |= inlineHotFunctions(F, ImportGUIDs);
|
||||
DenseSet<GlobalValue::GUID> InlinedGUIDs;
|
||||
Changed |= inlineHotFunctions(F, InlinedGUIDs);
|
||||
|
||||
// Compute basic block weights.
|
||||
Changed |= computeBlockWeights(F);
|
||||
|
||||
if (Changed) {
|
||||
// Add an entry count to the function using the samples gathered at the
|
||||
// function entry. Also sets the GUIDs that comes from a different
|
||||
// module but inlined in the profiled binary. This is aiming at making
|
||||
// the IR match the profiled binary before annotation.
|
||||
F.setEntryCount(Samples->getHeadSamples() + 1, &ImportGUIDs);
|
||||
// function entry.
|
||||
// Sets the GUIDs that are inlined in the profiled binary. This is used
|
||||
// for ThinLink to make correct liveness analysis, and also make the IR
|
||||
// match the profiled binary before annotation.
|
||||
F.setEntryCount(Samples->getHeadSamples() + 1, &InlinedGUIDs);
|
||||
|
||||
// Compute dominance and loop info needed for propagation.
|
||||
computeDominanceAndLoopInfo(F);
|
||||
|
@ -6,3 +6,6 @@ test:10000:0
|
||||
1: 1000
|
||||
4: foo2:1000
|
||||
1: 1000 foo3:1000
|
||||
test_liveness:10000:0
|
||||
1: foo:1000
|
||||
1: foo_available:1000
|
@ -1,10 +1,15 @@
|
||||
; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=new-pm-pgo-sample-use-pipeline -profile-file=%S/Inputs/import.prof -S | FileCheck %s
|
||||
; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=new-pm-pgo-sample-use-pipeline -profile-file=%S/Inputs/function_metadata.prof -S | FileCheck %s
|
||||
|
||||
; Tests whether the functions in the inline stack are added to the
|
||||
; function_entry_count metadata.
|
||||
|
||||
declare void @foo()
|
||||
|
||||
define void @foo_available() !dbg !11 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: define void @test({{.*}} !prof ![[ENTRY_TEST:[0-9]+]]
|
||||
define void @test(void ()*) !dbg !7 {
|
||||
%2 = alloca void ()*
|
||||
store void ()* %0, void ()** %2
|
||||
@ -15,9 +20,20 @@ define void @test(void ()*) !dbg !7 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: define void @test_liveness({{.*}} !prof ![[ENTRY_TEST_LIVENESS:[0-9]+]]
|
||||
define void @test_liveness() !dbg !12 {
|
||||
call void @foo(), !dbg !20
|
||||
ret void
|
||||
}
|
||||
|
||||
; GUIDs of foo, bar, foo1, foo2 and foo3 should be included in the metadata to
|
||||
; make sure hot inline stacks are imported.
|
||||
; CHECK: !{!"function_entry_count", i64 1, i64 2494702099028631698, i64 6699318081062747564, i64 7682762345278052905, i64 -7908226060800700466, i64 -2012135647395072713}
|
||||
; CHECK: ![[ENTRY_TEST]] = !{!"function_entry_count", i64 1, i64 2494702099028631698, i64 6699318081062747564, i64 7682762345278052905, i64 -7908226060800700466, i64 -2012135647395072713}
|
||||
|
||||
; Check GUIDs for both foo and foo_available are included in the metadata to
|
||||
; make sure the liveness analysis can capture the dependency from test_liveness
|
||||
; to foo_available.
|
||||
; CHECK: ![[ENTRY_TEST_LIVENESS]] = !{!"function_entry_count", i64 1, i64 4005816710939881937, i64 6699318081062747564}
|
||||
|
||||
!llvm.dbg.cu = !{!0}
|
||||
!llvm.module.flags = !{!8, !9}
|
||||
@ -31,7 +47,10 @@ define void @test(void ()*) !dbg !7 {
|
||||
!8 = !{i32 2, !"Dwarf Version", i32 4}
|
||||
!9 = !{i32 1, !"Debug Info Version", i32 3}
|
||||
!10 = !{!"clang version 3.5 "}
|
||||
!11 = distinct !DISubprogram(name: "foo_available", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !1, type: !6, variables: !2)
|
||||
!12 = distinct !DISubprogram(name: "test_liveness", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !1, type: !6, variables: !2)
|
||||
!15 = !DILexicalBlockFile(discriminator: 1, file: !1, scope: !7)
|
||||
!17 = distinct !DILexicalBlock(line: 10, column: 0, file: !1, scope: !7)
|
||||
!18 = !DILocation(line: 10, scope: !17)
|
||||
!19 = !DILocation(line: 11, scope: !17)
|
||||
!20 = !DILocation(line: 8, scope: !12)
|
Loading…
x
Reference in New Issue
Block a user