[ThinLTO] For SamplePGO, need to handle ICP targets consistently in thin link

Summary:
SamplePGO indirect call profiles record the target as the original GUID
for statics. The importer had special handling to map to the normal GUID
in that case. The dead global analysis needs the same treatment or
inconsistencies arise, resulting in linker unsats due to some dead
symbols being exported and kept, leaving in references to other dead
symbols that are removed.

This can happen when a SamplePGO profile collected by one binary is used
for a different binary, so the indirect call profiles may not accurately
reflect live targets.

Reviewers: danielcdh

Subscribers: mehdi_amini, inglorion, llvm-commits, eraman

Differential Revision: https://reviews.llvm.org/D37783

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@313151 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Teresa Johnson 2017-09-13 15:16:38 +00:00
parent 66341b5d6a
commit 32ce91cfc3
4 changed files with 154 additions and 11 deletions

View File

@ -187,6 +187,21 @@ selectCallee(const ModuleSummaryIndex &Index,
using EdgeInfo = std::tuple<const FunctionSummary *, unsigned /* Threshold */,
GlobalValue::GUID>;
static ValueInfo
updateValueInfoForIndirectCalls(const ModuleSummaryIndex &Index, ValueInfo VI) {
if (!VI.getSummaryList().empty())
return VI;
// For SamplePGO, the indirect call targets for local functions will
// have its original name annotated in profile. We try to find the
// corresponding PGOFuncName as the GUID.
// FIXME: Consider updating the edges in the graph after building
// it, rather than needing to perform this mapping on each walk.
auto GUID = Index.getGUIDFromOriginalID(VI.getGUID());
if (GUID == 0)
return nullptr;
return Index.getValueInfo(GUID);
}
/// Compute the list of functions to import for a given caller. Mark these
/// imported functions and the symbols they reference in their source module as
/// exported from their source module.
@ -201,17 +216,9 @@ static void computeImportForFunction(
DEBUG(dbgs() << " edge -> " << VI.getGUID() << " Threshold:" << Threshold
<< "\n");
if (VI.getSummaryList().empty()) {
// For SamplePGO, the indirect call targets for local functions will
// have its original name annotated in profile. We try to find the
// corresponding PGOFuncName as the GUID.
auto GUID = Index.getGUIDFromOriginalID(VI.getGUID());
if (GUID == 0)
continue;
VI = Index.getValueInfo(GUID);
if (!VI)
continue;
}
VI = updateValueInfoForIndirectCalls(Index, VI);
if (!VI)
continue;
if (DefinedGVSummaries.count(VI.getGUID())) {
DEBUG(dbgs() << "ignored! Target already in destination module.\n");
@ -461,6 +468,17 @@ void llvm::computeDeadSymbols(
for (auto &S : VI.getSummaryList())
if (S->isLive())
return;
// FIXME: If we knew which edges were created for indirect call profiles,
// we could skip them here. Any that are live should be reached via
// other edges, e.g. reference edges. Otherwise, using a profile collected
// on a slightly different binary might provoke preserving, importing
// and ultimately promoting calls to functions not linked into this
// binary, which increases the binary size unnecessarily. Note that
// if this code changes, the importer needs to change so that edges
// to functions marked dead are skipped.
VI = updateValueInfoForIndirectCalls(Index, VI);
if (!VI)
return;
for (auto &S : VI.getSummaryList())
S->setLive(true);
++LiveSymbols;

View File

@ -0,0 +1,21 @@
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: norecurse nounwind readnone uwtable
define internal void @_ZL3foov() #1 {
entry:
call void @_ZL3barv()
ret void
}
declare void @_ZL3barv()
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3}
!llvm.ident = !{!31}
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 5.0.0 (trunk 297016)", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !2)
!1 = !DIFile(filename: "b.cc", directory: "/ssd/llvm/abc/small")
!2 = !{}
!3 = !{i32 2, !"Debug Info Version", i32 3}
!31 = !{!"clang version 5.0.0 (trunk 297016)"}

View File

@ -0,0 +1,28 @@
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define void @_ZL3barv() #1 {
entry:
call void @dummy()
call void @dummy()
call void @dummy()
call void @dummy()
call void @dummy()
call void @dummy()
ret void
}
define internal void @dummy() {
entry:
ret void
}
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3}
!llvm.ident = !{!31}
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 5.0.0 (trunk 297016)", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !2)
!1 = !DIFile(filename: "c.cc", directory: "/ssd/llvm/abc/small")
!2 = !{}
!3 = !{i32 2, !"Debug Info Version", i32 3}
!31 = !{!"clang version 5.0.0 (trunk 297016)"}

View File

@ -0,0 +1,76 @@
; Checks if indirect calls to static target functions that are actually
; dead in the new binary target (due to a profile collected from a slightly
; different binary) are properly traversed during ThinLTO liveness analysis.
; If the liveness analysis is changed to ignore indirect edges and the
; importer is changed to check liveness before importing, this test will
; need adjustment (in that case _ZL3foov should not be imported/promoted,
; and _ZL3barv can be internalized/removed).
; Do setup work for all below tests: generate bitcode and combined index
; RUN: opt -module-summary %s -o %t.bc
; RUN: opt -module-summary %p/Inputs/thinlto_samplepgo_icp2a.ll -o %t2a.bc
; RUN: opt -module-summary %p/Inputs/thinlto_samplepgo_icp2b.ll -o %t2b.bc
; Use -import-instr-limit=5 so that we don't import _ZL3barv, which would
; hide the problem.
; RUN: llvm-lto2 run -save-temps -import-instr-limit=5 -o %t3 %t.bc %t2a.bc %t2b.bc -r %t.bc,fptr,plx -r %t.bc,main,plx -r %t2a.bc,_ZL3barv,l -r %t2b.bc,_ZL3barv,pl -print-imports 2>&1 | FileCheck %s --check-prefix=IMPORTS2
; IMPORTS2-NOT: Import _ZL3barv
; IMPORTS2: Import _ZL3foov.llvm.0
; IMPORTS2-NOT: Import _ZL3barv
; RUN: llvm-nm %t3.2 | FileCheck %s --check-prefix=NM
; NM: _ZL3barv
; RUN: llvm-dis < %t3.2.2.internalize.bc | FileCheck %s --check-prefix=INTERNALIZE
; INTERNALIZE: define void @_ZL3barv
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@fptr = local_unnamed_addr global void ()* null, align 8
; Function Attrs: norecurse uwtable
define i32 @main() local_unnamed_addr #0 !prof !34 {
entry:
%0 = load void ()*, void ()** @fptr, align 8
; ICALL-PROM: br i1 %{{[0-9]+}}, label %if.true.direct_targ, label %if.false.orig_indirect
tail call void %0(), !prof !40
ret i32 0
}
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3,!4}
!llvm.ident = !{!31}
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 5.0.0 (trunk 297016)", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !2)
!1 = !DIFile(filename: "main.cc", directory: ".")
!2 = !{}
!3 = !{i32 2, !"Debug Info Version", i32 3}
!4 = !{i32 1, !"ProfileSummary", !5}
!5 = !{!6, !7, !8, !9, !10, !11, !12, !13}
!6 = !{!"ProfileFormat", !"SampleProfile"}
!7 = !{!"TotalCount", i64 3003}
!8 = !{!"MaxCount", i64 3000}
!9 = !{!"MaxInternalCount", i64 0}
!10 = !{!"MaxFunctionCount", i64 0}
!11 = !{!"NumCounts", i64 3}
!12 = !{!"NumFunctions", i64 1}
!13 = !{!"DetailedSummary", !14}
!14 = !{!15, !16, !17, !18, !19, !20, !20, !21, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30}
!15 = !{i32 10000, i64 3000, i32 1}
!16 = !{i32 100000, i64 3000, i32 1}
!17 = !{i32 200000, i64 3000, i32 1}
!18 = !{i32 300000, i64 3000, i32 1}
!19 = !{i32 400000, i64 3000, i32 1}
!20 = !{i32 500000, i64 3000, i32 1}
!21 = !{i32 600000, i64 3000, i32 1}
!22 = !{i32 700000, i64 3000, i32 1}
!23 = !{i32 800000, i64 3000, i32 1}
!24 = !{i32 900000, i64 3000, i32 1}
!25 = !{i32 950000, i64 3000, i32 1}
!26 = !{i32 990000, i64 3000, i32 1}
!27 = !{i32 999000, i64 3000, i32 1}
!28 = !{i32 999900, i64 2, i32 2}
!29 = !{i32 999990, i64 2, i32 2}
!30 = !{i32 999999, i64 2, i32 2}
!31 = !{!"clang version 5.0.0 (trunk 297016)"}
!34 = !{!"function_entry_count", i64 1}
!40 = !{!"VP", i32 0, i64 3000, i64 -8789629626369651636, i64 3000}