SamplePGO ThinLTO ICP fix for local functions.

Summary:
In SamplePGO, if the profile is collected from non-LTO binary, and used to drive ThinLTO, the indirect call promotion may fail because ThinLTO adjusts local function names to avoid conflicts. There are two places of where the mismatch can happen:

1. thin-link prepends SourceFileName to front of FuncName to build the GUID (GlobalValue::getGlobalIdentifier). Unlike instrumentation FDO, SamplePGO does not use the PGOFuncName scheme and therefore the indirect call target profile data contains a hash of the OriginalName.
2. backend compiler promotes some local functions to global and appends .llvm.{$ModuleHash} to the end of the FuncName to derive PromotedFunctionName

This patch tries at the best effort to find the GUID from the original local function name (in profile), and use that in ICP promotion, and in SamplePGO matching that happens in the backend after importing/inlining:

1. in thin-link, it builds the map from OriginalName to GUID so that when thin-link reads in indirect call target profile (represented by OriginalName), it knows which GUID to import.
2. in backend compiler, if sample profile reader cannot find a profile match for PromotedFunctionName, it will try to find if there is a match for OriginalFunctionName.
3. in backend compiler, we build symbol table entry for OriginalFunctionName and pointer to the same symbol of PromotedFunctionName, so that ICP can find the correct target to promote.

Reviewers: mehdi_amini, tejohnson

Reviewed By: tejohnson

Subscribers: llvm-commits, Prazek

Differential Revision: https://reviews.llvm.org/D30754

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@297757 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Dehao Chen 2017-03-14 17:33:01 +00:00
parent 77960c2b12
commit cd2a5b62d1
9 changed files with 158 additions and 6 deletions

View File

@ -162,7 +162,7 @@ private:
protected:
/// GlobalValueSummary constructor.
GlobalValueSummary(SummaryKind K, GVFlags Flags, std::vector<ValueInfo> Refs)
: Kind(K), Flags(Flags), RefEdgeList(std::move(Refs)) {}
: Kind(K), Flags(Flags), OriginalName(0), RefEdgeList(std::move(Refs)) {}
public:
virtual ~GlobalValueSummary() = default;
@ -528,6 +528,10 @@ private:
// FIXME: Add bitcode read/write support for this field.
std::map<std::string, TypeIdSummary> TypeIdMap;
/// Mapping from original ID to GUID. If original ID can map to multiple
/// GUIDs, it will be mapped to 0.
std::map<GlobalValue::GUID, GlobalValue::GUID> OidGuidMap;
// YAML I/O support.
friend yaml::MappingTraits<ModuleSummaryIndex>;
@ -555,9 +559,17 @@ public:
return GlobalValueMap.find(ValueGUID);
}
/// Return the GUID for \p OriginalId in the OidGuidMap.
GlobalValue::GUID getGUIDFromOriginalID(GlobalValue::GUID OriginalID) const {
const auto I = OidGuidMap.find(OriginalID);
return I == OidGuidMap.end() ? 0 : I->second;
}
/// Add a global value summary for a value of the given name.
void addGlobalValueSummary(StringRef ValueName,
std::unique_ptr<GlobalValueSummary> Summary) {
addOriginalName(GlobalValue::getGUID(ValueName),
Summary->getOriginalName());
GlobalValueMap[GlobalValue::getGUID(ValueName)].push_back(
std::move(Summary));
}
@ -565,9 +577,21 @@ public:
/// Add a global value summary for a value of the given GUID.
void addGlobalValueSummary(GlobalValue::GUID ValueGUID,
std::unique_ptr<GlobalValueSummary> Summary) {
addOriginalName(ValueGUID, Summary->getOriginalName());
GlobalValueMap[ValueGUID].push_back(std::move(Summary));
}
/// Add an original name for the value of the given GUID.
void addOriginalName(GlobalValue::GUID ValueGUID,
GlobalValue::GUID OrigGUID) {
if (OrigGUID == 0 || ValueGUID == OrigGUID)
return;
if (OidGuidMap.count(OrigGUID) && OidGuidMap[OrigGUID] != ValueGUID)
OidGuidMap[OrigGUID] = 0;
else
OidGuidMap[OrigGUID] = ValueGUID;
}
/// Find the summary for global \p GUID in module \p ModuleId, or nullptr if
/// not found.
GlobalValueSummary *findSummaryInModule(GlobalValue::GUID ValueGUID,

View File

@ -283,7 +283,12 @@ public:
/// \brief Return the samples collected for function \p F.
FunctionSamples *getSamplesFor(const Function &F) {
return &Profiles[F.getName()];
// The function name may have been updated by adding suffix. In sample
// profile, the function names are all stripped, so we need to strip
// the function name suffix before matching with profile.
if (Profiles.count(F.getName().split('.').first))
return &Profiles[(F.getName().split('.').first)];
return nullptr;
}
/// \brief Return all the profiles.

View File

@ -4846,6 +4846,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(
// Keep around the last seen summary to be used when we see an optional
// "OriginalName" attachement.
GlobalValueSummary *LastSeenSummary = nullptr;
GlobalValue::GUID LastSeenGUID = 0;
bool Combined = false;
// We can expect to see any number of type ID information records before
@ -5014,6 +5015,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(
PendingTypeTestAssumeConstVCalls.clear();
PendingTypeCheckedLoadConstVCalls.clear();
LastSeenSummary = FS.get();
LastSeenGUID = GUID;
FS->setModulePath(ModuleIdMap[ModuleId]);
TheIndex.addGlobalValueSummary(GUID, std::move(FS));
Combined = true;
@ -5040,6 +5042,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(
AS->setAliasee(AliaseeInModule);
GlobalValue::GUID GUID = getGUIDFromValueId(ValueID).first;
LastSeenGUID = GUID;
TheIndex.addGlobalValueSummary(GUID, std::move(AS));
Combined = true;
break;
@ -5056,6 +5059,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(
LastSeenSummary = FS.get();
FS->setModulePath(ModuleIdMap[ModuleId]);
GlobalValue::GUID GUID = getGUIDFromValueId(ValueID).first;
LastSeenGUID = GUID;
TheIndex.addGlobalValueSummary(GUID, std::move(FS));
Combined = true;
break;
@ -5066,8 +5070,10 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(
if (!LastSeenSummary)
return error("Name attachment that does not follow a combined record");
LastSeenSummary->setOriginalName(OriginalName);
TheIndex.addOriginalName(LastSeenGUID, OriginalName);
// Reset the LastSeenSummary
LastSeenSummary = nullptr;
LastSeenGUID = 0;
break;
}
case bitc::FS_TYPE_TESTS: {

View File

@ -3708,9 +3708,16 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
for (auto &EI : FS->calls()) {
// If this GUID doesn't have a value id, it doesn't have a function
// summary and we don't need to record any calls to it.
if (!hasValueId(EI.first.getGUID()))
continue;
NameVals.push_back(getValueId(EI.first.getGUID()));
GlobalValue::GUID GUID = EI.first.getGUID();
if (!hasValueId(GUID)) {
// For SamplePGO, the indirect call targets for local functions will
// have its original name annotated in profile. We try to find the
// corresponding PGOFuncName as the GUID.
GUID = Index.getGUIDFromOriginalID(GUID);
if (GUID == 0 || !hasValueId(GUID))
continue;
}
NameVals.push_back(getValueId(GUID));
if (HasProfileData)
NameVals.push_back(static_cast<uint8_t>(EI.second.Hotness));
}

View File

@ -298,6 +298,17 @@ void InstrProfSymtab::create(Module &M, bool InLTO) {
const std::string &PGOFuncName = getPGOFuncName(F, InLTO);
addFuncName(PGOFuncName);
MD5FuncMap.emplace_back(Function::getGUID(PGOFuncName), &F);
// In ThinLTO, local function may have been promoted to global and have
// suffix added to the function name. We need to add the stripped function
// name to the symbol table so that we can find a match from profile.
if (InLTO) {
auto pos = PGOFuncName.find('.');
if (pos != std::string::npos) {
const std::string &OtherFuncName = PGOFuncName.substr(0, pos);
addFuncName(OtherFuncName);
MD5FuncMap.emplace_back(Function::getGUID(OtherFuncName), &F);
}
}
}
finalizeSymtab();

View File

@ -197,6 +197,15 @@ static void computeImportForFunction(
auto GUID = Edge.first.getGUID();
DEBUG(dbgs() << " edge -> " << GUID << " Threshold:" << Threshold << "\n");
if (Index.findGlobalValueSummaryList(GUID) == Index.end()) {
// For SamplePGO, the indirect call targets for local functions will
// have its original name annotated in profile. We try to find the
// corresponding PGOFuncName as the GUID.
GUID = Index.getGUIDFromOriginalID(GUID);
if (GUID == 0)
continue;
}
if (DefinedGVSummaries.count(GUID)) {
DEBUG(dbgs() << "ignored! Target already in destination module.\n");
continue;

View File

@ -1383,7 +1383,7 @@ bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
bool SampleProfileLoader::runOnFunction(Function &F) {
F.setEntryCount(0);
Samples = Reader->getSamplesFor(F);
if (!Samples->empty())
if (Samples && !Samples->empty())
return emitAnnotations(F);
return false;
}

View File

@ -0,0 +1,27 @@
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@fptr = external local_unnamed_addr global void ()*, align 8
; Function Attrs: norecurse nounwind uwtable
define void @_Z6updatei(i32 %i) local_unnamed_addr #0 {
entry:
store void ()* @_ZL3foov, void ()** @fptr, align 8
ret void
}
; Function Attrs: norecurse nounwind readnone uwtable
define internal void @_ZL3foov() #1 {
entry:
ret void
}
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3}
!llvm.ident = !{!31}
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 5.0.0 (trunk 297016)", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !2)
!1 = !DIFile(filename: "b.cc", directory: "/ssd/llvm/abc/small")
!2 = !{}
!3 = !{i32 2, !"Debug Info Version", i32 3}
!31 = !{!"clang version 5.0.0 (trunk 297016)"}

View File

@ -0,0 +1,63 @@
; Do setup work for all below tests: generate bitcode and combined index
; RUN: opt -module-summary %s -o %t.bc
; RUN: opt -module-summary %p/Inputs/thinlto_samplepgo_icp.ll -o %t2.bc
; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc
; Checks if calls to static target functions are properly imported and promoted
; by ICP. Note that the GUID in the profile is from the oroginal name.
; RUN: opt -function-import -summary-file %t3.thinlto.bc %t.bc -o %t4.bc -print-imports 2>&1 | FileCheck %s --check-prefix=IMPORTS
; IMPORTS: Import _ZL3foov.llvm.0
; RUN: opt %t4.bc -icp-lto -pgo-icall-prom -S -icp-count-threshold=1 | FileCheck %s --check-prefix=ICALL-PROM
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@fptr = local_unnamed_addr global void ()* null, align 8
; Function Attrs: norecurse uwtable
define i32 @main() local_unnamed_addr #0 !prof !34 {
entry:
%0 = load void ()*, void ()** @fptr, align 8
; ICALL-PROM: br i1 %{{[0-9]+}}, label %if.true.direct_targ, label %if.false.orig_indirect
tail call void %0(), !prof !40
ret i32 0
}
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3,!4}
!llvm.ident = !{!31}
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 5.0.0 (trunk 297016)", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !2)
!1 = !DIFile(filename: "main.cc", directory: ".")
!2 = !{}
!3 = !{i32 2, !"Debug Info Version", i32 3}
!4 = !{i32 1, !"ProfileSummary", !5}
!5 = !{!6, !7, !8, !9, !10, !11, !12, !13}
!6 = !{!"ProfileFormat", !"SampleProfile"}
!7 = !{!"TotalCount", i64 3003}
!8 = !{!"MaxCount", i64 3000}
!9 = !{!"MaxInternalCount", i64 0}
!10 = !{!"MaxFunctionCount", i64 0}
!11 = !{!"NumCounts", i64 3}
!12 = !{!"NumFunctions", i64 1}
!13 = !{!"DetailedSummary", !14}
!14 = !{!15, !16, !17, !18, !19, !20, !20, !21, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30}
!15 = !{i32 10000, i64 3000, i32 1}
!16 = !{i32 100000, i64 3000, i32 1}
!17 = !{i32 200000, i64 3000, i32 1}
!18 = !{i32 300000, i64 3000, i32 1}
!19 = !{i32 400000, i64 3000, i32 1}
!20 = !{i32 500000, i64 3000, i32 1}
!21 = !{i32 600000, i64 3000, i32 1}
!22 = !{i32 700000, i64 3000, i32 1}
!23 = !{i32 800000, i64 3000, i32 1}
!24 = !{i32 900000, i64 3000, i32 1}
!25 = !{i32 950000, i64 3000, i32 1}
!26 = !{i32 990000, i64 3000, i32 1}
!27 = !{i32 999000, i64 3000, i32 1}
!28 = !{i32 999900, i64 2, i32 2}
!29 = !{i32 999990, i64 2, i32 2}
!30 = !{i32 999999, i64 2, i32 2}
!31 = !{!"clang version 5.0.0 (trunk 297016)"}
!34 = !{!"function_entry_count", i64 1}
!40 = !{!"VP", i32 0, i64 3000, i64 -8789629626369651636, i64 3000}