[ThinLTO] Perform profile-guided indirect call promotion

Summary:
To enable profile-guided indirect call promotion in ThinLTO mode, we
simply add call graph edges for each profitable target from the profile
to the summaries, then the summary-guided importing will consider the
callee for importing as usual.

Also we need to enable the indirect call promotion pass creation in the
PassManagerBuilder when PerformThinLTO=true (we are in the ThinLTO
backend), so that the newly imported functions are considered for
promotion in the backends.

The IC promotion profiles refer to callees by GUID, which required
adding GUIDs to the per-module VST in bitcode (and assigning them
valueIds similar to how they are assigned valueIds in the combined
index).

Reviewers: mehdi_amini, xur

Subscribers: mehdi_amini, davidxl, llvm-commits

Differential Revision: http://reviews.llvm.org/D21932

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275707 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Teresa Johnson 2016-07-17 14:47:01 +00:00
parent 350718ee80
commit e2f34269f3
6 changed files with 143 additions and 13 deletions

View File

@ -80,6 +80,7 @@ struct ValueInfo {
assert(Kind == VI_Value && "Not a Value type");
return TheValue.V;
}
bool isGUID() const { return Kind == VI_GUID; }
};
/// \brief Function and variable summary information to aid decisions and
@ -261,6 +262,14 @@ public:
CallGraphEdgeList.push_back(std::make_pair(CalleeGUID, Info));
}
/// Record a call graph edge from this function to each function GUID recorded
/// in \p CallGraphEdges.
void
addCallGraphEdges(DenseMap<GlobalValue::GUID, CalleeInfo> &CallGraphEdges) {
for (auto &EI : CallGraphEdges)
addCallGraphEdge(EI.first, EI.second);
}
/// Record a call graph edge from this function to the function identified
/// by \p CalleeV, with \p CalleeInfo including the cumulative profile
/// count (across all calls from this function) or 0 if no PGO.

View File

@ -16,6 +16,7 @@
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Dominators.h"
@ -73,7 +74,9 @@ void ModuleSummaryIndexBuilder::computeFunctionSummary(
// Map from callee ValueId to profile count. Used to accumulate profile
// counts for all static calls to a given callee.
DenseMap<const Value *, CalleeInfo> CallGraphEdges;
DenseMap<GlobalValue::GUID, CalleeInfo> IndirectCallEdges;
DenseSet<const Value *> RefEdges;
ICallPromotionAnalysis ICallAnalysis;
SmallPtrSet<const User *, 8> Visited;
for (const BasicBlock &BB : F)
@ -83,13 +86,29 @@ void ModuleSummaryIndexBuilder::computeFunctionSummary(
if (auto CS = ImmutableCallSite(&I)) {
auto *CalledFunction = CS.getCalledFunction();
if (CalledFunction && CalledFunction->hasName() &&
!CalledFunction->isIntrinsic()) {
auto ScaledCount = BFI ? BFI->getBlockProfileCount(&BB) : None;
auto *CalleeId =
M->getValueSymbolTable().lookup(CalledFunction->getName());
CallGraphEdges[CalleeId] +=
(ScaledCount ? ScaledCount.getValue() : 0);
// Check if this is a direct call to a known function.
if (CalledFunction) {
if (CalledFunction->hasName() && !CalledFunction->isIntrinsic()) {
auto ScaledCount = BFI ? BFI->getBlockProfileCount(&BB) : None;
auto *CalleeId =
M->getValueSymbolTable().lookup(CalledFunction->getName());
CallGraphEdges[CalleeId] +=
(ScaledCount ? ScaledCount.getValue() : 0);
}
} else {
// Otherwise, check for an indirect call (call to a non-const value
// that isn't an inline assembly call).
const CallInst *CI = dyn_cast<CallInst>(&I);
if (CS.getCalledValue() && !isa<Constant>(CS.getCalledValue()) &&
!(CI && CI->isInlineAsm())) {
uint32_t NumVals, NumCandidates;
uint64_t TotalCount;
auto CandidateProfileData =
ICallAnalysis.getPromotionCandidatesForInstruction(
&I, NumVals, TotalCount, NumCandidates);
for (auto &Candidate : CandidateProfileData)
IndirectCallEdges[Candidate.Value] += Candidate.Count;
}
}
}
findRefEdges(&I, RefEdges, Visited);
@ -99,6 +118,7 @@ void ModuleSummaryIndexBuilder::computeFunctionSummary(
std::unique_ptr<FunctionSummary> FuncSummary =
llvm::make_unique<FunctionSummary>(Flags, NumInsts);
FuncSummary->addCallGraphEdges(CallGraphEdges);
FuncSummary->addCallGraphEdges(IndirectCallEdges);
FuncSummary->addRefEdges(RefEdges);
Index->addGlobalValueSummary(F.getName(), std::move(FuncSummary));
}

View File

@ -119,6 +119,14 @@ class ModuleBitcodeWriter : public BitcodeWriter {
/// The start bit of the module block, for use in generating a module hash
uint64_t BitcodeStartBit = 0;
/// Map that holds the correspondence between GUIDs in the summary index,
/// that came from indirect call profiles, and a value id generated by this
/// class to use in the VST and summary block records.
std::map<GlobalValue::GUID, unsigned> GUIDToValueIdMap;
/// Tracks the last value id recorded in the GUIDToValueMap.
unsigned GlobalValueId;
public:
/// Constructs a ModuleBitcodeWriter object for the given Module,
/// writing to the provided \p Buffer.
@ -132,6 +140,25 @@ public:
// will start at the bitcode, and we need the offset of the VST
// to line up.
BitcodeStartBit = Stream.GetCurrentBitNo();
// Assign ValueIds to any callee values in the index that came from
// indirect call profiles and were recorded as a GUID not a Value*
// (which would have been assigned an ID by the ValueEnumerator).
// The starting ValueId is just after the number of values in the
// ValueEnumerator, so that they can be emitted in the VST.
GlobalValueId = VE.getValues().size();
if (Index)
for (const auto &GUIDSummaryLists : *Index)
// Examine all summaries for this GUID.
for (auto &Summary : GUIDSummaryLists.second)
if (auto FS = dyn_cast<FunctionSummary>(Summary.get()))
// For each call in the function summary, see if the call
// is to a GUID (which means it is for an indirect call,
// otherwise we would have a Value for it). If so, synthesize
// a value id.
for (auto &CallEdge : FS->calls())
if (CallEdge.first.isGUID())
assignValueId(CallEdge.first.getGUID());
}
private:
@ -260,6 +287,22 @@ private:
unsigned FSModRefsAbbrev);
void writePerModuleGlobalValueSummary();
void writeModuleHash(size_t BlockStartPos);
void assignValueId(GlobalValue::GUID ValGUID) {
GUIDToValueIdMap[ValGUID] = ++GlobalValueId;
}
unsigned getValueId(GlobalValue::GUID ValGUID) {
const auto &VMI = GUIDToValueIdMap.find(ValGUID);
assert(VMI != GUIDToValueIdMap.end());
return VMI->second;
}
// Helper to get the valueId for the type of value recorded in VI.
unsigned getValueId(ValueInfo VI) {
if (VI.isGUID())
return getValueId(VI.getGUID());
return VE.getValueID(VI.getValue());
}
std::map<GlobalValue::GUID, unsigned> &valueIds() { return GUIDToValueIdMap; }
};
/// Class to manage the bitcode writing for a combined index.
@ -2707,6 +2750,7 @@ void ModuleBitcodeWriter::writeValueSymbolTable(
unsigned FnEntry8BitAbbrev;
unsigned FnEntry7BitAbbrev;
unsigned FnEntry6BitAbbrev;
unsigned GUIDEntryAbbrev;
if (IsModuleLevel && hasVSTOffsetPlaceholder()) {
// 8-bit fixed-width VST_CODE_FNENTRY function strings.
BitCodeAbbrev *Abbv = new BitCodeAbbrev();
@ -2734,11 +2778,19 @@ void ModuleBitcodeWriter::writeValueSymbolTable(
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
FnEntry6BitAbbrev = Stream.EmitAbbrev(Abbv);
// FIXME: Change the name of this record as it is now used by
// the per-module index as well.
Abbv = new BitCodeAbbrev();
Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_ENTRY));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // refguid
GUIDEntryAbbrev = Stream.EmitAbbrev(Abbv);
}
// FIXME: Set up the abbrev, we know how many values there are!
// FIXME: We know if the type names can use 7-bit ascii.
SmallVector<unsigned, 64> NameVals;
SmallVector<uint64_t, 64> NameVals;
for (const ValueName &Name : VST) {
// Figure out the encoding to use for the name.
@ -2799,6 +2851,16 @@ void ModuleBitcodeWriter::writeValueSymbolTable(
Stream.EmitRecord(Code, NameVals, AbbrevToUse);
NameVals.clear();
}
// Emit any GUID valueIDs created for indirect call edges into the
// module-level VST.
if (IsModuleLevel && hasVSTOffsetPlaceholder())
for (const auto &GI : valueIds()) {
NameVals.push_back(GI.second);
NameVals.push_back(GI.first);
Stream.EmitRecord(bitc::VST_CODE_COMBINED_ENTRY, NameVals,
GUIDEntryAbbrev);
NameVals.clear();
}
Stream.ExitBlock();
}
@ -3220,12 +3282,11 @@ void ModuleBitcodeWriter::writePerModuleFunctionSummaryRecord(
std::sort(Calls.begin(), Calls.end(),
[this](const FunctionSummary::EdgeTy &L,
const FunctionSummary::EdgeTy &R) {
return VE.getValueID(L.first.getValue()) <
VE.getValueID(R.first.getValue());
return getValueId(L.first) < getValueId(R.first);
});
bool HasProfileData = F.getEntryCount().hasValue();
for (auto &ECI : Calls) {
NameVals.push_back(VE.getValueID(ECI.first.getValue()));
NameVals.push_back(getValueId(ECI.first));
assert(ECI.second.CallsiteCount > 0 && "Expected at least one callsite");
NameVals.push_back(ECI.second.CallsiteCount);
if (HasProfileData)

View File

@ -407,10 +407,11 @@ void PassManagerBuilder::populateModulePassManager(
/// PGO instrumentation is added during the compile phase for ThinLTO, do
/// not run it a second time
addPGOInstrPasses(MPM);
// Indirect call promotion that promotes intra-module targets only.
MPM.add(createPGOIndirectCallPromotionLegacyPass());
}
// Indirect call promotion that promotes intra-module targets only.
MPM.add(createPGOIndirectCallPromotionLegacyPass());
if (EnableNonLTOGlobalsModRef)
// We add a module alias analysis pass here. In part due to bugs in the
// analysis infrastructure this "works" in that the analysis stays alive

View File

@ -0,0 +1,7 @@
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define void @a() {
entry:
ret void
}

View File

@ -0,0 +1,32 @@
; Do setup work for all below tests: generate bitcode and combined index
; RUN: opt -module-summary %s -o %t.bc
; RUN: opt -module-summary %p/Inputs/thinlto_indirect_call_promotion.ll -o %t2.bc
; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc
; RUN: opt -function-import -summary-file %t3.thinlto.bc %t.bc -o %t4.bc -print-imports 2>&1 | FileCheck %s --check-prefix=IMPORTS
; IMPORTS: Import a
; RUN: opt %t4.bc -pgo-icall-prom -S -icp-count-threshold=1 | FileCheck %s --check-prefix=ICALL-PROM
; RUN: opt %t4.bc -pgo-icall-prom -S -pass-remarks=pgo-icall-prom -icp-count-threshold=1 2>&1 | FileCheck %s --check-prefix=PASS-REMARK
; PASS-REMARK: Promote indirect call to a with count 1 out of 1
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@foo = external local_unnamed_addr global void ()*, align 8
define i32 @main() local_unnamed_addr {
entry:
%0 = load void ()*, void ()** @foo, align 8
; ICALL-PROM: br i1 %{{[0-9]+}}, label %if.true.direct_targ, label %if.false.orig_indirect, !prof [[BRANCH_WEIGHT:![0-9]+]]
tail call void %0(), !prof !1
ret i32 0
}
!1 = !{!"VP", i32 0, i64 1, i64 -6289574019528802036, i64 1}
; Should not have a VP annotation on new indirect call (check before and after
; branch_weights annotation).
; ICALL-PROM-NOT: !"VP"
; ICALL-PROM: [[BRANCH_WEIGHT]] = !{!"branch_weights", i32 1, i32 0}
; ICALL-PROM-NOT: !"VP"