[BOLT] Major overhaul of profiling in BOLT

Summary:
Profile reading was tightly coupled with building CFG. Since I plan
to move to a new profile format that will be associated with CFG
it is critical to decouple the two phases.

We now have read profile right after the cfg was constructed, but
before it is "canonicalized", i.e. CTCs will till be there.

After reading the profile, we do a post-processing pass that fixes
CFG and does some post-processing for debug info, such as
inference of fall-throughs, which is still required with the current
format.

Another good reason for decoupling is that we can use profile with
CFG to more accurately record fall-through branches during
aggregation.

At the moment we use "Offset" annotations to facilitate location
of instructions corresponding to the profile. This might not be
super efficient. However, once we switch to the new profile format
the offsets would be no longer needed. We might keep them for
the aggregator, but if we have to trust LBR data that might
not be strictly necessary.

I've tried to make changes while keeping backwards compatibly. This makes
it easier to verify correctness of the changes, but that also means
that we lose accuracy of the profile.

Some refactoring is included.

Flag "-prof-compat-mode" (on by default) is used for bug-level
backwards compatibility. Disable it for more accurate tracing.

(cherry picked from FBD6506156)
This commit is contained in:
Maksim Panchenko 2017-11-28 09:57:21 -08:00
parent b6f7c68a6c
commit d15b93bade
20 changed files with 1340 additions and 1068 deletions

View File

@ -97,11 +97,12 @@ bool BinaryBasicBlock::validateSuccessorInvariants() {
case 0:
Valid = !CondBranch && !UncondBranch;
break;
case 1:
Valid = !CondBranch ||
(CondBranch &&
!Function->getBasicBlockForLabel(BC.MIA->getTargetSymbol(*CondBranch)));
case 1: {
const bool HasCondBlock = CondBranch &&
Function->getBasicBlockForLabel(BC.MIA->getTargetSymbol(*CondBranch));
Valid = !CondBranch || !HasCondBlock;
break;
}
case 2:
Valid =
(CondBranch &&
@ -121,7 +122,7 @@ bool BinaryBasicBlock::validateSuccessorInvariants() {
<< Twine::utohexstr(BC.MIA->getJumpTable(*Inst)) << "\n";
JT->print(errs());
}
dump();
getFunction()->dump();
}
return Valid;
}
@ -452,5 +453,18 @@ uint64_t BinaryBasicBlock::estimateSize() const {
return Function->getBinaryContext().computeCodeSize(begin(), end());
}
BinaryBasicBlock::BinaryBranchInfo &
BinaryBasicBlock::getBranchInfo(const BinaryBasicBlock &Succ) {
auto BI = branch_info_begin();
for (auto BB : successors()) {
if (&Succ == BB)
return *BI;
++BI;
}
llvm_unreachable("Invalid successor");
return *BI;
}
} // namespace bolt
} // namespace llvm

View File

@ -363,15 +363,14 @@ public:
return BranchInfo[Condition == true ? 0 : 1];
};
BinaryBranchInfo &getBranchInfo(const BinaryBasicBlock &Succ) {
auto BI = branch_info_begin();
for (auto BB : successors()) {
if (&Succ == BB)
return *BI;
++BI;
}
llvm_unreachable("Invalid successor");
return *BI;
BinaryBranchInfo &getBranchInfo(const BinaryBasicBlock &Succ);
void setSuccessorBranchInfo(const BinaryBasicBlock &Succ,
uint64_t Count,
uint64_t MispredictedCount) {
auto &BI = getBranchInfo(Succ);
BI.Count = Count;
BI.MispredictedCount = MispredictedCount;
}
/// Try to compute the taken and misprediction frequencies for the given

View File

@ -27,8 +27,6 @@ namespace opts {
extern cl::OptionCategory BoltCategory;
extern cl::opt<BinaryFunction::ReorderType> ReorderFunctions;
static cl::opt<bool>
PrintDebugInfo("print-debug-info",
cl::desc("print debug info when printing functions"),
@ -215,16 +213,14 @@ std::vector<BinaryFunction *> BinaryContext::getSortedFunctions(
return &BFI.second;
});
if (opts::ReorderFunctions != BinaryFunction::RT_NONE) {
std::stable_sort(SortedFunctions.begin(), SortedFunctions.end(),
[](const BinaryFunction *A, const BinaryFunction *B) {
if (A->hasValidIndex() && B->hasValidIndex()) {
return A->getIndex() < B->getIndex();
} else {
return A->hasValidIndex();
}
});
}
std::stable_sort(SortedFunctions.begin(), SortedFunctions.end(),
[](const BinaryFunction *A, const BinaryFunction *B) {
if (A->hasValidIndex() && B->hasValidIndex()) {
return A->getIndex() < B->getIndex();
} else {
return A->hasValidIndex();
}
});
return SortedFunctions;
}

View File

@ -169,6 +169,9 @@ public:
/// Number of functions with profile information
uint64_t NumProfiledFuncs{0};
/// Total hotness score according to profiling data for this binary.
uint64_t TotalScore{0};
/// Track next available address for new allocatable sections. RewriteInstance
/// sets this prior to running BOLT passes, so layout passes are aware of the
/// final addresses functions will have.

View File

@ -13,7 +13,6 @@
#include "BinaryBasicBlock.h"
#include "BinaryFunction.h"
#include "DataReader.h"
#include "Passes/MCF.h"
#include "llvm/ADT/edit_distance.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
@ -52,7 +51,6 @@ extern cl::OptionCategory BoltRelocCategory;
extern bool shouldProcess(const BinaryFunction &);
extern cl::opt<bool> UpdateDebugSections;
extern cl::opt<IndirectCallPromotionType> IndirectCallPromotion;
extern cl::opt<unsigned> Verbosity;
static cl::opt<bool>
@ -61,27 +59,6 @@ AlignBlocks("align-blocks",
cl::ZeroOrMore,
cl::cat(BoltOptCategory));
static cl::opt<MCFCostFunction>
DoMCF("mcf",
cl::desc("solve a min cost flow problem on the CFG to fix edge counts "
"(default=disable)"),
cl::init(MCF_DISABLE),
cl::values(
clEnumValN(MCF_DISABLE, "none",
"disable MCF"),
clEnumValN(MCF_LINEAR, "linear",
"cost function is inversely proportional to edge count"),
clEnumValN(MCF_QUADRATIC, "quadratic",
"cost function is inversely proportional to edge count squared"),
clEnumValN(MCF_LOG, "log",
"cost function is inversely proportional to log of edge count"),
clEnumValN(MCF_BLAMEFTS, "blamefts",
"tune cost to blame fall-through edges for surplus flow"),
clEnumValEnd),
cl::ZeroOrMore,
cl::Hidden,
cl::cat(BoltOptCategory));
static cl::opt<bool>
DotToolTipCode("dot-tooltip-code",
cl::desc("add basic block instructions as tool tips on nodes"),
@ -1185,21 +1162,13 @@ void BinaryFunction::disassemble(ArrayRef<uint8_t> FunctionData) {
}
BC.MIA->replaceBranchTarget(Instruction, TargetSymbol, &*Ctx);
// Record call offset for profile matching.
if (IsCall) {
MIA->addAnnotation(Ctx.get(), Instruction, "Offset", Offset);
}
if (IsCondBranch) {
// Add fallthrough branch info.
FTBranches.emplace_back(Offset, Offset + Size);
if (IsCall) {
MIA->setConditionalTailCall(Instruction, TargetAddress);
}
// Mark CTC.
if (IsCondBranch && IsCall) {
MIA->setConditionalTailCall(Instruction, TargetAddress);
}
} else {
// Could not evaluate branch. Should be an indirect call or an
// indirect branch. Bail out on the latter case.
MIA->addAnnotation(Ctx.get(), Instruction, "Offset", Offset);
if (MIA->isIndirectBranch(Instruction)) {
auto Result = processIndirectBranch(Instruction, Size, Offset);
switch (Result) {
@ -1255,6 +1224,9 @@ add_instruction:
findDebugLineInformationForInstructionAt(AbsoluteInstrAddr, ULT));
}
// Record offset of the instruction for profile matching.
MIA->addAnnotation(Ctx.get(), Instruction, "Offset", Offset);
if (MemData && !emptyRange(MemData->getMemInfoRange(Offset))) {
MIA->addAnnotation(Ctx.get(), Instruction, "MemDataOffset", Offset);
}
@ -1563,9 +1535,6 @@ bool BinaryFunction::buildCFG() {
// e.g. exit(3), etc. Otherwise we'll see a false fall-through
// blocks.
// Possibly assign/re-assign branch profile data.
matchProfileData();
for (auto &Branch : TakenBranches) {
DEBUG(dbgs() << "registering branch [0x" << Twine::utohexstr(Branch.first)
<< "] -> [0x" << Twine::utohexstr(Branch.second) << "]\n");
@ -1574,124 +1543,15 @@ bool BinaryFunction::buildCFG() {
auto *ToBB = getBasicBlockAtOffset(Branch.second);
assert(ToBB && "cannot find BB containing TO branch");
if (!BranchData) {
FromBB->addSuccessor(ToBB);
continue;
}
auto BranchInfoOrErr = BranchData->getBranch(Branch.first, Branch.second);
if (!BranchInfoOrErr) {
FromBB->addSuccessor(ToBB);
continue;
}
const BranchInfo &BInfo = BranchInfoOrErr.get();
FromBB->addSuccessor(ToBB, BInfo.Branches, BInfo.Mispreds);
// Populate profile counts for the jump table.
auto *LastInstr = FromBB->getLastNonPseudoInstr();
if (!LastInstr)
continue;
auto JTAddress = BC.MIA->getJumpTable(*LastInstr);
if (!JTAddress)
continue;
auto *JT = getJumpTableContainingAddress(JTAddress);
if (!JT)
continue;
JT->Count += BInfo.Branches;
if (opts::IndirectCallPromotion < ICP_JUMP_TABLES &&
opts::JumpTables < JTS_AGGRESSIVE)
continue;
if (JT->Counts.empty())
JT->Counts.resize(JT->Entries.size());
auto EI = JT->Entries.begin();
auto Delta = (JTAddress - JT->Address) / JT->EntrySize;
EI += Delta;
while (EI != JT->Entries.end()) {
if (ToBB->getLabel() == *EI) {
assert(Delta < JT->Counts.size());
JT->Counts[Delta].Mispreds += BInfo.Mispreds;
JT->Counts[Delta].Count += BInfo.Branches;
}
++Delta;
++EI;
// A label marks the start of another jump table.
if (JT->Labels.count(Delta * JT->EntrySize))
break;
}
FromBB->addSuccessor(ToBB);
}
for (auto &Branch : FTBranches) {
DEBUG(dbgs() << "registering fallthrough [0x"
<< Twine::utohexstr(Branch.first) << "] -> [0x"
<< Twine::utohexstr(Branch.second) << "]\n");
auto *FromBB = getBasicBlockContainingOffset(Branch.first);
assert(FromBB && "cannot find BB containing FROM branch");
// Try to find the destination basic block. If the jump instruction was
// followed by a no-op then the destination offset recorded in FTBranches
// will point to that no-op but the destination basic block will start
// after the no-op due to ignoring no-ops when creating basic blocks.
// So we have to skip any no-ops when trying to find the destination
// basic block.
auto *ToBB = getBasicBlockAtOffset(Branch.second);
if (ToBB == nullptr) {
auto I = Instructions.find(Branch.second), E = Instructions.end();
while (ToBB == nullptr && I != E && MIA->isNoop(I->second)) {
++I;
if (I == E)
break;
ToBB = getBasicBlockAtOffset(I->first);
}
if (ToBB == nullptr) {
// We have a fall-through that does not point to another BB, ignore it
// as it may happen in cases where we have a BB finished by two
// branches.
// This can also happen when we delete a branch past the end of a
// function in case of a call to __builtin_unreachable().
continue;
}
}
// Does not add a successor if we can't find profile data, leave it to the
// inference pass to guess its frequency
if (BranchData) {
auto BranchInfoOrErr = BranchData->getBranch(Branch.first, Branch.second);
if (BranchInfoOrErr) {
const BranchInfo &BInfo = BranchInfoOrErr.get();
FromBB->addSuccessor(ToBB, BInfo.Branches, BInfo.Mispreds);
}
}
}
if (BranchData) {
for (auto BB : BasicBlocks) {
auto *CTCInstr = BB->getLastNonPseudoInstr();
if (!CTCInstr || !MIA->getConditionalTailCall(*CTCInstr))
continue;
auto OffsetOrErr =
MIA->tryGetAnnotationAs<uint64_t>(*CTCInstr, "Offset");
assert(OffsetOrErr && "offset not set for conditional tail call");
auto BranchInfoOrErr = BranchData->getDirectCallBranch(*OffsetOrErr);
if (!BranchInfoOrErr)
continue;
MIA->addAnnotation(BC.Ctx.get(), *CTCInstr, "CTCTakenCount",
BranchInfoOrErr->Branches);
MIA->addAnnotation(BC.Ctx.get(), *CTCInstr, "CTCMispredCount",
BranchInfoOrErr->Mispreds);
}
}
// Add fall-through branches (except for non-taken conditional branches with
// profile data, which were already accounted for in TakenBranches).
// Add fall-through branches.
PrevBB = nullptr;
bool IsPrevFT = false; // Is previous block a fall-through.
for (auto BB : BasicBlocks) {
if (IsPrevFT) {
PrevBB->addSuccessor(BB, BinaryBasicBlock::COUNT_NO_PROFILE,
BinaryBasicBlock::COUNT_INFERRED);
PrevBB->addSuccessor(BB);
}
if (BB->empty()) {
IsPrevFT = true;
@ -1703,29 +1563,18 @@ bool BinaryFunction::buildCFG() {
assert(LastInstr &&
"should have non-pseudo instruction in non-empty block");
const auto IsCondTailCall = MIA->getConditionalTailCall(*LastInstr);
if (BB->succ_size() == 0) {
if (IsCondTailCall) {
// Conditional tail call without profile data for non-taken branch.
IsPrevFT = true;
} else {
// Unless the last instruction is a terminator, control will fall
// through to the next basic block.
IsPrevFT = !MIA->isTerminator(*LastInstr);
}
// Since there's no existing successors, we know the last instruction is
// not a conditional branch. Thus if it's a terminator, it shouldn't be a
// fall-through.
//
// Conditional tail call is a special case since we don't add a taken
// branch successor for it.
IsPrevFT = !MIA->isTerminator(*LastInstr) ||
MIA->getConditionalTailCall(*LastInstr);
} else if (BB->succ_size() == 1) {
if (IsCondTailCall) {
// Conditional tail call with data for non-taken branch. A fall-through
// edge has already ben added in the CFG.
IsPrevFT = false;
} else {
// Fall-through should be added if the last instruction is a conditional
// jump, since there was no profile data for the non-taken branch.
IsPrevFT = MIA->isConditionalBranch(*LastInstr);
}
IsPrevFT = MIA->isConditionalBranch(*LastInstr);
} else {
// Ends with 2 branches, with an indirect jump or it is a conditional
// branch whose frequency has been inferred from LBR.
IsPrevFT = false;
}
@ -1734,26 +1583,20 @@ bool BinaryFunction::buildCFG() {
if (!IsPrevFT) {
// Possibly a call that does not return.
DEBUG(dbgs() << "last block was marked as a fall-through\n");
DEBUG(dbgs() << "last block was marked as a fall-through in " << *this
<< '\n');
}
// Assign landing pads and throwers info.
recomputeLandingPads();
// Infer frequency for non-taken branches
if (hasValidProfile() && opts::DoMCF != MCF_DISABLE) {
// Convert COUNT_NO_PROFILE to 0
removeTagsFromProfile();
solveMCF(*this, opts::DoMCF);
} else if (hasValidProfile()) {
inferFallThroughCounts();
} else {
clearProfile();
}
// Assign CFI information to each BB entry.
annotateCFIState();
// Set the basic block layout to the original order.
// Annotate invoke instructions with GNU_args_size data.
propagateGnuArgsSizeInfo();
// Set the basic block layout to the original order and set end offsets.
PrevBB = nullptr;
for (auto BB : BasicBlocks) {
BasicBlocksLayout.emplace_back(BB);
@ -1763,33 +1606,37 @@ bool BinaryFunction::buildCFG() {
}
PrevBB->setEndOffset(getSize());
// Convert conditional tail call branches to conditional branches that jump
// to a tail call.
// TODO: make a separate pass
removeConditionalTailCalls();
updateLayoutIndices();
// Make any necessary adjustments for indirect branches.
if (!postProcessIndirectBranches()) {
if (opts::Verbosity) {
errs() << "BOLT-WARNING: failed to post-process indirect branches for "
<< *this << '\n';
// Update the state.
CurrentState = State::CFG;
return true;
}
void BinaryFunction::postProcessCFG() {
if (isSimple() && !BasicBlocks.empty()) {
// Convert conditional tail call branches to conditional branches that jump
// to a tail call.
removeConditionalTailCalls();
// Make any necessary adjustments for indirect branches.
if (!postProcessIndirectBranches()) {
if (opts::Verbosity) {
errs() << "BOLT-WARNING: failed to post-process indirect branches for "
<< *this << '\n';
}
// In relocation mode we want to keep processing the function but avoid
// optimizing it.
setSimple(false);
} else {
postProcessProfile();
// Eliminate inconsistencies between branch instructions and CFG.
postProcessBranches();
}
// In relocation mode we want to keep processing the function but avoid
// optimizing it.
setSimple(false);
}
// Eliminate inconsistencies between branch instructions and CFG.
postProcessBranches();
// If our profiling data comes from samples instead of LBR entries,
// now is the time to read this data and attach it to BBs. At this point,
// conditional tail calls are converted into a branch and a new basic block,
// making it slightly different than the original binary where profiled data
// was collected. However, this shouldn't matter for plain sampling events.
if (!BC.DR.hasLBR())
readSampleData();
// Clean-up memory taken by instructions and labels.
//
// NB: don't clear Labels list as we may need them if we mark the function
@ -1797,19 +1644,20 @@ bool BinaryFunction::buildCFG() {
clearList(Instructions);
clearList(OffsetToCFI);
clearList(TakenBranches);
clearList(FTBranches);
clearList(IgnoredBranches);
clearList(EntryOffsets);
// Update the state.
CurrentState = State::CFG;
// Remove "Offset" annotations from instructions that don't need those.
for (auto *BB : layout()) {
for (auto &Inst : *BB) {
if (BC.MIA->isCall(Inst) || BC.MIA->isIndirectBranch(Inst))
continue;
BC.MIA->removeAnnotation(Inst, "Offset");
}
}
// Annotate invoke instructions with GNU_args_size data.
propagateGnuArgsSizeInfo();
assert(validateCFG() && "Invalid CFG detected after disassembly");
return true;
assert((!isSimple() || validateCFG())
&& "Invalid CFG detected after post-processing CFG");
}
void BinaryFunction::removeTagsFromProfile() {
@ -1826,57 +1674,6 @@ void BinaryFunction::removeTagsFromProfile() {
}
}
void BinaryFunction::readSampleData() {
auto SampleDataOrErr = BC.DR.getFuncSampleData(getNames());
if (!SampleDataOrErr)
return;
// Non-LBR mode territory
// First step is to assign BB execution count based on samples from perf
ProfileMatchRatio = 1.0f;
removeTagsFromProfile();
bool NormalizeByInsnCount =
BC.DR.usesEvent("cycles") || BC.DR.usesEvent("instructions");
bool NormalizeByCalls = BC.DR.usesEvent("branches");
static bool NagUser{true};
if (NagUser) {
outs() << "BOLT-INFO: operating with non-LBR profiling data.\n";
if (NormalizeByInsnCount) {
outs() << "BOLT-INFO: normalizing samples by instruction count.\n";
} else if (NormalizeByCalls) {
outs() << "BOLT-INFO: normalizing samples by branches.\n";
}
NagUser = false;
}
uint64_t LastOffset = getSize();
uint64_t TotalEntryCount{0};
for (auto I = BasicBlockOffsets.rbegin(), E = BasicBlockOffsets.rend();
I != E; ++I) {
uint64_t CurOffset = I->first;
// Always work with samples multiplied by 1000 to avoid losing them if we
// later need to normalize numbers
uint64_t NumSamples =
SampleDataOrErr->getSamples(CurOffset, LastOffset) * 1000;
if (NormalizeByInsnCount && I->second->getNumNonPseudos())
NumSamples /= I->second->getNumNonPseudos();
else if (NormalizeByCalls) {
uint32_t NumCalls = I->second->getNumCalls();
NumSamples /= NumCalls + 1;
}
I->second->setExecutionCount(NumSamples);
if (I->second->isEntryPoint())
TotalEntryCount += NumSamples;
LastOffset = CurOffset;
}
ExecutionCount = TotalEntryCount;
estimateEdgeCounts(BC, *this);
if (opts::DoMCF != MCF_DISABLE)
solveMCF(*this, opts::DoMCF);
}
void BinaryFunction::addEntryPoint(uint64_t Address) {
assert(containsAddress(Address) && "address does not belong to the function");
@ -1930,377 +1727,7 @@ void BinaryFunction::addEntryPoint(uint64_t Address) {
}
}
bool BinaryFunction::fetchProfileForOtherEntryPoints() {
if (!BranchData)
return false;
// Check if we are missing profiling data for secondary entry points
bool First{true};
bool Updated{false};
for (auto BB : BasicBlocks) {
if (First) {
First = false;
continue;
}
if (BB->isEntryPoint()) {
uint64_t EntryAddress = BB->getOffset() + getAddress();
// Look for branch data associated with this entry point
std::vector<std::string> Names;
std::multimap<uint64_t, std::string>::iterator I, E;
for (std::tie(I, E) = BC.GlobalAddresses.equal_range(EntryAddress);
I != E; ++I) {
Names.push_back(I->second);
}
if (!Names.empty()) {
if (FuncBranchData *Data = BC.DR.getFuncBranchData(Names)) {
BranchData->appendFrom(*Data, BB->getOffset());
Data->Used = true;
Updated = true;
}
}
}
}
return Updated;
}
void BinaryFunction::matchProfileMemData() {
const auto AllMemData = BC.DR.getFuncMemDataRegex(getNames());
for (auto *NewMemData : AllMemData) {
// Prevent functions from sharing the same profile.
if (NewMemData->Used)
continue;
if (MemData)
MemData->Used = false;
// Update function profile data with the new set.
MemData = NewMemData;
MemData->Used = true;
break;
}
}
void BinaryFunction::matchProfileData() {
// This functionality is available for LBR-mode only
// TODO: Implement evaluateProfileData() for samples, checking whether
// sample addresses match instruction addresses in the function
if (!BC.DR.hasLBR())
return;
if (BranchData) {
ProfileMatchRatio = evaluateProfileData(*BranchData);
if (ProfileMatchRatio == 1.0f) {
if (fetchProfileForOtherEntryPoints()) {
ProfileMatchRatio = evaluateProfileData(*BranchData);
ExecutionCount = BranchData->ExecutionCount;
}
return;
}
}
// Check if the function name can fluctuate between several compilations
// possibly triggered by minor unrelated code changes in the source code
// of the input binary.
const auto HasVolatileName = [this]() {
for (const auto Name : getNames()) {
if (getLTOCommonName(Name))
return true;
}
return false;
}();
if (!HasVolatileName)
return;
// Check for a profile that matches with 100% confidence.
const auto AllBranchData = BC.DR.getFuncBranchDataRegex(getNames());
for (auto *NewBranchData : AllBranchData) {
// Prevent functions from sharing the same profile.
if (NewBranchData->Used)
continue;
if (evaluateProfileData(*NewBranchData) != 1.0f)
continue;
if (BranchData)
BranchData->Used = false;
// Update function profile data with the new set.
BranchData = NewBranchData;
ExecutionCount = NewBranchData->ExecutionCount;
ProfileMatchRatio = 1.0f;
BranchData->Used = true;
break;
}
}
float BinaryFunction::evaluateProfileData(const FuncBranchData &BranchData) {
// Until we define a minimal profile, we consider an empty branch data to be
// a valid profile. It could happen to a function without branches when we
// still have an EntryData for execution count.
if (BranchData.Data.empty()) {
return 1.0f;
}
BranchListType ProfileBranches(BranchData.Data.size());
std::transform(BranchData.Data.begin(),
BranchData.Data.end(),
ProfileBranches.begin(),
[](const BranchInfo &BI) {
return std::make_pair(BI.From.Offset,
BI.To.Name == BI.From.Name ?
BI.To.Offset : -1U);
});
BranchListType LocalProfileBranches;
std::copy_if(ProfileBranches.begin(),
ProfileBranches.end(),
std::back_inserter(LocalProfileBranches),
[](const std::pair<uint32_t, uint32_t> &Branch) {
return Branch.second != -1U;
});
// Profile referencing external functions.
BranchListType ExternProfileBranches;
std::copy_if(ProfileBranches.begin(),
ProfileBranches.end(),
std::back_inserter(ExternProfileBranches),
[](const std::pair<uint32_t, uint32_t> &Branch) {
return Branch.second == -1U;
});
std::sort(LocalProfileBranches.begin(), LocalProfileBranches.end());
BranchListType FunctionBranches = TakenBranches;
FunctionBranches.insert(FunctionBranches.end(),
FTBranches.begin(),
FTBranches.end());
FunctionBranches.insert(FunctionBranches.end(),
IgnoredBranches.begin(),
IgnoredBranches.end());
std::sort(FunctionBranches.begin(), FunctionBranches.end());
BranchListType DiffBranches; // Branches in profile without a match.
std::set_difference(LocalProfileBranches.begin(),
LocalProfileBranches.end(),
FunctionBranches.begin(),
FunctionBranches.end(),
std::back_inserter(DiffBranches));
// Branches without a match in CFG.
BranchListType OrphanBranches;
// Eliminate recursive calls and returns from recursive calls from the list
// of branches that have no match. They are not considered local branches.
auto isRecursiveBranch = [&](std::pair<uint32_t, uint32_t> &Branch) {
auto SrcInstrI = Instructions.find(Branch.first);
if (SrcInstrI == Instructions.end())
return false;
// Check if it is a recursive call.
const auto &SrcInstr = SrcInstrI->second;
if ((BC.MIA->isCall(SrcInstr) || BC.MIA->isIndirectBranch(SrcInstr)) &&
Branch.second == 0)
return true;
auto DstInstrI = Instructions.find(Branch.second);
if (DstInstrI == Instructions.end())
return false;
// Check if it is a return from a recursive call.
bool IsSrcReturn = BC.MIA->isReturn(SrcInstr);
// "rep ret" is considered to be 2 different instructions.
if (!IsSrcReturn && BC.MIA->isPrefix(SrcInstr)) {
auto SrcInstrSuccessorI = SrcInstrI;
++SrcInstrSuccessorI;
assert(SrcInstrSuccessorI != Instructions.end() &&
"unexpected prefix instruction at the end of function");
IsSrcReturn = BC.MIA->isReturn(SrcInstrSuccessorI->second);
}
if (IsSrcReturn && Branch.second != 0) {
// Make sure the destination follows the call instruction.
auto DstInstrPredecessorI = DstInstrI;
--DstInstrPredecessorI;
assert(DstInstrPredecessorI != Instructions.end() && "invalid iterator");
if (BC.MIA->isCall(DstInstrPredecessorI->second))
return true;
}
return false;
};
std::remove_copy_if(DiffBranches.begin(),
DiffBranches.end(),
std::back_inserter(OrphanBranches),
isRecursiveBranch);
// Check all external branches.
std::copy_if(ExternProfileBranches.begin(),
ExternProfileBranches.end(),
std::back_inserter(OrphanBranches),
[&](const std::pair<uint32_t, uint32_t> &Branch) {
auto II = Instructions.find(Branch.first);
if (II == Instructions.end())
return true;
const auto &Instr = II->second;
// Check for calls, tail calls, rets and indirect branches.
// When matching profiling info, we did not reach the stage
// when we identify tail calls, so they are still represented
// by regular branch instructions and we need isBranch() here.
if (BC.MIA->isCall(Instr) ||
BC.MIA->isBranch(Instr) ||
BC.MIA->isReturn(Instr))
return false;
// Check for "rep ret"
if (BC.MIA->isPrefix(Instr)) {
++II;
if (II != Instructions.end() && BC.MIA->isReturn(II->second))
return false;
}
return true;
});
const float MatchRatio =
(float) (ProfileBranches.size() - OrphanBranches.size()) /
(float) ProfileBranches.size();
if (opts::Verbosity >= 2 && !OrphanBranches.empty()) {
errs() << "BOLT-WARNING: profile branches match only "
<< format("%.1f%%", MatchRatio * 100.0f) << " ("
<< (ProfileBranches.size() - OrphanBranches.size()) << '/'
<< ProfileBranches.size() << ") for function "
<< *this << '\n';
DEBUG(
for (auto &OBranch : OrphanBranches)
errs() << "\t0x" << Twine::utohexstr(OBranch.first) << " -> 0x"
<< Twine::utohexstr(OBranch.second) << " (0x"
<< Twine::utohexstr(OBranch.first + getAddress()) << " -> 0x"
<< Twine::utohexstr(OBranch.second + getAddress()) << ")\n";
);
}
return MatchRatio;
}
void BinaryFunction::clearProfile() {
// Keep function execution profile the same. Only clear basic block and edge
// counts.
for (auto *BB : BasicBlocks) {
BB->ExecutionCount = 0;
for (auto &BI : BB->branch_info()) {
BI.Count = 0;
BI.MispredictedCount = 0;
}
}
}
void BinaryFunction::inferFallThroughCounts() {
assert(!BasicBlocks.empty() && "basic block list should not be empty");
assert(BranchData && "cannot infer counts without branch data");
// Compute preliminary execution count for each basic block
for (auto CurBB : BasicBlocks) {
CurBB->ExecutionCount = 0;
}
for (auto CurBB : BasicBlocks) {
auto SuccBIIter = CurBB->branch_info_begin();
for (auto Succ : CurBB->successors()) {
if (SuccBIIter->Count != BinaryBasicBlock::COUNT_NO_PROFILE)
Succ->setExecutionCount(Succ->getExecutionCount() + SuccBIIter->Count);
++SuccBIIter;
}
}
// Set entry BBs to zero, we'll update their execution count next with entry
// data (we maintain a separate data structure for branches to function entry
// points)
for (auto BB : BasicBlocks) {
if (BB->isEntryPoint())
BB->ExecutionCount = 0;
}
// Update execution counts of landing pad blocks and entry BBs
// There is a slight skew introduced here as branches originated from RETs
// may be accounted for in the execution count of an entry block if the last
// instruction in a predecessor fall-through block is a call. This situation
// should rarely happen because there are few multiple-entry functions.
for (const auto &I : BranchData->EntryData) {
BinaryBasicBlock *BB = getBasicBlockAtOffset(I.To.Offset);
if (BB && (BB->isEntryPoint() || BB->isLandingPad())) {
BB->setExecutionCount(BB->getExecutionCount() + I.Branches);
}
}
// Work on a basic block at a time, propagating frequency information
// forwards.
// It is important to walk in the layout order.
for (auto BB : BasicBlocks) {
uint64_t BBExecCount = BB->getExecutionCount();
// Propagate this information to successors, filling in fall-through edges
// with frequency information
if (BB->succ_size() == 0)
continue;
// Calculate frequency of outgoing branches from this node according to
// LBR data.
uint64_t ReportedBranches = 0;
for (const auto &SuccBI : BB->branch_info()) {
if (SuccBI.Count != BinaryBasicBlock::COUNT_NO_PROFILE)
ReportedBranches += SuccBI.Count;
}
// Get taken count of conditional tail call if the block ends with one.
uint64_t CTCTakenCount = 0;
const auto CTCInstr = BB->getLastNonPseudoInstr();
if (CTCInstr && BC.MIA->getConditionalTailCall(*CTCInstr)) {
CTCTakenCount =
BC.MIA->getAnnotationWithDefault<uint64_t>(*CTCInstr, "CTCTakenCount");
}
// Calculate frequency of throws from this node according to LBR data
// for branching into associated landing pads. Since it is possible
// for a landing pad to be associated with more than one basic blocks,
// we may overestimate the frequency of throws for such blocks.
uint64_t ReportedThrows = 0;
for (const auto *LP: BB->landing_pads()) {
ReportedThrows += LP->getExecutionCount();
}
const uint64_t TotalReportedJumps =
ReportedBranches + CTCTakenCount + ReportedThrows;
// Infer the frequency of the fall-through edge, representing not taking the
// branch.
uint64_t Inferred = 0;
if (BBExecCount > TotalReportedJumps)
Inferred = BBExecCount - TotalReportedJumps;
DEBUG(
if (opts::Verbosity >= 1 && BBExecCount < TotalReportedJumps)
errs()
<< "BOLT-WARNING: Fall-through inference is slightly inconsistent. "
"exec frequency is less than the outgoing edges frequency ("
<< BBExecCount << " < " << ReportedBranches
<< ") for BB at offset 0x"
<< Twine::utohexstr(getAddress() + BB->getOffset()) << '\n';
);
if (BB->succ_size() <= 2) {
// If there is an FT it will be the last successor.
auto &SuccBI = *BB->branch_info_rbegin();
auto &Succ = *BB->succ_rbegin();
if (SuccBI.Count == BinaryBasicBlock::COUNT_NO_PROFILE) {
SuccBI.Count = Inferred;
Succ->ExecutionCount += Inferred;
}
}
}
return;
}
void BinaryFunction::removeConditionalTailCalls() {
CurrentState = State::CFG;
// Blocks to be appended at the end.
std::vector<std::unique_ptr<BinaryBasicBlock>> NewBlocks;
@ -2373,6 +1800,9 @@ void BinaryFunction::removeConditionalTailCalls() {
// Swap edges as the TailCallBB corresponds to the taken branch.
BB.swapConditionalSuccessors();
}
// This branch is no longer a conditional tail call.
BC.MIA->unsetConditionalTailCall(*CTCInstr);
}
insertBasicBlocks(std::prev(end()),
@ -3068,11 +2498,12 @@ void BinaryFunction::fixBranches() {
// terminator) or more than 2 (switch table) don't require branch
// instruction adjustments.
}
assert(validateCFG() && "Invalid CFG detected after fixing branches");
assert((!isSimple() || validateCFG())
&& "Invalid CFG detected after fixing branches");
}
void BinaryFunction::propagateGnuArgsSizeInfo() {
assert(CurrentState == State::CFG && "unexpected function state");
assert(CurrentState == State::Disassembled && "unexpected function state");
if (!hasEHRanges() || !usesGnuArgsSize())
return;
@ -3145,68 +2576,6 @@ void BinaryFunction::postProcessBranches() {
assert(validateCFG() && "invalid CFG");
}
void BinaryFunction::mergeProfileDataInto(BinaryFunction &BF) const {
// No reason to merge invalid or empty profiles into BF.
if (!hasValidProfile())
return;
// Update function execution count.
if (getExecutionCount() != BinaryFunction::COUNT_NO_PROFILE) {
BF.setExecutionCount(BF.getKnownExecutionCount() + getExecutionCount());
}
// Since we are merging a valid profile, the new profile should be valid too.
// It has either already been valid, or it has been cleaned up.
BF.ProfileMatchRatio = 1.0f;
// Update basic block and edge counts.
auto BBMergeI = BF.begin();
for (BinaryBasicBlock *BB : BasicBlocks) {
BinaryBasicBlock *BBMerge = &*BBMergeI;
assert(getIndex(BB) == BF.getIndex(BBMerge));
// Update basic block count.
if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE) {
BBMerge->setExecutionCount(
BBMerge->getKnownExecutionCount() + BB->getExecutionCount());
}
// Update edge count for successors of this basic block.
auto BBMergeSI = BBMerge->succ_begin();
auto BIMergeI = BBMerge->branch_info_begin();
auto BII = BB->branch_info_begin();
for (const auto *BBSucc : BB->successors()) {
(void)BBSucc;
assert(getIndex(BBSucc) == BF.getIndex(*BBMergeSI));
// At this point no branch count should be set to COUNT_NO_PROFILE.
assert(BII->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
"unexpected unknown branch profile");
assert(BIMergeI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
"unexpected unknown branch profile");
BIMergeI->Count += BII->Count;
// When we merge inferred and real fall-through branch data, the merged
// data is considered inferred.
if (BII->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED &&
BIMergeI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) {
BIMergeI->MispredictedCount += BII->MispredictedCount;
} else {
BIMergeI->MispredictedCount = BinaryBasicBlock::COUNT_INFERRED;
}
++BBMergeSI;
++BII;
++BIMergeI;
}
assert(BBMergeSI == BBMerge->succ_end());
++BBMergeI;
}
assert(BBMergeI == BF.end());
}
BinaryFunction::BasicBlockOrderType BinaryFunction::dfs() const {
BasicBlockOrderType DFS;
unsigned Index = 0;
@ -4058,6 +3427,28 @@ DWARFAddressRangesVector BinaryFunction::translateInputToOutputRanges(
return MergedRanges;
}
MCInst *BinaryFunction::getInstructionAtOffset(uint64_t Offset) {
if (CurrentState == State::Disassembled) {
auto II = Instructions.find(Offset);
return (II == Instructions.end()) ? nullptr : &II->second;
} else if (CurrentState == State::CFG) {
auto *BB = getBasicBlockContainingOffset(Offset);
if (!BB)
return nullptr;
for (auto &Inst : *BB) {
constexpr auto InvalidOffset = std::numeric_limits<uint64_t>::max();
if (Offset == BC.MIA->getAnnotationWithDefault<uint64_t>(Inst, "Offset",
InvalidOffset))
return &Inst;
}
return nullptr;
} else {
llvm_unreachable("invalid CFG state to use getInstructionAtOffset()");
}
}
DWARFDebugLoc::LocationList BinaryFunction::translateInputToOutputLocationList(
const DWARFDebugLoc::LocationList &InputLL,
uint64_t BaseAddress) const {
@ -4331,60 +3722,6 @@ DynoStats BinaryFunction::getDynoStats() const {
return Stats;
}
Optional<SmallVector<std::pair<uint64_t, uint64_t>, 16>>
BinaryFunction::getFallthroughsInTrace(uint64_t From, uint64_t To) const {
SmallVector<std::pair<uint64_t, uint64_t>, 16> Res;
if (CurrentState != State::Disassembled)
return NoneType();
// Get iterators and validate trace start/end
auto FromIter = Instructions.find(From);
if (FromIter == Instructions.end())
return NoneType();
auto ToIter = Instructions.find(To);
if (ToIter == Instructions.end())
return NoneType();
// Trace needs to go forward
if (FromIter->first > ToIter->first)
return NoneType();
// Trace needs to finish in a branch
if (!BC.MIA->isBranch(ToIter->second) && !BC.MIA->isCall(ToIter->second) &&
!BC.MIA->isReturn(ToIter->second)) {
// Check for "rep ret"
if (!BC.MIA->isPrefix(ToIter->second)) {
return NoneType();
} else {
++ToIter;
if (!BC.MIA->isReturn(ToIter->second))
return NoneType();
}
}
// Analyze intermediate instructions
for (; FromIter != ToIter; ++FromIter) {
// This operates under an assumption that we collect all branches in LBR
// No unconditional branches in the middle of the trace
if (BC.MIA->isUnconditionalBranch(FromIter->second) ||
BC.MIA->isReturn(FromIter->second) ||
BC.MIA->isCall(FromIter->second))
return NoneType();
if (!BC.MIA->isConditionalBranch(FromIter->second))
continue;
const uint64_t Src = FromIter->first;
auto Next = std::next(FromIter);
const uint64_t Dst = Next->first;
Res.push_back(std::make_pair(Src, Dst));
}
return Res;
}
void DynoStats::print(raw_ostream &OS, const DynoStats *Other) const {
auto printStatWithDelta = [&](const std::string &Name, uint64_t Stat,
uint64_t OtherStat) {

View File

@ -172,7 +172,7 @@ public:
enum class State : char {
Empty = 0, /// Function body is empty.
Disassembled, /// Function have been disassembled.
CFG, /// Control flow graph have been built.
CFG, /// Control flow graph has been built.
CFG_Finalized, /// CFG is finalized. No optimizations allowed.
Emitted, /// Instructions have been emitted to output.
};
@ -186,16 +186,6 @@ public:
ST_ALL, /// Split all functions
};
enum ReorderType : char {
RT_NONE = 0,
RT_EXEC_COUNT,
RT_HFSORT,
RT_HFSORT_PLUS,
RT_PETTIS_HANSEN,
RT_RANDOM,
RT_USER
};
/// Branch statistics for jump table entries.
struct JumpInfo {
uint64_t Mispreds{0};
@ -447,7 +437,6 @@ private:
using BranchListType = std::vector<std::pair<uint32_t, uint32_t>>;
BranchListType TakenBranches; /// All local taken branches.
BranchListType FTBranches; /// All fall-through branches.
BranchListType IgnoredBranches; /// Branches ignored by CFG purposes.
/// Map offset in the function to a label.
@ -754,13 +743,8 @@ private:
}
/// Return instruction at a given offset in the function. Valid before
/// CFG is constructed.
MCInst *getInstructionAtOffset(uint64_t Offset) {
assert(CurrentState == State::Disassembled &&
"can only call function in Disassembled state");
auto II = Instructions.find(Offset);
return (II == Instructions.end()) ? nullptr : &II->second;
}
/// CFG is constructed or while instruction offsets are available in CFG.
MCInst *getInstructionAtOffset(uint64_t Offset);
/// Analyze and process indirect branch \p Instruction before it is
/// added to Instructions list.
@ -1480,6 +1464,13 @@ public:
ProfileMatchRatio == 1.0f;
}
/// Mark this function as having a valid profile.
void markProfiled() {
if (ExecutionCount == COUNT_NO_PROFILE)
ExecutionCount = 0;
ProfileMatchRatio = 1.0f;
}
void addCFIInstruction(uint64_t Offset, MCCFIInstruction &&Inst) {
assert(!Instructions.empty());
@ -1809,6 +1800,12 @@ public:
/// State::CFG. Returns false if CFG cannot be built.
bool buildCFG();
/// Read any kind of profile information available for the function.
void readProfile();
/// Perform post-processing of the CFG.
void postProcessCFG();
/// Verify that any assumptions we've made about indirect branches were
/// correct and also make any necessary changes to unknown indirect branches.
///
@ -2022,9 +2019,41 @@ public:
return UnitLineTable;
}
/// Scan from - to offsets for conditional jumps
/// Update function execution profile with a recorded trace.
/// A trace is region of code executed between two LBR entries supplied in
/// execution order.
///
/// Return true if the trace is valid, false otherwise.
bool recordTrace(
const LBREntry &First,
const LBREntry &Second,
uint64_t Count = 1,
SmallVector<std::pair<uint64_t, uint64_t>, 16> *Branches = nullptr);
/// Update function profile with a taken branch.
/// \p Count could be 0 if verification of the branch is required.
///
/// Return true if the branch is valid, false otherwise.
bool recordBranch(uint64_t From, uint64_t To, uint64_t Count = 1,
uint64_t Mispreds = 0);
/// Record external entry into the function.
///
/// Return true if the entry is valid, false otherwise.
bool recordEntry(uint64_t To, bool Mispred, uint64_t Count = 1);
/// Record exit from a function via a call or return.
///
/// Return true if the exit point is valid, false otherwise.
bool recordExit(uint64_t From, bool Mispred, uint64_t Count = 1);
/// Finalize profile for the function.
void postProcessProfile();
/// Return a vector of offsets corresponding to a trace in a function
/// (see recordTrace() above).
Optional<SmallVector<std::pair<uint64_t, uint64_t>, 16>>
getFallthroughsInTrace(uint64_t From, uint64_t To) const;
getFallthroughsInTrace(const LBREntry &First, const LBREntry &Second);
/// Returns an estimate of the function's hot part after splitting.
/// This is a very rough estimate, as with C++ exceptions there are
@ -2181,6 +2210,13 @@ inline raw_ostream &operator<<(raw_ostream &OS,
return OS;
}
inline raw_ostream &operator<<(raw_ostream &OS,
const LBREntry &LBR) {
OS << "0x" << Twine::utohexstr(LBR.From)
<< " -> 0x" << Twine::utohexstr(LBR.To);
return OS;
}
} // namespace bolt

View File

@ -0,0 +1,854 @@
//===--- BinaryFunctionProfile.cpp --------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
//===----------------------------------------------------------------------===//
#include "BinaryBasicBlock.h"
#include "BinaryFunction.h"
#include "DataReader.h"
#include "Passes/MCF.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#undef DEBUG_TYPE
#define DEBUG_TYPE "bolt-prof"
using namespace llvm;
using namespace bolt;
namespace opts {
extern cl::OptionCategory AggregatorCategory;
extern cl::OptionCategory BoltOptCategory;
extern cl::opt<unsigned> Verbosity;
extern cl::opt<IndirectCallPromotionType> IndirectCallPromotion;
extern cl::opt<JumpTableSupportLevel> JumpTables;
static cl::opt<bool>
CompatMode("prof-compat-mode",
cl::desc("maintain bug-level compatibility with old profile"),
cl::init(true),
cl::ZeroOrMore,
cl::Hidden,
cl::cat(BoltOptCategory));
static cl::opt<MCFCostFunction>
DoMCF("mcf",
cl::desc("solve a min cost flow problem on the CFG to fix edge counts "
"(default=disable)"),
cl::init(MCF_DISABLE),
cl::values(
clEnumValN(MCF_DISABLE, "none",
"disable MCF"),
clEnumValN(MCF_LINEAR, "linear",
"cost function is inversely proportional to edge count"),
clEnumValN(MCF_QUADRATIC, "quadratic",
"cost function is inversely proportional to edge count squared"),
clEnumValN(MCF_LOG, "log",
"cost function is inversely proportional to log of edge count"),
clEnumValN(MCF_BLAMEFTS, "blamefts",
"tune cost to blame fall-through edges for surplus flow"),
clEnumValEnd),
cl::ZeroOrMore,
cl::Hidden,
cl::cat(BoltOptCategory));
static cl::opt<bool>
FixFuncCounts("fix-func-counts",
cl::desc("adjust function counts based on basic blocks execution count"),
cl::init(false),
cl::ZeroOrMore,
cl::Hidden,
cl::cat(BoltOptCategory));
} // namespace opts
namespace llvm {
namespace bolt {
bool BinaryFunction::recordTrace(
const LBREntry &First,
const LBREntry &Second,
uint64_t Count,
SmallVector<std::pair<uint64_t, uint64_t>, 16> *Branches) {
if (!isSimple())
return false;
assert(CurrentState == State::CFG && "can only record traces in CFG state");
// Offsets of the trace within this function.
const auto From = First.To - getAddress();
const auto To = Second.From - getAddress();
if (From > To)
return false;
auto *FromBB = getBasicBlockContainingOffset(From);
auto *ToBB = getBasicBlockContainingOffset(To);
if (!FromBB || !ToBB)
return false;
// Fill out information for fall-through edges. The From and To could be
// within the same basic block, e.g. when two call instructions are in the
// same block. In this case we skip the processing.
if (FromBB == ToBB) {
if (opts::CompatMode)
return true;
// If the previous block ended with a call, the destination of a return
// would be in ToBB basic block. And if the ToBB starts with a control
// transfer instruction, we will have a 0-length trace that we have to
// account for as a fall-through edge.
if (To == ToBB->getOffset()) {
// External entry point.
if (ToBB->isEntryPoint() || ToBB->isLandingPad())
return true;
// Check that the origin LBR of a trace starts in another function.
// Otherwise it's an internal branch that was accounted for.
if (containsAddress(First.From))
return true;
auto *PrevBB = BasicBlocksLayout[ToBB->getIndex() - 1];
// This could be a bad trace.
if (!PrevBB->getSuccessor(ToBB->getLabel())) {
DEBUG(dbgs() << "invalid LBR sequence:\n"
<< " " << First << '\n'
<< " " << Second << '\n');
return false;
}
auto &BI = PrevBB->getBranchInfo(*ToBB);
BI.Count += Count;
if (Branches) {
const auto *Instr = PrevBB->getLastNonPseudoInstr();
const auto Offset =
BC.MIA->getAnnotationWithDefault<uint64_t>(*Instr, "Offset");
Branches->push_back(std::make_pair(Offset, ToBB->getOffset()));
}
}
return true;
}
// Process blocks in the original layout order.
auto *BB = BasicBlocksLayout[FromBB->getIndex()];
assert(BB == FromBB && "index mismatch");
while (BB != ToBB) {
auto *NextBB = BasicBlocksLayout[BB->getIndex() + 1];
assert((NextBB && NextBB->getOffset() > BB->getOffset()) && "bad layout");
// Check for bad LBRs.
if (!BB->getSuccessor(NextBB->getLabel())) {
DEBUG(dbgs() << "no fall-through for the trace:\n"
<< " " << First << '\n'
<< " " << Second << '\n');
return false;
}
// To keep backwards compatibility we skip recording fall-throughs that
// are not a result of a conditional jump.
if (!opts::CompatMode ||
(BB->succ_size() == 2 &&
BB->getConditionalSuccessor(false) == NextBB)) {
auto &BI = BB->getBranchInfo(*NextBB);
BI.Count += Count;
if (Branches) {
const auto *Instr = BB->getLastNonPseudoInstr();
// Note: real offset for conditional jump instruction shouldn't be 0.
const auto Offset =
BC.MIA->getAnnotationWithDefault<uint64_t>(*Instr, "Offset");
if (Offset) {
Branches->push_back(std::make_pair(Offset, NextBB->getOffset()));
}
}
}
BB = NextBB;
}
return true;
}
bool BinaryFunction::recordBranch(uint64_t From, uint64_t To,
uint64_t Count, uint64_t Mispreds) {
auto *FromBB = getBasicBlockContainingOffset(From);
auto *ToBB = getBasicBlockContainingOffset(To);
if (!FromBB || !ToBB) {
DEBUG(dbgs() << "failed to get block for recorded branch\n");
return false;
}
// Could be bad LBR data. Ignore, or report as a bad profile for backwards
// compatibility.
if (From == To) {
if (!opts::CompatMode)
return true;
auto *Instr = getInstructionAtOffset(0);
if (Instr && BC.MIA->isCall(*Instr))
return true;
return false;
}
if (FromBB->succ_size() == 0) {
// Return from a tail call.
return true;
}
// Very rarely we will see ignored branches. Do a linear check.
for (auto &Branch : IgnoredBranches) {
if (Branch == std::make_pair(static_cast<uint32_t>(From),
static_cast<uint32_t>(To)))
return true;
}
if (To != ToBB->getOffset()) {
// "To" could be referring to nop instructions in between 2 basic blocks.
// While building the CFG we make sure these nops are attributed to the
// previous basic block, thus we check if the destination belongs to the
// gap past the last instruction.
const auto *LastInstr = ToBB->getLastNonPseudoInstr();
if (LastInstr) {
const auto LastInstrOffset =
BC.MIA->getAnnotationWithDefault<uint64_t>(*LastInstr, "Offset");
// With old .fdata we are getting FT branches for "jcc,jmp" sequences.
if (To == LastInstrOffset && BC.MIA->isUnconditionalBranch(*LastInstr)) {
return true;
}
if (To <= LastInstrOffset) {
DEBUG(dbgs() << "branch recorded into the middle of the block" << " in "
<< *this << " : " << From << " -> " << To << '\n');
return false;
}
}
// The real destination is the layout successor of the detected ToBB.
if (ToBB == BasicBlocksLayout.back())
return false;
auto *NextBB = BasicBlocksLayout[ToBB->getIndex() + 1];
assert((NextBB && NextBB->getOffset() > ToBB->getOffset()) && "bad layout");
ToBB = NextBB;
}
// If there's no corresponding instruction for 'From', we have probably
// discarded it as a FT from __builtin_unreachable.
auto *FromInstruction = getInstructionAtOffset(From);
if (!FromInstruction) {
DEBUG(dbgs() << "no instruction for offset " << From << " in "
<< *this << '\n');
return false;
}
if (FromBB == ToBB) {
// Check for a return from a recursive call.
// Otherwise it's a simple loop.
}
if (!FromBB->getSuccessor(ToBB->getLabel())) {
// Check if this is a recursive call or a return from a recursive call.
if (ToBB->isEntryPoint()) {
// Execution count is already accounted for.
return true;
}
DEBUG(dbgs() << "invalid branch in " << *this << '\n'
<< Twine::utohexstr(From) << " -> "
<< Twine::utohexstr(To) << '\n');
return false;
}
auto &BI = FromBB->getBranchInfo(*ToBB);
BI.Count += Count;
// Only update mispredicted count if it the count was real.
if (Count) {
BI.MispredictedCount += Mispreds;
}
return true;
}
bool BinaryFunction::recordEntry(uint64_t To, bool Mispred, uint64_t Count) {
if (To > getSize())
return false;
if (!hasProfile())
ExecutionCount = 0;
if (To == 0)
ExecutionCount += Count;
return true;
}
bool BinaryFunction::recordExit(uint64_t From, bool Mispred, uint64_t Count) {
if (!isSimple())
return false;
assert(From <= getSize() && "wrong From address");
if (!hasProfile())
ExecutionCount = 0;
return true;
}
void BinaryFunction::postProcessProfile() {
if (!hasValidProfile()) {
clearProfile();
return;
}
// Check if MCF post-processing was requested.
if (opts::DoMCF != MCF_DISABLE) {
removeTagsFromProfile();
solveMCF(*this, opts::DoMCF);
return;
}
// Is we are using non-LBR sampling there's nothing left to do.
if (!BranchData)
return;
// Bug compatibility with previous version - double accounting for conditional
// jump into a fall-through block.
if (opts::CompatMode) {
for (auto *BB : BasicBlocks) {
if (BB->succ_size() == 2 &&
BB->getConditionalSuccessor(false) ==
BB->getConditionalSuccessor(true)) {
auto &TakenBI = *BB->branch_info_begin();
auto &FallThroughBI = *BB->branch_info_rbegin();
FallThroughBI.Count = TakenBI.Count;
FallThroughBI.MispredictedCount = 0;
}
}
}
// Pre-sort branch data.
std::stable_sort(BranchData->Data.begin(), BranchData->Data.end());
// If we have at least some branch data for the function indicate that it
// was executed.
if (opts::FixFuncCounts && ExecutionCount == 0) {
ExecutionCount = 1;
}
// Compute preliminary execution count for each basic block
for (auto *BB : BasicBlocks) {
BB->ExecutionCount = 0;
}
for (auto *BB : BasicBlocks) {
auto SuccBIIter = BB->branch_info_begin();
for (auto Succ : BB->successors()) {
if (SuccBIIter->Count != BinaryBasicBlock::COUNT_NO_PROFILE)
Succ->setExecutionCount(Succ->getExecutionCount() + SuccBIIter->Count);
++SuccBIIter;
}
}
// Set entry BBs to zero, we'll update their execution count next with entry
// data (we maintain a separate data structure for branches to function entry
// points)
for (auto *BB : BasicBlocks) {
if (BB->isEntryPoint())
BB->ExecutionCount = 0;
}
// Update execution counts of landing pad blocks and entry BBs
// There is a slight skew introduced here as branches originated from RETs
// may be accounted for in the execution count of an entry block if the last
// instruction in a predecessor fall-through block is a call. This situation
// should rarely happen because there are few multiple-entry functions.
for (const auto &I : BranchData->EntryData) {
BinaryBasicBlock *BB = getBasicBlockAtOffset(I.To.Offset);
if (BB && (BB->isEntryPoint() || BB->isLandingPad())) {
BB->setExecutionCount(BB->getExecutionCount() + I.Branches);
}
}
inferFallThroughCounts();
// Update profile information for jump tables based on CFG branch data.
for (auto *BB : BasicBlocks) {
const auto *LastInstr = BB->getLastNonPseudoInstr();
if (!LastInstr)
continue;
const auto JTAddress = BC.MIA->getJumpTable(*LastInstr);
if (!JTAddress)
continue;
auto *JT = getJumpTableContainingAddress(JTAddress);
if (!JT)
continue;
uint64_t TotalBranchCount = 0;
for (const auto &BranchInfo : BB->branch_info()) {
TotalBranchCount += BranchInfo.Count;
}
JT->Count += TotalBranchCount;
if (opts::IndirectCallPromotion < ICP_JUMP_TABLES &&
opts::JumpTables < JTS_AGGRESSIVE)
continue;
if (JT->Counts.empty())
JT->Counts.resize(JT->Entries.size());
auto EI = JT->Entries.begin();
auto Delta = (JTAddress - JT->Address) / JT->EntrySize;
EI += Delta;
while (EI != JT->Entries.end()) {
const auto *TargetBB = getBasicBlockForLabel(*EI);
if (TargetBB) {
const auto &BranchInfo = BB->getBranchInfo(*TargetBB);
assert(Delta < JT->Counts.size());
JT->Counts[Delta].Count += BranchInfo.Count;
JT->Counts[Delta].Mispreds += BranchInfo.MispredictedCount;
}
++Delta;
++EI;
// A label marks the start of another jump table.
if (JT->Labels.count(Delta * JT->EntrySize))
break;
}
}
}
Optional<SmallVector<std::pair<uint64_t, uint64_t>, 16>>
BinaryFunction::getFallthroughsInTrace(const LBREntry &First,
const LBREntry &Second) {
SmallVector<std::pair<uint64_t, uint64_t>, 16> Res;
if (!recordTrace(First, Second, 1, &Res))
return NoneType();
return Res;
}
void BinaryFunction::readProfile() {
if (empty())
return;
if (!BC.DR.hasLBR()) {
readSampleData();
return;
}
// Possibly assign/re-assign branch profile data.
matchProfileData();
if (!BranchData)
return;
uint64_t MismatchedBranches = 0;
for (const auto &BI : BranchData->Data) {
if (BI.From.Name != BI.To.Name) {
continue;
}
if (!recordBranch(BI.From.Offset, BI.To.Offset,
BI.Branches, BI.Mispreds)) {
DEBUG(dbgs() << "bad branch : " << BI.From.Offset << " -> "
<< BI.To.Offset << '\n');
++MismatchedBranches;
}
}
// Special profile data propagation is required for conditional tail calls.
for (auto BB : BasicBlocks) {
auto *CTCInstr = BB->getLastNonPseudoInstr();
if (!CTCInstr || !BC.MIA->getConditionalTailCall(*CTCInstr))
continue;
auto OffsetOrErr =
BC.MIA->tryGetAnnotationAs<uint64_t>(*CTCInstr, "Offset");
assert(OffsetOrErr && "offset not set for conditional tail call");
auto BranchInfoOrErr = BranchData->getDirectCallBranch(*OffsetOrErr);
if (!BranchInfoOrErr)
continue;
BC.MIA->addAnnotation(BC.Ctx.get(), *CTCInstr, "CTCTakenCount",
BranchInfoOrErr->Branches);
BC.MIA->addAnnotation(BC.Ctx.get(), *CTCInstr, "CTCMispredCount",
BranchInfoOrErr->Mispreds);
}
}
void BinaryFunction::mergeProfileDataInto(BinaryFunction &BF) const {
// No reason to merge invalid or empty profiles into BF.
if (!hasValidProfile())
return;
// Update function execution count.
if (getExecutionCount() != BinaryFunction::COUNT_NO_PROFILE) {
BF.setExecutionCount(BF.getKnownExecutionCount() + getExecutionCount());
}
// Since we are merging a valid profile, the new profile should be valid too.
// It has either already been valid, or it has been cleaned up.
BF.ProfileMatchRatio = 1.0f;
// Update basic block and edge counts.
auto BBMergeI = BF.begin();
for (BinaryBasicBlock *BB : BasicBlocks) {
BinaryBasicBlock *BBMerge = &*BBMergeI;
assert(getIndex(BB) == BF.getIndex(BBMerge));
// Update basic block count.
if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE) {
BBMerge->setExecutionCount(
BBMerge->getKnownExecutionCount() + BB->getExecutionCount());
}
// Update edge count for successors of this basic block.
auto BBMergeSI = BBMerge->succ_begin();
auto BIMergeI = BBMerge->branch_info_begin();
auto BII = BB->branch_info_begin();
for (const auto *BBSucc : BB->successors()) {
(void)BBSucc;
assert(getIndex(BBSucc) == BF.getIndex(*BBMergeSI));
// At this point no branch count should be set to COUNT_NO_PROFILE.
assert(BII->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
"unexpected unknown branch profile");
assert(BIMergeI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
"unexpected unknown branch profile");
BIMergeI->Count += BII->Count;
// When we merge inferred and real fall-through branch data, the merged
// data is considered inferred.
if (BII->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED &&
BIMergeI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) {
BIMergeI->MispredictedCount += BII->MispredictedCount;
} else {
BIMergeI->MispredictedCount = BinaryBasicBlock::COUNT_INFERRED;
}
++BBMergeSI;
++BII;
++BIMergeI;
}
assert(BBMergeSI == BBMerge->succ_end());
++BBMergeI;
}
assert(BBMergeI == BF.end());
}
void BinaryFunction::readSampleData() {
auto SampleDataOrErr = BC.DR.getFuncSampleData(getNames());
if (!SampleDataOrErr)
return;
// Non-LBR mode territory
// First step is to assign BB execution count based on samples from perf
ProfileMatchRatio = 1.0f;
removeTagsFromProfile();
bool NormalizeByInsnCount =
BC.DR.usesEvent("cycles") || BC.DR.usesEvent("instructions");
bool NormalizeByCalls = BC.DR.usesEvent("branches");
static bool NagUser{true};
if (NagUser) {
outs() << "BOLT-INFO: operating with non-LBR profiling data.\n";
if (NormalizeByInsnCount) {
outs() << "BOLT-INFO: normalizing samples by instruction count.\n";
} else if (NormalizeByCalls) {
outs() << "BOLT-INFO: normalizing samples by branches.\n";
}
NagUser = false;
}
uint64_t LastOffset = getSize();
uint64_t TotalEntryCount{0};
for (auto I = BasicBlockOffsets.rbegin(), E = BasicBlockOffsets.rend();
I != E; ++I) {
uint64_t CurOffset = I->first;
// Always work with samples multiplied by 1000 to avoid losing them if we
// later need to normalize numbers
uint64_t NumSamples =
SampleDataOrErr->getSamples(CurOffset, LastOffset) * 1000;
if (NormalizeByInsnCount && I->second->getNumNonPseudos())
NumSamples /= I->second->getNumNonPseudos();
else if (NormalizeByCalls) {
uint32_t NumCalls = I->second->getNumCalls();
NumSamples /= NumCalls + 1;
}
I->second->setExecutionCount(NumSamples);
if (I->second->isEntryPoint())
TotalEntryCount += NumSamples;
LastOffset = CurOffset;
}
ExecutionCount = TotalEntryCount;
estimateEdgeCounts(BC, *this);
if (opts::DoMCF != MCF_DISABLE)
solveMCF(*this, opts::DoMCF);
}
void BinaryFunction::inferFallThroughCounts() {
// Work on a basic block at a time, propagating frequency information
// forwards.
// It is important to walk in the layout order.
for (auto *BB : BasicBlocks) {
const uint64_t BBExecCount = BB->getExecutionCount();
// Propagate this information to successors, filling in fall-through edges
// with frequency information
if (BB->succ_size() == 0)
continue;
// Calculate frequency of outgoing branches from this node according to
// LBR data.
uint64_t ReportedBranches = 0;
for (const auto &SuccBI : BB->branch_info()) {
if (SuccBI.Count != BinaryBasicBlock::COUNT_NO_PROFILE)
ReportedBranches += SuccBI.Count;
}
// Get taken count of conditional tail call if the block ends with one.
uint64_t CTCTakenCount = 0;
const auto CTCInstr = BB->getLastNonPseudoInstr();
if (CTCInstr && BC.MIA->getConditionalTailCall(*CTCInstr)) {
CTCTakenCount =
BC.MIA->getAnnotationWithDefault<uint64_t>(*CTCInstr, "CTCTakenCount");
}
// Calculate frequency of throws from this node according to LBR data
// for branching into associated landing pads. Since it is possible
// for a landing pad to be associated with more than one basic blocks,
// we may overestimate the frequency of throws for such blocks.
uint64_t ReportedThrows = 0;
for (const auto *LP: BB->landing_pads()) {
ReportedThrows += LP->getExecutionCount();
}
const uint64_t TotalReportedJumps =
ReportedBranches + CTCTakenCount + ReportedThrows;
// Infer the frequency of the fall-through edge, representing not taking the
// branch.
uint64_t Inferred = 0;
if (BBExecCount > TotalReportedJumps)
Inferred = BBExecCount - TotalReportedJumps;
DEBUG(
if (BBExecCount < TotalReportedJumps)
dbgs()
<< "Fall-through inference is slightly inconsistent. "
"exec frequency is less than the outgoing edges frequency ("
<< BBExecCount << " < " << ReportedBranches
<< ") for BB at offset 0x"
<< Twine::utohexstr(getAddress() + BB->getOffset()) << '\n';
);
if (BB->succ_size() <= 2) {
// Skip if the last instruction is an unconditional jump.
const auto *LastInstr = BB->getLastNonPseudoInstr();
if (LastInstr &&
(BC.MIA->isUnconditionalBranch(*LastInstr) ||
BC.MIA->isIndirectBranch(*LastInstr)))
continue;
// If there is an FT it will be the last successor.
auto &SuccBI = *BB->branch_info_rbegin();
auto &Succ = *BB->succ_rbegin();
if (SuccBI.Count == 0) {
SuccBI.Count = Inferred;
SuccBI.MispredictedCount = BinaryBasicBlock::COUNT_INFERRED;
Succ->ExecutionCount += Inferred;
}
}
}
return;
}
bool BinaryFunction::fetchProfileForOtherEntryPoints() {
if (!BranchData)
return false;
// Check if we are missing profiling data for secondary entry points
bool First{true};
bool Updated{false};
for (auto BB : BasicBlocks) {
if (First) {
First = false;
continue;
}
if (BB->isEntryPoint()) {
uint64_t EntryAddress = BB->getOffset() + getAddress();
// Look for branch data associated with this entry point
std::vector<std::string> Names;
std::multimap<uint64_t, std::string>::iterator I, E;
for (std::tie(I, E) = BC.GlobalAddresses.equal_range(EntryAddress);
I != E; ++I) {
Names.push_back(I->second);
}
if (!Names.empty()) {
if (FuncBranchData *Data = BC.DR.getFuncBranchData(Names)) {
BranchData->appendFrom(*Data, BB->getOffset());
Data->Used = true;
Updated = true;
}
}
}
}
return Updated;
}
void BinaryFunction::matchProfileMemData() {
const auto AllMemData = BC.DR.getFuncMemDataRegex(getNames());
for (auto *NewMemData : AllMemData) {
// Prevent functions from sharing the same profile.
if (NewMemData->Used)
continue;
if (MemData)
MemData->Used = false;
// Update function profile data with the new set.
MemData = NewMemData;
MemData->Used = true;
break;
}
}
void BinaryFunction::matchProfileData() {
// This functionality is available for LBR-mode only
// TODO: Implement evaluateProfileData() for samples, checking whether
// sample addresses match instruction addresses in the function
if (!BC.DR.hasLBR())
return;
if (BranchData) {
ProfileMatchRatio = evaluateProfileData(*BranchData);
if (ProfileMatchRatio == 1.0f) {
if (fetchProfileForOtherEntryPoints()) {
ProfileMatchRatio = evaluateProfileData(*BranchData);
ExecutionCount = BranchData->ExecutionCount;
}
return;
}
}
// Check if the function name can fluctuate between several compilations
// possibly triggered by minor unrelated code changes in the source code
// of the input binary.
const auto HasVolatileName = [this]() {
for (const auto Name : getNames()) {
if (getLTOCommonName(Name))
return true;
}
return false;
}();
if (!HasVolatileName)
return;
// Check for a profile that matches with 100% confidence.
const auto AllBranchData = BC.DR.getFuncBranchDataRegex(getNames());
for (auto *NewBranchData : AllBranchData) {
// Prevent functions from sharing the same profile.
if (NewBranchData->Used)
continue;
if (evaluateProfileData(*NewBranchData) != 1.0f)
continue;
if (BranchData)
BranchData->Used = false;
// Update function profile data with the new set.
BranchData = NewBranchData;
ExecutionCount = NewBranchData->ExecutionCount;
ProfileMatchRatio = 1.0f;
BranchData->Used = true;
break;
}
}
float BinaryFunction::evaluateProfileData(const FuncBranchData &BranchData) {
// Until we define a minimal profile, we consider an empty branch data to be
// a valid profile. It could happen to a function without branches when we
// still have an EntryData for execution count.
if (BranchData.Data.empty()) {
return 1.0f;
}
uint64_t NumMatchedBranches = 0;
for (const auto &BI : BranchData.Data) {
bool IsValid = false;
if (BI.From.Name == BI.To.Name) {
// Try to record information with 0 count.
IsValid = recordBranch(BI.From.Offset, BI.To.Offset, 0);
} else {
// The branch has to originate from this function.
// Check for calls, tail calls, rets and indirect branches.
// When matching profiling info, we did not reach the stage
// when we identify tail calls, so they are still represented
// by regular branch instructions and we need isBranch() here.
auto *Instr = getInstructionAtOffset(BI.From.Offset);
// If it's a prefix - skip it.
if (Instr && BC.MIA->isPrefix(*Instr))
Instr = getInstructionAtOffset(BI.From.Offset + 1);
if (Instr &&
(BC.MIA->isCall(*Instr) ||
BC.MIA->isBranch(*Instr) ||
BC.MIA->isReturn(*Instr))) {
IsValid = true;
}
}
if (IsValid) {
++NumMatchedBranches;
continue;
}
DEBUG(dbgs()
<< "\tinvalid branch in " << *this << " : 0x"
<< Twine::utohexstr(BI.From.Offset) << " -> ";
if (BI.From.Name == BI.To.Name)
dbgs() << "0x" << Twine::utohexstr(BI.To.Offset) << '\n';
else
dbgs() << "<outbounds>\n";
);
}
const auto MatchRatio = (float) NumMatchedBranches / BranchData.Data.size();
if (opts::Verbosity >= 2 && NumMatchedBranches < BranchData.Data.size()) {
errs() << "BOLT-WARNING: profile branches match only "
<< format("%.1f%%", MatchRatio * 100.0f) << " ("
<< NumMatchedBranches << '/' << BranchData.Data.size()
<< ") for function " << *this << '\n';
}
return MatchRatio;
}
void BinaryFunction::clearProfile() {
// Keep function execution profile the same. Only clear basic block and edge
// counts.
for (auto *BB : BasicBlocks) {
BB->ExecutionCount = 0;
for (auto &BI : BB->branch_info()) {
BI.Count = 0;
BI.MispredictedCount = 0;
}
}
}
} // namespace bolt
} // namespace llvm

View File

@ -345,7 +345,7 @@ void BinaryFunctionPassManager::runAllPasses(
// order they're registered.
// Run this pass first to use stats for the original functions.
Manager.registerPass(llvm::make_unique<PrintSortedBy>(NeverPrint));
Manager.registerPass(llvm::make_unique<PrintProgramStats>(NeverPrint));
Manager.registerPass(llvm::make_unique<StripRepRet>(NeverPrint),
opts::StripRepRet);

View File

@ -63,6 +63,7 @@ add_llvm_tool(llvm-bolt
BinaryBasicBlock.cpp
BinaryContext.cpp
BinaryFunction.cpp
BinaryFunctionProfile.cpp
BinaryPassManager.cpp
CacheMetrics.cpp
DataAggregator.cpp

View File

@ -414,6 +414,14 @@ bool DataAggregator::aggregate(BinaryContext &BC,
outs() << "PERF2BOLT: Failed to parse branch events\n";
}
// Mark all functions with registered events as having a valid profile.
for (auto &BFI : BFs) {
auto &BF = BFI.second;
if (BF.getBranchData()) {
BF.markProfiled();
}
}
auto PI3 = sys::Wait(MemEventsPI, 0, true, &Error);
if (PI3.ReturnCode != 0) {
@ -423,7 +431,8 @@ bool DataAggregator::aggregate(BinaryContext &BC,
deleteTempFiles();
Regex NoData("Samples for '.*' event do not have ADDR attribute set. Cannot print 'addr' field.");
Regex NoData("Samples for '.*' event do not have ADDR attribute set. "
"Cannot print 'addr' field.");
if (!NoData.match(ErrBuf)) {
errs() << "PERF-ERROR: Return code " << PI3.ReturnCode << "\n";
errs() << ErrBuf;
@ -450,7 +459,7 @@ bool DataAggregator::aggregate(BinaryContext &BC,
}
deleteTempFiles();
return true;
}
@ -467,8 +476,8 @@ DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address) {
return &FI->second;
}
bool DataAggregator::doIntraBranch(BinaryFunction *Func, uint64_t From,
uint64_t To, bool Mispred) {
bool
DataAggregator::doIntraBranch(BinaryFunction *Func, const LBREntry &Branch) {
FuncBranchData *AggrData = Func->getBranchData();
if (!AggrData) {
AggrData = &FuncsToBranches[Func->getNames()[0]];
@ -476,19 +485,21 @@ bool DataAggregator::doIntraBranch(BinaryFunction *Func, uint64_t From,
Func->setBranchData(AggrData);
}
From -= Func->getAddress();
To -= Func->getAddress();
AggrData->bumpBranchCount(From, To, Mispred);
AggrData->bumpBranchCount(Branch.From - Func->getAddress(),
Branch.To - Func->getAddress(),
Branch.Mispred);
return true;
}
bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
BinaryFunction *ToFunc, uint64_t From,
uint64_t To, bool Mispred) {
BinaryFunction *ToFunc,
const LBREntry &Branch) {
FuncBranchData *FromAggrData{nullptr};
FuncBranchData *ToAggrData{nullptr};
StringRef SrcFunc;
StringRef DstFunc;
auto From = Branch.From;
auto To = Branch.To;
if (FromFunc) {
SrcFunc = FromFunc->getNames()[0];
FromAggrData = FromFunc->getBranchData();
@ -498,6 +509,8 @@ bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
FromFunc->setBranchData(FromAggrData);
}
From -= FromFunc->getAddress();
FromFunc->recordExit(From, Branch.Mispred);
}
if (ToFunc) {
DstFunc = ToFunc->getNames()[0];
@ -508,32 +521,39 @@ bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
ToFunc->setBranchData(ToAggrData);
}
To -= ToFunc->getAddress();
ToFunc->recordEntry(To, Branch.Mispred);
}
if (FromAggrData)
FromAggrData->bumpCallCount(From, Location(!DstFunc.empty(), DstFunc, To),
Mispred);
Branch.Mispred);
if (ToAggrData)
ToAggrData->bumpEntryCount(Location(!SrcFunc.empty(), SrcFunc, From), To,
Mispred);
Branch.Mispred);
return true;
}
bool DataAggregator::doBranch(uint64_t From, uint64_t To, bool Mispred) {
auto *FromFunc = getBinaryFunctionContainingAddress(From);
auto *ToFunc = getBinaryFunctionContainingAddress(To);
bool DataAggregator::doBranch(const LBREntry &Branch) {
auto *FromFunc = getBinaryFunctionContainingAddress(Branch.From);
auto *ToFunc = getBinaryFunctionContainingAddress(Branch.To);
if (!FromFunc && !ToFunc)
return false;
if (FromFunc == ToFunc)
return doIntraBranch(FromFunc, From, To, Mispred);
if (FromFunc == ToFunc) {
FromFunc->recordBranch(Branch.From - FromFunc->getAddress(),
Branch.To - FromFunc->getAddress(),
1,
Branch.Mispred);
return doIntraBranch(FromFunc, Branch);
}
return doInterBranch(FromFunc, ToFunc, From, To, Mispred);
return doInterBranch(FromFunc, ToFunc, Branch);
}
bool DataAggregator::doTrace(uint64_t From, uint64_t To) {
auto *FromFunc = getBinaryFunctionContainingAddress(From);
auto *ToFunc = getBinaryFunctionContainingAddress(To);
bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second) {
auto *FromFunc = getBinaryFunctionContainingAddress(First.To);
auto *ToFunc = getBinaryFunctionContainingAddress(Second.From);
if (!FromFunc || !ToFunc) {
++NumLongRangeTraces;
return false;
@ -541,26 +561,25 @@ bool DataAggregator::doTrace(uint64_t From, uint64_t To) {
if (FromFunc != ToFunc) {
++NumInvalidTraces;
DEBUG(dbgs() << "Trace starting in " << FromFunc->getPrintName() << " @ "
<< Twine::utohexstr(From - FromFunc->getAddress())
<< Twine::utohexstr(First.To - FromFunc->getAddress())
<< " and ending in " << ToFunc->getPrintName() << " @ "
<< ToFunc->getPrintName() << " @ "
<< Twine::utohexstr(To - ToFunc->getAddress()) << "\n");
<< Twine::utohexstr(Second.From - ToFunc->getAddress())
<< '\n');
return false;
}
if (FromFunc) {
From -= FromFunc->getAddress();
To -= ToFunc->getAddress();
}
auto FTs = FromFunc->getFallthroughsInTrace(From, To);
auto FTs = FromFunc->getFallthroughsInTrace(First, Second);
if (!FTs) {
++NumInvalidTraces;
return false;
}
for (const auto &Pair : *FTs) {
doIntraBranch(FromFunc, Pair.first + FromFunc->getAddress(),
Pair.second + FromFunc->getAddress(), false);
doIntraBranch(FromFunc,
LBREntry{Pair.first + FromFunc->getAddress(),
Pair.second + FromFunc->getAddress(),
false});
}
return true;
@ -710,7 +729,8 @@ bool DataAggregator::hasData() {
std::error_code DataAggregator::parseBranchEvents() {
outs() << "PERF2BOLT: Aggregating branch events...\n";
NamedRegionTimer T("Branch samples parsing", TimerGroupName, opts::TimeAggregator);
NamedRegionTimer T("Branch samples parsing", TimerGroupName,
opts::TimeAggregator);
uint64_t NumEntries{0};
uint64_t NumSamples{0};
uint64_t NumTraces{0};
@ -727,14 +747,16 @@ std::error_code DataAggregator::parseBranchEvents() {
NumEntries += Sample.LBR.size();
// Parser semantic actions
uint64_t Last{0};
// LBRs are stored in reverse execution order. NextLBR refers to next
// executed branch record.
const LBREntry *NextLBR{nullptr};
for (const auto &LBR : Sample.LBR) {
if (Last) {
doTrace(LBR.To, Last);
if (NextLBR) {
doTrace(LBR, *NextLBR);
++NumTraces;
}
doBranch(LBR.From, LBR.To, LBR.Mispred);
Last = LBR.From;
doBranch(LBR);
NextLBR = &LBR;
}
}
outs() << "PERF2BOLT: Read " << NumSamples << " samples and "

View File

@ -28,12 +28,6 @@ namespace bolt {
class BinaryFunction;
class BinaryContext;
struct LBREntry {
uint64_t From;
uint64_t To;
bool Mispred;
};
struct PerfBranchSample {
SmallVector<LBREntry, 16> LBR;
};
@ -125,24 +119,19 @@ class DataAggregator : public DataReader {
BinaryFunction *getBinaryFunctionContainingAddress(uint64_t Address);
/// Semantic actions - parser hooks to interpret parsed perf samples
/// Register an intraprocedural branch in \p Func with offsets \p From and
/// \p To (relative to \p Func start address).
bool doIntraBranch(BinaryFunction *Func, uint64_t From, uint64_t To,
bool Mispred);
/// Register an intraprocedural branch \p Branch.
bool doIntraBranch(BinaryFunction *Func, const LBREntry &Branch);
/// Register an interprocedural branch from \p FromFunc to \p ToFunc with
/// offsets \p From and \p To, respectively.
bool doInterBranch(BinaryFunction *FromFunc, BinaryFunction *ToFunc,
uint64_t From, uint64_t To, bool Mispred);
const LBREntry &Branch);
/// Register a branch with raw addresses \p From and \p To extracted from the
/// LBR
bool doBranch(uint64_t From, uint64_t To, bool Mispred);
/// Register a \p Branch.
bool doBranch(const LBREntry &Branch);
/// Register a trace starting in raw address \p From and ending in \p To
/// This will add all intermediate conditional branches in this trace as not
/// taken.
bool doTrace(uint64_t From, uint64_t To);
/// Register a trace between two LBR entries supplied in execution order.
bool doTrace(const LBREntry &First, const LBREntry &Second);
/// Parser helpers
/// Return false if we exhausted our parser buffer and finished parsing

View File

@ -31,6 +31,12 @@
namespace llvm {
namespace bolt {
struct LBREntry {
uint64_t From;
uint64_t To;
bool Mispred;
};
/// LTO-generated function names take a form:
///
/// <function_name>.lto_priv.<decimal_number>/...

View File

@ -48,6 +48,7 @@ const char* dynoStatsOptDesc(const bolt::DynoStats::Category C) {
namespace opts {
extern cl::OptionCategory BoltCategory;
extern cl::OptionCategory BoltOptCategory;
extern cl::opt<unsigned> Verbosity;
@ -88,6 +89,33 @@ MinBranchClusters("min-branch-clusters",
cl::Hidden,
cl::cat(BoltOptCategory));
enum PeepholeOpts : char {
PEEP_NONE = 0x0,
PEEP_SHORTEN = 0x1,
PEEP_DOUBLE_JUMPS = 0x2,
PEEP_TAILCALL_TRAPS = 0x4,
PEEP_USELESS_BRANCHES = 0x8,
PEEP_ALL = 0xf
};
static cl::list<PeepholeOpts>
Peepholes("peepholes",
cl::CommaSeparated,
cl::desc("enable peephole optimizations"),
cl::value_desc("opt1,opt2,opt3,..."),
cl::values(
clEnumValN(PEEP_NONE, "none", "disable peepholes"),
clEnumValN(PEEP_SHORTEN, "shorten", "perform instruction shortening"),
clEnumValN(PEEP_DOUBLE_JUMPS, "double-jumps",
"remove double jumps when able"),
clEnumValN(PEEP_TAILCALL_TRAPS, "tailcall-traps", "insert tail call traps"),
clEnumValN(PEEP_USELESS_BRANCHES, "useless-branches",
"remove useless conditional branches"),
clEnumValN(PEEP_ALL, "all", "enable all peephole optimizations"),
clEnumValEnd),
cl::ZeroOrMore,
cl::cat(BoltOptCategory));
static cl::opt<unsigned>
PrintFuncStat("print-function-statistics",
cl::desc("print statistics about basic block ordering"),
@ -140,6 +168,14 @@ ReorderBlocks("reorder-blocks",
cl::ZeroOrMore,
cl::cat(BoltOptCategory));
static cl::opt<bool>
ReportStaleFuncs("report-stale",
cl::desc("print the list of functions with stale profile"),
cl::init(false),
cl::ZeroOrMore,
cl::Hidden,
cl::cat(BoltOptCategory));
enum SctcModes : char {
SctcAlways,
SctcPreserveDirection,
@ -178,32 +214,14 @@ TSPThreshold("tsp-threshold",
cl::Hidden,
cl::cat(BoltOptCategory));
enum PeepholeOpts : char {
PEEP_NONE = 0x0,
PEEP_SHORTEN = 0x1,
PEEP_DOUBLE_JUMPS = 0x2,
PEEP_TAILCALL_TRAPS = 0x4,
PEEP_USELESS_BRANCHES = 0x8,
PEEP_ALL = 0xf
};
static cl::list<PeepholeOpts>
Peepholes("peepholes",
cl::CommaSeparated,
cl::desc("enable peephole optimizations"),
cl::value_desc("opt1,opt2,opt3,..."),
cl::values(
clEnumValN(PEEP_NONE, "none", "disable peepholes"),
clEnumValN(PEEP_SHORTEN, "shorten", "perform instruction shortening"),
clEnumValN(PEEP_DOUBLE_JUMPS, "double-jumps",
"remove double jumps when able"),
clEnumValN(PEEP_TAILCALL_TRAPS, "tailcall-traps", "insert tail call traps"),
clEnumValN(PEEP_USELESS_BRANCHES, "useless-branches",
"remove useless conditional branches"),
clEnumValN(PEEP_ALL, "all", "enable all peephole optimizations"),
clEnumValEnd),
static cl::opt<unsigned>
TopCalledLimit("top-called-limit",
cl::desc("maximum number of functions to print in top called "
"functions section"),
cl::init(100),
cl::ZeroOrMore,
cl::cat(BoltOptCategory));
cl::Hidden,
cl::cat(BoltCategory));
} // namespace opts
@ -861,6 +879,7 @@ uint64_t SimplifyConditionalTailCalls::fixTailCalls(BinaryContext &BC,
MIA->setConditionalTailCall(*CondBranch);
// Add info abount the conditional tail call frequency, otherwise this
// info will be lost when we delete the associated BranchInfo entry
BC.MIA->removeAnnotation(*CondBranch, "CTCTakenCount");
BC.MIA->addAnnotation(BC.Ctx.get(), *CondBranch, "CTCTakenCount",
CTCTakenFreq);
@ -1315,11 +1334,93 @@ void IdenticalCodeFolding::runOnFunctions(BinaryContext &BC,
}
}
void PrintSortedBy::runOnFunctions(
BinaryContext &,
std::map<uint64_t, BinaryFunction> &BFs,
std::set<uint64_t> &
) {
void
PrintProgramStats::runOnFunctions(BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
std::set<uint64_t> &) {
uint64_t NumSimpleFunctions{0};
uint64_t NumStaleProfileFunctions{0};
std::vector<BinaryFunction *> ProfiledFunctions;
const char *StaleFuncsHeader = "BOLT-INFO: Functions with stale profile:\n";
for (auto &BFI : BFs) {
auto &Function = BFI.second;
if (!Function.isSimple())
continue;
++NumSimpleFunctions;
if (Function.getExecutionCount() == BinaryFunction::COUNT_NO_PROFILE)
continue;
if (Function.hasValidProfile())
ProfiledFunctions.push_back(&Function);
else {
if (opts::ReportStaleFuncs) {
outs() << StaleFuncsHeader;
StaleFuncsHeader = "";
outs() << " " << Function << '\n';
}
++NumStaleProfileFunctions;
}
}
BC.NumProfiledFuncs = ProfiledFunctions.size();
const auto NumAllProfiledFunctions =
ProfiledFunctions.size() + NumStaleProfileFunctions;
outs() << "BOLT-INFO: "
<< NumAllProfiledFunctions
<< " functions out of " << NumSimpleFunctions << " simple functions ("
<< format("%.1f", NumAllProfiledFunctions /
(float) NumSimpleFunctions * 100.0f)
<< "%) have non-empty execution profile.\n";
if (NumStaleProfileFunctions) {
outs() << "BOLT-INFO: " << NumStaleProfileFunctions
<< format(" (%.1f%% of all profiled)",
NumStaleProfileFunctions /
(float) NumAllProfiledFunctions * 100.0f)
<< " function" << (NumStaleProfileFunctions == 1 ? "" : "s")
<< " have invalid (possibly stale) profile.\n";
}
// Profile is marked as 'Used' if it either matches a function name
// exactly or if it 100% matches any of functions with matching common
// LTO names.
auto getUnusedObjects = [&]() -> Optional<std::vector<StringRef>> {
std::vector<StringRef> UnusedObjects;
for (const auto &Func : BC.DR.getAllFuncsData()) {
if (!Func.getValue().Used) {
UnusedObjects.emplace_back(Func.getKey());
}
}
if (UnusedObjects.empty())
return NoneType();
return UnusedObjects;
};
if (const auto UnusedObjects = getUnusedObjects()) {
outs() << "BOLT-INFO: profile for " << UnusedObjects->size()
<< " objects was ignored\n";
if (opts::Verbosity >= 1) {
for (auto Name : *UnusedObjects) {
outs() << " " << Name << '\n';
}
}
}
if (ProfiledFunctions.size() > 10) {
if (opts::Verbosity >= 1) {
outs() << "BOLT-INFO: top called functions are:\n";
std::sort(ProfiledFunctions.begin(), ProfiledFunctions.end(),
[](BinaryFunction *A, BinaryFunction *B) {
return B->getExecutionCount() < A->getExecutionCount();
}
);
auto SFI = ProfiledFunctions.begin();
auto SFIend = ProfiledFunctions.end();
for (auto i = 0u; i < opts::TopCalledLimit && SFI != SFIend; ++SFI, ++i) {
outs() << " " << **SFI << " : "
<< (*SFI)->getExecutionCount() << '\n';
}
}
}
if (!opts::PrintSortedBy.empty() &&
std::find(opts::PrintSortedBy.begin(),
opts::PrintSortedBy.end(),

View File

@ -382,17 +382,15 @@ public:
std::set<uint64_t> &LargeFunctions) override;
};
///
/// Prints a list of the top 100 functions sorted by a set of
/// dyno stats categories.
///
class PrintSortedBy : public BinaryFunctionPass {
class PrintProgramStats : public BinaryFunctionPass {
public:
explicit PrintSortedBy(const cl::opt<bool> &PrintPass)
explicit PrintProgramStats(const cl::opt<bool> &PrintPass)
: BinaryFunctionPass(PrintPass) { }
const char *getName() const override {
return "print-sorted-by";
return "print-stats";
}
bool shouldPrint(const BinaryFunction &) const override {
return false;

View File

@ -1295,7 +1295,8 @@ void IndirectCallPromotion::runOnFunctions(
if (BC.MIA->isCall(Inst) && BC.MIA->getTargetSymbol(Inst, 0))
continue;
assert(BC.MIA->isCall(Inst) || BC.MIA->isIndirectBranch(Inst));
assert((BC.MIA->isCall(Inst) || BC.MIA->isIndirectBranch(Inst))
&& "expected a call or an indirect jump instruction");
if (IsJumpTable)
++TotalJumpTableCallsites;

View File

@ -27,29 +27,29 @@ extern cl::opt<uint32_t> RandomSeed;
extern bool shouldProcess(const bolt::BinaryFunction &Function);
extern size_t padFunction(const bolt::BinaryFunction &Function);
cl::opt<bolt::BinaryFunction::ReorderType>
cl::opt<bolt::ReorderFunctions::ReorderType>
ReorderFunctions("reorder-functions",
cl::desc("reorder and cluster functions (works only with relocations)"),
cl::init(bolt::BinaryFunction::RT_NONE),
cl::values(clEnumValN(bolt::BinaryFunction::RT_NONE,
cl::init(bolt::ReorderFunctions::RT_NONE),
cl::values(clEnumValN(bolt::ReorderFunctions::RT_NONE,
"none",
"do not reorder functions"),
clEnumValN(bolt::BinaryFunction::RT_EXEC_COUNT,
clEnumValN(bolt::ReorderFunctions::RT_EXEC_COUNT,
"exec-count",
"order by execution count"),
clEnumValN(bolt::BinaryFunction::RT_HFSORT,
clEnumValN(bolt::ReorderFunctions::RT_HFSORT,
"hfsort",
"use hfsort algorithm"),
clEnumValN(bolt::BinaryFunction::RT_HFSORT_PLUS,
clEnumValN(bolt::ReorderFunctions::RT_HFSORT_PLUS,
"hfsort+",
"use hfsort+ algorithm"),
clEnumValN(bolt::BinaryFunction::RT_PETTIS_HANSEN,
clEnumValN(bolt::ReorderFunctions::RT_PETTIS_HANSEN,
"pettis-hansen",
"use Pettis-Hansen algorithm"),
clEnumValN(bolt::BinaryFunction::RT_RANDOM,
clEnumValN(bolt::ReorderFunctions::RT_RANDOM,
"random",
"reorder functions randomly"),
clEnumValN(bolt::BinaryFunction::RT_USER,
clEnumValN(bolt::ReorderFunctions::RT_USER,
"user",
"use function order specified by -function-order"),
clEnumValEnd),
@ -142,7 +142,7 @@ void ReorderFunctions::reorder(std::vector<Cluster> &&Clusters,
}
}
if (opts::ReorderFunctions == BinaryFunction::RT_NONE)
if (opts::ReorderFunctions == RT_NONE)
return;
if (opts::Verbosity == 0) {
@ -280,15 +280,15 @@ std::vector<std::string> readFunctionOrderFile() {
void ReorderFunctions::runOnFunctions(BinaryContext &BC,
std::map<uint64_t, BinaryFunction> &BFs,
std::set<uint64_t> &LargeFunctions) {
if (!BC.HasRelocations && opts::ReorderFunctions != BinaryFunction::RT_NONE) {
if (!BC.HasRelocations && opts::ReorderFunctions != RT_NONE) {
errs() << "BOLT-ERROR: Function reordering only works when "
<< "relocs are enabled.\n";
exit(1);
}
if (opts::ReorderFunctions != BinaryFunction::RT_NONE &&
opts::ReorderFunctions != BinaryFunction::RT_EXEC_COUNT &&
opts::ReorderFunctions != BinaryFunction::RT_USER) {
if (opts::ReorderFunctions != RT_NONE &&
opts::ReorderFunctions != RT_EXEC_COUNT &&
opts::ReorderFunctions != RT_USER) {
Cg = buildCallGraph(BC,
BFs,
[this](const BinaryFunction &BF) {
@ -306,9 +306,9 @@ void ReorderFunctions::runOnFunctions(BinaryContext &BC,
std::vector<Cluster> Clusters;
switch(opts::ReorderFunctions) {
case BinaryFunction::RT_NONE:
case RT_NONE:
break;
case BinaryFunction::RT_EXEC_COUNT:
case RT_EXEC_COUNT:
{
std::vector<BinaryFunction *> SortedFunctions(BFs.size());
uint32_t Index = 0;
@ -340,20 +340,20 @@ void ReorderFunctions::runOnFunctions(BinaryContext &BC,
}
}
break;
case BinaryFunction::RT_HFSORT:
case RT_HFSORT:
Clusters = clusterize(Cg);
break;
case BinaryFunction::RT_HFSORT_PLUS:
case RT_HFSORT_PLUS:
Clusters = hfsortPlus(Cg, opts::UseGainCache);
break;
case BinaryFunction::RT_PETTIS_HANSEN:
case RT_PETTIS_HANSEN:
Clusters = pettisAndHansen(Cg);
break;
case BinaryFunction::RT_RANDOM:
case RT_RANDOM:
std::srand(opts::RandomSeed);
Clusters = randomClusters(Cg);
break;
case BinaryFunction::RT_USER:
case RT_USER:
{
uint32_t Index = 0;
for (const auto &Function : readFunctionOrderFile()) {
@ -394,7 +394,8 @@ void ReorderFunctions::runOnFunctions(BinaryContext &BC,
if (!BF->hasValidIndex()) {
BF->setIndex(Index++);
} else if (opts::Verbosity > 0) {
errs() << "BOLT-WARNING: Duplicate reorder entry for " << Function << ".\n";
errs() << "BOLT-WARNING: Duplicate reorder entry for " << Function
<< ".\n";
}
}
}

View File

@ -24,7 +24,17 @@ class ReorderFunctions : public BinaryFunctionPass {
void reorder(std::vector<Cluster> &&Clusters,
std::map<uint64_t, BinaryFunction> &BFs);
public:
public:
enum ReorderType : char {
RT_NONE = 0,
RT_EXEC_COUNT,
RT_HFSORT,
RT_HFSORT_PLUS,
RT_PETTIS_HANSEN,
RT_RANDOM,
RT_USER
};
explicit ReorderFunctions(const cl::opt<bool> &PrintPass)
: BinaryFunctionPass(PrintPass) { }

View File

@ -217,14 +217,6 @@ RelocationMode("relocs",
cl::ZeroOrMore,
cl::cat(BoltCategory));
static cl::opt<bool>
ReportStaleFuncs("report-stale",
cl::desc("print a list of functions with a stale profile"),
cl::init(false),
cl::ZeroOrMore,
cl::Hidden,
cl::cat(BoltCategory));
static cl::list<std::string>
SkipFunctionNames("skip-funcs",
cl::CommaSeparated,
@ -255,15 +247,6 @@ SplitFunctions("split-functions",
cl::ZeroOrMore,
cl::cat(BoltOptCategory));
static cl::opt<unsigned>
TopCalledLimit("top-called-limit",
cl::desc("maximum number of functions to print in top called "
"functions section"),
cl::init(100),
cl::ZeroOrMore,
cl::Hidden,
cl::cat(BoltCategory));
cl::opt<bool>
TrapOldCode("trap-old-code",
cl::desc("insert traps in old function bodies (relocation mode)"),
@ -572,7 +555,8 @@ createBinaryContext(ELFObjectFileBase *File, DataReader &DR,
std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
if (!MII) {
errs() << "BOLT-ERROR: no instruction info for target " << TripleName << "\n";
errs() << "BOLT-ERROR: no instruction info for target " << TripleName
<< "\n";
return nullptr;
}
@ -666,19 +650,6 @@ void RewriteInstance::reset() {
FailedAddresses.clear();
RangesSectionsWriter.reset();
LocationListWriter.reset();
TotalScore = 0;
}
void RewriteInstance::aggregateData() {
NamedRegionTimer T("aggregate data", TimerGroupName, opts::TimeRewrite);
DA.aggregate(*BC.get(), BinaryFunctions);
if (!opts::AggregateOnly)
return;
if (std::error_code EC = DA.writeAggregatedFile()) {
check_error(EC, "cannot create output data file");
}
}
void RewriteInstance::discoverStorage() {
@ -901,13 +872,11 @@ void RewriteInstance::run() {
readSpecialSections();
discoverFileObjects();
readDebugInfo();
readProfileData();
disassembleFunctions();
if (DA.started()) {
aggregateData();
if (opts::AggregateOnly)
return;
}
readProfileData();
if (opts::AggregateOnly)
return;
postProcessFunctions();
for (uint64_t Address : NonSimpleFunctions) {
auto FI = BinaryFunctions.find(Address);
assert(FI != BinaryFunctions.end() && "bad non-simple function address");
@ -1930,30 +1899,44 @@ void RewriteInstance::readDebugInfo() {
}
void RewriteInstance::readProfileData() {
NamedRegionTimer T("read profile data", TimerGroupName, opts::TimeRewrite);
if (BC->DR.getAllFuncsData().empty())
if (DA.started()) {
NamedRegionTimer T("aggregate data", TimerGroupName, opts::TimeRewrite);
DA.aggregate(*BC.get(), BinaryFunctions);
if (opts::AggregateOnly) {
if (std::error_code EC = DA.writeAggregatedFile()) {
check_error(EC, "cannot create output data file");
}
}
return;
}
NamedRegionTimer T("read profile data", TimerGroupName, opts::TimeRewrite);
// Preliminary match profile data to functions.
if (!BC->DR.getAllFuncsData().empty()) {
for (auto &BFI : BinaryFunctions) {
auto &Function = BFI.second;
if (auto *MemData = BC->DR.getFuncMemData(Function.getNames())) {
Function.MemData = MemData;
MemData->Used = true;
}
if (auto *FuncData = BC->DR.getFuncBranchData(Function.getNames())) {
Function.BranchData = FuncData;
Function.ExecutionCount = FuncData->ExecutionCount;
FuncData->Used = true;
}
}
}
for (auto &BFI : BinaryFunctions) {
auto &Function = BFI.second;
if (auto *MemData = BC->DR.getFuncMemData(Function.getNames())) {
Function.MemData = MemData;
MemData->Used = true;
}
if (auto *FuncData = BC->DR.getFuncBranchData(Function.getNames())) {
Function.BranchData = FuncData;
Function.ExecutionCount = FuncData->ExecutionCount;
FuncData->Used = true;
}
Function.readProfile();
}
}
void RewriteInstance::disassembleFunctions() {
NamedRegionTimer T("disassemble functions", TimerGroupName,
opts::TimeRewrite);
// Disassemble every function and build it's control flow graph.
TotalScore = 0;
BC->SumExecutionCount = 0;
for (auto &BFI : BinaryFunctions) {
BinaryFunction &Function = BFI.second;
@ -1965,7 +1948,6 @@ void RewriteInstance::disassembleFunctions() {
}
auto FunctionData = BC->getFunctionData(Function);
if (!FunctionData) {
// When could it happen?
errs() << "BOLT-ERROR: corresponding section is non-executable or "
@ -1980,7 +1962,7 @@ void RewriteInstance::disassembleFunctions() {
}
// Offset of the function in the file.
auto *FileBegin =
const auto *FileBegin =
reinterpret_cast<const uint8_t*>(InputFile->getData().data());
Function.setFileOffset(FunctionData->begin() - FileBegin);
@ -2049,9 +2031,6 @@ void RewriteInstance::disassembleFunctions() {
}
BC->InterproceduralReferences.clear();
if (opts::AggregateOnly)
continue;
// Fill in CFI information for this function
if (Function.isSimple()) {
if (!CFIRdWrt->fillCFIInfoFor(Function)) {
@ -2071,6 +2050,23 @@ void RewriteInstance::disassembleFunctions() {
if (!Function.buildCFG())
continue;
if (opts::PrintAll)
Function.print(outs(), "while building cfg", true);
} // Iterate over all functions
}
void RewriteInstance::postProcessFunctions() {
BC->TotalScore = 0;
BC->SumExecutionCount = 0;
for (auto &BFI : BinaryFunctions) {
BinaryFunction &Function = BFI.second;
if (Function.empty())
continue;
Function.postProcessCFG();
if (opts::PrintAll || opts::PrintCFG)
Function.print(outs(), "after building cfg", true);
@ -2082,95 +2078,8 @@ void RewriteInstance::disassembleFunctions() {
Function.printLoopInfo(outs());
}
TotalScore += Function.getFunctionScore();
BC->TotalScore += Function.getFunctionScore();
BC->SumExecutionCount += Function.getKnownExecutionCount();
} // Iterate over all functions
if (opts::AggregateOnly)
return;
const char *StaleFuncsHeader = "BOLT-INFO: Functions with stale profile:\n";
uint64_t NumSimpleFunctions{0};
uint64_t NumStaleProfileFunctions{0};
std::vector<BinaryFunction *> ProfiledFunctions;
for (auto &BFI : BinaryFunctions) {
auto &Function = BFI.second;
if (!Function.isSimple())
continue;
++NumSimpleFunctions;
if (Function.getExecutionCount() == BinaryFunction::COUNT_NO_PROFILE)
continue;
if (Function.hasValidProfile()) {
ProfiledFunctions.push_back(&Function);
} else {
if (opts::ReportStaleFuncs) {
outs() << StaleFuncsHeader
<< " " << Function << '\n';
StaleFuncsHeader = "";
}
++NumStaleProfileFunctions;
}
}
BC->NumProfiledFuncs = ProfiledFunctions.size();
const auto NumAllProfiledFunctions =
ProfiledFunctions.size() + NumStaleProfileFunctions;
outs() << "BOLT-INFO: "
<< NumAllProfiledFunctions
<< " functions out of " << NumSimpleFunctions << " simple functions ("
<< format("%.1f", NumAllProfiledFunctions /
(float) NumSimpleFunctions * 100.0f)
<< "%) have non-empty execution profile.\n";
if (NumStaleProfileFunctions) {
outs() << "BOLT-INFO: " << NumStaleProfileFunctions
<< format(" (%.1f%% of all profiled)",
NumStaleProfileFunctions /
(float) NumAllProfiledFunctions * 100.0f)
<< " function" << (NumStaleProfileFunctions == 1 ? "" : "s")
<< " have invalid (possibly stale) profile.\n";
}
// Profile is marked as 'Used' if it either matches a function name
// exactly or if it 100% matches any of functions with matching common
// LTO names.
auto getUnusedObjects = [this]() -> Optional<std::vector<StringRef>> {
std::vector<StringRef> UnusedObjects;
for (const auto &Func : BC->DR.getAllFuncsData()) {
if (!Func.getValue().Used) {
UnusedObjects.emplace_back(Func.getKey());
}
}
if (UnusedObjects.empty())
return NoneType();
return UnusedObjects;
};
if (const auto UnusedObjects = getUnusedObjects()) {
outs() << "BOLT-INFO: profile for " << UnusedObjects->size()
<< " objects was ignored\n";
if (opts::Verbosity >= 1) {
for (auto Name : *UnusedObjects) {
outs() << " " << Name << '\n';
}
}
}
if (ProfiledFunctions.size() > 10) {
if (opts::Verbosity >= 1) {
outs() << "BOLT-INFO: top called functions are:\n";
std::sort(ProfiledFunctions.begin(), ProfiledFunctions.end(),
[](BinaryFunction *A, BinaryFunction *B) {
return B->getExecutionCount() < A->getExecutionCount();
}
);
auto SFI = ProfiledFunctions.begin();
auto SFIend = ProfiledFunctions.end();
for (auto i = 0u; i < opts::TopCalledLimit && SFI != SFIend; ++SFI, ++i) {
outs() << " " << **SFI << " : "
<< (*SFI)->getExecutionCount() << '\n';
}
}
}
}
@ -3861,8 +3770,8 @@ void RewriteInstance::rewriteFile() {
outs() << "BOLT: " << CountOverwrittenFunctions
<< " out of " << BinaryFunctions.size()
<< " functions were overwritten.\n";
if (TotalScore != 0) {
double Coverage = OverwrittenScore / (double)TotalScore * 100.0;
if (BC->TotalScore != 0) {
double Coverage = OverwrittenScore / (double) BC->TotalScore * 100.0;
outs() << format("BOLT: Rewritten functions cover %.2lf", Coverage)
<< "% of the execution count of simple functions of "
"this binary.\n";

View File

@ -186,6 +186,8 @@ public:
/// optimization.
void disassembleFunctions();
void postProcessFunctions();
/// Run optimizations that operate at the binary, or post-linker, level.
void runOptimizationPasses();
@ -277,9 +279,6 @@ private:
void emitFunction(MCStreamer &Streamer, BinaryFunction &Function,
bool EmitColdPart);
/// Perform a perf.data aggregation job instead of a binary rewriting one
void aggregateData();
/// Detect addresses and offsets available in the binary for allocating
/// new sections.
void discoverStorage();
@ -523,9 +522,6 @@ private:
/// last emission, so that we may either decide to split or not optimize them.
std::set<uint64_t> LargeFunctions;
/// Total hotness score according to profiling data for this binary.
uint64_t TotalScore{0};
/// Section header string table.
StringTableBuilder SHStrTab;

View File

@ -178,9 +178,8 @@ int main(int argc, char **argv) {
if (!opts::PerfData.empty()) {
if (!opts::AggregateOnly) {
errs() << ToolName
<< ": reading perf data directly is unsupported, please use "
"-aggregate-only or perf2bolt\n";
exit(1);
<< ": WARNING: reading perf data directly is unsupported, please use "
"-aggregate-only or perf2bolt.\n!!! Proceed on your own risk. !!!\n";
}
DA->start(opts::PerfData);
} else if (!opts::InputDataFilename.empty()) {