mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-01-09 01:29:52 +00:00
3652483c8e
Summary: Fix according to Coding Standards doc, section Don't Use Braces on Simple Single-Statement Bodies of if/else/loop Statements. This set of changes applies to lib Core only. (cherry picked from FBD33240028)
331 lines
11 KiB
C++
331 lines
11 KiB
C++
//===- bolt/Core/DynoStats.cpp - Dynamic execution stats ------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file implements the DynoStats class.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "bolt/Core/DynoStats.h"
|
|
#include "bolt/Core/BinaryBasicBlock.h"
|
|
#include "bolt/Core/BinaryFunction.h"
|
|
#include "llvm/ADT/StringRef.h"
|
|
#include "llvm/Support/CommandLine.h"
|
|
#include "llvm/Support/Debug.h"
|
|
#include "llvm/Support/raw_ostream.h"
|
|
#include <algorithm>
|
|
#include <string>
|
|
|
|
#undef DEBUG_TYPE
|
|
#define DEBUG_TYPE "bolt"
|
|
|
|
using namespace llvm;
|
|
using namespace bolt;
|
|
|
|
namespace opts {
|
|
|
|
extern cl::OptionCategory BoltCategory;
|
|
|
|
static cl::opt<uint32_t>
|
|
DynoStatsScale("dyno-stats-scale",
|
|
cl::desc("scale to be applied while reporting dyno stats"),
|
|
cl::Optional,
|
|
cl::init(1),
|
|
cl::Hidden,
|
|
cl::cat(BoltCategory));
|
|
|
|
static cl::opt<uint32_t>
|
|
PrintDynoOpcodeStat("print-dyno-opcode-stats",
|
|
cl::desc("print per instruction opcode dyno stats and the function"
|
|
"names:BB offsets of the nth highest execution counts"),
|
|
cl::init(0),
|
|
cl::Hidden,
|
|
cl::cat(BoltCategory));
|
|
|
|
} // namespace opts
|
|
|
|
namespace llvm {
|
|
namespace bolt {
|
|
|
|
constexpr const char *DynoStats::Desc[];
|
|
|
|
bool DynoStats::operator<(const DynoStats &Other) const {
|
|
return std::lexicographical_compare(
|
|
&Stats[FIRST_DYNO_STAT], &Stats[LAST_DYNO_STAT],
|
|
&Other.Stats[FIRST_DYNO_STAT], &Other.Stats[LAST_DYNO_STAT]);
|
|
}
|
|
|
|
bool DynoStats::operator==(const DynoStats &Other) const {
|
|
return std::equal(&Stats[FIRST_DYNO_STAT], &Stats[LAST_DYNO_STAT],
|
|
&Other.Stats[FIRST_DYNO_STAT]);
|
|
}
|
|
|
|
bool DynoStats::lessThan(const DynoStats &Other,
|
|
ArrayRef<Category> Keys) const {
|
|
return std::lexicographical_compare(
|
|
Keys.begin(), Keys.end(), Keys.begin(), Keys.end(),
|
|
[this, &Other](const Category A, const Category) {
|
|
return Stats[A] < Other.Stats[A];
|
|
});
|
|
}
|
|
|
|
void DynoStats::print(raw_ostream &OS, const DynoStats *Other,
|
|
MCInstPrinter *Printer) const {
|
|
auto printStatWithDelta = [&](const std::string &Name, uint64_t Stat,
|
|
uint64_t OtherStat) {
|
|
OS << format("%'20lld : ", Stat * opts::DynoStatsScale) << Name;
|
|
if (Other) {
|
|
if (Stat != OtherStat) {
|
|
OtherStat = std::max(OtherStat, uint64_t(1)); // to prevent divide by 0
|
|
OS << format(" (%+.1f%%)", ((float)Stat - (float)OtherStat) * 100.0 /
|
|
(float)(OtherStat));
|
|
} else {
|
|
OS << " (=)";
|
|
}
|
|
}
|
|
OS << '\n';
|
|
};
|
|
|
|
for (auto Stat = DynoStats::FIRST_DYNO_STAT + 1;
|
|
Stat < DynoStats::LAST_DYNO_STAT; ++Stat) {
|
|
|
|
if (!PrintAArch64Stats && Stat == DynoStats::VENEER_CALLS_AARCH64)
|
|
continue;
|
|
|
|
printStatWithDelta(Desc[Stat], Stats[Stat], Other ? (*Other)[Stat] : 0);
|
|
}
|
|
if (opts::PrintDynoOpcodeStat && Printer) {
|
|
outs() << "\nProgram-wide opcode histogram:\n";
|
|
OS << " Opcode, Execution Count, Max Exec Count, "
|
|
"Function Name:Offset ...\n";
|
|
std::vector<std::pair<uint64_t, unsigned>> SortedHistogram;
|
|
for (const OpcodeStatTy &Stat : OpcodeHistogram)
|
|
SortedHistogram.emplace_back(Stat.second.first, Stat.first);
|
|
|
|
// Sort using lexicographic ordering
|
|
std::sort(SortedHistogram.begin(), SortedHistogram.end());
|
|
|
|
// Dump in ascending order: Start with Opcode with Highest execution
|
|
// count.
|
|
for (auto Stat = SortedHistogram.rbegin(); Stat != SortedHistogram.rend();
|
|
++Stat) {
|
|
OS << format("%20s,%'18lld", Printer->getOpcodeName(Stat->second).data(),
|
|
Stat->first * opts::DynoStatsScale);
|
|
|
|
MaxOpcodeHistogramTy MaxMultiMap =
|
|
OpcodeHistogram.at(Stat->second).second;
|
|
// Start with function name:BB offset with highest execution count.
|
|
for (auto Max = MaxMultiMap.rbegin(); Max != MaxMultiMap.rend(); ++Max) {
|
|
OS << format(", %'18lld, ", Max->first * opts::DynoStatsScale)
|
|
<< Max->second.first.str() << ':' << Max->second.second;
|
|
}
|
|
OS << '\n';
|
|
}
|
|
}
|
|
}
|
|
|
|
void DynoStats::operator+=(const DynoStats &Other) {
|
|
for (auto Stat = DynoStats::FIRST_DYNO_STAT + 1;
|
|
Stat < DynoStats::LAST_DYNO_STAT; ++Stat) {
|
|
Stats[Stat] += Other[Stat];
|
|
}
|
|
for (const OpcodeStatTy &Stat : Other.OpcodeHistogram) {
|
|
auto I = OpcodeHistogram.find(Stat.first);
|
|
if (I == OpcodeHistogram.end()) {
|
|
OpcodeHistogram.emplace(Stat);
|
|
} else {
|
|
// Merge Other Historgrams, log only the opts::PrintDynoOpcodeStat'th
|
|
// maximum counts.
|
|
I->second.first += Stat.second.first;
|
|
auto &MMap = I->second.second;
|
|
auto &OtherMMap = Stat.second.second;
|
|
auto Size = MMap.size();
|
|
assert(Size <= opts::PrintDynoOpcodeStat);
|
|
for (auto Iter = OtherMMap.rbegin(); Iter != OtherMMap.rend(); ++Iter) {
|
|
if (Size++ >= opts::PrintDynoOpcodeStat) {
|
|
auto First = MMap.begin();
|
|
if (Iter->first <= First->first)
|
|
break;
|
|
MMap.erase(First);
|
|
}
|
|
MMap.emplace(*Iter);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
DynoStats getDynoStats(const BinaryFunction &BF) {
|
|
auto &BC = BF.getBinaryContext();
|
|
|
|
DynoStats Stats(/*PrintAArch64Stats*/ BC.isAArch64());
|
|
|
|
// Return empty-stats about the function we don't completely understand.
|
|
if (!BF.isSimple() || !BF.hasValidProfile() || !BF.hasCanonicalCFG())
|
|
return Stats;
|
|
|
|
// Update enumeration of basic blocks for correct detection of branch'
|
|
// direction.
|
|
BF.updateLayoutIndices();
|
|
|
|
for (BinaryBasicBlock *const &BB : BF.layout()) {
|
|
// The basic block execution count equals to the sum of incoming branch
|
|
// frequencies. This may deviate from the sum of outgoing branches of the
|
|
// basic block especially since the block may contain a function that
|
|
// does not return or a function that throws an exception.
|
|
const uint64_t BBExecutionCount = BB->getKnownExecutionCount();
|
|
|
|
// Ignore empty blocks and blocks that were not executed.
|
|
if (BB->getNumNonPseudos() == 0 || BBExecutionCount == 0)
|
|
continue;
|
|
|
|
// Count AArch64 linker-inserted veneers
|
|
if (BF.isAArch64Veneer())
|
|
Stats[DynoStats::VENEER_CALLS_AARCH64] += BF.getKnownExecutionCount();
|
|
|
|
// Count various instruction types by iterating through all instructions.
|
|
// When -print-dyno-opcode-stats is on, count per each opcode and record
|
|
// maximum execution counts.
|
|
for (const MCInst &Instr : *BB) {
|
|
if (opts::PrintDynoOpcodeStat) {
|
|
unsigned Opcode = Instr.getOpcode();
|
|
auto I = Stats.OpcodeHistogram.find(Opcode);
|
|
if (I == Stats.OpcodeHistogram.end()) {
|
|
DynoStats::MaxOpcodeHistogramTy MMap;
|
|
MMap.emplace(BBExecutionCount,
|
|
std::make_pair(BF.getOneName(), BB->getOffset()));
|
|
Stats.OpcodeHistogram.emplace(Opcode,
|
|
std::make_pair(BBExecutionCount, MMap));
|
|
} else {
|
|
I->second.first += BBExecutionCount;
|
|
bool Insert = true;
|
|
if (I->second.second.size() == opts::PrintDynoOpcodeStat) {
|
|
auto First = I->second.second.begin();
|
|
if (First->first < BBExecutionCount)
|
|
I->second.second.erase(First);
|
|
else
|
|
Insert = false;
|
|
}
|
|
if (Insert) {
|
|
I->second.second.emplace(
|
|
BBExecutionCount,
|
|
std::make_pair(BF.getOneName(), BB->getOffset()));
|
|
}
|
|
}
|
|
}
|
|
|
|
if (BC.MIB->isStore(Instr)) {
|
|
Stats[DynoStats::STORES] += BBExecutionCount;
|
|
}
|
|
if (BC.MIB->isLoad(Instr)) {
|
|
Stats[DynoStats::LOADS] += BBExecutionCount;
|
|
}
|
|
if (!BC.MIB->isCall(Instr))
|
|
continue;
|
|
|
|
uint64_t CallFreq = BBExecutionCount;
|
|
if (BC.MIB->getConditionalTailCall(Instr)) {
|
|
CallFreq =
|
|
BC.MIB->getAnnotationWithDefault<uint64_t>(Instr, "CTCTakenCount");
|
|
}
|
|
Stats[DynoStats::FUNCTION_CALLS] += CallFreq;
|
|
if (BC.MIB->isIndirectCall(Instr)) {
|
|
Stats[DynoStats::INDIRECT_CALLS] += CallFreq;
|
|
} else if (const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Instr)) {
|
|
const BinaryFunction *BF = BC.getFunctionForSymbol(CallSymbol);
|
|
if (BF && BF->isPLTFunction()) {
|
|
Stats[DynoStats::PLT_CALLS] += CallFreq;
|
|
|
|
// We don't process PLT functions and hence have to adjust relevant
|
|
// dynostats here for:
|
|
//
|
|
// jmp *GOT_ENTRY(%rip)
|
|
//
|
|
// NOTE: this is arch-specific.
|
|
Stats[DynoStats::FUNCTION_CALLS] += CallFreq;
|
|
Stats[DynoStats::INDIRECT_CALLS] += CallFreq;
|
|
Stats[DynoStats::LOADS] += CallFreq;
|
|
Stats[DynoStats::INSTRUCTIONS] += CallFreq;
|
|
}
|
|
}
|
|
}
|
|
|
|
Stats[DynoStats::INSTRUCTIONS] += BB->getNumNonPseudos() * BBExecutionCount;
|
|
|
|
// Jump tables.
|
|
const MCInst *LastInstr = BB->getLastNonPseudoInstr();
|
|
if (BC.MIB->getJumpTable(*LastInstr)) {
|
|
Stats[DynoStats::JUMP_TABLE_BRANCHES] += BBExecutionCount;
|
|
LLVM_DEBUG(
|
|
static uint64_t MostFrequentJT;
|
|
if (BBExecutionCount > MostFrequentJT) {
|
|
MostFrequentJT = BBExecutionCount;
|
|
dbgs() << "BOLT-INFO: most frequently executed jump table is in "
|
|
<< "function " << BF << " in basic block " << BB->getName()
|
|
<< " executed totally " << BBExecutionCount << " times.\n";
|
|
}
|
|
);
|
|
continue;
|
|
}
|
|
|
|
if (BC.MIB->isIndirectBranch(*LastInstr) && !BC.MIB->isCall(*LastInstr)) {
|
|
Stats[DynoStats::UNKNOWN_INDIRECT_BRANCHES] += BBExecutionCount;
|
|
continue;
|
|
}
|
|
|
|
// Update stats for branches.
|
|
const MCSymbol *TBB = nullptr;
|
|
const MCSymbol *FBB = nullptr;
|
|
MCInst *CondBranch = nullptr;
|
|
MCInst *UncondBranch = nullptr;
|
|
if (!BB->analyzeBranch(TBB, FBB, CondBranch, UncondBranch))
|
|
continue;
|
|
|
|
if (!CondBranch && !UncondBranch)
|
|
continue;
|
|
|
|
// Simple unconditional branch.
|
|
if (!CondBranch) {
|
|
Stats[DynoStats::UNCOND_BRANCHES] += BBExecutionCount;
|
|
continue;
|
|
}
|
|
|
|
// CTCs: instruction annotations could be stripped, hence check the number
|
|
// of successors to identify conditional tail calls.
|
|
if (BB->succ_size() == 1) {
|
|
if (BB->branch_info_begin() != BB->branch_info_end())
|
|
Stats[DynoStats::UNCOND_BRANCHES] += BB->branch_info_begin()->Count;
|
|
continue;
|
|
}
|
|
|
|
// Conditional branch that could be followed by an unconditional branch.
|
|
uint64_t TakenCount = BB->getTakenBranchInfo().Count;
|
|
if (TakenCount == BinaryBasicBlock::COUNT_NO_PROFILE)
|
|
TakenCount = 0;
|
|
|
|
uint64_t NonTakenCount = BB->getFallthroughBranchInfo().Count;
|
|
if (NonTakenCount == BinaryBasicBlock::COUNT_NO_PROFILE)
|
|
NonTakenCount = 0;
|
|
|
|
if (BF.isForwardBranch(BB, BB->getConditionalSuccessor(true))) {
|
|
Stats[DynoStats::FORWARD_COND_BRANCHES] += BBExecutionCount;
|
|
Stats[DynoStats::FORWARD_COND_BRANCHES_TAKEN] += TakenCount;
|
|
} else {
|
|
Stats[DynoStats::BACKWARD_COND_BRANCHES] += BBExecutionCount;
|
|
Stats[DynoStats::BACKWARD_COND_BRANCHES_TAKEN] += TakenCount;
|
|
}
|
|
|
|
if (UncondBranch) {
|
|
Stats[DynoStats::UNCOND_BRANCHES] += NonTakenCount;
|
|
}
|
|
}
|
|
|
|
return Stats;
|
|
}
|
|
|
|
} // namespace bolt
|
|
} // namespace llvm
|