mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-12-14 19:49:36 +00:00
[BOLT] Dump dynamic execution per instruction opcode
Summary: We extended DynoStats to dump the histogram per instruction opcode. By default the dump is turned off. Use '-print-dyno-opcode-stats' to enable the dump. BOLT also dumps for each instruction opcode the maximum execution count and corresponding function name and basic block offsets where the instruction occurs. Below is a sample of the dump: Opcode, Execution Count, Max Exec Count, Function Name:Offset SHR8rCL, 232, 232, _ZNK5folly14AsyncSSLSocket4goodEv:53 VPADDDYrr, 13956, 388, chacha20_encrypt_bytes.part.0/3:736 PMOVSXBWrr, 4, 2, ares_expand_name/1:264 VMOVAPSmr, 1082, 43, chacha20_encrypt_bytes.part.0/3:2864 VPSHUFBrr, 9540, 1667, chacha20_encrypt_bytes.part.0/3:4416 VPUNPCKLDQYrr, 1102, 188, jsimd_ycc_rgb_convert_avx2/1:125 VPBROADCASTQYrm, 39, 39, chacha20_encrypt_bytes.part.0/3:400 PMOVSXWDrr, 8, 2, ares_expand_name/1:264 VPORrr, 817, 129, jsimd_idct_islow_avx2/1:41 PSLLDri, 8690752, 65644, blockmix_salsa8_xor/1:1424 (cherry picked from FBD28859624)
This commit is contained in:
parent
c9f5f47b51
commit
60b15062e1
@ -37,6 +37,14 @@ DynoStatsScale("dyno-stats-scale",
|
||||
cl::Hidden,
|
||||
cl::cat(BoltCategory));
|
||||
|
||||
static cl::opt<uint32_t>
|
||||
PrintDynoOpcodeStat("print-dyno-opcode-stats",
|
||||
cl::desc("print per instruction opcode dyno stats and the function"
|
||||
"names:BB offsets of the nth highest execution counts"),
|
||||
cl::init(0),
|
||||
cl::Hidden,
|
||||
cl::cat(BoltCategory));
|
||||
|
||||
} // namespace opts
|
||||
|
||||
namespace llvm {
|
||||
@ -69,7 +77,8 @@ bool DynoStats::lessThan(const DynoStats &Other,
|
||||
);
|
||||
}
|
||||
|
||||
void DynoStats::print(raw_ostream &OS, const DynoStats *Other) const {
|
||||
void DynoStats::print(raw_ostream &OS, const DynoStats *Other,
|
||||
MCInstPrinter *Printer) const {
|
||||
auto printStatWithDelta = [&](const std::string &Name, uint64_t Stat,
|
||||
uint64_t OtherStat) {
|
||||
OS << format("%'20lld : ", Stat * opts::DynoStatsScale) << Name;
|
||||
@ -95,6 +104,35 @@ void DynoStats::print(raw_ostream &OS, const DynoStats *Other) const {
|
||||
|
||||
printStatWithDelta(Desc[Stat], Stats[Stat], Other ? (*Other)[Stat] : 0);
|
||||
}
|
||||
if (opts::PrintDynoOpcodeStat && Printer) {
|
||||
outs() << "\nProgram-wide opcode histogram:\n";
|
||||
OS << " Opcode, Execution Count, Max Exec Count, "
|
||||
"Function Name:Offset ...\n";
|
||||
std::vector<std::pair<uint64_t, unsigned>> SortedHistogram;
|
||||
for (const OpcodeStatTy &Stat : OpcodeHistogram)
|
||||
SortedHistogram.emplace_back(Stat.second.first, Stat.first);
|
||||
|
||||
// Sort using lexicographic ordering
|
||||
std::sort(SortedHistogram.begin(), SortedHistogram.end());
|
||||
|
||||
// Dump in ascending order: Start with Opcode with Highest execution
|
||||
// count.
|
||||
for (auto Stat = SortedHistogram.rbegin(); Stat != SortedHistogram.rend();
|
||||
++Stat) {
|
||||
OS << format("%20s,%'18lld",
|
||||
Printer->getOpcodeName(Stat->second).data(),
|
||||
Stat->first * opts::DynoStatsScale);
|
||||
|
||||
MaxOpcodeHistogramTy MaxMultiMap =
|
||||
OpcodeHistogram.at(Stat->second).second;
|
||||
// Start with function name:BB offset with highest execution count.
|
||||
for (auto Max = MaxMultiMap.rbegin(); Max != MaxMultiMap.rend(); ++Max) {
|
||||
OS << format(", %'18lld, ", Max->first * opts::DynoStatsScale)
|
||||
<< Max->second.first.str() << ':' << Max->second.second;
|
||||
}
|
||||
OS << '\n';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DynoStats::operator+=(const DynoStats &Other) {
|
||||
@ -103,6 +141,29 @@ void DynoStats::operator+=(const DynoStats &Other) {
|
||||
++Stat) {
|
||||
Stats[Stat] += Other[Stat];
|
||||
}
|
||||
for (const OpcodeStatTy &Stat : Other.OpcodeHistogram) {
|
||||
auto I = OpcodeHistogram.find(Stat.first);
|
||||
if (I == OpcodeHistogram.end()) {
|
||||
OpcodeHistogram.emplace(Stat);
|
||||
} else {
|
||||
// Merge Other Historgrams, log only the opts::PrintDynoOpcodeStat'th
|
||||
// maximum counts.
|
||||
I->second.first += Stat.second.first;
|
||||
auto &MMap = I->second.second;
|
||||
auto &OtherMMap = Stat.second.second;
|
||||
auto Size = MMap.size();
|
||||
assert(Size <= opts::PrintDynoOpcodeStat);
|
||||
for (auto Iter = OtherMMap.rbegin(); Iter != OtherMMap.rend(); ++Iter) {
|
||||
if (Size++ >= opts::PrintDynoOpcodeStat) {
|
||||
auto First = MMap.begin();
|
||||
if (Iter->first <= First->first)
|
||||
break;
|
||||
MMap.erase(First);
|
||||
}
|
||||
MMap.emplace(*Iter);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DynoStats getDynoStats(const BinaryFunction &BF) {
|
||||
@ -133,15 +194,43 @@ DynoStats getDynoStats(const BinaryFunction &BF) {
|
||||
if(BF.isAArch64Veneer())
|
||||
Stats[DynoStats::VENEER_CALLS_AARCH64] += BF.getKnownExecutionCount();
|
||||
|
||||
// Count the number of calls by iterating through all instructions.
|
||||
// Count various instruction types by iterating through all instructions.
|
||||
// When -print-dyno-opcode-stats is on, count per each opcode and record
|
||||
// maximum execution counts.
|
||||
for (const MCInst &Instr : *BB) {
|
||||
if (opts::PrintDynoOpcodeStat) {
|
||||
unsigned Opcode = Instr.getOpcode();
|
||||
auto I = Stats.OpcodeHistogram.find(Opcode);
|
||||
if (I == Stats.OpcodeHistogram.end()) {
|
||||
DynoStats::MaxOpcodeHistogramTy MMap;
|
||||
MMap.emplace(BBExecutionCount,
|
||||
std::make_pair(BF.getOneName(), BB->getOffset()));
|
||||
Stats.OpcodeHistogram.emplace(Opcode,
|
||||
std::make_pair(BBExecutionCount, MMap));
|
||||
} else {
|
||||
I->second.first += BBExecutionCount;
|
||||
bool Insert = true;
|
||||
if (I->second.second.size() == opts::PrintDynoOpcodeStat) {
|
||||
auto First = I->second.second.begin();
|
||||
if (First->first < BBExecutionCount)
|
||||
I->second.second.erase(First);
|
||||
else
|
||||
Insert = false;
|
||||
}
|
||||
if (Insert) {
|
||||
I->second.second.emplace(
|
||||
BBExecutionCount,
|
||||
std::make_pair(BF.getOneName(), BB->getOffset()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (BC.MIB->isStore(Instr)) {
|
||||
Stats[DynoStats::STORES] += BBExecutionCount;
|
||||
}
|
||||
if (BC.MIB->isLoad(Instr)) {
|
||||
Stats[DynoStats::LOADS] += BBExecutionCount;
|
||||
}
|
||||
|
||||
if (!BC.MIB->isCall(Instr))
|
||||
continue;
|
||||
|
||||
|
@ -12,7 +12,10 @@
|
||||
#define LLVM_TOOLS_LLVM_BOLT_DYNO_STATS_H
|
||||
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/MC/MCInstPrinter.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
@ -102,7 +105,8 @@ public:
|
||||
return 0;
|
||||
}
|
||||
|
||||
void print(raw_ostream &OS, const DynoStats *Other = nullptr) const;
|
||||
void print(raw_ostream &OS, const DynoStats *Other = nullptr,
|
||||
MCInstPrinter *Printer = nullptr) const;
|
||||
|
||||
void operator+=(const DynoStats &Other);
|
||||
bool operator<(const DynoStats &Other) const;
|
||||
@ -113,6 +117,19 @@ public:
|
||||
static const char* Description(const Category C) {
|
||||
return Desc[C];
|
||||
}
|
||||
|
||||
/// Maps instruction opcodes to:
|
||||
/// 1. Accumulated executed instruction counts.
|
||||
/// 2. a multimap that records highest execution counts, function names,
|
||||
/// and BB offsets where intructions of these opcodes occur.
|
||||
using MaxOpcodeHistogramTy =
|
||||
std::multimap<uint64_t, std::pair<StringRef, uint32_t>>;
|
||||
using OpcodeStatTy =
|
||||
std::pair<unsigned, std::pair<uint64_t, MaxOpcodeHistogramTy>>;
|
||||
using OpcodeHistogramTy =
|
||||
std::unordered_map<unsigned, std::pair<uint64_t, MaxOpcodeHistogramTy>>;
|
||||
|
||||
OpcodeHistogramTy OpcodeHistogram;
|
||||
};
|
||||
|
||||
inline raw_ostream &operator<<(raw_ostream &OS, const DynoStats &Stats) {
|
||||
|
@ -83,7 +83,7 @@ public:
|
||||
<< PrevDynoStats;
|
||||
if (Changed) {
|
||||
outs() << '\n';
|
||||
NewDynoStats.print(outs(), &PrevDynoStats);
|
||||
NewDynoStats.print(outs(), &PrevDynoStats, BC.InstPrinter.get());
|
||||
}
|
||||
outs() << '\n';
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user