[BOLT] Run EliminateUnreachableBlocks in parallel (#71299)

The wall time for this pass decreased on my laptop from ~80 sec to 5
sec processing the clang.
This commit is contained in:
Vladislav Khmelevsky 2023-11-10 00:46:04 +04:00 committed by GitHub
parent 73519ba27a
commit c6c04a83a7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 39 additions and 29 deletions

View File

@ -1445,7 +1445,8 @@ public:
/// Rebuilds BBs layout, ignoring dead BBs. Returns the number of removed
/// BBs and the removed number of bytes of code.
std::pair<unsigned, uint64_t> eraseInvalidBBs();
std::pair<unsigned, uint64_t>
eraseInvalidBBs(const MCCodeEmitter *Emitter = nullptr);
/// Get the relative order between two basic blocks in the original
/// layout. The result is > 0 if B occurs before A and < 0 if B

View File

@ -322,7 +322,8 @@ void BinaryFunction::markUnreachableBlocks() {
// Any unnecessary fallthrough jumps revealed after calling eraseInvalidBBs
// will be cleaned up by fixBranches().
std::pair<unsigned, uint64_t> BinaryFunction::eraseInvalidBBs() {
std::pair<unsigned, uint64_t>
BinaryFunction::eraseInvalidBBs(const MCCodeEmitter *Emitter) {
DenseSet<const BinaryBasicBlock *> InvalidBBs;
unsigned Count = 0;
uint64_t Bytes = 0;
@ -331,7 +332,7 @@ std::pair<unsigned, uint64_t> BinaryFunction::eraseInvalidBBs() {
assert(!isEntryPoint(*BB) && "all entry blocks must be valid");
InvalidBBs.insert(BB);
++Count;
Bytes += BC.computeCodeSize(BB->begin(), BB->end());
Bytes += BC.computeCodeSize(BB->begin(), BB->end(), Emitter);
}
}

View File

@ -317,38 +317,46 @@ void NormalizeCFG::runOnFunctions(BinaryContext &BC) {
}
void EliminateUnreachableBlocks::runOnFunction(BinaryFunction &Function) {
if (!Function.getLayout().block_empty()) {
unsigned Count;
uint64_t Bytes;
Function.markUnreachableBlocks();
LLVM_DEBUG({
for (BinaryBasicBlock &BB : Function) {
if (!BB.isValid()) {
dbgs() << "BOLT-INFO: UCE found unreachable block " << BB.getName()
<< " in function " << Function << "\n";
Function.dump();
}
BinaryContext &BC = Function.getBinaryContext();
unsigned Count;
uint64_t Bytes;
Function.markUnreachableBlocks();
LLVM_DEBUG({
for (BinaryBasicBlock &BB : Function) {
if (!BB.isValid()) {
dbgs() << "BOLT-INFO: UCE found unreachable block " << BB.getName()
<< " in function " << Function << "\n";
Function.dump();
}
});
std::tie(Count, Bytes) = Function.eraseInvalidBBs();
DeletedBlocks += Count;
DeletedBytes += Bytes;
if (Count) {
Modified.insert(&Function);
if (opts::Verbosity > 0)
outs() << "BOLT-INFO: removed " << Count
<< " dead basic block(s) accounting for " << Bytes
<< " bytes in function " << Function << '\n';
}
});
BinaryContext::IndependentCodeEmitter Emitter =
BC.createIndependentMCCodeEmitter();
std::tie(Count, Bytes) = Function.eraseInvalidBBs(Emitter.MCE.get());
DeletedBlocks += Count;
DeletedBytes += Bytes;
if (Count) {
auto L = BC.scopeLock();
Modified.insert(&Function);
if (opts::Verbosity > 0)
outs() << "BOLT-INFO: removed " << Count
<< " dead basic block(s) accounting for " << Bytes
<< " bytes in function " << Function << '\n';
}
}
void EliminateUnreachableBlocks::runOnFunctions(BinaryContext &BC) {
for (auto &It : BC.getBinaryFunctions()) {
BinaryFunction &Function = It.second;
if (shouldOptimize(Function))
runOnFunction(Function);
}
ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
runOnFunction(BF);
};
ParallelUtilities::PredicateTy SkipPredicate = [&](const BinaryFunction &BF) {
return !shouldOptimize(BF) || BF.getLayout().block_empty();
};
ParallelUtilities::runOnEachFunction(
BC, ParallelUtilities::SchedulingPolicy::SP_CONSTANT, WorkFun,
SkipPredicate, "elimininate-unreachable");
if (DeletedBlocks)
outs() << "BOLT-INFO: UCE removed " << DeletedBlocks << " blocks and "