mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-01-10 01:55:08 +00:00
[Coroutines] Optimized coroutine elision based on reachability
Differential Revision: https://reviews.llvm.org/D75440
This commit is contained in:
parent
7a6878a72e
commit
b10deb9487
@ -30,7 +30,7 @@ struct Lowerer : coro::LowererBase {
|
||||
SmallVector<CoroSubFnInst *, 4> ResumeAddr;
|
||||
DenseMap<CoroBeginInst *, SmallVector<CoroSubFnInst *, 4>> DestroyAddr;
|
||||
SmallVector<CoroFreeInst *, 1> CoroFrees;
|
||||
CoroSuspendInst *CoroFinalSuspend;
|
||||
SmallPtrSet<const SwitchInst *, 4> CoroSuspendSwitches;
|
||||
|
||||
Lowerer(Module &M) : LowererBase(M) {}
|
||||
|
||||
@ -38,6 +38,8 @@ struct Lowerer : coro::LowererBase {
|
||||
bool shouldElide(Function *F, DominatorTree &DT) const;
|
||||
void collectPostSplitCoroIds(Function *F);
|
||||
bool processCoroId(CoroIdInst *, AAResults &AA, DominatorTree &DT);
|
||||
bool hasEscapePath(const CoroBeginInst *,
|
||||
const SmallPtrSetImpl<BasicBlock *> &) const;
|
||||
};
|
||||
} // end anonymous namespace
|
||||
|
||||
@ -142,6 +144,52 @@ void Lowerer::elideHeapAllocations(Function *F, Type *FrameTy, AAResults &AA) {
|
||||
removeTailCallAttribute(Frame, AA);
|
||||
}
|
||||
|
||||
bool Lowerer::hasEscapePath(const CoroBeginInst *CB,
|
||||
const SmallPtrSetImpl<BasicBlock *> &TIs) const {
|
||||
const auto &It = DestroyAddr.find(CB);
|
||||
assert(It != DestroyAddr.end());
|
||||
|
||||
// Limit the number of blocks we visit.
|
||||
unsigned Limit = 32 * (1 + It->second.size());
|
||||
|
||||
SmallVector<const BasicBlock *, 32> Worklist;
|
||||
Worklist.push_back(CB->getParent());
|
||||
|
||||
SmallPtrSet<const BasicBlock *, 32> Visited;
|
||||
// Consider basicblock of coro.destroy as visited one, so that we
|
||||
// skip the path pass through coro.destroy.
|
||||
for (auto *DA : It->second)
|
||||
Visited.insert(DA->getParent());
|
||||
|
||||
do {
|
||||
const auto *BB = Worklist.pop_back_val();
|
||||
if (!Visited.insert(BB).second)
|
||||
continue;
|
||||
if (TIs.count(BB))
|
||||
return true;
|
||||
|
||||
// Conservatively say that there is potentially a path.
|
||||
if (!--Limit)
|
||||
return true;
|
||||
|
||||
auto TI = BB->getTerminator();
|
||||
// Although the default dest of coro.suspend switches is suspend pointer
|
||||
// which means a escape path to normal terminator, it is reasonable to skip
|
||||
// it since coroutine frame doesn't change outside the coroutine body.
|
||||
if (isa<SwitchInst>(TI) &&
|
||||
CoroSuspendSwitches.count(cast<SwitchInst>(TI))) {
|
||||
Worklist.push_back(cast<SwitchInst>(TI)->getSuccessor(1));
|
||||
Worklist.push_back(cast<SwitchInst>(TI)->getSuccessor(2));
|
||||
} else
|
||||
Worklist.append(succ_begin(BB), succ_end(BB));
|
||||
|
||||
} while (!Worklist.empty());
|
||||
|
||||
// We have exhausted all possible paths and are certain that coro.begin can
|
||||
// not reach to any of terminators.
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Lowerer::shouldElide(Function *F, DominatorTree &DT) const {
|
||||
// If no CoroAllocs, we cannot suppress allocation, so elision is not
|
||||
// possible.
|
||||
@ -154,61 +202,34 @@ bool Lowerer::shouldElide(Function *F, DominatorTree &DT) const {
|
||||
// If the value escaped, then coro.destroy would have been referencing a
|
||||
// memory location storing that value and not the virtual register.
|
||||
|
||||
SmallPtrSet<Instruction *, 8> Terminators;
|
||||
bool HasMultiPred = false;
|
||||
SmallPtrSet<BasicBlock *, 8> Terminators;
|
||||
// First gather all of the non-exceptional terminators for the function.
|
||||
// Consider the final coro.suspend as the real terminator when the current
|
||||
// function is a coroutine.
|
||||
if (CoroFinalSuspend) {
|
||||
// If block of final coro.suspend has more than one predecessor,
|
||||
// then there is one resume path and the others are exceptional paths,
|
||||
// consider these predecessors as terminators.
|
||||
BasicBlock *FinalBB = CoroFinalSuspend->getParent();
|
||||
if (FinalBB->hasNPredecessorsOrMore(2)) {
|
||||
HasMultiPred = true;
|
||||
for (auto *B : predecessors(FinalBB))
|
||||
Terminators.insert(B->getTerminator());
|
||||
} else
|
||||
Terminators.insert(CoroFinalSuspend);
|
||||
} else {
|
||||
for (BasicBlock &B : *F) {
|
||||
auto *TI = B.getTerminator();
|
||||
if (TI->getNumSuccessors() == 0 && !TI->isExceptionalTerminator() &&
|
||||
!isa<UnreachableInst>(TI))
|
||||
Terminators.insert(TI);
|
||||
Terminators.insert(&B);
|
||||
}
|
||||
}
|
||||
|
||||
// Filter out the coro.destroy that lie along exceptional paths.
|
||||
SmallPtrSet<CoroSubFnInst *, 4> DAs;
|
||||
SmallPtrSet<Instruction *, 2> TIs;
|
||||
SmallPtrSet<CoroBeginInst *, 8> ReferencedCoroBegins;
|
||||
for (auto &It : DestroyAddr) {
|
||||
for (CoroSubFnInst *DA : It.second) {
|
||||
for (Instruction *TI : Terminators) {
|
||||
if (DT.dominates(DA, TI)) {
|
||||
if (HasMultiPred)
|
||||
TIs.insert(TI);
|
||||
else
|
||||
DAs.insert(DA);
|
||||
for (Instruction *DA : It.second) {
|
||||
for (BasicBlock *TI : Terminators) {
|
||||
if (DT.dominates(DA, TI->getTerminator())) {
|
||||
ReferencedCoroBegins.insert(It.first);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
// If all the predecessors dominate coro.destroys that reference same
|
||||
// coro.begin, record the coro.begin
|
||||
if (TIs.size() == Terminators.size()) {
|
||||
ReferencedCoroBegins.insert(It.first);
|
||||
TIs.clear();
|
||||
}
|
||||
}
|
||||
|
||||
// Find all the coro.begin referenced by coro.destroy along happy paths.
|
||||
for (CoroSubFnInst *DA : DAs) {
|
||||
if (auto *CB = dyn_cast<CoroBeginInst>(DA->getFrame()))
|
||||
ReferencedCoroBegins.insert(CB);
|
||||
else
|
||||
return false;
|
||||
// Whether there is any paths from coro.begin to Terminators which not pass
|
||||
// through any of the coro.destroys.
|
||||
if (!ReferencedCoroBegins.count(It.first) &&
|
||||
!hasEscapePath(It.first, Terminators))
|
||||
ReferencedCoroBegins.insert(It.first);
|
||||
}
|
||||
|
||||
// If size of the set is the same as total number of coro.begin, that means we
|
||||
@ -219,7 +240,7 @@ bool Lowerer::shouldElide(Function *F, DominatorTree &DT) const {
|
||||
|
||||
void Lowerer::collectPostSplitCoroIds(Function *F) {
|
||||
CoroIds.clear();
|
||||
CoroFinalSuspend = nullptr;
|
||||
CoroSuspendSwitches.clear();
|
||||
for (auto &I : instructions(F)) {
|
||||
if (auto *CII = dyn_cast<CoroIdInst>(&I))
|
||||
if (CII->getInfo().isPostSplit())
|
||||
@ -227,12 +248,16 @@ void Lowerer::collectPostSplitCoroIds(Function *F) {
|
||||
if (CII->getCoroutine() != CII->getFunction())
|
||||
CoroIds.push_back(CII);
|
||||
|
||||
// Consider case like:
|
||||
// %0 = call i8 @llvm.coro.suspend(...)
|
||||
// switch i8 %0, label %suspend [i8 0, label %resume
|
||||
// i8 1, label %cleanup]
|
||||
// and collect the SwitchInsts which are used by escape analysis later.
|
||||
if (auto *CSI = dyn_cast<CoroSuspendInst>(&I))
|
||||
if (CSI->isFinal()) {
|
||||
if (!CoroFinalSuspend)
|
||||
CoroFinalSuspend = CSI;
|
||||
else
|
||||
report_fatal_error("Only one suspend point can be marked as final");
|
||||
if (CSI->hasOneUse() && isa<SwitchInst>(CSI->use_begin()->getUser())) {
|
||||
SwitchInst *SWI = cast<SwitchInst>(CSI->use_begin()->getUser());
|
||||
if (SWI->getNumCases() == 2)
|
||||
CoroSuspendSwitches.insert(SWI);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -196,6 +196,58 @@ coro.ret:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @callResume_with_coro_suspend_3(
|
||||
define void @callResume_with_coro_suspend_3(i8 %cond) {
|
||||
entry:
|
||||
; CHECK: alloca %f.frame
|
||||
switch i8 %cond, label %coro.ret [
|
||||
i8 0, label %init.suspend
|
||||
i8 1, label %coro.ret
|
||||
]
|
||||
|
||||
init.suspend:
|
||||
; CHECK-NOT: llvm.coro.begin
|
||||
; CHECK-NOT: CustomAlloc
|
||||
; CHECK: call void @may_throw()
|
||||
%hdl = call i8* @f()
|
||||
; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.resume to void (i8*)*)(i8* %vFrame)
|
||||
%0 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0)
|
||||
%1 = bitcast i8* %0 to void (i8*)*
|
||||
call fastcc void %1(i8* %hdl)
|
||||
%2 = call token @llvm.coro.save(i8* %hdl)
|
||||
%3 = call i8 @llvm.coro.suspend(token %2, i1 false)
|
||||
switch i8 %3, label %coro.ret [
|
||||
i8 0, label %final.suspend
|
||||
i8 1, label %cleanups
|
||||
]
|
||||
|
||||
; CHECK-LABEL: final.suspend:
|
||||
final.suspend:
|
||||
; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.cleanup to void (i8*)*)(i8* %vFrame)
|
||||
%4 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 1)
|
||||
%5 = bitcast i8* %4 to void (i8*)*
|
||||
call fastcc void %5(i8* %hdl)
|
||||
%6 = call token @llvm.coro.save(i8* %hdl)
|
||||
%7 = call i8 @llvm.coro.suspend(token %6, i1 true)
|
||||
switch i8 %7, label %coro.ret [
|
||||
i8 0, label %coro.ret
|
||||
i8 1, label %cleanups
|
||||
]
|
||||
|
||||
; CHECK-LABEL: cleanups:
|
||||
cleanups:
|
||||
; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.cleanup to void (i8*)*)(i8* %vFrame)
|
||||
%8 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 1)
|
||||
%9 = bitcast i8* %8 to void (i8*)*
|
||||
call fastcc void %9(i8* %hdl)
|
||||
br label %coro.ret
|
||||
|
||||
; CHECK-LABEL: coro.ret:
|
||||
coro.ret:
|
||||
; CHECK-NEXT: ret void
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
|
||||
; CHECK-LABEL: @callResume_PR34897_no_elision(
|
||||
@ -231,6 +283,41 @@ return:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @callResume_PR34897_elision(
|
||||
define void @callResume_PR34897_elision(i1 %cond) {
|
||||
; CHECK-LABEL: entry:
|
||||
entry:
|
||||
; CHECK: alloca %f.frame
|
||||
; CHECK: tail call void @bar(
|
||||
tail call void @bar(i8* null)
|
||||
br i1 %cond, label %if.then, label %if.else
|
||||
|
||||
if.then:
|
||||
; CHECK-NOT: CustomAlloc
|
||||
; CHECK: call void @may_throw()
|
||||
%hdl = call i8* @f()
|
||||
; CHECK: call void @bar(
|
||||
tail call void @bar(i8* %hdl)
|
||||
; CHECK: call fastcc void bitcast (void (%f.frame*)* @f.resume to void (i8*)*)(i8*
|
||||
%0 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0)
|
||||
%1 = bitcast i8* %0 to void (i8*)*
|
||||
call fastcc void %1(i8* %hdl)
|
||||
; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.cleanup to void (i8*)*)(i8*
|
||||
%2 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 1)
|
||||
%3 = bitcast i8* %2 to void (i8*)*
|
||||
call fastcc void %3(i8* %hdl)
|
||||
br label %return
|
||||
|
||||
if.else:
|
||||
br label %return
|
||||
|
||||
; CHECK-LABEL: return:
|
||||
return:
|
||||
; CHECK: ret void
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
; a coroutine start function (cannot elide heap alloc, due to second argument to
|
||||
; coro.begin not pointint to coro.alloc)
|
||||
define i8* @f_no_elision() personality i8* null {
|
||||
|
Loading…
Reference in New Issue
Block a user