[Coroutines] Optimized coroutine elision based on reachability

Differential Revision: https://reviews.llvm.org/D75440
This commit is contained in:
Jun Ma 2020-03-02 17:35:34 +08:00
parent 7a6878a72e
commit b10deb9487
2 changed files with 157 additions and 45 deletions

View File

@ -30,7 +30,7 @@ struct Lowerer : coro::LowererBase {
SmallVector<CoroSubFnInst *, 4> ResumeAddr;
DenseMap<CoroBeginInst *, SmallVector<CoroSubFnInst *, 4>> DestroyAddr;
SmallVector<CoroFreeInst *, 1> CoroFrees;
CoroSuspendInst *CoroFinalSuspend;
SmallPtrSet<const SwitchInst *, 4> CoroSuspendSwitches;
Lowerer(Module &M) : LowererBase(M) {}
@ -38,6 +38,8 @@ struct Lowerer : coro::LowererBase {
bool shouldElide(Function *F, DominatorTree &DT) const;
void collectPostSplitCoroIds(Function *F);
bool processCoroId(CoroIdInst *, AAResults &AA, DominatorTree &DT);
bool hasEscapePath(const CoroBeginInst *,
const SmallPtrSetImpl<BasicBlock *> &) const;
};
} // end anonymous namespace
@ -142,6 +144,52 @@ void Lowerer::elideHeapAllocations(Function *F, Type *FrameTy, AAResults &AA) {
removeTailCallAttribute(Frame, AA);
}
bool Lowerer::hasEscapePath(const CoroBeginInst *CB,
const SmallPtrSetImpl<BasicBlock *> &TIs) const {
const auto &It = DestroyAddr.find(CB);
assert(It != DestroyAddr.end());
// Limit the number of blocks we visit.
unsigned Limit = 32 * (1 + It->second.size());
SmallVector<const BasicBlock *, 32> Worklist;
Worklist.push_back(CB->getParent());
SmallPtrSet<const BasicBlock *, 32> Visited;
// Consider basicblock of coro.destroy as visited one, so that we
// skip the path pass through coro.destroy.
for (auto *DA : It->second)
Visited.insert(DA->getParent());
do {
const auto *BB = Worklist.pop_back_val();
if (!Visited.insert(BB).second)
continue;
if (TIs.count(BB))
return true;
// Conservatively say that there is potentially a path.
if (!--Limit)
return true;
auto TI = BB->getTerminator();
// Although the default dest of coro.suspend switches is suspend pointer
// which means a escape path to normal terminator, it is reasonable to skip
// it since coroutine frame doesn't change outside the coroutine body.
if (isa<SwitchInst>(TI) &&
CoroSuspendSwitches.count(cast<SwitchInst>(TI))) {
Worklist.push_back(cast<SwitchInst>(TI)->getSuccessor(1));
Worklist.push_back(cast<SwitchInst>(TI)->getSuccessor(2));
} else
Worklist.append(succ_begin(BB), succ_end(BB));
} while (!Worklist.empty());
// We have exhausted all possible paths and are certain that coro.begin can
// not reach to any of terminators.
return false;
}
bool Lowerer::shouldElide(Function *F, DominatorTree &DT) const {
// If no CoroAllocs, we cannot suppress allocation, so elision is not
// possible.
@ -154,61 +202,34 @@ bool Lowerer::shouldElide(Function *F, DominatorTree &DT) const {
// If the value escaped, then coro.destroy would have been referencing a
// memory location storing that value and not the virtual register.
SmallPtrSet<Instruction *, 8> Terminators;
bool HasMultiPred = false;
SmallPtrSet<BasicBlock *, 8> Terminators;
// First gather all of the non-exceptional terminators for the function.
// Consider the final coro.suspend as the real terminator when the current
// function is a coroutine.
if (CoroFinalSuspend) {
// If block of final coro.suspend has more than one predecessor,
// then there is one resume path and the others are exceptional paths,
// consider these predecessors as terminators.
BasicBlock *FinalBB = CoroFinalSuspend->getParent();
if (FinalBB->hasNPredecessorsOrMore(2)) {
HasMultiPred = true;
for (auto *B : predecessors(FinalBB))
Terminators.insert(B->getTerminator());
} else
Terminators.insert(CoroFinalSuspend);
} else {
for (BasicBlock &B : *F) {
auto *TI = B.getTerminator();
if (TI->getNumSuccessors() == 0 && !TI->isExceptionalTerminator() &&
!isa<UnreachableInst>(TI))
Terminators.insert(TI);
Terminators.insert(&B);
}
}
// Filter out the coro.destroy that lie along exceptional paths.
SmallPtrSet<CoroSubFnInst *, 4> DAs;
SmallPtrSet<Instruction *, 2> TIs;
SmallPtrSet<CoroBeginInst *, 8> ReferencedCoroBegins;
for (auto &It : DestroyAddr) {
for (CoroSubFnInst *DA : It.second) {
for (Instruction *TI : Terminators) {
if (DT.dominates(DA, TI)) {
if (HasMultiPred)
TIs.insert(TI);
else
DAs.insert(DA);
for (Instruction *DA : It.second) {
for (BasicBlock *TI : Terminators) {
if (DT.dominates(DA, TI->getTerminator())) {
ReferencedCoroBegins.insert(It.first);
break;
}
}
}
// If all the predecessors dominate coro.destroys that reference same
// coro.begin, record the coro.begin
if (TIs.size() == Terminators.size()) {
ReferencedCoroBegins.insert(It.first);
TIs.clear();
}
}
// Find all the coro.begin referenced by coro.destroy along happy paths.
for (CoroSubFnInst *DA : DAs) {
if (auto *CB = dyn_cast<CoroBeginInst>(DA->getFrame()))
ReferencedCoroBegins.insert(CB);
else
return false;
// Whether there is any paths from coro.begin to Terminators which not pass
// through any of the coro.destroys.
if (!ReferencedCoroBegins.count(It.first) &&
!hasEscapePath(It.first, Terminators))
ReferencedCoroBegins.insert(It.first);
}
// If size of the set is the same as total number of coro.begin, that means we
@ -219,7 +240,7 @@ bool Lowerer::shouldElide(Function *F, DominatorTree &DT) const {
void Lowerer::collectPostSplitCoroIds(Function *F) {
CoroIds.clear();
CoroFinalSuspend = nullptr;
CoroSuspendSwitches.clear();
for (auto &I : instructions(F)) {
if (auto *CII = dyn_cast<CoroIdInst>(&I))
if (CII->getInfo().isPostSplit())
@ -227,12 +248,16 @@ void Lowerer::collectPostSplitCoroIds(Function *F) {
if (CII->getCoroutine() != CII->getFunction())
CoroIds.push_back(CII);
// Consider case like:
// %0 = call i8 @llvm.coro.suspend(...)
// switch i8 %0, label %suspend [i8 0, label %resume
// i8 1, label %cleanup]
// and collect the SwitchInsts which are used by escape analysis later.
if (auto *CSI = dyn_cast<CoroSuspendInst>(&I))
if (CSI->isFinal()) {
if (!CoroFinalSuspend)
CoroFinalSuspend = CSI;
else
report_fatal_error("Only one suspend point can be marked as final");
if (CSI->hasOneUse() && isa<SwitchInst>(CSI->use_begin()->getUser())) {
SwitchInst *SWI = cast<SwitchInst>(CSI->use_begin()->getUser());
if (SWI->getNumCases() == 2)
CoroSuspendSwitches.insert(SWI);
}
}
}

View File

@ -196,6 +196,58 @@ coro.ret:
ret void
}
; CHECK-LABEL: @callResume_with_coro_suspend_3(
define void @callResume_with_coro_suspend_3(i8 %cond) {
entry:
; CHECK: alloca %f.frame
switch i8 %cond, label %coro.ret [
i8 0, label %init.suspend
i8 1, label %coro.ret
]
init.suspend:
; CHECK-NOT: llvm.coro.begin
; CHECK-NOT: CustomAlloc
; CHECK: call void @may_throw()
%hdl = call i8* @f()
; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.resume to void (i8*)*)(i8* %vFrame)
%0 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0)
%1 = bitcast i8* %0 to void (i8*)*
call fastcc void %1(i8* %hdl)
%2 = call token @llvm.coro.save(i8* %hdl)
%3 = call i8 @llvm.coro.suspend(token %2, i1 false)
switch i8 %3, label %coro.ret [
i8 0, label %final.suspend
i8 1, label %cleanups
]
; CHECK-LABEL: final.suspend:
final.suspend:
; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.cleanup to void (i8*)*)(i8* %vFrame)
%4 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 1)
%5 = bitcast i8* %4 to void (i8*)*
call fastcc void %5(i8* %hdl)
%6 = call token @llvm.coro.save(i8* %hdl)
%7 = call i8 @llvm.coro.suspend(token %6, i1 true)
switch i8 %7, label %coro.ret [
i8 0, label %coro.ret
i8 1, label %cleanups
]
; CHECK-LABEL: cleanups:
cleanups:
; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.cleanup to void (i8*)*)(i8* %vFrame)
%8 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 1)
%9 = bitcast i8* %8 to void (i8*)*
call fastcc void %9(i8* %hdl)
br label %coro.ret
; CHECK-LABEL: coro.ret:
coro.ret:
; CHECK-NEXT: ret void
ret void
}
; CHECK-LABEL: @callResume_PR34897_no_elision(
@ -231,6 +283,41 @@ return:
ret void
}
; CHECK-LABEL: @callResume_PR34897_elision(
define void @callResume_PR34897_elision(i1 %cond) {
; CHECK-LABEL: entry:
entry:
; CHECK: alloca %f.frame
; CHECK: tail call void @bar(
tail call void @bar(i8* null)
br i1 %cond, label %if.then, label %if.else
if.then:
; CHECK-NOT: CustomAlloc
; CHECK: call void @may_throw()
%hdl = call i8* @f()
; CHECK: call void @bar(
tail call void @bar(i8* %hdl)
; CHECK: call fastcc void bitcast (void (%f.frame*)* @f.resume to void (i8*)*)(i8*
%0 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0)
%1 = bitcast i8* %0 to void (i8*)*
call fastcc void %1(i8* %hdl)
; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.cleanup to void (i8*)*)(i8*
%2 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 1)
%3 = bitcast i8* %2 to void (i8*)*
call fastcc void %3(i8* %hdl)
br label %return
if.else:
br label %return
; CHECK-LABEL: return:
return:
; CHECK: ret void
ret void
}
; a coroutine start function (cannot elide heap alloc, due to second argument to
; coro.begin not pointint to coro.alloc)
define i8* @f_no_elision() personality i8* null {