[coroutines] Add support for symmetric control transfer (musttail on coro.resumes followed by a suspend)

Summary:
Add musttail to any resume instructions that is immediately followed by a
suspend (i.e. ret). We do this even in -O0 to support guaranteed tail call
for symmetrical coroutine control transfer (C++ Coroutines TS extension).
This transformation is done only in the resume part of the coroutine that has
identical signature and calling convention as the coro.resume call.

Reviewers: GorNishanov

Reviewed By: GorNishanov

Subscribers: EricWF, majnemer, llvm-commits

Differential Revision: https://reviews.llvm.org/D37125

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@311751 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Gor Nishanov 2017-08-25 02:25:10 +00:00
parent 5872ccc628
commit 8970bfadd2
2 changed files with 148 additions and 0 deletions

View File

@ -27,6 +27,7 @@
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
@ -400,6 +401,91 @@ static void postSplitCleanup(Function &F) {
FPM.doFinalization();
}
// Assuming we arrived at the block NewBlock from Prev instruction, store
// PHI's incoming values in the ResolvedValues map.
static void
scanPHIsAndUpdateValueMap(Instruction *Prev, BasicBlock *NewBlock,
DenseMap<Value *, Value *> &ResolvedValues) {
auto *PrevBB = Prev->getParent();
auto *I = &*NewBlock->begin();
while (auto PN = dyn_cast<PHINode>(I)) {
auto V = PN->getIncomingValueForBlock(PrevBB);
// See if we already resolved it.
auto VI = ResolvedValues.find(V);
if (VI != ResolvedValues.end())
V = VI->second;
// Remember the value.
ResolvedValues[PN] = V;
I = I->getNextNode();
}
}
// Replace a sequence of branches leading to a ret, with a clone of a ret
// instruction. Suspend instruction represented by a switch, track the PHI
// values and select the correct case successor when possible.
static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) {
DenseMap<Value *, Value *> ResolvedValues;
Instruction *I = InitialInst;
while (isa<TerminatorInst>(I)) {
if (isa<ReturnInst>(I)) {
if (I != InitialInst)
ReplaceInstWithInst(InitialInst, I->clone());
return true;
}
if (auto *BR = dyn_cast<BranchInst>(I)) {
if (BR->isUnconditional()) {
BasicBlock *BB = BR->getSuccessor(0);
scanPHIsAndUpdateValueMap(I, BB, ResolvedValues);
I = BB->getFirstNonPHIOrDbgOrLifetime();
continue;
}
} else if (auto *SI = dyn_cast<SwitchInst>(I)) {
Value *V = SI->getCondition();
auto it = ResolvedValues.find(V);
if (it != ResolvedValues.end())
V = it->second;
if (ConstantInt *Cond = dyn_cast<ConstantInt>(V)) {
BasicBlock *BB = SI->findCaseValue(Cond)->getCaseSuccessor();
scanPHIsAndUpdateValueMap(I, BB, ResolvedValues);
I = BB->getFirstNonPHIOrDbgOrLifetime();
continue;
}
}
return false;
}
return false;
}
// Add musttail to any resume instructions that is immediately followed by a
// suspend (i.e. ret). We do this even in -O0 to support guaranteed tail call
// for symmetrical coroutine control transfer (C++ Coroutines TS extension).
// This transformation is done only in the resume part of the coroutine that has
// identical signature and calling convention as the coro.resume call.
static void addMustTailToCoroResumes(Function &F) {
bool changed = false;
// Collect potential resume instructions.
SmallVector<CallInst *, 4> Resumes;
for (auto &I : instructions(F))
if (auto *Call = dyn_cast<CallInst>(&I))
if (auto *CalledValue = Call->getCalledValue())
// CoroEarly pass replaced coro resumes with indirect calls to an
// address return by CoroSubFnInst intrinsic. See if it is one of those.
if (isa<CoroSubFnInst>(CalledValue->stripPointerCasts()))
Resumes.push_back(Call);
// Set musttail on those that are followed by a ret instruction.
for (CallInst *Call : Resumes)
if (simplifyTerminatorLeadingToRet(Call->getNextNode())) {
Call->setTailCallKind(llvm::CallInst::TCK_MustTail);
changed = true;
}
if (changed)
removeUnreachableBlocks(F);
}
// Coroutine has no suspend points. Remove heap allocation for the coroutine
// frame if possible.
static void handleNoSuspendCoroutine(CoroBeginInst *CoroBegin, Type *FrameTy) {
@ -608,6 +694,8 @@ static void splitCoroutine(Function &F, CallGraph &CG, CallGraphSCC &SCC) {
postSplitCleanup(*DestroyClone);
postSplitCleanup(*CleanupClone);
addMustTailToCoroResumes(*ResumeClone);
// Store addresses resume/destroy/cleanup functions in the coroutine frame.
updateCoroFrame(Shape, ResumeClone, DestroyClone, CleanupClone);

View File

@ -0,0 +1,60 @@
; Tests that coro-split will convert coro.resume followed by a suspend to a
; musttail call.
; RUN: opt < %s -coro-split -S | FileCheck %s
define void @f() "coroutine.presplit"="1" {
entry:
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
%alloc = call i8* @malloc(i64 16) #3
%vFrame = call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %alloc)
%save = call token @llvm.coro.save(i8* null)
%addr1 = call i8* @llvm.coro.subfn.addr(i8* null, i8 0)
%pv1 = bitcast i8* %addr1 to void (i8*)*
call fastcc void %pv1(i8* null)
%suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
switch i8 %suspend, label %exit [
i8 0, label %await.ready
i8 1, label %exit
]
await.ready:
%save2 = call token @llvm.coro.save(i8* null)
%addr2 = call i8* @llvm.coro.subfn.addr(i8* null, i8 0)
%pv2 = bitcast i8* %addr2 to void (i8*)*
call fastcc void %pv2(i8* null)
%suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
switch i8 %suspend2, label %exit [
i8 0, label %exit
i8 1, label %exit
]
exit:
call i1 @llvm.coro.end(i8* null, i1 false)
ret void
}
; Verify that in the initial function resume is not marked with musttail.
; CHECK-LABEL: @f(
; CHECK: %[[addr1:.+]] = call i8* @llvm.coro.subfn.addr(i8* null, i8 0)
; CHECK-NEXT: %[[pv1:.+]] = bitcast i8* %[[addr1]] to void (i8*)*
; CHECK-NOT: musttail call fastcc void %[[pv1]](i8* null)
; Verify that in the resume part resume call is marked with musttail.
; CHECK-LABEL: @f.resume(
; CHECK: %[[addr2:.+]] = call i8* @llvm.coro.subfn.addr(i8* null, i8 0)
; CHECK-NEXT: %[[pv2:.+]] = bitcast i8* %[[addr2]] to void (i8*)*
; CHECK-NEXT: musttail call fastcc void %[[pv2]](i8* null)
; CHECK-NEXT: ret void
declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*)
declare i1 @llvm.coro.alloc(token) #3
declare i64 @llvm.coro.size.i64() #5
declare i8* @llvm.coro.begin(token, i8* writeonly) #3
declare token @llvm.coro.save(i8*) #3
declare i8* @llvm.coro.frame() #5
declare i8 @llvm.coro.suspend(token, i1) #3
declare i8* @llvm.coro.free(token, i8* nocapture readonly) #2
declare i1 @llvm.coro.end(i8*, i1) #3
declare i8* @llvm.coro.subfn.addr(i8* nocapture readonly, i8) #5
declare i8* @malloc(i64)