mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-27 05:32:22 +00:00
[Coroutines] Part 6: Elide dynamic allocation of a coroutine frame when possible
Summary: A particular coroutine usage pattern, where a coroutine is created, manipulated and destroyed by the same calling function, is common for coroutines implementing RAII idiom and is suitable for allocation elision optimization which avoid dynamic allocation by storing the coroutine frame as a static `alloca` in its caller. coro.free and coro.alloc intrinsics are used to indicate which code needs to be suppressed when dynamic allocation elision happens: ``` entry: %elide = call i8* @llvm.coro.alloc() %need.dyn.alloc = icmp ne i8* %elide, null br i1 %need.dyn.alloc, label %coro.begin, label %dyn.alloc dyn.alloc: %alloc = call i8* @CustomAlloc(i32 4) br label %coro.begin coro.begin: %phi = phi i8* [ %elide, %entry ], [ %alloc, %dyn.alloc ] %hdl = call i8* @llvm.coro.begin(i8* %phi, i32 0, i8* null, i8* bitcast ([2 x void (%f.frame*)*]* @f.resumers to i8*)) ``` and ``` %mem = call i8* @llvm.coro.free(i8* %hdl) %need.dyn.free = icmp ne i8* %mem, null br i1 %need.dyn.free, label %dyn.free, label %if.end dyn.free: call void @CustomFree(i8* %mem) br label %if.end if.end: ... ``` If heap allocation elision is performed, we replace coro.alloc with a static alloca on the caller frame and coro.free with null constant. Also, we need to make sure that if there are any tail calls referencing the coroutine frame, we need to remote tail call attribute, since now coroutine frame lives on the stack. Documentation and overview is here: http://llvm.org/docs/Coroutines.html. Upstreaming sequence (rough plan) 1.Add documentation. (https://reviews.llvm.org/D22603) 2.Add coroutine intrinsics. (https://reviews.llvm.org/D22659) 3.Add empty coroutine passes. (https://reviews.llvm.org/D22847) 4.Add coroutine devirtualization + tests. ab) Lower coro.resume and coro.destroy (https://reviews.llvm.org/D22998) c) Do devirtualization (https://reviews.llvm.org/D23229) 5.Add CGSCC restart trigger + tests. (https://reviews.llvm.org/D23234) 6.Add coroutine heap elision + tests. <= we are here 7.Add the rest of the logic (split into more patches) Reviewers: mehdi_amini, majnemer Subscribers: mehdi_amini, llvm-commits Differential Revision: https://reviews.llvm.org/D23245 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@278242 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
9cba8e90cb
commit
bd0032e1a2
@ -95,7 +95,8 @@ The LLVM IR for this coroutine looks like this:
|
||||
entry:
|
||||
%size = call i32 @llvm.coro.size.i32()
|
||||
%alloc = call i8* @malloc(i32 %size)
|
||||
%hdl = call noalias i8* @llvm.coro.begin(i8* %alloc, i32 0, i8* null, i8* null)
|
||||
%beg = call token @llvm.coro.begin(i8* %alloc, i8* null, i32 0, i8* null, i8* null)
|
||||
%hdl = call noalias i8* @llvm.coro.frame(token %beg)
|
||||
br label %loop
|
||||
loop:
|
||||
%n.val = phi i32 [ %n, %entry ], [ %inc, %loop ]
|
||||
@ -115,9 +116,10 @@ The LLVM IR for this coroutine looks like this:
|
||||
|
||||
The `entry` block establishes the coroutine frame. The `coro.size`_ intrinsic is
|
||||
lowered to a constant representing the size required for the coroutine frame.
|
||||
The `coro.begin`_ intrinsic initializes the coroutine frame and returns the
|
||||
coroutine handle. The first parameter of `coro.begin` is given a block of memory
|
||||
to be used if the coroutine frame needs to be allocated dynamically.
|
||||
The `coro.begin`_ intrinsic initializes the coroutine frame and returns the a
|
||||
token that is used to obtain the coroutine handle via `coro.frame` intrinsic.
|
||||
The first parameter of `coro.begin` is given a block of memory to be used if the
|
||||
coroutine frame needs to be allocated dynamically.
|
||||
|
||||
The `cleanup` block destroys the coroutine frame. The `coro.free`_ intrinsic,
|
||||
given the coroutine handle, returns a pointer of the memory block to be freed or
|
||||
@ -160,12 +162,13 @@ After resume and destroy parts are outlined, function `f` will contain only the
|
||||
code responsible for creation and initialization of the coroutine frame and
|
||||
execution of the coroutine until a suspend point is reached:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: none
|
||||
|
||||
define i8* @f(i32 %n) {
|
||||
entry:
|
||||
%alloc = call noalias i8* @malloc(i32 24)
|
||||
%0 = call noalias i8* @llvm.coro.begin(i8* %alloc, i32 0, i8* null, i8* null)
|
||||
%beg = call token @llvm.coro.begin(i8* %alloc, i8* null, i32 0, i8* null, i8* null)
|
||||
%0 = call i8* @llvm.coro.frame(token %beg)
|
||||
%frame = bitcast i8* %0 to %f.frame*
|
||||
%1 = getelementptr %f.frame, %f.frame* %frame, i32 0, i32 0
|
||||
store void (%f.frame*)* @f.resume, void (%f.frame*)** %1
|
||||
@ -219,7 +222,7 @@ In the entry block, we will call `coro.alloc`_ intrinsic that will return `null`
|
||||
when dynamic allocation is required, and an address of an alloca on the caller's
|
||||
frame where coroutine frame can be stored if dynamic allocation is elided.
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: none
|
||||
|
||||
entry:
|
||||
%elide = call i8* @llvm.coro.alloc()
|
||||
@ -231,7 +234,7 @@ frame where coroutine frame can be stored if dynamic allocation is elided.
|
||||
br label %coro.begin
|
||||
coro.begin:
|
||||
%phi = phi i8* [ %elide, %entry ], [ %alloc, %dyn.alloc ]
|
||||
%hdl = call noalias i8* @llvm.coro.begin(i8* %phi, i32 0, i8* null, i8* null)
|
||||
%beg = call token @llvm.coro.begin(i8* %phi, i8* null, i32 0, i8* null, i8* null)
|
||||
|
||||
In the cleanup block, we will make freeing the coroutine frame conditional on
|
||||
`coro.free`_ intrinsic. If allocation is elided, `coro.free`_ returns `null`
|
||||
@ -421,7 +424,8 @@ store the current value produced by a coroutine.
|
||||
br label %coro.begin
|
||||
coro.begin:
|
||||
%phi = phi i8* [ %elide, %entry ], [ %alloc, %dyn.alloc ]
|
||||
%hdl = call noalias i8* @llvm.coro.begin(i8* %phi, i32 0, i8* %pv, i8* null)
|
||||
%beg = call token @llvm.coro.begin(i8* %phi, i8* %elide, i32 0, i8* %pv, i8* null)
|
||||
%hdl = call i8* @llvm.coro.frame(token %beg)
|
||||
br label %loop
|
||||
loop:
|
||||
%n.val = phi i32 [ %n, %coro.begin ], [ %inc, %loop ]
|
||||
@ -687,15 +691,16 @@ a coroutine user are responsible to makes sure there is no data races.
|
||||
Example:
|
||||
""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
define i8* @f(i32 %n) {
|
||||
entry:
|
||||
%promise = alloca i32
|
||||
%pv = bitcast i32* %promise to i8*
|
||||
...
|
||||
; the third argument to coro.begin points to the coroutine promise.
|
||||
%hdl = call noalias i8* @llvm.coro.begin(i8* %alloc, i32 0, i8* %pv, i8* null)
|
||||
; the fourth argument to coro.begin points to the coroutine promise.
|
||||
%beg = call token @llvm.coro.begin(i8* %alloc, i8* null, i32 0, i8* %pv, i8* null)
|
||||
%hdl = call noalias i8* @llvm.coro.frame(token %beg)
|
||||
...
|
||||
store i32 42, i32* %promise ; store something into the promise
|
||||
...
|
||||
@ -752,12 +757,14 @@ the coroutine frame.
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
::
|
||||
|
||||
declare i8* @llvm.coro.begin(i8* <mem>, i32 <align>, i8* <promise>, i8* <fnaddr>)
|
||||
declare i8* @llvm.coro.begin(i8* <mem>, i8* <elide>, i32 <align>, i8* <promise>, i8* <fnaddr>)
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
|
||||
The '``llvm.coro.begin``' intrinsic returns an address of the coroutine frame.
|
||||
The '``llvm.coro.begin``' intrinsic captures coroutine initialization
|
||||
information and returns a token that can be used by `coro.frame` intrinsic to
|
||||
return an address of the coroutine frame.
|
||||
|
||||
Arguments:
|
||||
""""""""""
|
||||
@ -765,15 +772,17 @@ Arguments:
|
||||
The first argument is a pointer to a block of memory where coroutine frame
|
||||
will be stored.
|
||||
|
||||
The second argument provides information on the alignment of the memory returned
|
||||
The second argument is either null or an SSA value of `coro.alloc` intrinsic.
|
||||
|
||||
The third argument provides information on the alignment of the memory returned
|
||||
by the allocation function and given to `coro.begin` by the first argument. If
|
||||
this argument is 0, the memory is assumed to be aligned to 2 * sizeof(i8*).
|
||||
This argument only accepts constants.
|
||||
|
||||
The third argument, if not `null`, designates a particular alloca instruction to
|
||||
The fourth argument, if not `null`, designates a particular alloca instruction to
|
||||
be a `coroutine promise`_.
|
||||
|
||||
The fourth argument is `null` before coroutine is split, and later is replaced
|
||||
The fifth argument is `null` before coroutine is split, and later is replaced
|
||||
to point to a private global constant array containing function pointers to
|
||||
outlined resume and destroy parts of the coroutine.
|
||||
|
||||
@ -781,10 +790,10 @@ Semantics:
|
||||
""""""""""
|
||||
|
||||
Depending on the alignment requirements of the objects in the coroutine frame
|
||||
and/or on the codegen compactness reasons the pointer returned from `coro.begin`
|
||||
may be at offset to the `%mem` argument. (This could be beneficial if
|
||||
instructions that express relative access to data can be more compactly encoded
|
||||
with small positive and negative offsets).
|
||||
and/or on the codegen compactness reasons the pointer returned from `coro.frame`
|
||||
associated with a particular `coro.begin` may be at offset to the `%mem`
|
||||
argument. (This could be beneficial if instructions that express relative access
|
||||
to data can be more compactly encoded with small positive and negative offsets).
|
||||
|
||||
A frontend should emit exactly one `coro.begin` intrinsic per coroutine.
|
||||
|
||||
@ -807,7 +816,7 @@ Arguments:
|
||||
""""""""""
|
||||
|
||||
A pointer to the coroutine frame. This should be the same pointer that was
|
||||
returned by prior `coro.begin` call.
|
||||
returned by prior `coro.frame` call.
|
||||
|
||||
Example (custom deallocation function):
|
||||
"""""""""""""""""""""""""""""""""""""""
|
||||
@ -862,10 +871,13 @@ alloca storing the coroutine frame. Otherwise, it is lowered to constant `null`.
|
||||
|
||||
A frontend should emit at most one `coro.alloc` intrinsic per coroutine.
|
||||
|
||||
If `coro.alloc` is present, the second parameter to `coro.begin` should refer
|
||||
to it.
|
||||
|
||||
Example:
|
||||
""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
entry:
|
||||
%elide = call i8* @llvm.coro.alloc()
|
||||
@ -879,7 +891,8 @@ Example:
|
||||
|
||||
coro.begin:
|
||||
%phi = phi i8* [ %elide, %entry ], [ %alloc, %coro.alloc ]
|
||||
%frame = call i8* @llvm.coro.begin(i8* %phi, i32 0, i8* null, i8* null)
|
||||
%beg = call token @llvm.coro.begin(i8* %phi, i8* %elide, i32 0, i8* null, i8* null)
|
||||
%frame = call i8* @llvm.coro.frame(token %beg)
|
||||
|
||||
.. _coro.frame:
|
||||
|
||||
@ -898,14 +911,12 @@ the enclosing coroutine.
|
||||
Arguments:
|
||||
""""""""""
|
||||
|
||||
None
|
||||
A token that refers to `coro.begin` instruction.
|
||||
|
||||
Semantics:
|
||||
""""""""""
|
||||
|
||||
This intrinsic is lowered to refer to the `coro.begin`_ instruction. This is
|
||||
a frontend convenience intrinsic that makes it easier to refer to the
|
||||
coroutine frame.
|
||||
This intrinsic is lowered to refer to address of the coroutine frame.
|
||||
|
||||
.. _coro.end:
|
||||
|
||||
@ -1164,7 +1175,7 @@ CoroElide
|
||||
---------
|
||||
The pass CoroElide examines if the inlined coroutine is eligible for heap
|
||||
allocation elision optimization. If so, it replaces `coro.alloc` and
|
||||
`coro.begin` intrinsic with an address of a coroutine frame placed on its caller
|
||||
`coro.frame` intrinsic with an address of a coroutine frame placed on its caller
|
||||
and replaces `coro.free` intrinsics with `null` to remove the deallocation code.
|
||||
This pass also replaces `coro.resume` and `coro.destroy` intrinsics with direct
|
||||
calls to resume and destroy functions for a particular coroutine where possible.
|
||||
@ -1178,11 +1189,11 @@ Upstreaming sequence (rough plan)
|
||||
=================================
|
||||
#. Add documentation.
|
||||
#. Add coroutine intrinsics.
|
||||
#. Add empty coroutine passes. <== we are here
|
||||
#. Add empty coroutine passes.
|
||||
#. Add coroutine devirtualization + tests.
|
||||
#. Add CGSCC restart trigger + tests.
|
||||
#. Add coroutine heap elision + tests.
|
||||
#. Add custom allocation heap elision + tests.
|
||||
#. Add custom allocation heap elision + tests. <== we are here
|
||||
#. Add coroutine splitting logic + tests.
|
||||
#. Add simple coroutine frame builder + tests.
|
||||
#. Add the rest of the logic + tests. (Maybe split further as needed).
|
||||
|
@ -603,16 +603,16 @@ def int_experimental_gc_relocate : Intrinsic<[llvm_any_ty],
|
||||
// Coroutine Structure Intrinsics.
|
||||
|
||||
def int_coro_alloc : Intrinsic<[llvm_ptr_ty], [], []>;
|
||||
def int_coro_begin : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty,
|
||||
llvm_ptr_ty, llvm_ptr_ty],
|
||||
[WriteOnly<0>, ReadNone<2>, ReadOnly<3>,
|
||||
NoCapture<3>]>;
|
||||
def int_coro_begin : Intrinsic<[llvm_token_ty], [llvm_ptr_ty, llvm_ptr_ty,
|
||||
llvm_i32_ty, llvm_ptr_ty, llvm_ptr_ty],
|
||||
[WriteOnly<0>, WriteOnly<0>,
|
||||
ReadNone<3>, ReadOnly<4>, NoCapture<4>]>;
|
||||
|
||||
def int_coro_free : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty],
|
||||
[IntrArgMemOnly, ReadOnly<0>, NoCapture<0>]>;
|
||||
def int_coro_end : Intrinsic<[], [llvm_ptr_ty, llvm_i1_ty], []>;
|
||||
|
||||
def int_coro_frame : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
|
||||
def int_coro_frame : Intrinsic<[llvm_ptr_ty], [llvm_token_ty], [IntrNoMem]>;
|
||||
def int_coro_size : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>;
|
||||
|
||||
def int_coro_save : Intrinsic<[llvm_token_ty], [llvm_ptr_ty], []>;
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "llvm/Analysis/InstructionSimplify.h"
|
||||
#include "llvm/IR/InstIterator.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
@ -39,11 +40,29 @@ struct CoroElide : FunctionPass {
|
||||
|
||||
bool runOnFunction(Function &F) override;
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.addRequired<AAResultsWrapperPass>();
|
||||
AU.setPreservesCFG();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
char CoroElide::ID = 0;
|
||||
INITIALIZE_PASS_BEGIN(
|
||||
CoroElide, "coro-elide",
|
||||
"Coroutine frame allocation elision and indirect calls replacement", false,
|
||||
false)
|
||||
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
|
||||
INITIALIZE_PASS_END(
|
||||
CoroElide, "coro-elide",
|
||||
"Coroutine frame allocation elision and indirect calls replacement", false,
|
||||
false)
|
||||
|
||||
Pass *llvm::createCoroElidePass() { return new CoroElide(); }
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Implementation
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Go through the list of coro.subfn.addr intrinsics and replace them with the
|
||||
// provided constant.
|
||||
static void replaceWithConstant(Constant *Value,
|
||||
@ -68,24 +87,103 @@ static void replaceWithConstant(Constant *Value,
|
||||
replaceAndRecursivelySimplify(I, Value);
|
||||
}
|
||||
|
||||
// See if any operand of the call instruction references the coroutine frame.
|
||||
static bool operandReferences(CallInst *CI, AllocaInst *Frame, AAResults &AA) {
|
||||
for (Value *Op : CI->operand_values())
|
||||
if (AA.alias(Op, Frame) != NoAlias)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Look for any tail calls referencing the coroutine frame and remove tail
|
||||
// attribute from them, since now coroutine frame resides on the stack and tail
|
||||
// call implies that the function does not references anything on the stack.
|
||||
static void removeTailCallAttribute(AllocaInst *Frame, AAResults &AA) {
|
||||
Function &F = *Frame->getFunction();
|
||||
MemoryLocation Mem(Frame);
|
||||
for (Instruction &I : instructions(F))
|
||||
if (auto *Call = dyn_cast<CallInst>(&I))
|
||||
if (Call->isTailCall() && operandReferences(Call, Frame, AA)) {
|
||||
// FIXME: If we ever hit this check. Evaluate whether it is more
|
||||
// appropriate to retain musttail and allow the code to compile.
|
||||
if (Call->isMustTailCall())
|
||||
report_fatal_error("Call referring to the coroutine frame cannot be "
|
||||
"marked as musttail");
|
||||
Call->setTailCall(false);
|
||||
}
|
||||
}
|
||||
|
||||
// Given a resume function @f.resume(%f.frame* %frame), returns %f.frame type.
|
||||
static Type *getFrameType(Function *Resume) {
|
||||
auto *ArgType = Resume->getArgumentList().front().getType();
|
||||
return cast<PointerType>(ArgType)->getElementType();
|
||||
}
|
||||
|
||||
// Finds first non alloca instruction in the entry block of a function.
|
||||
static Instruction *getFirstNonAllocaInTheEntryBlock(Function *F) {
|
||||
for (Instruction &I : F->getEntryBlock())
|
||||
if (!isa<AllocaInst>(&I))
|
||||
return &I;
|
||||
llvm_unreachable("no terminator in the entry block");
|
||||
}
|
||||
|
||||
// To elide heap allocations we need to suppress code blocks guarded by
|
||||
// llvm.coro.alloc and llvm.coro.free instructions.
|
||||
static void elideHeapAllocations(CoroBeginInst *CoroBegin, Type *FrameTy,
|
||||
CoroAllocInst *AllocInst, AAResults &AA) {
|
||||
LLVMContext &C = CoroBegin->getContext();
|
||||
auto *InsertPt = getFirstNonAllocaInTheEntryBlock(CoroBegin->getFunction());
|
||||
|
||||
// FIXME: Design how to transmit alignment information for every alloca that
|
||||
// is spilled into the coroutine frame and recreate the alignment information
|
||||
// here. Possibly we will need to do a mini SROA here and break the coroutine
|
||||
// frame into individual AllocaInst recreating the original alignment.
|
||||
auto *Frame = new AllocaInst(FrameTy, "", InsertPt);
|
||||
auto *FrameVoidPtr =
|
||||
new BitCastInst(Frame, Type::getInt8PtrTy(C), "vFrame", InsertPt);
|
||||
|
||||
// Replacing llvm.coro.alloc with non-null value will suppress dynamic
|
||||
// allocation as it is expected for the frontend to generate the code that
|
||||
// looks like:
|
||||
// mem = coro.alloc();
|
||||
// if (!mem) mem = malloc(coro.size());
|
||||
// coro.begin(mem, ...)
|
||||
AllocInst->replaceAllUsesWith(FrameVoidPtr);
|
||||
AllocInst->eraseFromParent();
|
||||
|
||||
// To suppress deallocation code, we replace all llvm.coro.free intrinsics
|
||||
// associated with this coro.begin with null constant.
|
||||
auto *NullPtr = ConstantPointerNull::get(Type::getInt8PtrTy(C));
|
||||
coro::replaceAllCoroFrees(CoroBegin, NullPtr);
|
||||
CoroBegin->lowerTo(FrameVoidPtr);
|
||||
|
||||
// Since now coroutine frame lives on the stack we need to make sure that
|
||||
// any tail call referencing it, must be made non-tail call.
|
||||
removeTailCallAttribute(Frame, AA);
|
||||
}
|
||||
|
||||
// See if there are any coro.subfn.addr intrinsics directly referencing
|
||||
// the coro.begin. If found, replace them with an appropriate coroutine
|
||||
// subfunction associated with that coro.begin.
|
||||
static bool replaceIndirectCalls(CoroBeginInst *CoroBegin) {
|
||||
static bool replaceIndirectCalls(CoroBeginInst *CoroBegin, AAResults &AA) {
|
||||
SmallVector<CoroSubFnInst *, 8> ResumeAddr;
|
||||
SmallVector<CoroSubFnInst *, 8> DestroyAddr;
|
||||
|
||||
for (User *U : CoroBegin->users()) {
|
||||
if (auto *II = dyn_cast<CoroSubFnInst>(U)) {
|
||||
switch (II->getIndex()) {
|
||||
case CoroSubFnInst::ResumeIndex:
|
||||
ResumeAddr.push_back(II);
|
||||
break;
|
||||
case CoroSubFnInst::DestroyIndex:
|
||||
DestroyAddr.push_back(II);
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("unexpected coro.subfn.addr constant");
|
||||
for (User *CF : CoroBegin->users()) {
|
||||
assert(isa<CoroFrameInst>(CF) &&
|
||||
"CoroBegin can be only used by coro.frame instructions");
|
||||
for (User *U : CF->users()) {
|
||||
if (auto *II = dyn_cast<CoroSubFnInst>(U)) {
|
||||
switch (II->getIndex()) {
|
||||
case CoroSubFnInst::ResumeIndex:
|
||||
ResumeAddr.push_back(II);
|
||||
break;
|
||||
case CoroSubFnInst::DestroyIndex:
|
||||
DestroyAddr.push_back(II);
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("unexpected coro.subfn.addr constant");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -99,11 +197,28 @@ static bool replaceIndirectCalls(CoroBeginInst *CoroBegin) {
|
||||
"of coroutine subfunctions");
|
||||
auto *ResumeAddrConstant =
|
||||
ConstantExpr::getExtractValue(Resumers, CoroSubFnInst::ResumeIndex);
|
||||
replaceWithConstant(ResumeAddrConstant, ResumeAddr);
|
||||
|
||||
if (DestroyAddr.empty())
|
||||
return true;
|
||||
|
||||
auto *DestroyAddrConstant =
|
||||
ConstantExpr::getExtractValue(Resumers, CoroSubFnInst::DestroyIndex);
|
||||
|
||||
replaceWithConstant(ResumeAddrConstant, ResumeAddr);
|
||||
replaceWithConstant(DestroyAddrConstant, DestroyAddr);
|
||||
|
||||
// If llvm.coro.begin refers to llvm.coro.alloc, we can elide the allocation.
|
||||
if (auto *AllocInst = CoroBegin->getAlloc()) {
|
||||
// FIXME: The check above is overly lax. It only checks for whether we have
|
||||
// an ability to elide heap allocations, not whether it is safe to do so.
|
||||
// We need to do something like:
|
||||
// If for every exit from the function where coro.begin is
|
||||
// live, there is a coro.free or coro.destroy dominating that exit block,
|
||||
// then it is safe to elide heap allocation, since the lifetime of coroutine
|
||||
// is fully enclosed in its caller.
|
||||
auto *FrameTy = getFrameType(cast<Function>(ResumeAddrConstant));
|
||||
elideHeapAllocations(CoroBegin, FrameTy, AllocInst, AA);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -143,20 +258,9 @@ bool CoroElide::runOnFunction(Function &F) {
|
||||
if (CoroBegins.empty())
|
||||
return Changed;
|
||||
|
||||
AAResults &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
|
||||
for (auto *CB : CoroBegins)
|
||||
Changed |= replaceIndirectCalls(CB);
|
||||
Changed |= replaceIndirectCalls(CB, AA);
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
char CoroElide::ID = 0;
|
||||
INITIALIZE_PASS_BEGIN(
|
||||
CoroElide, "coro-elide",
|
||||
"Coroutine frame allocation elision and indirect calls replacement", false,
|
||||
false)
|
||||
INITIALIZE_PASS_END(
|
||||
CoroElide, "coro-elide",
|
||||
"Coroutine frame allocation elision and indirect calls replacement", false,
|
||||
false)
|
||||
|
||||
Pass *llvm::createCoroElidePass() { return new CoroElide(); }
|
||||
|
@ -62,11 +62,57 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
/// This represents the llvm.coro.alloc instruction.
|
||||
class LLVM_LIBRARY_VISIBILITY CoroAllocInst : public IntrinsicInst {
|
||||
public:
|
||||
// Methods to support type inquiry through isa, cast, and dyn_cast:
|
||||
static inline bool classof(const IntrinsicInst *I) {
|
||||
return I->getIntrinsicID() == Intrinsic::coro_alloc;
|
||||
}
|
||||
static inline bool classof(const Value *V) {
|
||||
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
|
||||
}
|
||||
};
|
||||
|
||||
/// This represents the llvm.coro.frame instruction.
|
||||
class LLVM_LIBRARY_VISIBILITY CoroFrameInst : public IntrinsicInst {
|
||||
public:
|
||||
// Methods to support type inquiry through isa, cast, and dyn_cast:
|
||||
static inline bool classof(const IntrinsicInst *I) {
|
||||
return I->getIntrinsicID() == Intrinsic::coro_frame;
|
||||
}
|
||||
static inline bool classof(const Value *V) {
|
||||
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
|
||||
}
|
||||
};
|
||||
|
||||
/// This represents the llvm.coro.free instruction.
|
||||
class LLVM_LIBRARY_VISIBILITY CoroFreeInst : public IntrinsicInst {
|
||||
public:
|
||||
// Methods to support type inquiry through isa, cast, and dyn_cast:
|
||||
static inline bool classof(const IntrinsicInst *I) {
|
||||
return I->getIntrinsicID() == Intrinsic::coro_free;
|
||||
}
|
||||
static inline bool classof(const Value *V) {
|
||||
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
|
||||
}
|
||||
};
|
||||
|
||||
/// This class represents the llvm.coro.begin instruction.
|
||||
class LLVM_LIBRARY_VISIBILITY CoroBeginInst : public IntrinsicInst {
|
||||
enum { MemArg, AlignArg, PromiseArg, InfoArg };
|
||||
enum { MemArg, ElideArg, AlignArg, PromiseArg, InfoArg };
|
||||
|
||||
public:
|
||||
CoroAllocInst *getAlloc() const {
|
||||
if (auto *CAI = dyn_cast<CoroAllocInst>(
|
||||
getArgOperand(ElideArg)->stripPointerCasts()))
|
||||
return CAI;
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Value *getMem() const { return getArgOperand(MemArg); }
|
||||
|
||||
Constant *getRawInfo() const {
|
||||
return cast<Constant>(getArgOperand(InfoArg)->stripPointerCasts());
|
||||
}
|
||||
@ -108,6 +154,22 @@ public:
|
||||
return Result;
|
||||
}
|
||||
|
||||
// Replaces all coro.frame intrinsics that are associated with this coro.begin
|
||||
// to a replacement value and removes coro.begin and all of the coro.frame
|
||||
// intrinsics.
|
||||
void lowerTo(Value* Replacement) {
|
||||
SmallVector<CoroFrameInst*, 4> FrameInsts;
|
||||
for (auto *CF : this->users())
|
||||
FrameInsts.push_back(cast<CoroFrameInst>(CF));
|
||||
|
||||
for (auto *CF : FrameInsts) {
|
||||
CF->replaceAllUsesWith(Replacement);
|
||||
CF->eraseFromParent();
|
||||
}
|
||||
|
||||
this->eraseFromParent();
|
||||
}
|
||||
|
||||
// Methods for support type inquiry through isa, cast, and dyn_cast:
|
||||
static inline bool classof(const IntrinsicInst *I) {
|
||||
return I->getIntrinsicID() == Intrinsic::coro_begin;
|
||||
|
@ -42,6 +42,7 @@ void initializeCoroCleanupPass(PassRegistry &);
|
||||
namespace coro {
|
||||
|
||||
bool declaresIntrinsics(Module &M, std::initializer_list<StringRef>);
|
||||
void replaceAllCoroFrees(CoroBeginInst *CB, Value *Replacement);
|
||||
|
||||
// Keeps data and helper functions for lowering coroutine intrinsics.
|
||||
struct LowererBase {
|
||||
|
@ -122,3 +122,21 @@ bool coro::declaresIntrinsics(Module &M,
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Find all llvm.coro.free instructions associated with the provided coro.begin
|
||||
// and replace them with the provided replacement value.
|
||||
void coro::replaceAllCoroFrees(CoroBeginInst *CB, Value *Replacement) {
|
||||
SmallVector<CoroFreeInst *, 4> CoroFrees;
|
||||
for (User *FramePtr: CB->users())
|
||||
for (User *U : FramePtr->users())
|
||||
if (auto *CF = dyn_cast<CoroFreeInst>(U))
|
||||
CoroFrees.push_back(CF);
|
||||
|
||||
if (CoroFrees.empty())
|
||||
return;
|
||||
|
||||
for (CoroFreeInst *CF : CoroFrees) {
|
||||
CF->replaceAllUsesWith(Replacement);
|
||||
CF->eraseFromParent();
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
; Tests that the coro.destroy and coro.resume are devirtualized where possible,
|
||||
; SCC pipeline restarts and inlines the direct calls.
|
||||
; RUN: opt < %s -S -inline -coro-elide | FileCheck %s
|
||||
; RUN: opt < %s -S -inline -coro-elide -dce | FileCheck %s
|
||||
|
||||
declare void @print(i32) nounwind
|
||||
|
||||
@ -22,15 +22,16 @@ define fastcc void @f.destroy(i8*) {
|
||||
; a coroutine start function
|
||||
define i8* @f() {
|
||||
entry:
|
||||
%hdl = call i8* @llvm.coro.begin(i8* null, i32 0, i8* null,
|
||||
%tok = call token @llvm.coro.begin(i8* null, i8* null, i32 0, i8* null,
|
||||
i8* bitcast ([2 x void (i8*)*]* @f.resumers to i8*))
|
||||
%hdl = call i8* @llvm.coro.frame(token %tok)
|
||||
ret i8* %hdl
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @callResume(
|
||||
define void @callResume() {
|
||||
entry:
|
||||
; CHECK: call i8* @llvm.coro.begin
|
||||
; CHECK: call token @llvm.coro.begin
|
||||
%hdl = call i8* @f()
|
||||
|
||||
; CHECK-NEXT: call void @print(i32 0)
|
||||
@ -50,7 +51,7 @@ entry:
|
||||
; CHECK-LABEL: @eh(
|
||||
define void @eh() personality i8* null {
|
||||
entry:
|
||||
; CHECK: call i8* @llvm.coro.begin
|
||||
; CHECK: call token @llvm.coro.begin
|
||||
%hdl = call i8* @f()
|
||||
|
||||
; CHECK-NEXT: call void @print(i32 0)
|
||||
@ -70,7 +71,8 @@ ehcleanup:
|
||||
; no devirtualization here, since coro.begin info parameter is null
|
||||
define void @no_devirt_info_null() {
|
||||
entry:
|
||||
%hdl = call i8* @llvm.coro.begin(i8* null, i32 0, i8* null, i8* null)
|
||||
%tok = call token @llvm.coro.begin(i8* null, i8* null, i32 0, i8* null, i8* null)
|
||||
%hdl = call i8* @llvm.coro.frame(token %tok)
|
||||
|
||||
; CHECK: call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0)
|
||||
%0 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0)
|
||||
@ -106,5 +108,6 @@ entry:
|
||||
}
|
||||
|
||||
|
||||
declare i8* @llvm.coro.begin(i8*, i32, i8*, i8*)
|
||||
declare token @llvm.coro.begin(i8*, i8*, i32, i8*, i8*)
|
||||
declare i8* @llvm.coro.frame(token)
|
||||
declare i8* @llvm.coro.subfn.addr(i8*, i8)
|
||||
|
125
test/Transforms/Coroutines/coro-heap-elide.ll
Normal file
125
test/Transforms/Coroutines/coro-heap-elide.ll
Normal file
@ -0,0 +1,125 @@
|
||||
; Tests that the dynamic allocation and deallocation of the coroutine frame is
|
||||
; elided and any tail calls referencing the coroutine frame has the tail
|
||||
; call attribute removed.
|
||||
; RUN: opt < %s -S -inline -coro-elide -instsimplify -simplifycfg | FileCheck %s
|
||||
|
||||
declare void @print(i32) nounwind
|
||||
|
||||
%f.frame = type {i32}
|
||||
|
||||
declare void @bar(i8*)
|
||||
|
||||
declare fastcc void @f.resume(%f.frame*)
|
||||
declare fastcc void @f.destroy(%f.frame*)
|
||||
|
||||
declare void @may_throw()
|
||||
declare i8* @CustomAlloc(i32)
|
||||
declare void @CustomFree(i8*)
|
||||
|
||||
@f.resumers = internal constant
|
||||
[2 x void (%f.frame*)*] [void (%f.frame*)* @f.resume, void (%f.frame*)* @f.destroy]
|
||||
|
||||
; a coroutine start function
|
||||
define i8* @f() personality i8* null {
|
||||
entry:
|
||||
%elide = call i8* @llvm.coro.alloc()
|
||||
%need.dyn.alloc = icmp ne i8* %elide, null
|
||||
br i1 %need.dyn.alloc, label %coro.begin, label %dyn.alloc
|
||||
dyn.alloc:
|
||||
%alloc = call i8* @CustomAlloc(i32 4)
|
||||
br label %coro.begin
|
||||
coro.begin:
|
||||
%phi = phi i8* [ %elide, %entry ], [ %alloc, %dyn.alloc ]
|
||||
%beg = call token @llvm.coro.begin(i8* %phi, i8* %elide, i32 0, i8* null,
|
||||
i8* bitcast ([2 x void (%f.frame*)*]* @f.resumers to i8*))
|
||||
%hdl = call i8* @llvm.coro.frame(token %beg)
|
||||
invoke void @may_throw()
|
||||
to label %ret unwind label %ehcleanup
|
||||
ret:
|
||||
ret i8* %hdl
|
||||
|
||||
ehcleanup:
|
||||
%tok = cleanuppad within none []
|
||||
%mem = call i8* @llvm.coro.free(i8* %hdl)
|
||||
%need.dyn.free = icmp ne i8* %mem, null
|
||||
br i1 %need.dyn.free, label %dyn.free, label %if.end
|
||||
dyn.free:
|
||||
call void @CustomFree(i8* %mem)
|
||||
br label %if.end
|
||||
if.end:
|
||||
cleanupret from %tok unwind to caller
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @callResume(
|
||||
define void @callResume() {
|
||||
entry:
|
||||
; CHECK: alloca %f.frame
|
||||
; CHECK-NOT: coro.begin
|
||||
; CHECK-NOT: CustomAlloc
|
||||
; CHECK: call void @may_throw()
|
||||
%hdl = call i8* @f()
|
||||
|
||||
; Need to remove 'tail' from the first call to @bar
|
||||
; CHECK-NOT: tail call void @bar(
|
||||
; CHECK: call void @bar(
|
||||
tail call void @bar(i8* %hdl)
|
||||
; CHECK: tail call void @bar(
|
||||
tail call void @bar(i8* null)
|
||||
|
||||
; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.resume to void (i8*)*)(i8* %vFrame)
|
||||
%0 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0)
|
||||
%1 = bitcast i8* %0 to void (i8*)*
|
||||
call fastcc void %1(i8* %hdl)
|
||||
|
||||
; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.destroy to void (i8*)*)(i8* %vFrame)
|
||||
%2 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 1)
|
||||
%3 = bitcast i8* %2 to void (i8*)*
|
||||
call fastcc void %3(i8* %hdl)
|
||||
|
||||
; CHECK-NEXT: ret void
|
||||
ret void
|
||||
}
|
||||
|
||||
; a coroutine start function (cannot elide heap alloc, due to second argument to
|
||||
; coro.begin not pointint to coro.alloc)
|
||||
define i8* @f_no_elision() personality i8* null {
|
||||
entry:
|
||||
%alloc = call i8* @CustomAlloc(i32 4)
|
||||
%beg = call token @llvm.coro.begin(i8* %alloc, i8* null, i32 0, i8* null,
|
||||
i8* bitcast ([2 x void (%f.frame*)*]* @f.resumers to i8*))
|
||||
%hdl = call i8* @llvm.coro.frame(token %beg)
|
||||
ret i8* %hdl
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @callResume_no_elision(
|
||||
define void @callResume_no_elision() {
|
||||
entry:
|
||||
; CHECK: call i8* @CustomAlloc(
|
||||
%hdl = call i8* @f_no_elision()
|
||||
|
||||
; Tail call should remain tail calls
|
||||
; CHECK: tail call void @bar(
|
||||
tail call void @bar(i8* %hdl)
|
||||
; CHECK: tail call void @bar(
|
||||
tail call void @bar(i8* null)
|
||||
|
||||
; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.resume to void (i8*)*)(i8*
|
||||
%0 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0)
|
||||
%1 = bitcast i8* %0 to void (i8*)*
|
||||
call fastcc void %1(i8* %hdl)
|
||||
|
||||
; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.destroy to void (i8*)*)(i8*
|
||||
%2 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 1)
|
||||
%3 = bitcast i8* %2 to void (i8*)*
|
||||
call fastcc void %3(i8* %hdl)
|
||||
|
||||
; CHECK-NEXT: ret void
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
declare i8* @llvm.coro.alloc()
|
||||
declare i8* @llvm.coro.free(i8*)
|
||||
declare token @llvm.coro.begin(i8*, i8*, i32, i8*, i8*)
|
||||
declare i8* @llvm.coro.frame(token)
|
||||
declare i8* @llvm.coro.subfn.addr(i8*, i8)
|
16
test/Transforms/Coroutines/restart-trigger.ll
Normal file
16
test/Transforms/Coroutines/restart-trigger.ll
Normal file
@ -0,0 +1,16 @@
|
||||
; Verifies that restart trigger forces IPO pipelines restart and the same
|
||||
; coroutine is looked at by CoroSplit pass twice.
|
||||
; REQUIRES: asserts
|
||||
; RUN: opt < %s -S -O0 -enable-coroutines -debug-only=coro-split 2>&1 | FileCheck %s
|
||||
; RUN: opt < %s -S -O1 -enable-coroutines -debug-only=coro-split 2>&1 | FileCheck %s
|
||||
|
||||
; CHECK: CoroSplit: Processing coroutine 'f' state: 0
|
||||
; CHECK-NEXT: CoroSplit: Processing coroutine 'f' state: 1
|
||||
|
||||
declare token @llvm.coro.begin(i8*, i8*, i32, i8*, i8*)
|
||||
|
||||
; a coroutine start function
|
||||
define void @f() {
|
||||
call token @llvm.coro.begin(i8* null, i8* null, i32 0, i8* null, i8* null)
|
||||
ret void
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user