mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-02-13 13:45:16 +00:00
[Coroutines] Offering llvm.coro.align intrinsic
It is a known problem that we can't align the switch-based coroutine frame if the alignment exceeds std::max_align_t (which is 16 usually). We could solve the problem on the middle-end by dynamically transforming or in the frontend by emitting aligned allocation function. If we need to solve it in the frontend, the middle end need to offer an intrinsic to tell the alignment at least. This patch tries to offer such an intrinsic called llvm.coro.align. Reviewed By: https://reviews.llvm.org/D117542 Differential revision: https://reviews.llvm.org/D117542
This commit is contained in:
parent
76b74236c7
commit
c8ecf12bc3
@ -948,6 +948,32 @@ Semantics:
|
||||
The `coro.size` intrinsic is lowered to a constant representing the size of
|
||||
the coroutine frame.
|
||||
|
||||
.. _coro.align:
|
||||
|
||||
'llvm.coro.align' Intrinsic
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
::
|
||||
|
||||
declare i32 @llvm.coro.align.i32()
|
||||
declare i64 @llvm.coro.align.i64()
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
|
||||
The '``llvm.coro.align``' intrinsic returns the alignment of a `coroutine frame`_.
|
||||
This is only supported for switched-resume coroutines.
|
||||
|
||||
Arguments:
|
||||
""""""""""
|
||||
|
||||
None
|
||||
|
||||
Semantics:
|
||||
""""""""""
|
||||
|
||||
The `coro.align` intrinsic is lowered to a constant representing the alignment of
|
||||
the coroutine frame.
|
||||
|
||||
.. _coro.begin:
|
||||
|
||||
'llvm.coro.begin' Intrinsic
|
||||
|
@ -633,6 +633,7 @@ public:
|
||||
case Intrinsic::coro_end:
|
||||
case Intrinsic::coro_frame:
|
||||
case Intrinsic::coro_size:
|
||||
case Intrinsic::coro_align:
|
||||
case Intrinsic::coro_suspend:
|
||||
case Intrinsic::coro_subfn_addr:
|
||||
// These intrinsics don't actually represent code after lowering.
|
||||
|
@ -1272,6 +1272,7 @@ def int_coro_end_async
|
||||
def int_coro_frame : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
|
||||
def int_coro_noop : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
|
||||
def int_coro_size : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>;
|
||||
def int_coro_align : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>;
|
||||
|
||||
def int_coro_save : Intrinsic<[llvm_token_ty], [llvm_ptr_ty], []>;
|
||||
def int_coro_suspend : Intrinsic<[llvm_i8_ty], [llvm_token_ty, llvm_i1_ty], []>;
|
||||
|
@ -599,6 +599,18 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
/// This represents the llvm.coro.align instruction.
|
||||
class LLVM_LIBRARY_VISIBILITY CoroAlignInst : public IntrinsicInst {
|
||||
public:
|
||||
// Methods to support type inquiry through isa, cast, and dyn_cast:
|
||||
static bool classof(const IntrinsicInst *I) {
|
||||
return I->getIntrinsicID() == Intrinsic::coro_align;
|
||||
}
|
||||
static bool classof(const Value *V) {
|
||||
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
|
||||
}
|
||||
};
|
||||
|
||||
class LLVM_LIBRARY_VISIBILITY AnyCoroEndInst : public IntrinsicInst {
|
||||
enum { FrameArg, UnwindArg };
|
||||
|
||||
|
@ -104,6 +104,7 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
|
||||
CoroBeginInst *CoroBegin;
|
||||
SmallVector<AnyCoroEndInst *, 4> CoroEnds;
|
||||
SmallVector<CoroSizeInst *, 2> CoroSizes;
|
||||
SmallVector<CoroAlignInst *, 2> CoroAligns;
|
||||
SmallVector<AnyCoroSuspendInst *, 4> CoroSuspends;
|
||||
SmallVector<CallInst*, 2> SwiftErrorOps;
|
||||
|
||||
|
@ -1083,10 +1083,16 @@ static void updateAsyncFuncPointerContextSize(coro::Shape &Shape) {
|
||||
Shape.AsyncLowering.AsyncFuncPointer->setInitializer(NewFuncPtrStruct);
|
||||
}
|
||||
|
||||
static void replaceFrameSize(coro::Shape &Shape) {
|
||||
static void replaceFrameSizeAndAlignment(coro::Shape &Shape) {
|
||||
if (Shape.ABI == coro::ABI::Async)
|
||||
updateAsyncFuncPointerContextSize(Shape);
|
||||
|
||||
for (CoroAlignInst *CA : Shape.CoroAligns) {
|
||||
CA->replaceAllUsesWith(
|
||||
ConstantInt::get(CA->getType(), Shape.FrameAlign.value()));
|
||||
CA->eraseFromParent();
|
||||
}
|
||||
|
||||
if (Shape.CoroSizes.empty())
|
||||
return;
|
||||
|
||||
@ -1884,7 +1890,7 @@ static coro::Shape splitCoroutine(Function &F,
|
||||
|
||||
simplifySuspendPoints(Shape);
|
||||
buildCoroutineFrame(F, Shape);
|
||||
replaceFrameSize(Shape);
|
||||
replaceFrameSizeAndAlignment(Shape);
|
||||
|
||||
// If there are no suspend points, no split required, just remove
|
||||
// the allocation and deallocation blocks, they are not needed.
|
||||
|
@ -123,6 +123,7 @@ Value *coro::LowererBase::makeSubFnCall(Value *Arg, int Index,
|
||||
static bool isCoroutineIntrinsicName(StringRef Name) {
|
||||
// NOTE: Must be sorted!
|
||||
static const char *const CoroIntrinsics[] = {
|
||||
"llvm.coro.align",
|
||||
"llvm.coro.alloc",
|
||||
"llvm.coro.async.context.alloc",
|
||||
"llvm.coro.async.context.dealloc",
|
||||
@ -268,6 +269,9 @@ void coro::Shape::buildFrom(Function &F) {
|
||||
case Intrinsic::coro_size:
|
||||
CoroSizes.push_back(cast<CoroSizeInst>(II));
|
||||
break;
|
||||
case Intrinsic::coro_align:
|
||||
CoroAligns.push_back(cast<CoroAlignInst>(II));
|
||||
break;
|
||||
case Intrinsic::coro_frame:
|
||||
CoroFrames.push_back(cast<CoroFrameInst>(II));
|
||||
break;
|
||||
|
54
llvm/test/Transforms/Coroutines/coro-align-01.ll
Normal file
54
llvm/test/Transforms/Coroutines/coro-align-01.ll
Normal file
@ -0,0 +1,54 @@
|
||||
; Tests that the coro.align intrinsic could be lowered to correct alignment
|
||||
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
|
||||
|
||||
define i8* @f() "coroutine.presplit"="1" {
|
||||
entry:
|
||||
%x = alloca i64
|
||||
%y = alloca i64
|
||||
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
|
||||
%size = call i32 @llvm.coro.size.i32()
|
||||
%align = call i32 @llvm.coro.align.i32()
|
||||
%alloc = call i8* @aligned_alloc(i32 %align, i32 %size)
|
||||
%hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
|
||||
%sp1 = call i8 @llvm.coro.suspend(token none, i1 false)
|
||||
switch i8 %sp1, label %suspend [i8 0, label %resume
|
||||
i8 1, label %cleanup]
|
||||
resume:
|
||||
%x.alias = bitcast i64* %x to i32*
|
||||
call void @capture_call(i32* %x.alias)
|
||||
%y.alias = bitcast i64* %y to i32*
|
||||
call void @nocapture_call(i32* %y.alias)
|
||||
br label %cleanup
|
||||
|
||||
cleanup:
|
||||
%mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
|
||||
call void @free(i8* %mem)
|
||||
br label %suspend
|
||||
|
||||
suspend:
|
||||
call i1 @llvm.coro.end(i8* %hdl, i1 0)
|
||||
ret i8* %hdl
|
||||
}
|
||||
|
||||
; %x needs to go to the frame since it's escaped; %y will stay as local since it doesn't escape.
|
||||
; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i64, i1 }
|
||||
; CHECK-LABEL: define i8* @f()
|
||||
; CHECK: %[[ALLOC:.+]] = call i8* @aligned_alloc(i32 8, i32 32)
|
||||
; CHECK-NEXT: call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %[[ALLOC]])
|
||||
|
||||
declare i8* @llvm.coro.free(token, i8*)
|
||||
declare i32 @llvm.coro.size.i32()
|
||||
declare i32 @llvm.coro.align.i32()
|
||||
declare i8 @llvm.coro.suspend(token, i1)
|
||||
declare void @llvm.coro.resume(i8*)
|
||||
declare void @llvm.coro.destroy(i8*)
|
||||
|
||||
declare token @llvm.coro.id(i32, i8*, i8*, i8*)
|
||||
declare i1 @llvm.coro.alloc(token)
|
||||
declare i8* @llvm.coro.begin(token, i8*)
|
||||
declare i1 @llvm.coro.end(i8*, i1)
|
||||
|
||||
declare void @capture_call(i32*)
|
||||
declare void @nocapture_call(i32* nocapture)
|
||||
declare noalias i8* @aligned_alloc(i32, i32)
|
||||
declare void @free(i8*)
|
46
llvm/test/Transforms/Coroutines/coro-align-02.ll
Normal file
46
llvm/test/Transforms/Coroutines/coro-align-02.ll
Normal file
@ -0,0 +1,46 @@
|
||||
; Tests that the coro.align intrinsic could be lowered to correct alignment
|
||||
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
|
||||
|
||||
define i8* @f() "coroutine.presplit"="1" {
|
||||
entry:
|
||||
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
|
||||
%size = call i32 @llvm.coro.size.i32()
|
||||
%align = call i32 @llvm.coro.align.i32()
|
||||
%alloc = call i8* @aligned_alloc(i32 %align, i32 %size)
|
||||
%hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
|
||||
%sp1 = call i8 @llvm.coro.suspend(token none, i1 false)
|
||||
switch i8 %sp1, label %suspend [i8 0, label %resume
|
||||
i8 1, label %cleanup]
|
||||
resume:
|
||||
br label %cleanup
|
||||
|
||||
cleanup:
|
||||
%mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
|
||||
call void @free(i8* %mem)
|
||||
br label %suspend
|
||||
|
||||
suspend:
|
||||
call i1 @llvm.coro.end(i8* %hdl, i1 0)
|
||||
ret i8* %hdl
|
||||
}
|
||||
|
||||
; %x needs to go to the frame since it's escaped; %y will stay as local since it doesn't escape.
|
||||
; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1 }
|
||||
; CHECK-LABEL: define i8* @f()
|
||||
; CHECK: %[[ALLOC:.+]] = call i8* @aligned_alloc(i32 8, i32 24)
|
||||
; CHECK-NEXT: call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %[[ALLOC]])
|
||||
|
||||
declare i8* @llvm.coro.free(token, i8*)
|
||||
declare i32 @llvm.coro.size.i32()
|
||||
declare i32 @llvm.coro.align.i32()
|
||||
declare i8 @llvm.coro.suspend(token, i1)
|
||||
declare void @llvm.coro.resume(i8*)
|
||||
declare void @llvm.coro.destroy(i8*)
|
||||
|
||||
declare token @llvm.coro.id(i32, i8*, i8*, i8*)
|
||||
declare i1 @llvm.coro.alloc(token)
|
||||
declare i8* @llvm.coro.begin(token, i8*)
|
||||
declare i1 @llvm.coro.end(i8*, i1)
|
||||
|
||||
declare noalias i8* @aligned_alloc(i32, i32)
|
||||
declare void @free(i8*)
|
54
llvm/test/Transforms/Coroutines/coro-align-03.ll
Normal file
54
llvm/test/Transforms/Coroutines/coro-align-03.ll
Normal file
@ -0,0 +1,54 @@
|
||||
; Tests that the coro.align intrinsic could be lowered to correct alignment
|
||||
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
|
||||
|
||||
define i8* @f() "coroutine.presplit"="1" {
|
||||
entry:
|
||||
%x = alloca i64, align 16
|
||||
%y = alloca i64
|
||||
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
|
||||
%size = call i32 @llvm.coro.size.i32()
|
||||
%align = call i32 @llvm.coro.align.i32()
|
||||
%alloc = call i8* @aligned_alloc(i32 %align, i32 %size)
|
||||
%hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
|
||||
%sp1 = call i8 @llvm.coro.suspend(token none, i1 false)
|
||||
switch i8 %sp1, label %suspend [i8 0, label %resume
|
||||
i8 1, label %cleanup]
|
||||
resume:
|
||||
%x.alias = bitcast i64* %x to i32*
|
||||
call void @capture_call(i32* %x.alias)
|
||||
%y.alias = bitcast i64* %y to i32*
|
||||
call void @capture_call(i32* %y.alias)
|
||||
br label %cleanup
|
||||
|
||||
cleanup:
|
||||
%mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
|
||||
call void @free(i8* %mem)
|
||||
br label %suspend
|
||||
|
||||
suspend:
|
||||
call i1 @llvm.coro.end(i8* %hdl, i1 0)
|
||||
ret i8* %hdl
|
||||
}
|
||||
|
||||
; %x needs to go to the frame since it's escaped; %y will stay as local since it doesn't escape.
|
||||
; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i64, i64, i1 }
|
||||
; CHECK-LABEL: define i8* @f()
|
||||
; CHECK: %[[ALLOC:.+]] = call i8* @aligned_alloc(i32 16, i32 40)
|
||||
; CHECK-NEXT: call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %[[ALLOC]])
|
||||
|
||||
declare i8* @llvm.coro.free(token, i8*)
|
||||
declare i32 @llvm.coro.size.i32()
|
||||
declare i32 @llvm.coro.align.i32()
|
||||
declare i8 @llvm.coro.suspend(token, i1)
|
||||
declare void @llvm.coro.resume(i8*)
|
||||
declare void @llvm.coro.destroy(i8*)
|
||||
|
||||
declare token @llvm.coro.id(i32, i8*, i8*, i8*)
|
||||
declare i1 @llvm.coro.alloc(token)
|
||||
declare i8* @llvm.coro.begin(token, i8*)
|
||||
declare i1 @llvm.coro.end(i8*, i1)
|
||||
|
||||
declare void @capture_call(i32*)
|
||||
declare void @nocapture_call(i32* nocapture)
|
||||
declare noalias i8* @aligned_alloc(i32, i32)
|
||||
declare void @free(i8*)
|
54
llvm/test/Transforms/Coroutines/coro-align-04.ll
Normal file
54
llvm/test/Transforms/Coroutines/coro-align-04.ll
Normal file
@ -0,0 +1,54 @@
|
||||
; Tests that the coro.align intrinsic could be lowered to correct alignment
|
||||
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
|
||||
|
||||
define i8* @f() "coroutine.presplit"="1" {
|
||||
entry:
|
||||
%x = alloca i1, align 64
|
||||
%y = alloca i64
|
||||
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
|
||||
%size = call i32 @llvm.coro.size.i32()
|
||||
%align = call i32 @llvm.coro.align.i32()
|
||||
%alloc = call i8* @aligned_alloc(i32 %align, i32 %size)
|
||||
%hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
|
||||
%sp1 = call i8 @llvm.coro.suspend(token none, i1 false)
|
||||
switch i8 %sp1, label %suspend [i8 0, label %resume
|
||||
i8 1, label %cleanup]
|
||||
resume:
|
||||
%x.alias = bitcast i1* %x to i32*
|
||||
call void @capture_call(i32* %x.alias)
|
||||
%y.alias = bitcast i64* %y to i32*
|
||||
call void @capture_call(i32* %y.alias)
|
||||
br label %cleanup
|
||||
|
||||
cleanup:
|
||||
%mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
|
||||
call void @free(i8* %mem)
|
||||
br label %suspend
|
||||
|
||||
suspend:
|
||||
call i1 @llvm.coro.end(i8* %hdl, i1 0)
|
||||
ret i8* %hdl
|
||||
}
|
||||
|
||||
; %x needs to go to the frame since it's escaped; %y will stay as local since it doesn't escape.
|
||||
; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i64, i1, [39 x i8], i1 }
|
||||
; CHECK-LABEL: define i8* @f()
|
||||
; CHECK: %[[ALLOC:.+]] = call i8* @aligned_alloc(i32 64, i32 72)
|
||||
; CHECK-NEXT: call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %[[ALLOC]])
|
||||
|
||||
declare i8* @llvm.coro.free(token, i8*)
|
||||
declare i32 @llvm.coro.size.i32()
|
||||
declare i32 @llvm.coro.align.i32()
|
||||
declare i8 @llvm.coro.suspend(token, i1)
|
||||
declare void @llvm.coro.resume(i8*)
|
||||
declare void @llvm.coro.destroy(i8*)
|
||||
|
||||
declare token @llvm.coro.id(i32, i8*, i8*, i8*)
|
||||
declare i1 @llvm.coro.alloc(token)
|
||||
declare i8* @llvm.coro.begin(token, i8*)
|
||||
declare i1 @llvm.coro.end(i8*, i1)
|
||||
|
||||
declare void @capture_call(i32*)
|
||||
declare void @nocapture_call(i32* nocapture)
|
||||
declare noalias i8* @aligned_alloc(i32, i32)
|
||||
declare void @free(i8*)
|
54
llvm/test/Transforms/Coroutines/coro-align-05.ll
Normal file
54
llvm/test/Transforms/Coroutines/coro-align-05.ll
Normal file
@ -0,0 +1,54 @@
|
||||
; Tests that the coro.align intrinsic could be lowered to correct alignment
|
||||
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
|
||||
|
||||
define i8* @f() "coroutine.presplit"="1" {
|
||||
entry:
|
||||
%x = alloca i1, align 64
|
||||
%y = alloca i64, align 32
|
||||
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
|
||||
%size = call i32 @llvm.coro.size.i32()
|
||||
%align = call i32 @llvm.coro.align.i32()
|
||||
%alloc = call i8* @aligned_alloc(i32 %align, i32 %size)
|
||||
%hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
|
||||
%sp1 = call i8 @llvm.coro.suspend(token none, i1 false)
|
||||
switch i8 %sp1, label %suspend [i8 0, label %resume
|
||||
i8 1, label %cleanup]
|
||||
resume:
|
||||
%x.alias = bitcast i1* %x to i32*
|
||||
call void @capture_call(i32* %x.alias)
|
||||
%y.alias = bitcast i64* %y to i32*
|
||||
call void @capture_call(i32* %y.alias)
|
||||
br label %cleanup
|
||||
|
||||
cleanup:
|
||||
%mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
|
||||
call void @free(i8* %mem)
|
||||
br label %suspend
|
||||
|
||||
suspend:
|
||||
call i1 @llvm.coro.end(i8* %hdl, i1 0)
|
||||
ret i8* %hdl
|
||||
}
|
||||
|
||||
; %x needs to go to the frame since it's escaped; %y will stay as local since it doesn't escape.
|
||||
; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i1, [15 x i8], i64, [24 x i8], i1 }
|
||||
; CHECK-LABEL: define i8* @f()
|
||||
; CHECK: %[[ALLOC:.+]] = call i8* @aligned_alloc(i32 64, i32 72)
|
||||
; CHECK-NEXT: call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %[[ALLOC]])
|
||||
|
||||
declare i8* @llvm.coro.free(token, i8*)
|
||||
declare i32 @llvm.coro.size.i32()
|
||||
declare i32 @llvm.coro.align.i32()
|
||||
declare i8 @llvm.coro.suspend(token, i1)
|
||||
declare void @llvm.coro.resume(i8*)
|
||||
declare void @llvm.coro.destroy(i8*)
|
||||
|
||||
declare token @llvm.coro.id(i32, i8*, i8*, i8*)
|
||||
declare i1 @llvm.coro.alloc(token)
|
||||
declare i8* @llvm.coro.begin(token, i8*)
|
||||
declare i1 @llvm.coro.end(i8*, i1)
|
||||
|
||||
declare void @capture_call(i32*)
|
||||
declare void @nocapture_call(i32* nocapture)
|
||||
declare noalias i8* @aligned_alloc(i32, i32)
|
||||
declare void @free(i8*)
|
Loading…
x
Reference in New Issue
Block a user