mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-11-23 13:50:11 +00:00
[LLVM][NVPTX] Add cp.async.bulk.commit/wait intrinsics (#78698)
This patch adds NVVM intrinsics and NVPTX codegen for the bulk variants of the async-copy commit/wait instructions. lit tests are added to verify the generated PTX. PTX Doc link: https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-commit-group Signed-off-by: Durgadoss R <durgadossr@nvidia.com>
This commit is contained in:
parent
42b160356f
commit
43531e7196
@ -1454,6 +1454,16 @@ def int_nvvm_cp_async_wait_all :
|
||||
ClangBuiltin<"__nvvm_cp_async_wait_all">,
|
||||
Intrinsic<[],[],[]>;
|
||||
|
||||
// cp.async.bulk variants of the commit/wait group
|
||||
def int_nvvm_cp_async_bulk_commit_group :
|
||||
Intrinsic<[],[],[]>;
|
||||
|
||||
def int_nvvm_cp_async_bulk_wait_group :
|
||||
Intrinsic<[],[llvm_i32_ty],[ImmArg<ArgIndex<0>>]>;
|
||||
|
||||
def int_nvvm_cp_async_bulk_wait_group_read :
|
||||
Intrinsic<[],[llvm_i32_ty],[ImmArg<ArgIndex<0>>]>;
|
||||
|
||||
// mbarrier
|
||||
def int_nvvm_mbarrier_init : ClangBuiltin<"__nvvm_mbarrier_init">,
|
||||
Intrinsic<[],[llvm_ptr_ty, llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
|
||||
|
@ -411,6 +411,22 @@ def CP_ASYNC_WAIT_ALL :
|
||||
[(int_nvvm_cp_async_wait_all)]>,
|
||||
Requires<[hasPTX<70>, hasSM<80>]>;
|
||||
|
||||
// cp.async.bulk variants of the commit/wait group
|
||||
def CP_ASYNC_BULK_COMMIT_GROUP :
|
||||
NVPTXInst<(outs), (ins), "cp.async.bulk.commit_group;",
|
||||
[(int_nvvm_cp_async_bulk_commit_group)]>,
|
||||
Requires<[hasPTX<80>, hasSM<90>]>;
|
||||
|
||||
def CP_ASYNC_BULK_WAIT_GROUP :
|
||||
NVPTXInst<(outs), (ins i32imm:$n), "cp.async.bulk.wait_group $n;",
|
||||
[(int_nvvm_cp_async_bulk_wait_group (i32 timm:$n))]>,
|
||||
Requires<[hasPTX<80>, hasSM<90>]>;
|
||||
|
||||
def CP_ASYNC_BULK_WAIT_GROUP_READ :
|
||||
NVPTXInst<(outs), (ins i32imm:$n), "cp.async.bulk.wait_group.read $n;",
|
||||
[(int_nvvm_cp_async_bulk_wait_group_read (i32 timm:$n))]>,
|
||||
Requires<[hasPTX<80>, hasSM<90>]>;
|
||||
|
||||
//-----------------------------------
|
||||
// MBarrier Functions
|
||||
//-----------------------------------
|
||||
|
@ -144,6 +144,31 @@ define void @test_barrier_cluster_aligned() {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_cp_async_bulk_commit_group(
|
||||
define void @test_cp_async_bulk_commit_group() {
|
||||
; CHECK: cp.async.bulk.commit_group;
|
||||
call void @llvm.nvvm.cp.async.bulk.commit.group()
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_cp_async_bulk_wait_group(
|
||||
define void @test_cp_async_bulk_wait_group() {
|
||||
; CHECK: cp.async.bulk.wait_group 8;
|
||||
call void @llvm.nvvm.cp.async.bulk.wait.group(i32 8)
|
||||
; CHECK: cp.async.bulk.wait_group 0;
|
||||
call void @llvm.nvvm.cp.async.bulk.wait.group(i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_cp_async_bulk_wait_group_read(
|
||||
define void @test_cp_async_bulk_wait_group_read() {
|
||||
; CHECK: cp.async.bulk.wait_group.read 8;
|
||||
call void @llvm.nvvm.cp.async.bulk.wait.group.read(i32 8)
|
||||
; CHECK: cp.async.bulk.wait_group.read 0;
|
||||
call void @llvm.nvvm.cp.async.bulk.wait.group.read(i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i1 @llvm.nvvm.isspacep.shared.cluster(ptr %p);
|
||||
declare ptr @llvm.nvvm.mapa(ptr %p, i32 %r);
|
||||
declare ptr addrspace(3) @llvm.nvvm.mapa.shared.cluster(ptr addrspace(3) %p, i32 %r);
|
||||
@ -167,3 +192,6 @@ declare void @llvm.nvvm.barrier.cluster.arrive.aligned()
|
||||
declare void @llvm.nvvm.barrier.cluster.arrive.relaxed.aligned()
|
||||
declare void @llvm.nvvm.barrier.cluster.wait.aligned()
|
||||
declare void @llvm.nvvm.fence.sc.cluster()
|
||||
declare void @llvm.nvvm.cp.async.bulk.commit.group()
|
||||
declare void @llvm.nvvm.cp.async.bulk.wait.group(i32)
|
||||
declare void @llvm.nvvm.cp.async.bulk.wait.group.read(i32)
|
||||
|
Loading…
Reference in New Issue
Block a user