mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-04-01 12:43:47 +00:00
[NVPTX] Some nvvm.read.ptx.sreg intrinsics should have IntrInaccessibleMemOnly attribute.
These intrinsics may return different values every time they are called and should not be CSE'd. IntrInaccessibleMemOnly appears to be the right attribute to model this behavior. Differential Revision: https://reviews.llvm.org/D57259 llvm-svn: 352256
This commit is contained in:
parent
b1d3457c03
commit
dfad526943
@ -3673,11 +3673,19 @@ multiclass PTXReadSRegIntrinsic_v4i32<string regname> {
|
||||
class PTXReadSRegIntrinsic_r32<string name>
|
||||
: Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
|
||||
|
||||
class PTXReadSRegIntrinsic_r64<string name>
|
||||
: Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
|
||||
|
||||
// Intrinsics to read registers with non-constant values. E.g. the values that
|
||||
// do change over the kernel lifetime. Such reads should not be CSE'd.
|
||||
class PTXReadNCSRegIntrinsic_r32<string name>
|
||||
: Intrinsic<[llvm_i32_ty], [], [IntrInaccessibleMemOnly]>,
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
|
||||
class PTXReadNCSRegIntrinsic_r64<string name>
|
||||
: Intrinsic<[llvm_i64_ty], [], [IntrInaccessibleMemOnly]>,
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
|
||||
|
||||
defm int_nvvm_read_ptx_sreg_tid : PTXReadSRegIntrinsic_v4i32<"tid">;
|
||||
defm int_nvvm_read_ptx_sreg_ntid : PTXReadSRegIntrinsic_v4i32<"ntid">;
|
||||
|
||||
@ -3703,13 +3711,13 @@ def int_nvvm_read_ptx_sreg_lanemask_ge :
|
||||
def int_nvvm_read_ptx_sreg_lanemask_gt :
|
||||
PTXReadSRegIntrinsic_r32<"lanemask_gt">;
|
||||
|
||||
def int_nvvm_read_ptx_sreg_clock : PTXReadSRegIntrinsic_r32<"clock">;
|
||||
def int_nvvm_read_ptx_sreg_clock64 : PTXReadSRegIntrinsic_r64<"clock64">;
|
||||
def int_nvvm_read_ptx_sreg_clock : PTXReadNCSRegIntrinsic_r32<"clock">;
|
||||
def int_nvvm_read_ptx_sreg_clock64 : PTXReadNCSRegIntrinsic_r64<"clock64">;
|
||||
|
||||
def int_nvvm_read_ptx_sreg_pm0 : PTXReadSRegIntrinsic_r32<"pm0">;
|
||||
def int_nvvm_read_ptx_sreg_pm1 : PTXReadSRegIntrinsic_r32<"pm1">;
|
||||
def int_nvvm_read_ptx_sreg_pm2 : PTXReadSRegIntrinsic_r32<"pm2">;
|
||||
def int_nvvm_read_ptx_sreg_pm3 : PTXReadSRegIntrinsic_r32<"pm3">;
|
||||
def int_nvvm_read_ptx_sreg_pm0 : PTXReadNCSRegIntrinsic_r32<"pm0">;
|
||||
def int_nvvm_read_ptx_sreg_pm1 : PTXReadNCSRegIntrinsic_r32<"pm1">;
|
||||
def int_nvvm_read_ptx_sreg_pm2 : PTXReadNCSRegIntrinsic_r32<"pm2">;
|
||||
def int_nvvm_read_ptx_sreg_pm3 : PTXReadNCSRegIntrinsic_r32<"pm3">;
|
||||
|
||||
def int_nvvm_read_ptx_sreg_warpsize : PTXReadSRegIntrinsic_r32<"warpsize">;
|
||||
|
||||
|
@ -94,6 +94,43 @@ define i32 @test_popc16_to_32(i16 %a) {
|
||||
ret i32 %zext
|
||||
}
|
||||
|
||||
; Most of nvvm.read.ptx.sreg.* intrinsics always return the same value and may
|
||||
; be CSE'd.
|
||||
; CHECK-LABEL: test_tid
|
||||
define i32 @test_tid() {
|
||||
; CHECK: mov.u32 %r{{.*}}, %tid.x;
|
||||
%a = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
|
||||
; CHECK-NOT: mov.u32 %r{{.*}}, %tid.x;
|
||||
%b = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
|
||||
%ret = add i32 %a, %b
|
||||
; CHECK: ret
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
; reading clock() or clock64() should not be CSE'd as each read may return
|
||||
; different value.
|
||||
; CHECK-LABEL: test_clock
|
||||
define i32 @test_clock() {
|
||||
; CHECK: mov.u32 %r{{.*}}, %clock;
|
||||
%a = tail call i32 @llvm.nvvm.read.ptx.sreg.clock()
|
||||
; CHECK: mov.u32 %r{{.*}}, %clock;
|
||||
%b = tail call i32 @llvm.nvvm.read.ptx.sreg.clock()
|
||||
%ret = add i32 %a, %b
|
||||
; CHECK: ret
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_clock64
|
||||
define i64 @test_clock64() {
|
||||
; CHECK: mov.u64 %r{{.*}}, %clock64;
|
||||
%a = tail call i64 @llvm.nvvm.read.ptx.sreg.clock64()
|
||||
; CHECK: mov.u64 %r{{.*}}, %clock64;
|
||||
%b = tail call i64 @llvm.nvvm.read.ptx.sreg.clock64()
|
||||
%ret = add i64 %a, %b
|
||||
; CHECK: ret
|
||||
ret i64 %ret
|
||||
}
|
||||
|
||||
declare float @llvm.fabs.f32(float)
|
||||
declare double @llvm.fabs.f64(double)
|
||||
declare float @llvm.nvvm.sqrt.f(float)
|
||||
@ -103,3 +140,7 @@ declare i64 @llvm.bitreverse.i64(i64)
|
||||
declare i16 @llvm.ctpop.i16(i16)
|
||||
declare i32 @llvm.ctpop.i32(i32)
|
||||
declare i64 @llvm.ctpop.i64(i64)
|
||||
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.clock()
|
||||
declare i64 @llvm.nvvm.read.ptx.sreg.clock64()
|
||||
|
Loading…
x
Reference in New Issue
Block a user