[NVPTX] Some nvvm.read.ptx.sreg intrinsics should have IntrInaccessibleMemOnly attribute.

These intrinsics may return different values every time they are called
and should not be CSE'd. IntrInaccessibleMemOnly appears to be the right
attribute to model this behavior.

Differential Revision: https://reviews.llvm.org/D57259

llvm-svn: 352256
This commit is contained in:
Artem Belevich 2019-01-26 00:28:32 +00:00
parent b1d3457c03
commit dfad526943
2 changed files with 56 additions and 7 deletions

View File

@ -3673,11 +3673,19 @@ multiclass PTXReadSRegIntrinsic_v4i32<string regname> {
class PTXReadSRegIntrinsic_r32<string name>
: Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
class PTXReadSRegIntrinsic_r64<string name>
: Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
// Intrinsics to read registers with non-constant values. E.g. the values that
// do change over the kernel lifetime. Such reads should not be CSE'd.
class PTXReadNCSRegIntrinsic_r32<string name>
: Intrinsic<[llvm_i32_ty], [], [IntrInaccessibleMemOnly]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
class PTXReadNCSRegIntrinsic_r64<string name>
: Intrinsic<[llvm_i64_ty], [], [IntrInaccessibleMemOnly]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
defm int_nvvm_read_ptx_sreg_tid : PTXReadSRegIntrinsic_v4i32<"tid">;
defm int_nvvm_read_ptx_sreg_ntid : PTXReadSRegIntrinsic_v4i32<"ntid">;
@ -3703,13 +3711,13 @@ def int_nvvm_read_ptx_sreg_lanemask_ge :
def int_nvvm_read_ptx_sreg_lanemask_gt :
PTXReadSRegIntrinsic_r32<"lanemask_gt">;
def int_nvvm_read_ptx_sreg_clock : PTXReadSRegIntrinsic_r32<"clock">;
def int_nvvm_read_ptx_sreg_clock64 : PTXReadSRegIntrinsic_r64<"clock64">;
def int_nvvm_read_ptx_sreg_clock : PTXReadNCSRegIntrinsic_r32<"clock">;
def int_nvvm_read_ptx_sreg_clock64 : PTXReadNCSRegIntrinsic_r64<"clock64">;
def int_nvvm_read_ptx_sreg_pm0 : PTXReadSRegIntrinsic_r32<"pm0">;
def int_nvvm_read_ptx_sreg_pm1 : PTXReadSRegIntrinsic_r32<"pm1">;
def int_nvvm_read_ptx_sreg_pm2 : PTXReadSRegIntrinsic_r32<"pm2">;
def int_nvvm_read_ptx_sreg_pm3 : PTXReadSRegIntrinsic_r32<"pm3">;
def int_nvvm_read_ptx_sreg_pm0 : PTXReadNCSRegIntrinsic_r32<"pm0">;
def int_nvvm_read_ptx_sreg_pm1 : PTXReadNCSRegIntrinsic_r32<"pm1">;
def int_nvvm_read_ptx_sreg_pm2 : PTXReadNCSRegIntrinsic_r32<"pm2">;
def int_nvvm_read_ptx_sreg_pm3 : PTXReadNCSRegIntrinsic_r32<"pm3">;
def int_nvvm_read_ptx_sreg_warpsize : PTXReadSRegIntrinsic_r32<"warpsize">;

View File

@ -94,6 +94,43 @@ define i32 @test_popc16_to_32(i16 %a) {
ret i32 %zext
}
; Most of nvvm.read.ptx.sreg.* intrinsics always return the same value and may
; be CSE'd.
; CHECK-LABEL: test_tid
define i32 @test_tid() {
; CHECK: mov.u32 %r{{.*}}, %tid.x;
%a = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
; CHECK-NOT: mov.u32 %r{{.*}}, %tid.x;
%b = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
%ret = add i32 %a, %b
; CHECK: ret
ret i32 %ret
}
; reading clock() or clock64() should not be CSE'd as each read may return
; different value.
; CHECK-LABEL: test_clock
define i32 @test_clock() {
; CHECK: mov.u32 %r{{.*}}, %clock;
%a = tail call i32 @llvm.nvvm.read.ptx.sreg.clock()
; CHECK: mov.u32 %r{{.*}}, %clock;
%b = tail call i32 @llvm.nvvm.read.ptx.sreg.clock()
%ret = add i32 %a, %b
; CHECK: ret
ret i32 %ret
}
; CHECK-LABEL: test_clock64
define i64 @test_clock64() {
; CHECK: mov.u64 %r{{.*}}, %clock64;
%a = tail call i64 @llvm.nvvm.read.ptx.sreg.clock64()
; CHECK: mov.u64 %r{{.*}}, %clock64;
%b = tail call i64 @llvm.nvvm.read.ptx.sreg.clock64()
%ret = add i64 %a, %b
; CHECK: ret
ret i64 %ret
}
declare float @llvm.fabs.f32(float)
declare double @llvm.fabs.f64(double)
declare float @llvm.nvvm.sqrt.f(float)
@ -103,3 +140,7 @@ declare i64 @llvm.bitreverse.i64(i64)
declare i16 @llvm.ctpop.i16(i16)
declare i32 @llvm.ctpop.i32(i32)
declare i64 @llvm.ctpop.i64(i64)
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
declare i32 @llvm.nvvm.read.ptx.sreg.clock()
declare i64 @llvm.nvvm.read.ptx.sreg.clock64()