diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index fabec277707..6526c719670 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -258,7 +258,6 @@ static bool isIntrinsicSourceOfDivergence(const TargetIntrinsicInfo *TII, switch (TII->lookupName((const char *)Name.bytes_begin(), Name.size())) { default: return false; - case AMDGPUIntrinsic::SI_tid: case AMDGPUIntrinsic::SI_fs_interp: case AMDGPUIntrinsic::SI_fs_constant: return true; diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td index bc300b74d34..97e4e5c6f1a 100644 --- a/lib/Target/AMDGPU/SIInstructions.td +++ b/lib/Target/AMDGPU/SIInstructions.td @@ -3053,12 +3053,6 @@ def : Pat < (V_RCP_IFLAG_F32_e32 (V_CVT_F32_U32_e32 $src0)))) >; -def : Pat < - (int_SI_tid), - (V_MBCNT_HI_U32_B32_e64 0xffffffff, - (V_MBCNT_LO_U32_B32_e64 0xffffffff, 0)) ->; - //===----------------------------------------------------------------------===// // VOP3 Patterns //===----------------------------------------------------------------------===// diff --git a/lib/Target/AMDGPU/SIIntrinsics.td b/lib/Target/AMDGPU/SIIntrinsics.td index c240c1130cc..a9b7c39096e 100644 --- a/lib/Target/AMDGPU/SIIntrinsics.td +++ b/lib/Target/AMDGPU/SIIntrinsics.td @@ -13,8 +13,6 @@ let TargetPrefix = "SI", isTarget = 1 in { - - def int_SI_tid : Intrinsic <[llvm_i32_ty], [], [IntrNoMem]>; def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>; diff --git a/test/CodeGen/AMDGPU/array-ptr-calc-i64.ll b/test/CodeGen/AMDGPU/array-ptr-calc-i64.ll index a3ae3c3aea1..b914edf2928 100644 --- a/test/CodeGen/AMDGPU/array-ptr-calc-i64.ll +++ b/test/CodeGen/AMDGPU/array-ptr-calc-i64.ll @@ -1,13 +1,15 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -declare i32 @llvm.SI.tid() readnone +declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0 +declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #0 ; SI-LABEL: {{^}}test_array_ptr_calc: ; SI-DAG: v_mul_lo_i32 ; SI-DAG: v_mul_hi_i32 ; SI: s_endpgm define void @test_array_ptr_calc(i32 addrspace(1)* noalias %out, [1025 x i32] addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) { - %tid = call i32 @llvm.SI.tid() readnone + %mbcnt.lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) + %tid = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %mbcnt.lo) %a_ptr = getelementptr [1025 x i32], [1025 x i32] addrspace(1)* %inA, i32 %tid, i32 0 %b_ptr = getelementptr i32, i32 addrspace(1)* %inB, i32 %tid %a = load i32, i32 addrspace(1)* %a_ptr @@ -16,3 +18,5 @@ define void @test_array_ptr_calc(i32 addrspace(1)* noalias %out, [1025 x i32] ad store i32 %result, i32 addrspace(1)* %out ret void } + +attributes #0 = { nounwind readnone } diff --git a/test/CodeGen/AMDGPU/llvm.SI.tid.ll b/test/CodeGen/AMDGPU/llvm.SI.tid.ll deleted file mode 100644 index 1654debcc07..00000000000 --- a/test/CodeGen/AMDGPU/llvm.SI.tid.ll +++ /dev/null @@ -1,18 +0,0 @@ -;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=GCN %s -;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=GCN %s - -;GCN: v_mbcnt_lo_u32_b32_e64 -;SI: v_mbcnt_hi_u32_b32_e32 -;VI: v_mbcnt_hi_u32_b32_e64 - -define amdgpu_ps void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) { -main_body: - %4 = call i32 @llvm.SI.tid() - %5 = bitcast i32 %4 to float - call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %5, float %5, float %5, float %5) - ret void -} - -declare i32 @llvm.SI.tid() readnone - -declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) diff --git a/test/CodeGen/AMDGPU/register-count-comments.ll b/test/CodeGen/AMDGPU/register-count-comments.ll index 4bb315049be..bff3a9f5d2b 100644 --- a/test/CodeGen/AMDGPU/register-count-comments.ll +++ b/test/CodeGen/AMDGPU/register-count-comments.ll @@ -1,7 +1,8 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs -asm-verbose < %s | FileCheck -check-prefix=SI %s ; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs -asm-verbose -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI %s -declare i32 @llvm.SI.tid() nounwind readnone +declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0 +declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #0 ; SI-LABEL: {{^}}foo: ; SI: .section .AMDGPU.csdata @@ -9,7 +10,8 @@ declare i32 @llvm.SI.tid() nounwind readnone ; SI: ; NumSgprs: {{[0-9]+}} ; SI: ; NumVgprs: {{[0-9]+}} define void @foo(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %abase, i32 addrspace(1)* %bbase) nounwind { - %tid = call i32 @llvm.SI.tid() nounwind readnone + %mbcnt.lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0); + %tid = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %mbcnt.lo) %aptr = getelementptr i32, i32 addrspace(1)* %abase, i32 %tid %bptr = getelementptr i32, i32 addrspace(1)* %bbase, i32 %tid %outptr = getelementptr i32, i32 addrspace(1)* %out, i32 %tid diff --git a/test/CodeGen/AMDGPU/si-sgpr-spill.ll b/test/CodeGen/AMDGPU/si-sgpr-spill.ll index a9a39ee1294..63b1b71a8cb 100644 --- a/test/CodeGen/AMDGPU/si-sgpr-spill.ll +++ b/test/CodeGen/AMDGPU/si-sgpr-spill.ll @@ -114,13 +114,15 @@ main_body: %tmp106 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %arg4, <2 x i32> %arg6) %tmp107 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %arg4, <2 x i32> %arg6) %tmp108 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %arg4, <2 x i32> %arg6) - %tmp109 = call i32 @llvm.SI.tid() + %mbcnt.lo.0 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) + %tmp109 = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %mbcnt.lo.0) %tmp110 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %tmp109 %tmp111 = bitcast float %tmp92 to i32 store i32 %tmp111, i32 addrspace(3)* %tmp110 %tmp112 = bitcast float %tmp93 to i32 store i32 %tmp112, i32 addrspace(3)* %tmp110 - %tmp113 = call i32 @llvm.SI.tid() + %mbcnt.lo.1 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) + %tmp113 = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %mbcnt.lo.1) %tmp114 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %tmp113 %tmp115 = and i32 %tmp113, -4 %tmp116 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %tmp115 @@ -150,7 +152,8 @@ main_body: %tmp138 = fmul float %tmp59, %tmp93 %tmp139 = fmul float %tmp59, %tmp93 %tmp140 = fmul float %tmp59, %tmp93 - %tmp141 = call i32 @llvm.SI.tid() + %mbcnt.lo.2 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) + %tmp141 = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %mbcnt.lo.2) %tmp142 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %tmp141 %tmp143 = bitcast float %tmp137 to i32 store i32 %tmp143, i32 addrspace(3)* %tmp142 @@ -160,7 +163,8 @@ main_body: store i32 %tmp145, i32 addrspace(3)* %tmp142 %tmp146 = bitcast float %tmp140 to i32 store i32 %tmp146, i32 addrspace(3)* %tmp142 - %tmp147 = call i32 @llvm.SI.tid() + %mbcnt.lo.3 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) + %tmp147 = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %mbcnt.lo.3) %tmp148 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %tmp147 %tmp149 = and i32 %tmp147, -4 %tmp150 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %tmp149 @@ -1580,8 +1584,8 @@ declare float @llvm.SI.load.const(<16 x i8>, i32) #2 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #2 -; Function Attrs: readnone -declare i32 @llvm.SI.tid() #1 +declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1 +declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1 ; Function Attrs: nounwind readonly declare float @ceil(float) #3 diff --git a/test/CodeGen/AMDGPU/spill-alloc-sgpr-init-bug.ll b/test/CodeGen/AMDGPU/spill-alloc-sgpr-init-bug.ll index c91a44cf60e..cc4b6bcbfb5 100644 --- a/test/CodeGen/AMDGPU/spill-alloc-sgpr-init-bug.ll +++ b/test/CodeGen/AMDGPU/spill-alloc-sgpr-init-bug.ll @@ -6,7 +6,8 @@ ; TONGA-LABEL: test define void @test(<256 x i32> addrspace(1)* %out, <256 x i32> addrspace(1)* %in) { entry: - %tid = call i32 @llvm.SI.tid() nounwind readnone + %mbcnt.lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) + %tid = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %mbcnt.lo) %aptr = getelementptr <256 x i32>, <256 x i32> addrspace(1)* %in, i32 %tid %a = load <256 x i32>, <256 x i32> addrspace(1)* %aptr call void asm sideeffect "", "~{memory}" () @@ -21,4 +22,7 @@ entry: ret void } -declare i32 @llvm.SI.tid() nounwind readnone +declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0 +declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #0 + +attributes #0 = { nounwind readnone }