mirror of
https://github.com/RPCSX/llvm.git
synced 2024-12-13 06:38:44 +00:00
AMDGPU: Remove llvm.SI.tid intrinsic
Mesa doesn't emit this for llvm >= 3.8 anymore. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@273050 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
e5eb673413
commit
310a3752c0
@ -258,7 +258,6 @@ static bool isIntrinsicSourceOfDivergence(const TargetIntrinsicInfo *TII,
|
|||||||
switch (TII->lookupName((const char *)Name.bytes_begin(), Name.size())) {
|
switch (TII->lookupName((const char *)Name.bytes_begin(), Name.size())) {
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
case AMDGPUIntrinsic::SI_tid:
|
|
||||||
case AMDGPUIntrinsic::SI_fs_interp:
|
case AMDGPUIntrinsic::SI_fs_interp:
|
||||||
case AMDGPUIntrinsic::SI_fs_constant:
|
case AMDGPUIntrinsic::SI_fs_constant:
|
||||||
return true;
|
return true;
|
||||||
|
@ -3053,12 +3053,6 @@ def : Pat <
|
|||||||
(V_RCP_IFLAG_F32_e32 (V_CVT_F32_U32_e32 $src0))))
|
(V_RCP_IFLAG_F32_e32 (V_CVT_F32_U32_e32 $src0))))
|
||||||
>;
|
>;
|
||||||
|
|
||||||
def : Pat <
|
|
||||||
(int_SI_tid),
|
|
||||||
(V_MBCNT_HI_U32_B32_e64 0xffffffff,
|
|
||||||
(V_MBCNT_LO_U32_B32_e64 0xffffffff, 0))
|
|
||||||
>;
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// VOP3 Patterns
|
// VOP3 Patterns
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
@ -13,8 +13,6 @@
|
|||||||
|
|
||||||
|
|
||||||
let TargetPrefix = "SI", isTarget = 1 in {
|
let TargetPrefix = "SI", isTarget = 1 in {
|
||||||
|
|
||||||
def int_SI_tid : Intrinsic <[llvm_i32_ty], [], [IntrNoMem]>;
|
|
||||||
def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||||
def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
|
def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
|
||||||
def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
|
def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||||
|
@ -1,13 +1,15 @@
|
|||||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||||
|
|
||||||
declare i32 @llvm.SI.tid() readnone
|
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
|
||||||
|
declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #0
|
||||||
|
|
||||||
; SI-LABEL: {{^}}test_array_ptr_calc:
|
; SI-LABEL: {{^}}test_array_ptr_calc:
|
||||||
; SI-DAG: v_mul_lo_i32
|
; SI-DAG: v_mul_lo_i32
|
||||||
; SI-DAG: v_mul_hi_i32
|
; SI-DAG: v_mul_hi_i32
|
||||||
; SI: s_endpgm
|
; SI: s_endpgm
|
||||||
define void @test_array_ptr_calc(i32 addrspace(1)* noalias %out, [1025 x i32] addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) {
|
define void @test_array_ptr_calc(i32 addrspace(1)* noalias %out, [1025 x i32] addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) {
|
||||||
%tid = call i32 @llvm.SI.tid() readnone
|
%mbcnt.lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
|
||||||
|
%tid = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %mbcnt.lo)
|
||||||
%a_ptr = getelementptr [1025 x i32], [1025 x i32] addrspace(1)* %inA, i32 %tid, i32 0
|
%a_ptr = getelementptr [1025 x i32], [1025 x i32] addrspace(1)* %inA, i32 %tid, i32 0
|
||||||
%b_ptr = getelementptr i32, i32 addrspace(1)* %inB, i32 %tid
|
%b_ptr = getelementptr i32, i32 addrspace(1)* %inB, i32 %tid
|
||||||
%a = load i32, i32 addrspace(1)* %a_ptr
|
%a = load i32, i32 addrspace(1)* %a_ptr
|
||||||
@ -16,3 +18,5 @@ define void @test_array_ptr_calc(i32 addrspace(1)* noalias %out, [1025 x i32] ad
|
|||||||
store i32 %result, i32 addrspace(1)* %out
|
store i32 %result, i32 addrspace(1)* %out
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
attributes #0 = { nounwind readnone }
|
||||||
|
@ -1,18 +0,0 @@
|
|||||||
;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=GCN %s
|
|
||||||
;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=GCN %s
|
|
||||||
|
|
||||||
;GCN: v_mbcnt_lo_u32_b32_e64
|
|
||||||
;SI: v_mbcnt_hi_u32_b32_e32
|
|
||||||
;VI: v_mbcnt_hi_u32_b32_e64
|
|
||||||
|
|
||||||
define amdgpu_ps void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) {
|
|
||||||
main_body:
|
|
||||||
%4 = call i32 @llvm.SI.tid()
|
|
||||||
%5 = bitcast i32 %4 to float
|
|
||||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %5, float %5, float %5, float %5)
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
declare i32 @llvm.SI.tid() readnone
|
|
||||||
|
|
||||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
|
@ -1,7 +1,8 @@
|
|||||||
; RUN: llc -march=amdgcn -verify-machineinstrs -asm-verbose < %s | FileCheck -check-prefix=SI %s
|
; RUN: llc -march=amdgcn -verify-machineinstrs -asm-verbose < %s | FileCheck -check-prefix=SI %s
|
||||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs -asm-verbose -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI %s
|
; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs -asm-verbose -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI %s
|
||||||
|
|
||||||
declare i32 @llvm.SI.tid() nounwind readnone
|
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
|
||||||
|
declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #0
|
||||||
|
|
||||||
; SI-LABEL: {{^}}foo:
|
; SI-LABEL: {{^}}foo:
|
||||||
; SI: .section .AMDGPU.csdata
|
; SI: .section .AMDGPU.csdata
|
||||||
@ -9,7 +10,8 @@ declare i32 @llvm.SI.tid() nounwind readnone
|
|||||||
; SI: ; NumSgprs: {{[0-9]+}}
|
; SI: ; NumSgprs: {{[0-9]+}}
|
||||||
; SI: ; NumVgprs: {{[0-9]+}}
|
; SI: ; NumVgprs: {{[0-9]+}}
|
||||||
define void @foo(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %abase, i32 addrspace(1)* %bbase) nounwind {
|
define void @foo(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %abase, i32 addrspace(1)* %bbase) nounwind {
|
||||||
%tid = call i32 @llvm.SI.tid() nounwind readnone
|
%mbcnt.lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0);
|
||||||
|
%tid = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %mbcnt.lo)
|
||||||
%aptr = getelementptr i32, i32 addrspace(1)* %abase, i32 %tid
|
%aptr = getelementptr i32, i32 addrspace(1)* %abase, i32 %tid
|
||||||
%bptr = getelementptr i32, i32 addrspace(1)* %bbase, i32 %tid
|
%bptr = getelementptr i32, i32 addrspace(1)* %bbase, i32 %tid
|
||||||
%outptr = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
%outptr = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
|
||||||
|
@ -114,13 +114,15 @@ main_body:
|
|||||||
%tmp106 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %arg4, <2 x i32> %arg6)
|
%tmp106 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %arg4, <2 x i32> %arg6)
|
||||||
%tmp107 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %arg4, <2 x i32> %arg6)
|
%tmp107 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %arg4, <2 x i32> %arg6)
|
||||||
%tmp108 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %arg4, <2 x i32> %arg6)
|
%tmp108 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %arg4, <2 x i32> %arg6)
|
||||||
%tmp109 = call i32 @llvm.SI.tid()
|
%mbcnt.lo.0 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
|
||||||
|
%tmp109 = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %mbcnt.lo.0)
|
||||||
%tmp110 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %tmp109
|
%tmp110 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %tmp109
|
||||||
%tmp111 = bitcast float %tmp92 to i32
|
%tmp111 = bitcast float %tmp92 to i32
|
||||||
store i32 %tmp111, i32 addrspace(3)* %tmp110
|
store i32 %tmp111, i32 addrspace(3)* %tmp110
|
||||||
%tmp112 = bitcast float %tmp93 to i32
|
%tmp112 = bitcast float %tmp93 to i32
|
||||||
store i32 %tmp112, i32 addrspace(3)* %tmp110
|
store i32 %tmp112, i32 addrspace(3)* %tmp110
|
||||||
%tmp113 = call i32 @llvm.SI.tid()
|
%mbcnt.lo.1 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
|
||||||
|
%tmp113 = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %mbcnt.lo.1)
|
||||||
%tmp114 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %tmp113
|
%tmp114 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %tmp113
|
||||||
%tmp115 = and i32 %tmp113, -4
|
%tmp115 = and i32 %tmp113, -4
|
||||||
%tmp116 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %tmp115
|
%tmp116 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %tmp115
|
||||||
@ -150,7 +152,8 @@ main_body:
|
|||||||
%tmp138 = fmul float %tmp59, %tmp93
|
%tmp138 = fmul float %tmp59, %tmp93
|
||||||
%tmp139 = fmul float %tmp59, %tmp93
|
%tmp139 = fmul float %tmp59, %tmp93
|
||||||
%tmp140 = fmul float %tmp59, %tmp93
|
%tmp140 = fmul float %tmp59, %tmp93
|
||||||
%tmp141 = call i32 @llvm.SI.tid()
|
%mbcnt.lo.2 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
|
||||||
|
%tmp141 = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %mbcnt.lo.2)
|
||||||
%tmp142 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %tmp141
|
%tmp142 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %tmp141
|
||||||
%tmp143 = bitcast float %tmp137 to i32
|
%tmp143 = bitcast float %tmp137 to i32
|
||||||
store i32 %tmp143, i32 addrspace(3)* %tmp142
|
store i32 %tmp143, i32 addrspace(3)* %tmp142
|
||||||
@ -160,7 +163,8 @@ main_body:
|
|||||||
store i32 %tmp145, i32 addrspace(3)* %tmp142
|
store i32 %tmp145, i32 addrspace(3)* %tmp142
|
||||||
%tmp146 = bitcast float %tmp140 to i32
|
%tmp146 = bitcast float %tmp140 to i32
|
||||||
store i32 %tmp146, i32 addrspace(3)* %tmp142
|
store i32 %tmp146, i32 addrspace(3)* %tmp142
|
||||||
%tmp147 = call i32 @llvm.SI.tid()
|
%mbcnt.lo.3 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
|
||||||
|
%tmp147 = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %mbcnt.lo.3)
|
||||||
%tmp148 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %tmp147
|
%tmp148 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %tmp147
|
||||||
%tmp149 = and i32 %tmp147, -4
|
%tmp149 = and i32 %tmp147, -4
|
||||||
%tmp150 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %tmp149
|
%tmp150 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %tmp149
|
||||||
@ -1580,8 +1584,8 @@ declare float @llvm.SI.load.const(<16 x i8>, i32) #2
|
|||||||
; Function Attrs: nounwind readnone
|
; Function Attrs: nounwind readnone
|
||||||
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #2
|
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #2
|
||||||
|
|
||||||
; Function Attrs: readnone
|
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
|
||||||
declare i32 @llvm.SI.tid() #1
|
declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1
|
||||||
|
|
||||||
; Function Attrs: nounwind readonly
|
; Function Attrs: nounwind readonly
|
||||||
declare float @ceil(float) #3
|
declare float @ceil(float) #3
|
||||||
|
@ -6,7 +6,8 @@
|
|||||||
; TONGA-LABEL: test
|
; TONGA-LABEL: test
|
||||||
define void @test(<256 x i32> addrspace(1)* %out, <256 x i32> addrspace(1)* %in) {
|
define void @test(<256 x i32> addrspace(1)* %out, <256 x i32> addrspace(1)* %in) {
|
||||||
entry:
|
entry:
|
||||||
%tid = call i32 @llvm.SI.tid() nounwind readnone
|
%mbcnt.lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
|
||||||
|
%tid = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %mbcnt.lo)
|
||||||
%aptr = getelementptr <256 x i32>, <256 x i32> addrspace(1)* %in, i32 %tid
|
%aptr = getelementptr <256 x i32>, <256 x i32> addrspace(1)* %in, i32 %tid
|
||||||
%a = load <256 x i32>, <256 x i32> addrspace(1)* %aptr
|
%a = load <256 x i32>, <256 x i32> addrspace(1)* %aptr
|
||||||
call void asm sideeffect "", "~{memory}" ()
|
call void asm sideeffect "", "~{memory}" ()
|
||||||
@ -21,4 +22,7 @@ entry:
|
|||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
declare i32 @llvm.SI.tid() nounwind readnone
|
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
|
||||||
|
declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #0
|
||||||
|
|
||||||
|
attributes #0 = { nounwind readnone }
|
||||||
|
Loading…
Reference in New Issue
Block a user