mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-29 22:30:33 +00:00
AMDGPU/SI: Enable lanemask tracking in misched
Summary: This results in higher register usage, but should make it easier for the compiler to hide latency. This pass is a prerequisite for some more scheduler improvements, and I think the increase register usage with this patch is acceptable, because when combined with the scheduler improvements, the total register usage will decrease. shader-db stats: 2382 shaders in 478 tests Totals: SGPRS: 48672 -> 49088 (0.85 %) VGPRS: 34148 -> 34847 (2.05 %) Code Size: 1285816 -> 1289128 (0.26 %) bytes LDS: 28 -> 28 (0.00 %) blocks Scratch: 492544 -> 573440 (16.42 %) bytes per wave Max Waves: 6856 -> 6846 (-0.15 %) Wait states: 0 -> 0 (0.00 %) Depends on D18451 Reviewers: nhaehnle, arsenm Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D18452 llvm-svn: 264876
This commit is contained in:
parent
23316f1822
commit
24f53ac119
@ -156,6 +156,10 @@ void AMDGPUSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
|
||||
// register spills than just using one of these approaches on its own.
|
||||
Policy.OnlyTopDown = false;
|
||||
Policy.OnlyBottomUp = false;
|
||||
|
||||
// Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler.
|
||||
if (!enableSIScheduler())
|
||||
Policy.ShouldTrackLaneMasks = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -282,11 +282,11 @@ define void @v_and_multi_use_constant_i64(i64 addrspace(1)* %out, i64 addrspace(
|
||||
; SI: buffer_load_dwordx2 v{{\[}}[[LO1:[0-9]+]]:[[HI1:[0-9]+]]{{\]}}
|
||||
; SI-NOT: and
|
||||
; SI: v_and_b32_e32 v[[RESLO0:[0-9]+]], 63, v[[LO0]]
|
||||
; SI-NOT: and
|
||||
; SI: buffer_store_dwordx2 v{{\[}}[[RESLO0]]
|
||||
; SI: v_and_b32_e32 v[[RESLO1:[0-9]+]], 63, v[[LO1]]
|
||||
; SI-NOT: and
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI-NOT: and
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2 v{{\[}}[[RESLO1]]
|
||||
define void @v_and_multi_use_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
|
||||
%a = load volatile i64, i64 addrspace(1)* %aptr
|
||||
%b = load volatile i64, i64 addrspace(1)* %aptr
|
||||
|
@ -3,11 +3,11 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_offset:
|
||||
; GCN: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7
|
||||
; SICI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SICI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
|
||||
; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; VI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
|
||||
; GCN: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7
|
||||
; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
|
||||
; GCN-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
|
||||
; GCN: ds_cmpst_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[VCMP]], [[VSWAP]] offset:16
|
||||
@ -21,12 +21,12 @@ define void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrs
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i64_offset:
|
||||
; GCN-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], 7
|
||||
; GCN-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], 0
|
||||
; SICI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SICI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
|
||||
; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; VI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x34
|
||||
; GCN-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], 7
|
||||
; GCN-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], 0
|
||||
; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
|
||||
; GCN-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
|
||||
; GCN-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
|
||||
|
@ -2,12 +2,10 @@
|
||||
|
||||
; GCN-LABEL: {{^}}stored_fi_to_lds:
|
||||
; GCN: s_load_dword [[LDSPTR:s[0-9]+]]
|
||||
; GCN: v_mov_b32_e32 [[ZERO0:v[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 [[ZERO1:v[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, [[ZERO1]]
|
||||
|
||||
; GCN: v_mov_b32_e32 [[ZERO0:v[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 [[VLDSPTR:v[0-9]+]], [[LDSPTR]]
|
||||
|
||||
; GCN: ds_write_b32 [[VLDSPTR]], [[ZERO0]]
|
||||
define void @stored_fi_to_lds(float* addrspace(3)* %ptr) #0 {
|
||||
%tmp = alloca float
|
||||
@ -19,7 +17,6 @@ define void @stored_fi_to_lds(float* addrspace(3)* %ptr) #0 {
|
||||
; Offset is applied
|
||||
; GCN-LABEL: {{^}}stored_fi_to_lds_2_small_objects:
|
||||
; GCN: s_load_dword [[LDSPTR:s[0-9]+]]
|
||||
; GCN: v_mov_b32_e32 [[ZERO0:v[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 [[ZERO1:v[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, [[ZERO1]]
|
||||
|
||||
@ -27,6 +24,7 @@ define void @stored_fi_to_lds(float* addrspace(3)* %ptr) #0 {
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, [[FI1]]
|
||||
|
||||
|
||||
; GCN-DAG: v_mov_b32_e32 [[ZERO0:v[0-9]+]], 0{{$}}
|
||||
; GCN-DAG: v_mov_b32_e32 [[VLDSPTR:v[0-9]+]], [[LDSPTR]]
|
||||
; GCN: ds_write_b32 [[VLDSPTR]], [[ZERO0]]
|
||||
|
||||
|
@ -7,7 +7,7 @@ declare float @llvm.fma.f32(float, float, float) nounwind readnone
|
||||
; FUNC-LABEL: @commute_add_imm_fabs_f32
|
||||
; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI: v_add_f32_e64 [[REG:v[0-9]+]], 2.0, |[[X]]|
|
||||
; SI-NEXT: buffer_store_dword [[REG]]
|
||||
; SI: buffer_store_dword [[REG]]
|
||||
define void @commute_add_imm_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
@ -21,7 +21,7 @@ define void @commute_add_imm_fabs_f32(float addrspace(1)* %out, float addrspace(
|
||||
; FUNC-LABEL: @commute_mul_imm_fneg_fabs_f32
|
||||
; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI: v_mul_f32_e64 [[REG:v[0-9]+]], -4.0, |[[X]]|
|
||||
; SI-NEXT: buffer_store_dword [[REG]]
|
||||
; SI: buffer_store_dword [[REG]]
|
||||
define void @commute_mul_imm_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
@ -36,7 +36,7 @@ define void @commute_mul_imm_fneg_fabs_f32(float addrspace(1)* %out, float addrs
|
||||
; FUNC-LABEL: @commute_mul_imm_fneg_f32
|
||||
; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI: v_mul_f32_e32 [[REG:v[0-9]+]], -4.0, [[X]]
|
||||
; SI-NEXT: buffer_store_dword [[REG]]
|
||||
; SI: buffer_store_dword [[REG]]
|
||||
define void @commute_mul_imm_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
@ -52,7 +52,7 @@ define void @commute_mul_imm_fneg_f32(float addrspace(1)* %out, float addrspace(
|
||||
; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI: v_mov_b32_e32 [[K:v[0-9]+]], 0x44800000
|
||||
; SI: v_add_f32_e64 [[REG:v[0-9]+]], |[[X]]|, [[K]]
|
||||
; SI-NEXT: buffer_store_dword [[REG]]
|
||||
; SI: buffer_store_dword [[REG]]
|
||||
define void @commute_add_lit_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
@ -67,7 +67,7 @@ define void @commute_add_lit_fabs_f32(float addrspace(1)* %out, float addrspace(
|
||||
; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
|
||||
; SI: v_add_f32_e64 [[REG:v[0-9]+]], [[X]], |[[Y]]|
|
||||
; SI-NEXT: buffer_store_dword [[REG]]
|
||||
; SI: buffer_store_dword [[REG]]
|
||||
define void @commute_add_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
@ -84,7 +84,7 @@ define void @commute_add_fabs_f32(float addrspace(1)* %out, float addrspace(1)*
|
||||
; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
|
||||
; SI: v_mul_f32_e64 [[REG:v[0-9]+]], [[X]], -[[Y]]
|
||||
; SI-NEXT: buffer_store_dword [[REG]]
|
||||
; SI: buffer_store_dword [[REG]]
|
||||
define void @commute_mul_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
@ -101,7 +101,7 @@ define void @commute_mul_fneg_f32(float addrspace(1)* %out, float addrspace(1)*
|
||||
; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
|
||||
; SI: v_mul_f32_e64 [[REG:v[0-9]+]], [[X]], -|[[Y]]|
|
||||
; SI-NEXT: buffer_store_dword [[REG]]
|
||||
; SI: buffer_store_dword [[REG]]
|
||||
define void @commute_mul_fabs_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
@ -120,7 +120,7 @@ define void @commute_mul_fabs_fneg_f32(float addrspace(1)* %out, float addrspace
|
||||
; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
|
||||
; SI: v_mul_f32_e64 [[REG:v[0-9]+]], |[[X]]|, |[[Y]]|
|
||||
; SI-NEXT: buffer_store_dword [[REG]]
|
||||
; SI: buffer_store_dword [[REG]]
|
||||
define void @commute_mul_fabs_x_fabs_y_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
@ -138,7 +138,7 @@ define void @commute_mul_fabs_x_fabs_y_f32(float addrspace(1)* %out, float addrs
|
||||
; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
|
||||
; SI: v_mul_f32_e64 [[REG:v[0-9]+]], |[[X]]|, -|[[Y]]|
|
||||
; SI-NEXT: buffer_store_dword [[REG]]
|
||||
; SI: buffer_store_dword [[REG]]
|
||||
define void @commute_mul_fabs_x_fneg_fabs_y_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
|
||||
|
@ -116,7 +116,7 @@ define void @v_ctlz_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %
|
||||
; SI-DAG: s_flbit_i32_b32 [[FFBH_LO:s[0-9]+]], s[[LO]]
|
||||
; SI-DAG: s_add_i32 [[ADD:s[0-9]+]], [[FFBH_LO]], 32
|
||||
; SI-DAG: s_flbit_i32_b32 [[FFBH_HI:s[0-9]+]], s[[HI]]
|
||||
; SI-DAG: v_mov_b32_e32 [[VFFBH_LO:v[0-9]+]], [[FFBH_LO]]
|
||||
; SI-DAG: v_mov_b32_e32 [[VFFBH_LO:v[0-9]+]], [[ADD]]
|
||||
; SI-DAG: v_mov_b32_e32 [[VFFBH_HI:v[0-9]+]], [[FFBH_HI]]
|
||||
; SI-DAG: v_cndmask_b32_e32 v[[CTLZ:[0-9]+]], [[VFFBH_HI]], [[VFFBH_LO]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}}
|
||||
|
@ -149,7 +149,7 @@ define void @v_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias %out, i64 add
|
||||
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32_sel_eq_neg1:
|
||||
; SI: buffer_load_dword [[VAL:v[0-9]+]],
|
||||
; SI: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
|
||||
; SI-NEXT: buffer_store_dword [[RESULT]],
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
define void @v_ctlz_zero_undef_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
|
||||
%val = load i32, i32 addrspace(1)* %valptr
|
||||
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
|
||||
@ -162,7 +162,7 @@ define void @v_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias %out, i64 add
|
||||
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32_sel_ne_neg1:
|
||||
; SI: buffer_load_dword [[VAL:v[0-9]+]],
|
||||
; SI: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
|
||||
; SI-NEXT: buffer_store_dword [[RESULT]],
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
define void @v_ctlz_zero_undef_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
|
||||
%val = load i32, i32 addrspace(1)* %valptr
|
||||
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
|
||||
|
@ -116,9 +116,10 @@ define void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrs
|
||||
; FUNC-LABEL: {{^}}ctpop_i64_in_br:
|
||||
; SI: s_load_dwordx2 s{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0xd
|
||||
; VI: s_load_dwordx2 s{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0x34
|
||||
; GCN: s_bcnt1_i32_b64 [[RESULT:s[0-9]+]], {{s\[}}[[LOVAL]]:[[HIVAL]]{{\]}}
|
||||
; GCN-DAG: s_bcnt1_i32_b64 [[RESULT:s[0-9]+]], {{s\[}}[[LOVAL]]:[[HIVAL]]{{\]}}
|
||||
; GCN-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0
|
||||
; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], [[RESULT]]
|
||||
; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[HIVAL]]
|
||||
; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], [[ZERO]]
|
||||
; GCN: buffer_store_dwordx2 {{v\[}}[[VLO]]:[[VHI]]{{\]}}
|
||||
; GCN: s_endpgm
|
||||
define void @ctpop_i64_in_br(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %ctpop_arg, i32 %cond) {
|
||||
|
@ -33,8 +33,8 @@ define void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8>
|
||||
; SI-NOT: bfe
|
||||
; SI-NOT: v_cvt_f32_ubyte3_e32
|
||||
; SI-DAG: v_cvt_f32_ubyte2_e32
|
||||
; SI-DAG: v_cvt_f32_ubyte1_e32
|
||||
; SI-DAG: v_cvt_f32_ubyte0_e32
|
||||
; SI-DAG: v_cvt_f32_ubyte1_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]]
|
||||
; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
|
||||
; SI: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
|
||||
define void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8> addrspace(1)* noalias %in) nounwind {
|
||||
%load = load <3 x i8>, <3 x i8> addrspace(1)* %in, align 4
|
||||
|
@ -85,14 +85,8 @@ define void @simple_read2_v3f32_superreg_align4(float addrspace(1)* %out) #0 {
|
||||
}
|
||||
|
||||
; CI-LABEL: {{^}}simple_read2_v4f32_superreg_align8:
|
||||
; CI-DAG: ds_read2_b64 v{{\[}}[[REG_W:[0-9]+]]:[[REG_Z:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1{{$}}
|
||||
|
||||
; FIXME: These moves shouldn't be necessary, it should be able to
|
||||
; store the same register if offset1 was the non-zero offset.
|
||||
|
||||
; CI: v_mov_b32
|
||||
; CI: v_mov_b32
|
||||
; CI: buffer_store_dwordx4
|
||||
; CI: ds_read2_b64 [[REG_ZW:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}} offset1:1{{$}}
|
||||
; CI: buffer_store_dwordx4 [[REG_ZW]]
|
||||
; CI: s_endpgm
|
||||
define void @simple_read2_v4f32_superreg_align8(<4 x float> addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
@ -104,10 +98,8 @@ define void @simple_read2_v4f32_superreg_align8(<4 x float> addrspace(1)* %out)
|
||||
}
|
||||
|
||||
; CI-LABEL: {{^}}simple_read2_v4f32_superreg:
|
||||
; CI: ds_read2_b64 v{{\[}}[[REG_W:[0-9]+]]:[[REG_Z:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1{{$}}
|
||||
; CI: v_mov_b32
|
||||
; CI: v_mov_b32
|
||||
; CI: buffer_store_dwordx4
|
||||
; CI-DAG: ds_read2_b64 [[REG_ZW:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}} offset1:1{{$}}
|
||||
; CI: buffer_store_dwordx4 [[REG_ZW]]
|
||||
; CI: s_endpgm
|
||||
define void @simple_read2_v4f32_superreg(<4 x float> addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
@ -120,14 +112,10 @@ define void @simple_read2_v4f32_superreg(<4 x float> addrspace(1)* %out) #0 {
|
||||
|
||||
; FIXME: Extra moves shuffling superregister
|
||||
; CI-LABEL: {{^}}simple_read2_v8f32_superreg:
|
||||
; CI: ds_read2_b64 v{{\[}}[[REG_ELT3:[0-9]+]]:[[REG_ELT7:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:3{{$}}
|
||||
; CI: v_mov_b32
|
||||
; CI: v_mov_b32
|
||||
; CI: ds_read2_b64 v{{\[}}[[REG_ELT6:[0-9]+]]:[[REG_ELT5:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:2{{$}}
|
||||
; CI: v_mov_b32
|
||||
; CI: v_mov_b32
|
||||
; CI: buffer_store_dwordx4
|
||||
; CI: buffer_store_dwordx4
|
||||
; CI-DAG: ds_read2_b64 [[VEC_HI:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}} offset0:2 offset1:3{{$}}
|
||||
; CI-DAG: ds_read2_b64 [[VEC_LO:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}} offset1:1{{$}}
|
||||
; CI-DAG: buffer_store_dwordx4 [[VEC_HI]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16
|
||||
; CI-DAG: buffer_store_dwordx4 [[VEC_LO]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64{{$}}
|
||||
; CI: s_endpgm
|
||||
define void @simple_read2_v8f32_superreg(<8 x float> addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
@ -140,22 +128,15 @@ define void @simple_read2_v8f32_superreg(<8 x float> addrspace(1)* %out) #0 {
|
||||
|
||||
; FIXME: Extra moves shuffling superregister
|
||||
; CI-LABEL: {{^}}simple_read2_v16f32_superreg:
|
||||
; CI: ds_read2_b64 v{{\[}}[[REG_ELT11:[0-9]+]]:[[REG_ELT15:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:3{{$}}
|
||||
; CI: v_mov_b32
|
||||
; CI: v_mov_b32
|
||||
; CI: ds_read2_b64 v{{\[}}[[REG_ELT14:[0-9]+]]:[[REG_ELT13:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:5 offset1:7{{$}}
|
||||
; CI: ds_read2_b64 v{{\[}}[[REG_ELT14:[0-9]+]]:[[REG_ELT13:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:6 offset1:4{{$}}
|
||||
; CI: v_mov_b32
|
||||
; CI: v_mov_b32
|
||||
; CI: ds_read2_b64 v{{\[}}[[REG_ELT12:[0-9]+]]:[[REG_ELT10:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:2{{$}}
|
||||
; CI: v_mov_b32
|
||||
; CI: v_mov_b32
|
||||
|
||||
; CI-DAG: ds_read2_b64 [[VEC0_3:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}} offset1:1{{$}}
|
||||
; CI-DAG: ds_read2_b64 [[VEC4_7:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}} offset0:2 offset1:3{{$}}
|
||||
; CI-DAG: ds_read2_b64 [[VEC8_11:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}} offset0:4 offset1:5{{$}}
|
||||
; CI-DAG: ds_read2_b64 [[VEC12_15:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}} offset0:6 offset1:7{{$}}
|
||||
; CI: s_waitcnt lgkmcnt(0)
|
||||
; CI: buffer_store_dwordx4
|
||||
; CI: buffer_store_dwordx4
|
||||
; CI: buffer_store_dwordx4
|
||||
; CI: buffer_store_dwordx4
|
||||
; CI-DAG: buffer_store_dwordx4 [[VEC0_3]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64{{$}}
|
||||
; CI-DAG: buffer_store_dwordx4 [[VEC4_7]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16
|
||||
; CI-DAG: buffer_store_dwordx4 [[VEC8_11]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:32
|
||||
; CI-DAG: buffer_store_dwordx4 [[VEC12_15]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:48
|
||||
; CI: s_endpgm
|
||||
define void @simple_read2_v16f32_superreg(<16 x float> addrspace(1)* %out) #0 {
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
@ -65,9 +65,9 @@ define void @simple_read2st64_f32_max_offset(float addrspace(1)* %out, float add
|
||||
|
||||
; SI-LABEL: @simple_read2st64_f32_over_max_offset
|
||||
; SI-NOT: ds_read2st64_b32
|
||||
; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], vcc, 0x10000, {{v[0-9]+}}
|
||||
; SI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256
|
||||
; SI: ds_read_b32 {{v[0-9]+}}, [[BIGADD]]
|
||||
; SI-DAG: v_add_i32_e32 [[BIGADD:v[0-9]+]], vcc, 0x10000, {{v[0-9]+}}
|
||||
; SI-DAG: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256
|
||||
; SI-DAG: ds_read_b32 {{v[0-9]+}}, [[BIGADD]]{{$}}
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2st64_f32_over_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
|
||||
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
@ -179,8 +179,8 @@ define void @simple_write2_two_val_too_far_f32(float addrspace(1)* %C, float add
|
||||
}
|
||||
|
||||
; SI-LABEL: @simple_write2_two_val_f32_x2
|
||||
; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset1:8
|
||||
; SI-NEXT: ds_write2_b32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27
|
||||
; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL0]] offset1:11
|
||||
; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL1:v[0-9]+]], [[VAL1]] offset0:8 offset1:27
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
|
||||
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
@ -209,8 +209,8 @@ define void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, float addrspac
|
||||
}
|
||||
|
||||
; SI-LABEL: @simple_write2_two_val_f32_x2_nonzero_base
|
||||
; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset0:3 offset1:8
|
||||
; SI-NEXT: ds_write2_b32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27
|
||||
; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL0]] offset0:3 offset1:11
|
||||
; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL1:v[0-9]+]], [[VAL1]] offset0:8 offset1:27
|
||||
; SI: s_endpgm
|
||||
define void @simple_write2_two_val_f32_x2_nonzero_base(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
|
||||
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
@ -13,8 +13,8 @@ declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>) nounwind r
|
||||
; GCN-DAG: v_mov_b32_e32 v[[VSIGN_HI:[0-9]+]], s[[SSIGN_HI]]
|
||||
; GCN-DAG: v_mov_b32_e32 v[[VMAG_HI:[0-9]+]], s[[SMAG_HI]]
|
||||
; GCN-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff
|
||||
; GCN: v_bfi_b32 v[[VRESULT_HI:[0-9]+]], [[SCONST]], v[[VMAG_HI]], v[[VSIGN_HI]]
|
||||
; GCN: v_mov_b32_e32 v[[VMAG_LO:[0-9]+]], s[[SMAG_LO]]
|
||||
; GCN-DAG: v_bfi_b32 v[[VRESULT_HI:[0-9]+]], [[SCONST]], v[[VMAG_HI]], v[[VSIGN_HI]]
|
||||
; GCN-DAG: v_mov_b32_e32 v[[VMAG_LO:[0-9]+]], s[[SMAG_LO]]
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[VMAG_LO]]:[[VRESULT_HI]]{{\]}}
|
||||
; GCN: s_endpgm
|
||||
define void @test_copysign_f64(double addrspace(1)* %out, double %mag, double %sign) nounwind {
|
||||
|
@ -13,8 +13,8 @@ declare <16 x double> @llvm.floor.v16f64(<16 x double>) nounwind readnone
|
||||
; FUNC-LABEL: {{^}}ffloor_f64:
|
||||
; CI: v_floor_f64_e32
|
||||
; SI: v_fract_f64_e32
|
||||
; SI: v_min_f64
|
||||
; SI: v_cmp_class_f64_e64
|
||||
; SI-DAG: v_min_f64
|
||||
; SI-DAG: v_cmp_class_f64_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_add_f64
|
||||
@ -28,8 +28,8 @@ define void @ffloor_f64(double addrspace(1)* %out, double %x) {
|
||||
; FUNC-LABEL: {{^}}ffloor_f64_neg:
|
||||
; CI: v_floor_f64_e64
|
||||
; SI: v_fract_f64_e64 {{v[[0-9]+:[0-9]+]}}, -[[INPUT:s[[0-9]+:[0-9]+]]]
|
||||
; SI: v_min_f64
|
||||
; SI: v_cmp_class_f64_e64
|
||||
; SI-DAG: v_min_f64
|
||||
; SI-DAG: v_cmp_class_f64_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_add_f64 {{v[[0-9]+:[0-9]+]}}, -[[INPUT]]
|
||||
@ -44,8 +44,8 @@ define void @ffloor_f64_neg(double addrspace(1)* %out, double %x) {
|
||||
; FUNC-LABEL: {{^}}ffloor_f64_neg_abs:
|
||||
; CI: v_floor_f64_e64
|
||||
; SI: v_fract_f64_e64 {{v[[0-9]+:[0-9]+]}}, -|[[INPUT:s[[0-9]+:[0-9]+]]]|
|
||||
; SI: v_min_f64
|
||||
; SI: v_cmp_class_f64_e64
|
||||
; SI-DAG: v_min_f64
|
||||
; SI-DAG: v_cmp_class_f64_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_cndmask_b32_e64
|
||||
; SI: v_add_f64 {{v[[0-9]+:[0-9]+]}}, -|[[INPUT]]|
|
||||
|
@ -55,8 +55,8 @@ define void @fneg_fabs_fn_free_f64(double addrspace(1)* %out, i64 %in) {
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}fneg_fabs_f64:
|
||||
; SI: s_load_dwordx2 s{{\[}}[[LO_X:[0-9]+]]:[[HI_X:[0-9]+]]{{\]}}
|
||||
; SI: s_load_dwordx2
|
||||
; SI: s_load_dwordx2 s{{\[}}[[LO_X:[0-9]+]]:[[HI_X:[0-9]+]]{{\]}}
|
||||
; SI: v_bfrev_b32_e32 [[IMMREG:v[0-9]+]], 1{{$}}
|
||||
; SI-DAG: v_or_b32_e32 v[[HI_V:[0-9]+]], s[[HI_X]], [[IMMREG]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[LO_V:[0-9]+]], s[[LO_X]]
|
||||
|
@ -10,10 +10,11 @@ declare double @llvm.AMDGPU.rsq.clamped.f64(double) nounwind readnone
|
||||
; TODO: this constant should be folded:
|
||||
; VI: s_mov_b32 s[[ALLBITS:[0-9+]]], -1
|
||||
; VI: s_mov_b32 s[[HIGH1:[0-9+]]], 0x7fefffff
|
||||
; VI: s_mov_b32 s[[LOW:[0-9+]]], s[[ALLBITS]]
|
||||
; VI: v_min_f64 v[0:1], [[RSQ]], s{{\[}}[[LOW]]:[[HIGH1]]]
|
||||
; VI: s_mov_b32 s[[LOW1:[0-9+]]], s[[ALLBITS]]
|
||||
; VI: v_min_f64 v[0:1], [[RSQ]], s{{\[}}[[LOW1]]:[[HIGH1]]]
|
||||
; VI: s_mov_b32 s[[HIGH2:[0-9+]]], 0xffefffff
|
||||
; VI: v_max_f64 v[0:1], v[0:1], s{{\[}}[[LOW]]:[[HIGH2]]]
|
||||
; VI: s_mov_b32 s[[LOW2:[0-9+]]], s[[ALLBITS]]
|
||||
; VI: v_max_f64 v[0:1], v[0:1], s{{\[}}[[LOW2]]:[[HIGH2]]]
|
||||
|
||||
define void @rsq_clamped_f64(double addrspace(1)* %out, double %src) nounwind {
|
||||
%rsq_clamped = call double @llvm.AMDGPU.rsq.clamped.f64(double %src) nounwind readnone
|
||||
|
@ -68,7 +68,6 @@ main_body:
|
||||
; create copies which we don't bother to track here.
|
||||
;
|
||||
;CHECK-LABEL: {{^}}test3:
|
||||
;CHECK-DAG: s_movk_i32 [[SOFS:s[0-9]+]], 0x1fff
|
||||
;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, s[0:3], 0 glc
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, v2, s[0:3], 0 idxen glc
|
||||
@ -79,6 +78,7 @@ main_body:
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, v3, s[0:3], 0 offen offset:42 glc
|
||||
;CHECK-DAG: s_waitcnt vmcnt(0)
|
||||
;CHECK-DAG: s_movk_i32 [[SOFS:s[0-9]+]], 0x1fff
|
||||
;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, s[0:3], [[SOFS]] offset:1 glc
|
||||
define float @test3(<4 x i32> inreg %rsrc, i32 %data, i32 %cmp, i32 %vindex, i32 %voffset) #0 {
|
||||
main_body:
|
||||
|
@ -187,7 +187,7 @@ define void @test_class_lit_constant_dynamic_mask_f32(i32 addrspace(1)* %out, i3
|
||||
; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
|
||||
; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
|
||||
; SI: v_cmp_class_f64_e32 vcc, [[SA]], [[VB]]
|
||||
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
|
||||
; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
|
||||
; SI-NEXT: buffer_store_dword [[RESULT]]
|
||||
; SI: s_endpgm
|
||||
define void @test_class_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 {
|
||||
@ -202,7 +202,7 @@ define void @test_class_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 {
|
||||
; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
|
||||
; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
|
||||
; SI: v_cmp_class_f64_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], |[[SA]]|, [[VB]]
|
||||
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]]
|
||||
; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]]
|
||||
; SI-NEXT: buffer_store_dword [[RESULT]]
|
||||
; SI: s_endpgm
|
||||
define void @test_class_fabs_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 {
|
||||
@ -218,7 +218,7 @@ define void @test_class_fabs_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 {
|
||||
; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
|
||||
; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
|
||||
; SI: v_cmp_class_f64_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -[[SA]], [[VB]]
|
||||
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]]
|
||||
; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]]
|
||||
; SI-NEXT: buffer_store_dword [[RESULT]]
|
||||
; SI: s_endpgm
|
||||
define void @test_class_fneg_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 {
|
||||
@ -234,7 +234,7 @@ define void @test_class_fneg_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 {
|
||||
; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
|
||||
; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
|
||||
; SI: v_cmp_class_f64_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -|[[SA]]|, [[VB]]
|
||||
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]]
|
||||
; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]]
|
||||
; SI-NEXT: buffer_store_dword [[RESULT]]
|
||||
; SI: s_endpgm
|
||||
define void @test_class_fneg_fabs_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 {
|
||||
|
@ -28,10 +28,11 @@ define void @rsq_clamp_f32(float addrspace(1)* %out, float %src) #0 {
|
||||
; TODO: this constant should be folded:
|
||||
; VI: s_mov_b32 s[[ALLBITS:[0-9+]]], -1
|
||||
; VI: s_mov_b32 s[[HIGH1:[0-9+]]], 0x7fefffff
|
||||
; VI: s_mov_b32 s[[LOW:[0-9+]]], s[[ALLBITS]]
|
||||
; VI: v_min_f64 v[0:1], [[RSQ]], s{{\[}}[[LOW]]:[[HIGH1]]]
|
||||
; VI: s_mov_b32 s[[LOW1:[0-9+]]], s[[ALLBITS]]
|
||||
; VI: v_min_f64 v[0:1], [[RSQ]], s{{\[}}[[LOW1]]:[[HIGH1]]]
|
||||
; VI: s_mov_b32 s[[HIGH2:[0-9+]]], 0xffefffff
|
||||
; VI: v_max_f64 v[0:1], v[0:1], s{{\[}}[[LOW]]:[[HIGH2]]]
|
||||
; VI: s_mov_b32 s[[LOW2:[0-9+]]], s[[ALLBITS]]
|
||||
; VI: v_max_f64 v[0:1], v[0:1], s{{\[}}[[LOW2]]:[[HIGH2]]]
|
||||
define void @rsq_clamp_f64(double addrspace(1)* %out, double %src) #0 {
|
||||
%rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %src)
|
||||
store double %rsq_clamp, double addrspace(1)* %out
|
||||
|
@ -5,8 +5,8 @@
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32:
|
||||
; EG: LDS_WRXCHG_RET *
|
||||
; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
|
||||
; GCN: s_load_dword [[SPTR:s[0-9]+]],
|
||||
; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
|
||||
; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
|
||||
; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]]
|
||||
; GCN: buffer_store_dword [[RESULT]],
|
||||
@ -31,8 +31,8 @@ define void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspac
|
||||
; XXX - Is it really necessary to load 4 into VGPR?
|
||||
; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32:
|
||||
; EG: LDS_ADD_RET *
|
||||
; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
|
||||
; GCN: s_load_dword [[SPTR:s[0-9]+]],
|
||||
; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
|
||||
; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
|
||||
; GCN: ds_add_rtn_u32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]]
|
||||
; GCN: buffer_store_dword [[RESULT]],
|
||||
|
@ -30,10 +30,10 @@ define void @lds_atomic_add_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}lds_atomic_add_ret_i64_offset:
|
||||
; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
|
||||
; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
|
||||
; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
|
||||
; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
|
||||
; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
|
||||
; GCN: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32
|
||||
; GCN: buffer_store_dwordx2 [[RESULT]],
|
||||
|
@ -191,8 +191,8 @@ define void @s_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}s_test_umax_ugt_imm_v2i32:
|
||||
; SI: s_max_u32 {{s[0-9]+}}, {{s[0-9]+}}, 15
|
||||
; SI: s_max_u32 {{s[0-9]+}}, {{s[0-9]+}}, 23
|
||||
; SI-DAG: s_max_u32 {{s[0-9]+}}, {{s[0-9]+}}, 15
|
||||
; SI-DAG: s_max_u32 {{s[0-9]+}}, {{s[0-9]+}}, 23
|
||||
define void @s_test_umax_ugt_imm_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a) nounwind {
|
||||
%cmp = icmp ugt <2 x i32> %a, <i32 15, i32 23>
|
||||
%val = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> <i32 15, i32 23>
|
||||
@ -205,8 +205,8 @@ define void @s_test_umax_ugt_imm_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %
|
||||
; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc
|
||||
; SI: s_max_u32 [[MAX:s[0-9]+]], [[A]], [[B]]
|
||||
; SI-NEXT: v_mov_b32_e32 [[VMAX:v[0-9]+]], [[MAX]]
|
||||
; SI-NEXT: buffer_store_dword [[VMAX]]
|
||||
; SI: v_mov_b32_e32 [[VMAX:v[0-9]+]], [[MAX]]
|
||||
; SI: buffer_store_dword [[VMAX]]
|
||||
define void @simplify_demanded_bits_test_umax_ugt_i16(i32 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b) nounwind {
|
||||
%a.ext = zext i16 %a to i32
|
||||
%b.ext = zext i16 %b to i32
|
||||
@ -223,8 +223,8 @@ define void @simplify_demanded_bits_test_umax_ugt_i16(i32 addrspace(1)* %out, i1
|
||||
; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc
|
||||
; SI: s_max_i32 [[MAX:s[0-9]+]], [[A]], [[B]]
|
||||
; SI-NEXT: v_mov_b32_e32 [[VMAX:v[0-9]+]], [[MAX]]
|
||||
; SI-NEXT: buffer_store_dword [[VMAX]]
|
||||
; SI: v_mov_b32_e32 [[VMAX:v[0-9]+]], [[MAX]]
|
||||
; SI: buffer_store_dword [[VMAX]]
|
||||
define void @simplify_demanded_bits_test_max_slt_i16(i32 addrspace(1)* %out, i16 signext %a, i16 signext %b) nounwind {
|
||||
%a.ext = sext i16 %a to i32
|
||||
%b.ext = sext i16 %b to i32
|
||||
|
@ -301,8 +301,8 @@ define void @s_test_umin_ult_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> %a, <
|
||||
; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc
|
||||
; SI: s_min_u32 [[MIN:s[0-9]+]], [[A]], [[B]]
|
||||
; SI-NEXT: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]]
|
||||
; SI-NEXT: buffer_store_dword [[VMIN]]
|
||||
; SI: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]]
|
||||
; SI: buffer_store_dword [[VMIN]]
|
||||
define void @simplify_demanded_bits_test_umin_ult_i16(i32 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b) nounwind {
|
||||
%a.ext = zext i16 %a to i32
|
||||
%b.ext = zext i16 %b to i32
|
||||
@ -319,8 +319,8 @@ define void @simplify_demanded_bits_test_umin_ult_i16(i32 addrspace(1)* %out, i1
|
||||
; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc
|
||||
; SI: s_min_i32 [[MIN:s[0-9]+]], [[A]], [[B]]
|
||||
; SI-NEXT: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]]
|
||||
; SI-NEXT: buffer_store_dword [[VMIN]]
|
||||
; SI: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]]
|
||||
; SI: buffer_store_dword [[VMIN]]
|
||||
define void @simplify_demanded_bits_test_min_slt_i16(i32 addrspace(1)* %out, i16 signext %a, i16 signext %b) nounwind {
|
||||
%a.ext = sext i16 %a to i32
|
||||
%b.ext = sext i16 %b to i32
|
||||
|
@ -8,7 +8,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() readnone
|
||||
|
||||
; MUBUF load with an immediate byte offset that fits into 12-bits
|
||||
; CHECK-LABEL: {{^}}mubuf_load0:
|
||||
; CHECK: buffer_load_dword v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:4 ; encoding: [0x04,0x00,0x30,0xe0
|
||||
; CHECK: buffer_load_dword v{{[0-9]}}, s[{{[0-9]+:[0-9]+}}], 0 offset:4 ; encoding: [0x04,0x00,0x30,0xe0
|
||||
define void @mubuf_load0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
|
||||
entry:
|
||||
%0 = getelementptr i32, i32 addrspace(1)* %in, i64 1
|
||||
@ -19,7 +19,7 @@ entry:
|
||||
|
||||
; MUBUF load with the largest possible immediate offset
|
||||
; CHECK-LABEL: {{^}}mubuf_load1:
|
||||
; CHECK: buffer_load_ubyte v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe0
|
||||
; CHECK: buffer_load_ubyte v{{[0-9]}}, s[{{[0-9]+:[0-9]+}}], 0 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe0
|
||||
define void @mubuf_load1(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
|
||||
entry:
|
||||
%0 = getelementptr i8, i8 addrspace(1)* %in, i64 4095
|
||||
|
@ -216,7 +216,7 @@ define void @cmp_zext_k_i8max(i1 addrspace(1)* %out, i8 %b) nounwind {
|
||||
; GCN: buffer_load_sbyte [[B:v[0-9]+]]
|
||||
; GCN: v_cmp_ne_i32_e32 vcc, -1, [[B]]{{$}}
|
||||
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
|
||||
; GCN-NEXT: buffer_store_byte [[RESULT]]
|
||||
; GCN: buffer_store_byte [[RESULT]]
|
||||
; GCN: s_endpgm
|
||||
define void @cmp_sext_k_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %b.ptr) nounwind {
|
||||
%b = load i8, i8 addrspace(1)* %b.ptr
|
||||
|
@ -59,7 +59,7 @@ entry:
|
||||
|
||||
; FUNC-LABEL: {{^}}f64_one:
|
||||
; SI: v_cmp_lg_f64_e32 vcc
|
||||
; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
|
||||
; SI: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
|
||||
define void @f64_one(i32 addrspace(1)* %out, double %a, double %b) {
|
||||
entry:
|
||||
%0 = fcmp one double %a, %b
|
||||
@ -80,7 +80,7 @@ entry:
|
||||
|
||||
; FUNC-LABEL: {{^}}f64_ueq:
|
||||
; SI: v_cmp_nlg_f64_e32 vcc
|
||||
; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
|
||||
; SI: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
|
||||
define void @f64_ueq(i32 addrspace(1)* %out, double %a, double %b) {
|
||||
entry:
|
||||
%0 = fcmp ueq double %a, %b
|
||||
@ -92,7 +92,7 @@ entry:
|
||||
; FUNC-LABEL: {{^}}f64_ugt:
|
||||
|
||||
; SI: v_cmp_nle_f64_e32 vcc
|
||||
; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
|
||||
; SI: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
|
||||
define void @f64_ugt(i32 addrspace(1)* %out, double %a, double %b) {
|
||||
entry:
|
||||
%0 = fcmp ugt double %a, %b
|
||||
@ -103,7 +103,7 @@ entry:
|
||||
|
||||
; FUNC-LABEL: {{^}}f64_uge:
|
||||
; SI: v_cmp_nlt_f64_e32 vcc
|
||||
; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
|
||||
; SI: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
|
||||
define void @f64_uge(i32 addrspace(1)* %out, double %a, double %b) {
|
||||
entry:
|
||||
%0 = fcmp uge double %a, %b
|
||||
@ -114,7 +114,7 @@ entry:
|
||||
|
||||
; FUNC-LABEL: {{^}}f64_ult:
|
||||
; SI: v_cmp_nge_f64_e32 vcc
|
||||
; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
|
||||
; SI: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
|
||||
define void @f64_ult(i32 addrspace(1)* %out, double %a, double %b) {
|
||||
entry:
|
||||
%0 = fcmp ult double %a, %b
|
||||
@ -125,7 +125,7 @@ entry:
|
||||
|
||||
; FUNC-LABEL: {{^}}f64_ule:
|
||||
; SI: v_cmp_ngt_f64_e32 vcc
|
||||
; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
|
||||
; SI: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
|
||||
define void @f64_ule(i32 addrspace(1)* %out, double %a, double %b) {
|
||||
entry:
|
||||
%0 = fcmp ule double %a, %b
|
||||
|
@ -1,4 +1,9 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI --misched=si < %s | FileCheck %s
|
||||
; FIXME: The si scheduler crashes if when lane mask tracking is enabled, so
|
||||
; we need to disable this when the si scheduler is being used.
|
||||
; The only way the subtarget knows that the si machine scheduler is being used
|
||||
; is to specify -mattr=si-scheduler. If we just pass --misched=si, the backend
|
||||
; won't know what scheduler we are using.
|
||||
; RUN: llc -march=amdgcn -mcpu=SI --misched=si -mattr=si-scheduler < %s | FileCheck %s
|
||||
|
||||
; The test checks the "si" machine scheduler pass works correctly.
|
||||
|
||||
|
@ -230,8 +230,8 @@ define void @v_ashr_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
|
||||
; GCN-LABEL: {{^}}s_ashr_63_i64:
|
||||
; GCN-DAG: s_load_dword s[[HI:[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}}
|
||||
; GCN: s_ashr_i32 s[[SHIFT:[0-9]+]], s[[HI]], 31
|
||||
; GCN: s_mov_b32 s[[COPYSHIFT:[0-9]+]], s[[SHIFT]]
|
||||
; GCN: s_add_u32 {{s[0-9]+}}, s[[HI]], {{s[0-9]+}}
|
||||
; GCN-DAG: s_mov_b32 s[[COPYSHIFT:[0-9]+]], s[[SHIFT]]
|
||||
; GCN-DAG: s_add_u32 {{s[0-9]+}}, s[[HI]], {{s[0-9]+}}
|
||||
; GCN: s_addc_u32 {{s[0-9]+}}, s[[COPYSHIFT]], {{s[0-9]+}}
|
||||
define void @s_ashr_63_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
|
||||
%result = ashr i64 %a, 63
|
||||
|
@ -8,14 +8,12 @@ target triple="amdgcn--"
|
||||
; CHECK-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb
|
||||
; CHECK-NEXT: v_mbcnt_lo_u32_b32_e64
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, s2
|
||||
; CHECK-NEXT: v_cmp_eq_i32_e32 vcc, 0, v1
|
||||
; CHECK: v_cmp_eq_i32_e32 vcc, 0, v0
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||
; CHECK-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
|
||||
; BB0_1:
|
||||
; CHECK: s_load_dword s6, s[0:1], 0xa
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, s6
|
||||
; BB0_2:
|
||||
; CHECK: s_or_b64 exec, exec, s[2:3]
|
||||
; CHECK-NEXT: s_mov_b32 s7, 0xf000
|
||||
|
@ -22,7 +22,7 @@ define void @sextload_i1_to_i32_trunc_cmp_eq_0(i1 addrspace(1)* %out, i1 addrspa
|
||||
; SI: v_cmp_eq_i32_e32 vcc, 1, [[TMP]]{{$}}
|
||||
; SI-NEXT: s_xor_b64 [[NEG:s\[[0-9]+:[0-9]+\]]], vcc, -1
|
||||
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[NEG]]
|
||||
; SI-NEXT: buffer_store_byte [[RESULT]]
|
||||
; SI: buffer_store_byte [[RESULT]]
|
||||
define void @zextload_i1_to_i32_trunc_cmp_eq_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
|
||||
%load = load i1, i1 addrspace(1)* %in
|
||||
%ext = zext i1 %load to i32
|
||||
@ -45,7 +45,7 @@ define void @sextload_i1_to_i32_trunc_cmp_eq_1(i1 addrspace(1)* %out, i1 addrspa
|
||||
; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_eq_1:
|
||||
; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
|
||||
; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[LOAD]]
|
||||
; SI-NEXT: buffer_store_byte [[RESULT]]
|
||||
; SI: buffer_store_byte [[RESULT]]
|
||||
define void @zextload_i1_to_i32_trunc_cmp_eq_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
|
||||
%load = load i1, i1 addrspace(1)* %in
|
||||
%ext = zext i1 %load to i32
|
||||
@ -57,7 +57,7 @@ define void @zextload_i1_to_i32_trunc_cmp_eq_1(i1 addrspace(1)* %out, i1 addrspa
|
||||
; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_eq_neg1:
|
||||
; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
|
||||
; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[LOAD]]
|
||||
; SI-NEXT: buffer_store_byte [[RESULT]]
|
||||
; SI: buffer_store_byte [[RESULT]]
|
||||
define void @sextload_i1_to_i32_trunc_cmp_eq_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
|
||||
%load = load i1, i1 addrspace(1)* %in
|
||||
%ext = sext i1 %load to i32
|
||||
@ -81,7 +81,7 @@ define void @zextload_i1_to_i32_trunc_cmp_eq_neg1(i1 addrspace(1)* %out, i1 addr
|
||||
; FUNC-LABEL {{^}}sextload_i1_to_i32_trunc_cmp_ne_0:
|
||||
; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
|
||||
; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
|
||||
; SI-NEXT: buffer_store_byte [[RESULT]]
|
||||
; SI: buffer_store_byte [[RESULT]]
|
||||
define void @sextload_i1_to_i32_trunc_cmp_ne_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
|
||||
%load = load i1, i1 addrspace(1)* %in
|
||||
%ext = sext i1 %load to i32
|
||||
@ -93,7 +93,7 @@ define void @sextload_i1_to_i32_trunc_cmp_ne_0(i1 addrspace(1)* %out, i1 addrspa
|
||||
; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_ne_0:
|
||||
; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
|
||||
; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
|
||||
; SI-NEXT: buffer_store_byte [[RESULT]]
|
||||
; SI: buffer_store_byte [[RESULT]]
|
||||
define void @zextload_i1_to_i32_trunc_cmp_ne_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
|
||||
%load = load i1, i1 addrspace(1)* %in
|
||||
%ext = zext i1 %load to i32
|
||||
@ -119,7 +119,7 @@ define void @sextload_i1_to_i32_trunc_cmp_ne_1(i1 addrspace(1)* %out, i1 addrspa
|
||||
; SI: v_cmp_eq_i32_e32 vcc, 1, [[TMP]]{{$}}
|
||||
; SI-NEXT: s_xor_b64 [[NEG:s\[[0-9]+:[0-9]+\]]], vcc, -1
|
||||
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[NEG]]
|
||||
; SI-NEXT: buffer_store_byte [[RESULT]]
|
||||
; SI: buffer_store_byte [[RESULT]]
|
||||
define void @zextload_i1_to_i32_trunc_cmp_ne_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
|
||||
%load = load i1, i1 addrspace(1)* %in
|
||||
%ext = zext i1 %load to i32
|
||||
@ -158,7 +158,7 @@ define void @zextload_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i1 addr
|
||||
; SI: buffer_load_sbyte [[LOAD:v[0-9]+]]
|
||||
; SI: v_cmp_ne_i32_e32 vcc, -1, [[LOAD]]{{$}}
|
||||
; SI-NEXT: v_cndmask_b32_e64
|
||||
; SI-NEXT: buffer_store_byte
|
||||
; SI: buffer_store_byte
|
||||
define void @masked_load_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
|
||||
%load = load i8, i8 addrspace(1)* %in
|
||||
%masked = and i8 %load, 255
|
||||
|
@ -28,10 +28,10 @@ define void @test_sgpr_use_three_ternary_op(float addrspace(1)* %out, float %a)
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_b:
|
||||
; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
|
||||
; VI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; VI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
|
||||
; SI-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
|
||||
; VI-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; VI-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
|
||||
; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
|
||||
; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[SGPR0]], [[VGPR1]]
|
||||
; GCN: buffer_store_dword [[RESULT]]
|
||||
@ -42,13 +42,13 @@ define void @test_sgpr_use_twice_ternary_op_a_a_b(float addrspace(1)* %out, floa
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_use_s_v_s:
|
||||
; GCN-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
|
||||
; GCN-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}}
|
||||
|
||||
; GCN: buffer_load_dword [[VA0:v[0-9]+]]
|
||||
; GCN-NOT: v_mov_b32
|
||||
; GCN: buffer_load_dword [[VA1:v[0-9]+]]
|
||||
|
||||
; GCN-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
|
||||
; GCN-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}}
|
||||
|
||||
; GCN-NOT: v_mov_b32
|
||||
; GCN: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
|
||||
; GCN-NOT: v_mov_b32
|
||||
@ -68,10 +68,10 @@ define void @test_use_s_v_s(float addrspace(1)* %out, float %a, float %b, float
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_b_a:
|
||||
; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
|
||||
; VI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; VI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
|
||||
; SI-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
|
||||
; VI-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; VI-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
|
||||
; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
|
||||
; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]]
|
||||
; GCN: buffer_store_dword [[RESULT]]
|
||||
@ -82,10 +82,10 @@ define void @test_sgpr_use_twice_ternary_op_a_b_a(float addrspace(1)* %out, floa
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_b_a_a:
|
||||
; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
|
||||
; VI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; VI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
|
||||
; SI-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; SI-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
|
||||
; VI-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; VI-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
|
||||
; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
|
||||
; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]]
|
||||
; GCN: buffer_store_dword [[RESULT]]
|
||||
|
Loading…
Reference in New Issue
Block a user