mirror of
https://github.com/RPCSX/llvm.git
synced 2025-02-13 09:14:38 +00:00
[AMDGPU] Remove getBidirectionalReasonRank
This method inverts the Reason field of a scheduling candidate. It does right comparison between RegCritical and RegExcess, but everything else is broken. In fact it can prefer less strong reason such as Weak over RegCritical because Weak > -RegCritical. The CandReason enum is properly sorted, so just remove artificial ranking. Differential Revision: https://reviews.llvm.org/D30557 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@297536 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
b893e797a5
commit
3081264dbe
@ -179,16 +179,6 @@ void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
|
||||
}
|
||||
}
|
||||
|
||||
static int getBidirectionalReasonRank(GenericSchedulerBase::CandReason Reason) {
|
||||
switch (Reason) {
|
||||
default:
|
||||
return Reason;
|
||||
case GenericSchedulerBase::RegCritical:
|
||||
case GenericSchedulerBase::RegExcess:
|
||||
return -Reason;
|
||||
}
|
||||
}
|
||||
|
||||
// This function is mostly cut and pasted from
|
||||
// GenericScheduler::pickNodeBidirectional()
|
||||
SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
|
||||
@ -261,9 +251,7 @@ SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
|
||||
} else if (BotCand.Reason == RegCritical && BotCand.RPDelta.CriticalMax.getUnitInc() <= 0) {
|
||||
Cand = BotCand;
|
||||
} else {
|
||||
int TopRank = getBidirectionalReasonRank(TopCand.Reason);
|
||||
int BotRank = getBidirectionalReasonRank(BotCand.Reason);
|
||||
if (TopRank > BotRank) {
|
||||
if (BotCand.Reason > TopCand.Reason) {
|
||||
Cand = TopCand;
|
||||
} else {
|
||||
Cand = BotCand;
|
||||
|
@ -120,15 +120,16 @@ ret:
|
||||
|
||||
; GCN-LABEL: {{^}}sink_ubfe_i16:
|
||||
; GCN-NOT: lshr
|
||||
; VI: s_bfe_u32 s0, s0, 0xc0004
|
||||
; VI: s_load_dword [[ARG:s[0-9]+]], s[0:1], 0x2c
|
||||
; VI: s_bfe_u32 [[BFE:s[0-9]+]], [[ARG]], 0xc0004
|
||||
; GCN: s_cbranch_scc1
|
||||
|
||||
; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80004
|
||||
; VI: s_and_b32 s0, s0, 0xff
|
||||
; VI: s_and_b32 s{{[0-9]+}}, [[BFE]], 0xff
|
||||
|
||||
; GCN: BB2_2:
|
||||
; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70004
|
||||
; VI: s_and_b32 s0, s0, 0x7f
|
||||
; VI: s_and_b32 s{{[0-9]+}}, [[BFE]], 0x7f
|
||||
|
||||
; GCN: BB2_3:
|
||||
; GCN: buffer_store_short
|
||||
|
@ -51,8 +51,8 @@ define amdgpu_kernel void @v_clamp_negabs_f32(float addrspace(1)* %out, float ad
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_clamp_negzero_f32:
|
||||
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
||||
; GCN: v_bfrev_b32_e32 [[SIGNBIT:v[0-9]+]], 1
|
||||
; GCN-DAG: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
||||
; GCN-DAG: v_bfrev_b32_e32 [[SIGNBIT:v[0-9]+]], 1
|
||||
; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[SIGNBIT]], 1.0
|
||||
define amdgpu_kernel void @v_clamp_negzero_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
@ -197,8 +197,8 @@ define void @simple_read2st64_f64_max_offset(double addrspace(1)* %out, double a
|
||||
|
||||
; SI-LABEL: @simple_read2st64_f64_over_max_offset
|
||||
; SI-NOT: ds_read2st64_b64
|
||||
; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512
|
||||
; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], vcc, 0x10000, {{v[0-9]+}}
|
||||
; SI-DAG: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512
|
||||
; SI-DAG: v_add_i32_e32 [[BIGADD:v[0-9]+]], vcc, 0x10000, {{v[0-9]+}}
|
||||
; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]]
|
||||
; SI: s_endpgm
|
||||
define void @simple_read2st64_f64_over_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
|
||||
|
@ -10,11 +10,11 @@ declare <3 x half> @llvm.copysign.v3f16(<3 x half>, <3 x half>)
|
||||
declare <4 x half> @llvm.copysign.v4f16(<4 x half>, <4 x half>)
|
||||
|
||||
; GCN-LABEL: {{^}}test_copysign_f16:
|
||||
; SI: buffer_load_ushort v[[MAG:[0-9]+]]
|
||||
; SI: buffer_load_ushort v[[SIGN:[0-9]+]]
|
||||
; SI: buffer_load_ushort v[[MAG:[0-9]+]]
|
||||
; SI: s_brev_b32 s[[CONST:[0-9]+]], -2
|
||||
; SI: v_cvt_f32_f16_e32 v[[MAG_F32:[0-9]+]], v[[MAG]]
|
||||
; SI: v_cvt_f32_f16_e32 v[[SIGN_F32:[0-9]+]], v[[SIGN]]
|
||||
; SI-DAG: v_cvt_f32_f16_e32 v[[MAG_F32:[0-9]+]], v[[MAG]]
|
||||
; SI-DAG: v_cvt_f32_f16_e32 v[[SIGN_F32:[0-9]+]], v[[SIGN]]
|
||||
; SI: v_bfi_b32 v[[OUT_F32:[0-9]+]], s[[CONST]], v[[MAG_F32]], v[[SIGN_F32]]
|
||||
; SI: v_cvt_f16_f32_e32 v[[OUT:[0-9]+]], v[[OUT_F32]]
|
||||
; VI: buffer_load_ushort v[[SIGN:[0-9]+]]
|
||||
|
@ -532,7 +532,7 @@ entry:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_umin_i32_ret:
|
||||
; GCN: flat_atomic_umin v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; GCN: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||
define void @atomic_umin_i32_ret(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) {
|
||||
entry:
|
||||
|
@ -189,7 +189,7 @@ define void @s_insertelement_v2f16_1(<2 x half> addrspace(1)* %out, <2 x half> a
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_insertelement_v2i16_0:
|
||||
; GCN: flat_load_dword [[VEC:v[0-9]+]]
|
||||
; GCN-DAG: flat_load_dword [[VEC:v[0-9]+]]
|
||||
; CIVI: v_and_b32_e32 [[ELT1:v[0-9]+]], 0xffff0000, [[VEC]]
|
||||
; CIVI: v_or_b32_e32 [[RES:v[0-9]+]], 0x3e7, [[ELT1]]
|
||||
|
||||
@ -258,11 +258,11 @@ define void @v_insertelement_v2i16_0_inlineimm(<2 x i16> addrspace(1)* %out, <2
|
||||
; FIXME: fold lshl_or c0, c1, v0 -> or (c0 << c1), v0
|
||||
|
||||
; GCN-LABEL: {{^}}v_insertelement_v2i16_1:
|
||||
; GCN: flat_load_dword [[VEC:v[0-9]+]]
|
||||
; GCN-DAG: flat_load_dword [[VEC:v[0-9]+]]
|
||||
; CIVI: v_or_b32_e32 [[RES:v[0-9]+]], 0x3e70000, [[VEC]]
|
||||
|
||||
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x3e7
|
||||
; GFX9: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]]
|
||||
; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x3e7
|
||||
; GFX9-DAG: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]]
|
||||
; GFX9: v_lshl_or_b32 [[RES:v[0-9]+]], [[K]], 16, [[ELT0]]
|
||||
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]]
|
||||
@ -295,13 +295,13 @@ define void @v_insertelement_v2i16_1_inlineimm(<2 x i16> addrspace(1)* %out, <2
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_insertelement_v2f16_0:
|
||||
; GCN: flat_load_dword [[VEC:v[0-9]+]]
|
||||
; GCN-DAG: flat_load_dword [[VEC:v[0-9]+]]
|
||||
|
||||
; CIVI: v_and_b32_e32 [[ELT1:v[0-9]+]], 0xffff0000, [[VEC]]
|
||||
; CIVI: v_or_b32_e32 [[RES:v[0-9]+]], 0x4500, [[ELT1]]
|
||||
|
||||
; GFX9: v_mov_b32_e32 [[ELT0:v[0-9]+]], 0x4500{{$}}
|
||||
; GFX9: v_lshrrev_b32_e32 [[ELT1:v[0-9]+]], 16, [[VEC]]
|
||||
; GFX9-DAG: v_mov_b32_e32 [[ELT0:v[0-9]+]], 0x4500{{$}}
|
||||
; GFX9-DAG: v_lshrrev_b32_e32 [[ELT1:v[0-9]+]], 16, [[VEC]]
|
||||
; GFX9: v_lshl_or_b32 [[RES:v[0-9]+]], [[ELT1]], 16, [[ELT0]]
|
||||
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]]
|
||||
@ -337,11 +337,11 @@ define void @v_insertelement_v2f16_0_inlineimm(<2 x half> addrspace(1)* %out, <2
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_insertelement_v2f16_1:
|
||||
; GCN: flat_load_dword [[VEC:v[0-9]+]]
|
||||
; GCN-DAG: flat_load_dword [[VEC:v[0-9]+]]
|
||||
; CIVI: v_or_b32_e32 [[RES:v[0-9]+]], 0x45000000, [[VEC]]
|
||||
|
||||
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x4500
|
||||
; GFX9: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]]
|
||||
; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x4500
|
||||
; GFX9-DAG: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]]
|
||||
; GFX9: v_lshl_or_b32 [[RES:v[0-9]+]], [[K]], 16, [[ELT0]]
|
||||
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]]
|
||||
|
@ -3,9 +3,9 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
; GCN-LABEL: {{^}}s_cvt_pkrtz_v2f16_f32:
|
||||
; GCN: s_load_dword [[X:s[0-9]+]]
|
||||
; GCN: s_load_dword [[Y:s[0-9]+]]
|
||||
; GCN: v_mov_b32_e32 [[VY:v[0-9]+]]
|
||||
; GCN-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x{{b|2c}}
|
||||
; GCN-DAG: s_load_dword [[SY:s[0-9]+]], s[0:1], 0x{{c|30}}
|
||||
; GCN: v_mov_b32_e32 [[VY:v[0-9]+]], [[SY]]
|
||||
; SI: v_cvt_pkrtz_f16_f32_e32 v{{[0-9]+}}, [[X]], [[VY]]
|
||||
; VI: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, [[X]], [[VY]]
|
||||
define void @s_cvt_pkrtz_v2f16_f32(<2 x half> addrspace(1)* %out, float %x, float %y) #0 {
|
||||
|
@ -111,11 +111,11 @@ define void @v_pack_v2f16_user(i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_pack_v2f16_imm_lo:
|
||||
; GFX9: flat_load_dword [[VAL1:v[0-9]+]]
|
||||
; GFX9-DENORM: s_movk_i32 [[K:s[0-9]+]], 0x1234{{$}}
|
||||
; GFX9-DAG: flat_load_dword [[VAL1:v[0-9]+]]
|
||||
; GFX9-DENORM-DAG: s_movk_i32 [[K:s[0-9]+]], 0x1234{{$}}
|
||||
; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[K]], [[VAL1]]
|
||||
|
||||
; GFX9-FLUSH: v_mov_b32_e32 [[K:v[0-9]+]], 0x1234{{$}}
|
||||
; GFX9-FLUSH-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x1234{{$}}
|
||||
; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[K]]
|
||||
; GFX9: ; use [[PACKED]]
|
||||
define void @v_pack_v2f16_imm_lo(i32 addrspace(1)* %in1) #0 {
|
||||
@ -133,10 +133,10 @@ define void @v_pack_v2f16_imm_lo(i32 addrspace(1)* %in1) #0 {
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_pack_v2f16_inline_imm_lo:
|
||||
; GFX9: flat_load_dword [[VAL1:v[0-9]+]]
|
||||
; GFX9-DAG: flat_load_dword [[VAL1:v[0-9]+]]
|
||||
; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], 4.0, [[VAL1]]
|
||||
|
||||
; GFX9-FLUSH: v_mov_b32_e32 [[K:v[0-9]+]], 0x4400{{$}}
|
||||
; GFX9-FLUSH-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x4400{{$}}
|
||||
; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[K]]
|
||||
|
||||
; GFX9: ; use [[PACKED]]
|
||||
@ -155,11 +155,11 @@ define void @v_pack_v2f16_inline_imm_lo(i32 addrspace(1)* %in1) #0 {
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_pack_v2f16_imm_hi:
|
||||
; GFX9: flat_load_dword [[VAL0:v[0-9]+]]
|
||||
; GFX9-DENORM: s_movk_i32 [[K:s[0-9]+]], 0x1234
|
||||
; GFX9-DAG: flat_load_dword [[VAL0:v[0-9]+]]
|
||||
; GFX9-DENORM-DAG: s_movk_i32 [[K:s[0-9]+]], 0x1234
|
||||
; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[VAL0]], [[K]]
|
||||
|
||||
; GFX9-FLUSH: s_movk_i32 [[K:s[0-9]+]], 0x1234
|
||||
; GFX9-FLUSH-DAG: s_movk_i32 [[K:s[0-9]+]], 0x1234
|
||||
; GFX9-FLUSH: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL0]]
|
||||
; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[K]], 16, [[MASKED]]
|
||||
|
||||
@ -179,10 +179,10 @@ define void @v_pack_v2f16_imm_hi(i32 addrspace(1)* %in0) #0 {
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_pack_v2f16_inline_f16imm_hi:
|
||||
; GFX9: flat_load_dword [[VAL:v[0-9]+]]
|
||||
; GFX9-DAG: flat_load_dword [[VAL:v[0-9]+]]
|
||||
; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[VAL]], 1.0
|
||||
|
||||
; GFX9-FLUSH: s_movk_i32 [[K:s[0-9]+]], 0x3c00
|
||||
; GFX9-FLUSH-DAG: s_movk_i32 [[K:s[0-9]+]], 0x3c00
|
||||
; GFX9-FLUSH: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL]]
|
||||
; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[K]], 16, [[MASKED]]
|
||||
|
||||
|
@ -103,11 +103,11 @@ define void @v_pack_v2i16_user(i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_pack_v2i16_imm_lo:
|
||||
; GFX9: flat_load_dword [[VAL1:v[0-9]+]]
|
||||
; GFX9-DENORM: s_movk_i32 [[K:s[0-9]+]], 0x7b{{$}}
|
||||
; GFX9-DAG: flat_load_dword [[VAL1:v[0-9]+]]
|
||||
; GFX9-DENORM-DAG: s_movk_i32 [[K:s[0-9]+]], 0x7b{{$}}
|
||||
; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[K]], [[VAL1]]
|
||||
|
||||
; GFX9-FLUSH: v_mov_b32_e32 [[K:v[0-9]+]], 0x7b{{$}}
|
||||
; GFX9-FLUSH-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x7b{{$}}
|
||||
; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[K]]
|
||||
|
||||
; GFX9: ; use [[PACKED]]
|
||||
@ -144,10 +144,10 @@ define void @v_pack_v2i16_inline_imm_lo(i32 addrspace(1)* %in1) #0 {
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_pack_v2i16_imm_hi:
|
||||
; GFX9: flat_load_dword [[VAL0:v[0-9]+]]
|
||||
; GFX9-DAG: flat_load_dword [[VAL0:v[0-9]+]]
|
||||
; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[VAL0]], [[K]]
|
||||
|
||||
; GFX9-FLUSH: s_movk_i32 [[K:s[0-9]+]], 0x7b{{$}}
|
||||
; GFX9-FLUSH-DAG: s_movk_i32 [[K:s[0-9]+]], 0x7b{{$}}
|
||||
; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[K]], 16, [[VAL0]]
|
||||
|
||||
; GFX9: ; use [[PACKED]]
|
||||
|
@ -55,11 +55,11 @@ done: ; preds = %loop
|
||||
|
||||
; GCN-LABEL: {{^}}smrd_valu:
|
||||
; SI: s_movk_i32 [[OFFSET:s[0-9]+]], 0x2ee0
|
||||
; SI: s_mov_b32
|
||||
; GCN: v_readfirstlane_b32 s[[PTR_LO:[0-9]+]], v{{[0-9]+}}
|
||||
; GCN: v_readfirstlane_b32 s[[PTR_HI:[0-9]+]], v{{[0-9]+}}
|
||||
; SI: s_nop 3
|
||||
; SI: s_load_dword [[OUT:s[0-9]+]], s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, [[OFFSET]]
|
||||
; SI: s_mov_b32
|
||||
|
||||
; CI: s_load_dword [[OUT:s[0-9]+]], s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0xbb8
|
||||
; GCN: v_mov_b32_e32 [[V_OUT:v[0-9]+]], [[OUT]]
|
||||
|
@ -372,9 +372,9 @@ define void @add_select_fneg_negk_f32(i32 %c) #0 {
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}add_select_fneg_inv2pi_f32:
|
||||
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0xbe22f983
|
||||
; GCN: buffer_load_dword [[X:v[0-9]+]]
|
||||
; GCN: buffer_load_dword [[Y:v[0-9]+]]
|
||||
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0xbe22f983
|
||||
|
||||
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[X]], vcc
|
||||
; GCN: v_subrev_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
|
||||
@ -390,9 +390,9 @@ define void @add_select_fneg_inv2pi_f32(i32 %c) #0 {
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}add_select_fneg_neginv2pi_f32:
|
||||
; SI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e22f983
|
||||
; GCN: buffer_load_dword [[X:v[0-9]+]]
|
||||
; GCN: buffer_load_dword [[Y:v[0-9]+]]
|
||||
; SI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e22f983
|
||||
|
||||
; SI: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[X]], vcc
|
||||
; VI: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 0.15915494, [[X]], vcc
|
||||
|
@ -166,7 +166,7 @@ endif:
|
||||
|
||||
; GCN-LABEL: {{^}}uniform_if_else_ret:
|
||||
; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0
|
||||
; GCN-NEXT: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]]
|
||||
; GCN: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]]
|
||||
|
||||
; GCN: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
|
||||
; GCN: buffer_store_dword [[TWO]]
|
||||
|
@ -4,8 +4,8 @@
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
; GCN-LABEL: {{^}}v_cnd_nan_nosgpr:
|
||||
; GCN: v_cmp_eq_u32_e64 vcc, s{{[0-9]+}}, 0
|
||||
; GCN: v_cndmask_b32_e32 v{{[0-9]}}, -1, v{{[0-9]+}}, vcc
|
||||
; GCN: v_cmp_eq_u32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0
|
||||
; GCN: v_cndmask_b32_e{{32|64}} v{{[0-9]}}, -1, v{{[0-9]+}}, [[COND]]
|
||||
; GCN-DAG: v{{[0-9]}}
|
||||
; All nan values are converted to 0xffffffff
|
||||
; GCN: s_endpgm
|
||||
@ -105,8 +105,8 @@ define void @fcmp_sgprX_k0_select_k0_sgprX_f32(float addrspace(1)* %out, float %
|
||||
; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k0_vgprZ_f32:
|
||||
; GCN-DAG: s_load_dword [[X:s[0-9]+]]
|
||||
; GCN-DAG: {{buffer|flat}}_load_dword [[Z:v[0-9]+]]
|
||||
; GCN-DAG: v_cmp_nlg_f32_e64 vcc, [[X]], 0
|
||||
; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, [[Z]], vcc
|
||||
; GCN-DAG: v_cmp_nlg_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], [[X]], 0
|
||||
; GCN: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, 0, [[Z]], [[COND]]
|
||||
define void @fcmp_sgprX_k0_select_k0_vgprZ_f32(float addrspace(1)* %out, float %x, float addrspace(1)* %z.ptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
@ -122,8 +122,8 @@ define void @fcmp_sgprX_k0_select_k0_vgprZ_f32(float addrspace(1)* %out, float %
|
||||
; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k1_vgprZ_f32:
|
||||
; GCN-DAG: {{buffer|flat}}_load_dword [[Z:v[0-9]+]]
|
||||
; GCN-DAG: s_load_dword [[X:s[0-9]+]]
|
||||
; GCN: v_cmp_nlg_f32_e64 vcc, [[X]], 0
|
||||
; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[Z]], vcc
|
||||
; GCN: v_cmp_nlg_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], [[X]], 0
|
||||
; GCN: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, 1.0, [[Z]], [[COND]]
|
||||
define void @fcmp_sgprX_k0_select_k1_vgprZ_f32(float addrspace(1)* %out, float %x, float addrspace(1)* %z.ptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
|
@ -156,7 +156,7 @@ exit:
|
||||
|
||||
; SI: BB{{[0-9]+_[0-9]+}}: ; %bb20
|
||||
; SI: buffer_store_dword
|
||||
; SI: v_cmp_ge_i64_e32 [[CMP:s\[[0-9]+:[0-9]+\]|vcc]]
|
||||
; SI: v_cmp_ge_i64_e{{32|64}} [[CMP:s\[[0-9]+:[0-9]+\]|vcc]]
|
||||
; SI: s_or_b64 [[TMP:s\[[0-9]+:[0-9]+\]]], [[CMP]], [[COND_STATE]]
|
||||
|
||||
; SI: [[LABEL_FLOW]]:
|
||||
|
Loading…
x
Reference in New Issue
Block a user