mirror of
https://github.com/RPCSX/llvm.git
synced 2025-03-04 19:07:26 +00:00
AMDGPU: Remove unnecessary and on conditional branch
The comment explaining why this was necessary is incorrect in its description of v_cmp's behavior for inactive workitems. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@286134 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
e5fd9c09ad
commit
f577de357a
@ -1407,26 +1407,12 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
|
||||
return;
|
||||
}
|
||||
|
||||
// The result of VOPC instructions is or'd against ~EXEC before it is
|
||||
// written to vcc or another SGPR. This means that the value '1' is always
|
||||
// written to the corresponding bit for results that are masked. In order
|
||||
// to correctly check against vccz, we need to and VCC with the EXEC
|
||||
// register in order to clear the value from the masked bits.
|
||||
|
||||
SDLoc SL(N);
|
||||
|
||||
SDNode *MaskedCond =
|
||||
CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1,
|
||||
CurDAG->getRegister(AMDGPU::EXEC, MVT::i1),
|
||||
Cond);
|
||||
SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC,
|
||||
SDValue(MaskedCond, 0),
|
||||
SDValue()); // Passing SDValue() adds a
|
||||
// glue output.
|
||||
SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC, Cond);
|
||||
CurDAG->SelectNodeTo(N, AMDGPU::S_CBRANCH_VCCNZ, MVT::Other,
|
||||
N->getOperand(2), // Basic Block
|
||||
VCC.getValue(0), // Chain
|
||||
VCC.getValue(1)); // Glue
|
||||
VCC.getValue(0));
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -89,10 +89,9 @@ bb3:
|
||||
|
||||
; GCN-LABEL: {{^}}uniform_conditional_min_long_forward_vcnd_branch:
|
||||
; GCN: s_load_dword [[CND:s[0-9]+]]
|
||||
; GCN-DAG: v_cmp_eq_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[CND]], 0
|
||||
; GCN-DAG: v_mov_b32_e32 [[V_CND:v[0-9]+]], [[CND]]
|
||||
; GCN: s_and_b64 vcc, exec, [[CMP]]
|
||||
; GCN-NEXT: s_cbranch_vccz [[LONGBB:BB[0-9]+_[0-9]+]]
|
||||
; GCN-DAG: v_cmp_eq_f32_e64 vcc, [[CND]], 0
|
||||
; GCN: s_cbranch_vccz [[LONGBB:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN-NEXT: [[LONG_JUMP:BB[0-9]+_[0-9]+]]: ; %bb0
|
||||
; GCN-NEXT: s_getpc_b64 vcc
|
||||
@ -434,7 +433,7 @@ endif:
|
||||
; GCN: v_nop_e64
|
||||
; GCN: v_nop_e64
|
||||
; GCN: ;;#ASMEND
|
||||
; GCN-NEXT: s_and_b64 vcc, exec, -1{{$}}
|
||||
; GCN-NEXT: s_mov_b64 vcc, -1{{$}}
|
||||
; GCN-NEXT: s_cbranch_vccz [[RET]]
|
||||
|
||||
; GCN-NEXT: [[LONGBB:BB[0-9]+_[0-9]+]]: ; %loop_body
|
||||
@ -478,14 +477,13 @@ ret:
|
||||
; GCN-LABEL: {{^}}long_branch_hang:
|
||||
; GCN: s_cmp_lt_i32 s{{[0-9]+}}, 6
|
||||
; GCN-NEXT: s_cbranch_scc1 [[LONG_BR_0:BB[0-9]+_[0-9]+]]
|
||||
; GCN-NEXT: s_branch [[SHORTB:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN-NEXT: [[LONG_BR_0]]:
|
||||
; GCN: s_add_u32 vcc_lo, vcc_lo, [[LONG_BR_DEST0:BB[0-9]+_[0-9]+]]-(
|
||||
; GCN: s_setpc_b64
|
||||
|
||||
; GCN-NEXT: [[LONG_BR_0]]:
|
||||
; GCN: s_setpc_b64
|
||||
|
||||
; GCN-NEXT: [[LONG_BR_DEST0]]:
|
||||
; GCN: [[SHORTB]]:
|
||||
; GCN-DAG: v_cmp_lt_i32
|
||||
; GCN-DAG: v_cmp_gt_i32
|
||||
; GCN: s_cbranch_vccnz
|
||||
@ -493,6 +491,7 @@ ret:
|
||||
; GCN: s_setpc_b64
|
||||
; GCN: s_setpc_b64
|
||||
|
||||
; GCN: [[LONG_BR_DEST0]]
|
||||
; GCN: s_cmp_eq_u32
|
||||
; GCN-NEXT: s_cbranch_scc0
|
||||
; GCN: s_setpc_b64
|
||||
|
@ -97,7 +97,6 @@ for.body:
|
||||
; GCN: v_and_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
|
||||
; GCN: v_cmp_eq_u32_e32 vcc, 1,
|
||||
|
||||
; GCN: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, exec, vcc
|
||||
; GCN: [[LOOPBB:BB[0-9]+_[0-9]+]]
|
||||
; GCN: s_cbranch_vccnz [[LOOPBB]]
|
||||
; GCN-NEXT: ; BB#2
|
||||
|
@ -170,13 +170,12 @@ ret:
|
||||
; OPT: ret
|
||||
|
||||
; GCN-LABEL: {{^}}sink_ubfe_i64_span_midpoint:
|
||||
; GCN: s_lshr_b64 s{{\[}}[[LO:[0-9]+]]:{{[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}, 30
|
||||
; GCN: s_cbranch_vccnz BB3_2
|
||||
|
||||
; GCN: s_lshr_b64 s{{\[}}[[LO:[0-9]+]]:{{[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}, 30
|
||||
; GCN: s_and_b32 s{{[0-9]+}}, s[[LO]], 0xff
|
||||
|
||||
; GCN: BB3_2:
|
||||
; GCN: s_lshr_b64 s{{\[}}[[LO:[0-9]+]]:{{[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}, 30
|
||||
; GCN: s_and_b32 s{{[0-9]+}}, s[[LO]], 0x7f
|
||||
|
||||
; GCN: BB3_3:
|
||||
|
@ -1,10 +1,9 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
; SILowerI1Copies was not handling IMPLICIT_DEF
|
||||
; SI-LABEL: {{^}}br_implicit_def:
|
||||
; SI: BB#0:
|
||||
; SI-NEXT: s_and_b64 vcc, exec
|
||||
; SI-NEXT: s_cbranch_vccnz
|
||||
define void @br_implicit_def(i32 addrspace(1)* %out, i32 %arg) #0 {
|
||||
bb:
|
||||
|
@ -37,7 +37,6 @@ bb5: ; preds = %bb3, %bb1
|
||||
; OPT-NOT: call i1 @llvm.amdgcn.loop
|
||||
|
||||
; GCN-LABEL: {{^}}annotate_ret_noloop:
|
||||
; GCN: s_and_b64 vcc
|
||||
; GCN: s_cbranch_vccnz
|
||||
; GCN: s_endpgm
|
||||
; GCN: .Lfunc_end1
|
||||
|
@ -262,13 +262,11 @@ exit:
|
||||
; CHECK-NEXT: s_endpgm
|
||||
|
||||
; CHECK: [[KILLBB:BB[0-9]+_[0-9]+]]:
|
||||
; CHECK: s_and_b64 vcc, exec,
|
||||
; CHECK-NEXT: s_cbranch_vccz [[PHIBB:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; CHECK: [[PHIBB]]:
|
||||
; CHECK: v_cmp_eq_f32_e32 vcc, 0, [[PHIREG]]
|
||||
; CHECK: s_and_b64 vcc, exec, vcc
|
||||
; CHECK: s_cbranch_vccz [[ENDBB:BB[0-9]+_[0-9]+]]
|
||||
; CHECK-NEXT: s_cbranch_vccz [[ENDBB:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; CHECK: ; %bb10
|
||||
; CHECK: v_mov_b32_e32 v{{[0-9]+}}, 9
|
||||
@ -303,16 +301,14 @@ end:
|
||||
|
||||
; CHECK-LABEL: {{^}}no_skip_no_successors:
|
||||
; CHECK: v_cmp_nge_f32
|
||||
; CHECK: s_and_b64 vcc, exec,
|
||||
; CHECK: s_cbranch_vccz [[SKIPKILL:BB[0-9]+_[0-9]+]]
|
||||
; CHECK-NEXT: s_cbranch_vccz [[SKIPKILL:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; CHECK: ; %bb6
|
||||
; CHECK: s_mov_b64 exec, 0
|
||||
|
||||
; CHECK: [[SKIPKILL]]:
|
||||
; CHECK: v_cmp_nge_f32
|
||||
; CHECK: s_and_b64 vcc, exec, vcc
|
||||
; CHECK: s_cbranch_vccz [[UNREACHABLE:BB[0-9]+_[0-9]+]]
|
||||
; CHECK-NEXT: s_cbranch_vccz [[UNREACHABLE:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; CHECK: [[UNREACHABLE]]:
|
||||
; CHECK-NEXT: .Lfunc_end{{[0-9]+}}
|
||||
|
@ -4,8 +4,7 @@
|
||||
|
||||
; GCN-FUNC: {{^}}vccz_workaround:
|
||||
; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0x0
|
||||
; GCN: v_cmp_neq_f32_e64 [[MASK:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}}
|
||||
; GCN: s_and_b64 vcc, exec, [[MASK]]
|
||||
; GCN: v_cmp_neq_f32_e64 vcc, s{{[0-9]+}}, 0{{$}}
|
||||
; GCN: s_waitcnt lgkmcnt(0)
|
||||
; VCCZ-BUG: s_mov_b64 vcc, vcc
|
||||
; NOVCCZ-BUG-NOT: s_mov_b64 vcc, vcc
|
||||
@ -29,7 +28,6 @@ endif:
|
||||
|
||||
; GCN-FUNC: {{^}}vccz_noworkaround:
|
||||
; GCN: v_cmp_neq_f32_e32 vcc, 0, v{{[0-9]+}}
|
||||
; GCN: s_and_b64 vcc, exec, vcc
|
||||
; GCN: s_cbranch_vccnz [[EXIT:[0-9A-Za-z_]+]]
|
||||
; GCN: buffer_store_dword
|
||||
; GCN: [[EXIT]]:
|
||||
|
@ -32,7 +32,6 @@ done:
|
||||
; FIXME: We could use _e32 here if we re-used the 0 from [[STORE_VAL]], and
|
||||
; also scheduled the write first.
|
||||
; GCN-DAG: v_cmp_eq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}}
|
||||
; GCN-DAG: s_and_b64 vcc, exec, [[COND]]
|
||||
; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0
|
||||
; GCN: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]]
|
||||
|
||||
@ -89,7 +88,6 @@ done:
|
||||
; FIXME: We could use _e32 here if we re-used the 0 from [[STORE_VAL]], and
|
||||
; also scheduled the write first.
|
||||
; GCN-DAG: v_cmp_neq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}}
|
||||
; GCN-DAG: s_and_b64 vcc, exec, [[COND]]
|
||||
; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0
|
||||
; GCN: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]]
|
||||
|
||||
@ -253,8 +251,7 @@ ENDIF: ; preds = %IF, %main_body
|
||||
; GCN: s_load_dword [[COND:s[0-9]+]]
|
||||
; GCN: s_cmp_lt_i32 [[COND]], 1
|
||||
; GCN: s_cbranch_scc1 [[EXIT:[A-Za-z0-9_]+]]
|
||||
; GCN: v_cmp_gt_i32_e64 [[MASK:s\[[0-9]+:[0-9]+\]]], [[COND]], 0{{$}}
|
||||
; GCN: s_and_b64 vcc, exec, [[MASK]]
|
||||
; GCN: v_cmp_gt_i32_e64 vcc, [[COND]], 0{{$}}
|
||||
; GCN: s_cbranch_vccnz [[EXIT]]
|
||||
; GCN: buffer_store
|
||||
; GCN: {{^}}[[EXIT]]:
|
||||
@ -439,7 +436,6 @@ bb9: ; preds = %bb8, %bb4
|
||||
; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0
|
||||
|
||||
; SI: v_cmp_eq_u64_e64
|
||||
; SI: s_and_b64 vcc, exec,
|
||||
; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]]
|
||||
|
||||
; VI: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]]
|
||||
@ -471,7 +467,6 @@ done:
|
||||
; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0
|
||||
|
||||
; SI: v_cmp_ne_u64_e64
|
||||
; SI: s_and_b64 vcc, exec,
|
||||
; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]]
|
||||
|
||||
; VI: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]]
|
||||
@ -500,7 +495,6 @@ done:
|
||||
|
||||
; GCN-LABEL: {{^}}uniform_if_scc_i64_sgt:
|
||||
; GCN: v_cmp_gt_i64_e64
|
||||
; GCN: s_and_b64 vcc, exec,
|
||||
; GCN: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]]
|
||||
|
||||
; Fall-through to the else
|
||||
|
@ -355,12 +355,11 @@ main_body:
|
||||
; CHECK: s_wqm_b64 exec, exec
|
||||
; CHECK-DAG: v_mov_b32_e32 [[CTR:v[0-9]+]], 0
|
||||
; CHECK-DAG: v_mov_b32_e32 [[SEVEN:v[0-9]+]], 0x40e00000
|
||||
; CHECK: s_branch [[LOOPHDR:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; CHECK: [[LOOPHDR:BB[0-9]+_[0-9]+]]: ; %body
|
||||
; CHECK: v_add_f32_e32 [[CTR]], 2.0, [[CTR]]
|
||||
; CHECK: [[LOOPHDR]]: ; %loop
|
||||
; CHECK: v_cmp_lt_f32_e32 vcc, [[SEVEN]], [[CTR]]
|
||||
; CHECK: s_cbranch_vccz
|
||||
; CHECK: s_cbranch_vccz [[LOOPHDR]]
|
||||
; CHECK: ; %break
|
||||
|
||||
; CHECK: ; return
|
||||
|
Loading…
x
Reference in New Issue
Block a user