AMDGPU: Remove unnecessary and on conditional branch

The comment explaining why this was necessary is incorrect
in its description of v_cmp's behavior for inactive workitems.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@286134 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Matt Arsenault 2016-11-07 19:09:33 +00:00
parent e5fd9c09ad
commit f577de357a
10 changed files with 18 additions and 50 deletions

View File

@ -1407,26 +1407,12 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
return;
}
// The result of VOPC instructions is or'd against ~EXEC before it is
// written to vcc or another SGPR. This means that the value '1' is always
// written to the corresponding bit for results that are masked. In order
// to correctly check against vccz, we need to and VCC with the EXEC
// register in order to clear the value from the masked bits.
SDLoc SL(N);
SDNode *MaskedCond =
CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1,
CurDAG->getRegister(AMDGPU::EXEC, MVT::i1),
Cond);
SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC,
SDValue(MaskedCond, 0),
SDValue()); // Passing SDValue() adds a
// glue output.
SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC, Cond);
CurDAG->SelectNodeTo(N, AMDGPU::S_CBRANCH_VCCNZ, MVT::Other,
N->getOperand(2), // Basic Block
VCC.getValue(0), // Chain
VCC.getValue(1)); // Glue
VCC.getValue(0));
return;
}

View File

@ -89,10 +89,9 @@ bb3:
; GCN-LABEL: {{^}}uniform_conditional_min_long_forward_vcnd_branch:
; GCN: s_load_dword [[CND:s[0-9]+]]
; GCN-DAG: v_cmp_eq_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[CND]], 0
; GCN-DAG: v_mov_b32_e32 [[V_CND:v[0-9]+]], [[CND]]
; GCN: s_and_b64 vcc, exec, [[CMP]]
; GCN-NEXT: s_cbranch_vccz [[LONGBB:BB[0-9]+_[0-9]+]]
; GCN-DAG: v_cmp_eq_f32_e64 vcc, [[CND]], 0
; GCN: s_cbranch_vccz [[LONGBB:BB[0-9]+_[0-9]+]]
; GCN-NEXT: [[LONG_JUMP:BB[0-9]+_[0-9]+]]: ; %bb0
; GCN-NEXT: s_getpc_b64 vcc
@ -434,7 +433,7 @@ endif:
; GCN: v_nop_e64
; GCN: v_nop_e64
; GCN: ;;#ASMEND
; GCN-NEXT: s_and_b64 vcc, exec, -1{{$}}
; GCN-NEXT: s_mov_b64 vcc, -1{{$}}
; GCN-NEXT: s_cbranch_vccz [[RET]]
; GCN-NEXT: [[LONGBB:BB[0-9]+_[0-9]+]]: ; %loop_body
@ -478,14 +477,13 @@ ret:
; GCN-LABEL: {{^}}long_branch_hang:
; GCN: s_cmp_lt_i32 s{{[0-9]+}}, 6
; GCN-NEXT: s_cbranch_scc1 [[LONG_BR_0:BB[0-9]+_[0-9]+]]
; GCN-NEXT: s_branch [[SHORTB:BB[0-9]+_[0-9]+]]
; GCN-NEXT: [[LONG_BR_0]]:
; GCN: s_add_u32 vcc_lo, vcc_lo, [[LONG_BR_DEST0:BB[0-9]+_[0-9]+]]-(
; GCN: s_setpc_b64
; GCN-NEXT: [[LONG_BR_0]]:
; GCN: s_setpc_b64
; GCN-NEXT: [[LONG_BR_DEST0]]:
; GCN: [[SHORTB]]:
; GCN-DAG: v_cmp_lt_i32
; GCN-DAG: v_cmp_gt_i32
; GCN: s_cbranch_vccnz
@ -493,6 +491,7 @@ ret:
; GCN: s_setpc_b64
; GCN: s_setpc_b64
; GCN: [[LONG_BR_DEST0]]
; GCN: s_cmp_eq_u32
; GCN-NEXT: s_cbranch_scc0
; GCN: s_setpc_b64

View File

@ -97,7 +97,6 @@ for.body:
; GCN: v_and_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
; GCN: v_cmp_eq_u32_e32 vcc, 1,
; GCN: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, exec, vcc
; GCN: [[LOOPBB:BB[0-9]+_[0-9]+]]
; GCN: s_cbranch_vccnz [[LOOPBB]]
; GCN-NEXT: ; BB#2

View File

@ -170,13 +170,12 @@ ret:
; OPT: ret
; GCN-LABEL: {{^}}sink_ubfe_i64_span_midpoint:
; GCN: s_lshr_b64 s{{\[}}[[LO:[0-9]+]]:{{[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}, 30
; GCN: s_cbranch_vccnz BB3_2
; GCN: s_lshr_b64 s{{\[}}[[LO:[0-9]+]]:{{[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}, 30
; GCN: s_and_b32 s{{[0-9]+}}, s[[LO]], 0xff
; GCN: BB3_2:
; GCN: s_lshr_b64 s{{\[}}[[LO:[0-9]+]]:{{[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}}, 30
; GCN: s_and_b32 s{{[0-9]+}}, s[[LO]], 0x7f
; GCN: BB3_3:

View File

@ -1,10 +1,9 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; SILowerI1Copies was not handling IMPLICIT_DEF
; SI-LABEL: {{^}}br_implicit_def:
; SI: BB#0:
; SI-NEXT: s_and_b64 vcc, exec
; SI-NEXT: s_cbranch_vccnz
define void @br_implicit_def(i32 addrspace(1)* %out, i32 %arg) #0 {
bb:

View File

@ -37,7 +37,6 @@ bb5: ; preds = %bb3, %bb1
; OPT-NOT: call i1 @llvm.amdgcn.loop
; GCN-LABEL: {{^}}annotate_ret_noloop:
; GCN: s_and_b64 vcc
; GCN: s_cbranch_vccnz
; GCN: s_endpgm
; GCN: .Lfunc_end1

View File

@ -262,13 +262,11 @@ exit:
; CHECK-NEXT: s_endpgm
; CHECK: [[KILLBB:BB[0-9]+_[0-9]+]]:
; CHECK: s_and_b64 vcc, exec,
; CHECK-NEXT: s_cbranch_vccz [[PHIBB:BB[0-9]+_[0-9]+]]
; CHECK: [[PHIBB]]:
; CHECK: v_cmp_eq_f32_e32 vcc, 0, [[PHIREG]]
; CHECK: s_and_b64 vcc, exec, vcc
; CHECK: s_cbranch_vccz [[ENDBB:BB[0-9]+_[0-9]+]]
; CHECK-NEXT: s_cbranch_vccz [[ENDBB:BB[0-9]+_[0-9]+]]
; CHECK: ; %bb10
; CHECK: v_mov_b32_e32 v{{[0-9]+}}, 9
@ -303,16 +301,14 @@ end:
; CHECK-LABEL: {{^}}no_skip_no_successors:
; CHECK: v_cmp_nge_f32
; CHECK: s_and_b64 vcc, exec,
; CHECK: s_cbranch_vccz [[SKIPKILL:BB[0-9]+_[0-9]+]]
; CHECK-NEXT: s_cbranch_vccz [[SKIPKILL:BB[0-9]+_[0-9]+]]
; CHECK: ; %bb6
; CHECK: s_mov_b64 exec, 0
; CHECK: [[SKIPKILL]]:
; CHECK: v_cmp_nge_f32
; CHECK: s_and_b64 vcc, exec, vcc
; CHECK: s_cbranch_vccz [[UNREACHABLE:BB[0-9]+_[0-9]+]]
; CHECK-NEXT: s_cbranch_vccz [[UNREACHABLE:BB[0-9]+_[0-9]+]]
; CHECK: [[UNREACHABLE]]:
; CHECK-NEXT: .Lfunc_end{{[0-9]+}}

View File

@ -4,8 +4,7 @@
; GCN-FUNC: {{^}}vccz_workaround:
; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0x0
; GCN: v_cmp_neq_f32_e64 [[MASK:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}}
; GCN: s_and_b64 vcc, exec, [[MASK]]
; GCN: v_cmp_neq_f32_e64 vcc, s{{[0-9]+}}, 0{{$}}
; GCN: s_waitcnt lgkmcnt(0)
; VCCZ-BUG: s_mov_b64 vcc, vcc
; NOVCCZ-BUG-NOT: s_mov_b64 vcc, vcc
@ -29,7 +28,6 @@ endif:
; GCN-FUNC: {{^}}vccz_noworkaround:
; GCN: v_cmp_neq_f32_e32 vcc, 0, v{{[0-9]+}}
; GCN: s_and_b64 vcc, exec, vcc
; GCN: s_cbranch_vccnz [[EXIT:[0-9A-Za-z_]+]]
; GCN: buffer_store_dword
; GCN: [[EXIT]]:

View File

@ -32,7 +32,6 @@ done:
; FIXME: We could use _e32 here if we re-used the 0 from [[STORE_VAL]], and
; also scheduled the write first.
; GCN-DAG: v_cmp_eq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}}
; GCN-DAG: s_and_b64 vcc, exec, [[COND]]
; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0
; GCN: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]]
@ -89,7 +88,6 @@ done:
; FIXME: We could use _e32 here if we re-used the 0 from [[STORE_VAL]], and
; also scheduled the write first.
; GCN-DAG: v_cmp_neq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}}
; GCN-DAG: s_and_b64 vcc, exec, [[COND]]
; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0
; GCN: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]]
@ -253,8 +251,7 @@ ENDIF: ; preds = %IF, %main_body
; GCN: s_load_dword [[COND:s[0-9]+]]
; GCN: s_cmp_lt_i32 [[COND]], 1
; GCN: s_cbranch_scc1 [[EXIT:[A-Za-z0-9_]+]]
; GCN: v_cmp_gt_i32_e64 [[MASK:s\[[0-9]+:[0-9]+\]]], [[COND]], 0{{$}}
; GCN: s_and_b64 vcc, exec, [[MASK]]
; GCN: v_cmp_gt_i32_e64 vcc, [[COND]], 0{{$}}
; GCN: s_cbranch_vccnz [[EXIT]]
; GCN: buffer_store
; GCN: {{^}}[[EXIT]]:
@ -439,7 +436,6 @@ bb9: ; preds = %bb8, %bb4
; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0
; SI: v_cmp_eq_u64_e64
; SI: s_and_b64 vcc, exec,
; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]]
; VI: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]]
@ -471,7 +467,6 @@ done:
; GCN-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0
; SI: v_cmp_ne_u64_e64
; SI: s_and_b64 vcc, exec,
; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]]
; VI: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]]
@ -500,7 +495,6 @@ done:
; GCN-LABEL: {{^}}uniform_if_scc_i64_sgt:
; GCN: v_cmp_gt_i64_e64
; GCN: s_and_b64 vcc, exec,
; GCN: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]]
; Fall-through to the else

View File

@ -355,12 +355,11 @@ main_body:
; CHECK: s_wqm_b64 exec, exec
; CHECK-DAG: v_mov_b32_e32 [[CTR:v[0-9]+]], 0
; CHECK-DAG: v_mov_b32_e32 [[SEVEN:v[0-9]+]], 0x40e00000
; CHECK: s_branch [[LOOPHDR:BB[0-9]+_[0-9]+]]
; CHECK: [[LOOPHDR:BB[0-9]+_[0-9]+]]: ; %body
; CHECK: v_add_f32_e32 [[CTR]], 2.0, [[CTR]]
; CHECK: [[LOOPHDR]]: ; %loop
; CHECK: v_cmp_lt_f32_e32 vcc, [[SEVEN]], [[CTR]]
; CHECK: s_cbranch_vccz
; CHECK: s_cbranch_vccz [[LOOPHDR]]
; CHECK: ; %break
; CHECK: ; return