[AMDGPU] Make sure to fix implicit operands on insertBranch

Summary:
Without fixImplicitOperands we may end up creating default implicit operands
that are the wrong wave size

Includes simple test that provokes insertBranch in the correct way to expose the
issue being fixed.

Change-Id: I92bdcdee9fcb7b4d91529b84e76a48ac8218483e

Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, tpr, t-tye, hiraditya, kerbowa, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D82459
This commit is contained in:
dstuttar 2020-06-23 17:47:58 +01:00
parent a448670752
commit e8775c8d81
2 changed files with 50 additions and 1 deletions

View File

@ -2265,6 +2265,7 @@ unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB,
// Copy the flags onto the implicit condition register operand. // Copy the flags onto the implicit condition register operand.
preserveCondRegFlags(CondBr->getOperand(1), Cond[1]); preserveCondRegFlags(CondBr->getOperand(1), Cond[1]);
fixImplicitOperands(*CondBr);
if (BytesAdded) if (BytesAdded)
*BytesAdded = 4; *BytesAdded = 4;
@ -3326,7 +3327,8 @@ static void copyFlagsToImplicitVCC(MachineInstr &MI,
const MachineOperand &Orig) { const MachineOperand &Orig) {
for (MachineOperand &Use : MI.implicit_operands()) { for (MachineOperand &Use : MI.implicit_operands()) {
if (Use.isUse() && Use.getReg() == AMDGPU::VCC) { if (Use.isUse() &&
(Use.getReg() == AMDGPU::VCC || Use.getReg() == AMDGPU::VCC_LO)) {
Use.setIsUndef(Orig.isUndef()); Use.setIsUndef(Orig.isUndef());
Use.setIsKill(Orig.isKill()); Use.setIsKill(Orig.isKill());
return; return;

View File

@ -0,0 +1,47 @@
# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass branch-folder -o - %s | FileCheck %s
# Designed to provoke calling SIInstrInfo::insertBranch in wave32 mode
# The implicit $vcc operand should be $vcc_lo in this case
...
# CHECK-LABEL: bb.1:
# CHECK: S_CBRANCH_VCCNZ %bb.1, implicit $vcc_lo
name: _amdgpu_cs_main
body: |
bb.0:
$vgpr1 = V_MOV_B32_e32 1050, implicit $exec
$sgpr0 = S_MOV_B32 1123418112
$vcc_hi = IMPLICIT_DEF
bb.1:
$vgpr0 = COPY killed $vgpr1, implicit $exec
V_CMP_GT_U32_e32 5, $vgpr1, implicit-def $vcc_lo, implicit $exec, implicit-def $vcc
$vcc_lo = S_AND_B32 $exec_lo, $vcc_lo, implicit-def dead $scc
S_CBRANCH_VCCNZ %bb.1, implicit $vcc_lo, implicit $vcc
S_BRANCH %bb.2
bb.2:
$sgpr1 = COPY $sgpr0
S_BRANCH %bb.1
...
# CHECK-LABEL: bb.1:
# CHECK: S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc_lo
---
name: _amdgpu_cs_main_undef
body: |
bb.0:
$vgpr1 = V_MOV_B32_e32 1050, implicit $exec
$sgpr0 = S_MOV_B32 1123418112
$vcc_hi = IMPLICIT_DEF
bb.1:
$vgpr0 = COPY killed $vgpr1, implicit $exec
S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc_lo, implicit undef $vcc
S_BRANCH %bb.2
bb.2:
$sgpr1 = COPY $sgpr0
S_BRANCH %bb.1
...