mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-10-09 12:24:08 +00:00
[AMDGPU] Fixed v_swap_b32 match
1. Fixed liveness issue with implicit kills. 2. Fixed potential problem with an indirect mov. Fixes: SWDEV-256848 Differential Revision: https://reviews.llvm.org/D89599
This commit is contained in:
parent
b6e4aae2cc
commit
611959f004
@ -437,6 +437,22 @@ getSubRegForIndex(Register Reg, unsigned Sub, unsigned I,
|
||||
return TargetInstrInfo::RegSubRegPair(Reg, Sub);
|
||||
}
|
||||
|
||||
static void dropInstructionKeepingImpDefs(MachineInstr &MI,
|
||||
const SIInstrInfo *TII) {
|
||||
for (unsigned i = MI.getDesc().getNumOperands() +
|
||||
MI.getDesc().getNumImplicitUses() +
|
||||
MI.getDesc().getNumImplicitDefs(), e = MI.getNumOperands();
|
||||
i != e; ++i) {
|
||||
const MachineOperand &Op = MI.getOperand(i);
|
||||
if (!Op.isDef())
|
||||
continue;
|
||||
BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
|
||||
TII->get(AMDGPU::IMPLICIT_DEF), Op.getReg());
|
||||
}
|
||||
|
||||
MI.eraseFromParent();
|
||||
}
|
||||
|
||||
// Match:
|
||||
// mov t, x
|
||||
// mov x, y
|
||||
@ -476,18 +492,25 @@ static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI,
|
||||
if (!TRI.isVGPR(MRI, X))
|
||||
return nullptr;
|
||||
|
||||
if (MovT.hasRegisterImplicitUseOperand(AMDGPU::M0))
|
||||
return nullptr;
|
||||
|
||||
const unsigned SearchLimit = 16;
|
||||
unsigned Count = 0;
|
||||
bool KilledT = false;
|
||||
for (auto Iter = std::next(MovT.getIterator()),
|
||||
E = MovT.getParent()->instr_end();
|
||||
Iter != E && Count < SearchLimit; ++Iter, ++Count) {
|
||||
Iter != E && Count < SearchLimit && !KilledT; ++Iter, ++Count) {
|
||||
|
||||
MachineInstr *MovY = &*Iter;
|
||||
KilledT = MovY->killsRegister(T, &TRI);
|
||||
|
||||
if ((MovY->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
|
||||
MovY->getOpcode() != AMDGPU::COPY) ||
|
||||
!MovY->getOperand(1).isReg() ||
|
||||
MovY->getOperand(1).getReg() != T ||
|
||||
MovY->getOperand(1).getSubReg() != Tsub)
|
||||
MovY->getOperand(1).getSubReg() != Tsub ||
|
||||
MovY->hasRegisterImplicitUseOperand(AMDGPU::M0))
|
||||
continue;
|
||||
|
||||
Register Y = MovY->getOperand(0).getReg();
|
||||
@ -521,32 +544,53 @@ static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI,
|
||||
MovX = nullptr;
|
||||
break;
|
||||
}
|
||||
// Implicit use of M0 is an indirect move.
|
||||
if (I->hasRegisterImplicitUseOperand(AMDGPU::M0))
|
||||
continue;
|
||||
|
||||
if (Size > 1 && (I->getNumImplicitOperands() > (I->isCopy() ? 0 : 1)))
|
||||
continue;
|
||||
|
||||
MovX = &*I;
|
||||
}
|
||||
|
||||
if (!MovX)
|
||||
continue;
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Matched v_swap_b32:\n" << MovT << *MovX << MovY);
|
||||
LLVM_DEBUG(dbgs() << "Matched v_swap_b32:\n" << MovT << *MovX << *MovY);
|
||||
|
||||
for (unsigned I = 0; I < Size; ++I) {
|
||||
TargetInstrInfo::RegSubRegPair X1, Y1;
|
||||
X1 = getSubRegForIndex(X, Xsub, I, TRI, MRI);
|
||||
Y1 = getSubRegForIndex(Y, Ysub, I, TRI, MRI);
|
||||
BuildMI(*MovT.getParent(), MovX->getIterator(), MovT.getDebugLoc(),
|
||||
MachineBasicBlock &MBB = *MovT.getParent();
|
||||
auto MIB = BuildMI(MBB, MovX->getIterator(), MovT.getDebugLoc(),
|
||||
TII->get(AMDGPU::V_SWAP_B32))
|
||||
.addDef(X1.Reg, 0, X1.SubReg)
|
||||
.addDef(Y1.Reg, 0, Y1.SubReg)
|
||||
.addReg(Y1.Reg, 0, Y1.SubReg)
|
||||
.addReg(X1.Reg, 0, X1.SubReg).getInstr();
|
||||
if (MovX->hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
|
||||
// Drop implicit EXEC.
|
||||
MIB->RemoveOperand(MIB->getNumExplicitOperands());
|
||||
MIB->copyImplicitOps(*MBB.getParent(), *MovX);
|
||||
}
|
||||
}
|
||||
MovX->eraseFromParent();
|
||||
MovY->eraseFromParent();
|
||||
dropInstructionKeepingImpDefs(*MovY, TII);
|
||||
MachineInstr *Next = &*std::next(MovT.getIterator());
|
||||
if (MRI.use_nodbg_empty(T))
|
||||
MovT.eraseFromParent();
|
||||
else
|
||||
|
||||
if (MRI.use_nodbg_empty(T)) {
|
||||
dropInstructionKeepingImpDefs(MovT, TII);
|
||||
} else {
|
||||
Xop.setIsKill(false);
|
||||
for (int I = MovT.getNumImplicitOperands() - 1; I >= 0; --I ) {
|
||||
unsigned OpNo = MovT.getNumExplicitOperands() + I;
|
||||
const MachineOperand &Op = MovT.getOperand(OpNo);
|
||||
if (Op.isKill() && TRI.regsOverlap(X, Op.getReg()))
|
||||
MovT.RemoveOperand(OpNo);
|
||||
}
|
||||
}
|
||||
|
||||
return Next;
|
||||
}
|
||||
|
@ -517,7 +517,9 @@ body: |
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: swap_virt_copy_subreg_impdef_super
|
||||
# GCN: %0.sub0:vreg_64, %1.sub0:vreg_64 = V_SWAP_B32 %1.sub0, %0.sub0, implicit $exec
|
||||
# GCN: %2:vreg_64 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %2.sub1:vreg_64 = COPY %0.sub1
|
||||
# GCN-NEXT: %0.sub0:vreg_64, %1.sub0:vreg_64 = V_SWAP_B32 %1.sub0, %0.sub0, implicit $exec
|
||||
---
|
||||
name: swap_virt_copy_subreg_impdef_super
|
||||
registers:
|
||||
@ -672,3 +674,222 @@ body: |
|
||||
%1 = COPY %2
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: swap_liveness_error_mov
|
||||
# GCN: $vgpr6 = V_MOV_B32_e32 $vgpr1, implicit $exec
|
||||
# GCN-NEXT: $vgpr1, $vgpr5 = V_SWAP_B32 $vgpr5, $vgpr1, implicit $exec
|
||||
# GCN-NEXT: $vgpr5_vgpr6 = IMPLICIT_DEF
|
||||
# GCN-NEXT: $vgpr6 = V_MOV_B32_e32 $vgpr7, implicit $exec, implicit $vgpr6_vgpr7
|
||||
# GCN-NEXT: $vgpr5 = V_MOV_B32_e32 $vgpr6, implicit $exec
|
||||
|
||||
---
|
||||
name: swap_liveness_error_mov
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr5, $vgpr1_vgpr2
|
||||
|
||||
$vgpr6 = V_MOV_B32_e32 $vgpr1, implicit $exec, implicit killed $vgpr1_vgpr2
|
||||
$vgpr1 = V_MOV_B32_e32 killed $vgpr5, implicit $exec
|
||||
$vgpr5 = V_MOV_B32_e32 $vgpr6, implicit $exec, implicit-def $vgpr5_vgpr6, implicit $vgpr6_vgpr7
|
||||
$vgpr6 = V_MOV_B32_e32 $vgpr7, implicit $exec, implicit $vgpr6_vgpr7
|
||||
$vgpr5 = V_MOV_B32_e32 $vgpr6, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: swap_liveness_error_copy
|
||||
# GCN: $vgpr6 = COPY $vgpr1
|
||||
# GCN-NEXT: $vgpr1, $vgpr5 = V_SWAP_B32 $vgpr5, $vgpr1, implicit $exec
|
||||
# GCN-NEXT: $vgpr5_vgpr6 = IMPLICIT_DEF
|
||||
# GCN-NEXT: $vgpr6 = COPY $vgpr7, implicit $vgpr6_vgpr7
|
||||
# GCN-NEXT: $vgpr5 = COPY $vgpr6
|
||||
|
||||
---
|
||||
name: swap_liveness_error_copy
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr5, $vgpr1_vgpr2
|
||||
|
||||
$vgpr6 = COPY $vgpr1, implicit killed $vgpr1_vgpr2
|
||||
$vgpr1 = COPY killed $vgpr5
|
||||
$vgpr5 = COPY $vgpr6, implicit-def $vgpr5_vgpr6, implicit $vgpr6_vgpr7
|
||||
$vgpr6 = COPY $vgpr7, implicit $vgpr6_vgpr7
|
||||
$vgpr5 = COPY $vgpr6
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: swap_killed_t_early
|
||||
# GCN: $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
|
||||
# GCN-NEXT: $vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec, implicit killed $vgpr2
|
||||
# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
|
||||
# GCN-NEXT: $vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec
|
||||
# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 undef $vgpr2, implicit $exec
|
||||
|
||||
---
|
||||
name: swap_killed_t_early
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
|
||||
$vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
|
||||
$vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec, implicit killed $vgpr2
|
||||
$vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
|
||||
$vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec
|
||||
$vgpr1 = V_MOV_B32_e32 undef $vgpr2, implicit $exec
|
||||
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: swap_killed_t_late
|
||||
# GCN: $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
|
||||
# GCN-NEXT: $vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec
|
||||
# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
|
||||
# GCN-NEXT: $vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec, implicit killed $vgpr2
|
||||
# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 undef $vgpr2, implicit $exec
|
||||
|
||||
---
|
||||
name: swap_killed_t_late
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
|
||||
$vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
|
||||
$vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
|
||||
$vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec, implicit killed $vgpr2
|
||||
$vgpr1 = V_MOV_B32_e32 undef $vgpr2, implicit $exec
|
||||
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: swap_killed_x
|
||||
# GCN: $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
|
||||
# GCN-NEXT: $vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec
|
||||
# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
|
||||
# GCN-NEXT: $vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec, implicit killed $vgpr0
|
||||
# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
|
||||
|
||||
---
|
||||
name: swap_killed_x
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
|
||||
$vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
|
||||
$vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
|
||||
$vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec, implicit killed $vgpr0
|
||||
$vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
|
||||
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: indirect_mov_t
|
||||
# GCN: $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec, implicit $m0
|
||||
# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
|
||||
# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
|
||||
# GCN-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
|
||||
|
||||
---
|
||||
name: indirect_mov_t
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
|
||||
$vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec, implicit $m0
|
||||
$vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
|
||||
$vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
|
||||
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: indirect_mov_x
|
||||
# GCN: $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
|
||||
# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec, implicit $m0
|
||||
# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
|
||||
# GCN-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
|
||||
|
||||
---
|
||||
name: indirect_mov_x
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
|
||||
$vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec, implicit $m0
|
||||
$vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
|
||||
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: indirect_mov_y
|
||||
# GCN: $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
|
||||
# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
|
||||
# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec, implicit $m0
|
||||
# GCN-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
|
||||
|
||||
---
|
||||
name: indirect_mov_y
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
|
||||
$vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
|
||||
$vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec, implicit $m0
|
||||
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: implicit_ops_mov_x_swap_b32
|
||||
# GCN: $vgpr0, $vgpr1 = V_SWAP_B32 $vgpr1, $vgpr0, implicit $exec, implicit $vgpr2, implicit killed $vgpr1_vgpr2
|
||||
|
||||
---
|
||||
name: implicit_ops_mov_x_swap_b32
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
|
||||
$vgpr3 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec, implicit $vgpr2, implicit killed $vgpr1_vgpr2
|
||||
$vgpr1 = V_MOV_B32_e32 killed $vgpr3, implicit $exec
|
||||
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: implict_ops_mov_x_swap_b64
|
||||
# GCN: %2:vreg_64 = COPY %0
|
||||
# GCN-NEXT: %0:vreg_64 = COPY %1, implicit $vgpr0
|
||||
# GCN-NEXT: %1:vreg_64 = COPY %2
|
||||
|
||||
---
|
||||
name: implict_ops_mov_x_swap_b64
|
||||
registers:
|
||||
- { id: 0, class: vreg_64 }
|
||||
- { id: 1, class: vreg_64 }
|
||||
- { id: 2, class: vreg_64 }
|
||||
body: |
|
||||
bb.0:
|
||||
%0 = IMPLICIT_DEF
|
||||
%1 = IMPLICIT_DEF
|
||||
%2 = COPY %0
|
||||
%0 = COPY %1, implicit $vgpr0
|
||||
%1 = COPY %2
|
||||
...
|
||||
|
||||
# GCN-LABEL: implicit_ops_mov_t_swap_b32
|
||||
# GCN: $vgpr1 = IMPLICIT_DEF
|
||||
# GCN-NEXT: $vgpr0, $vgpr1 = V_SWAP_B32 $vgpr1, $vgpr0, implicit $exec
|
||||
|
||||
---
|
||||
name: implicit_ops_mov_t_swap_b32
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
|
||||
$vgpr3 = V_MOV_B32_e32 killed $vgpr0, implicit $exec, implicit $vgpr2, implicit killed $vgpr1_vgpr2, implicit-def $vgpr1
|
||||
$vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec
|
||||
$vgpr1 = V_MOV_B32_e32 killed $vgpr3, implicit $exec
|
||||
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
|
||||
...
|
||||
|
||||
# GCN-LABEL: implicit_ops_mov_y_swap_b32
|
||||
# GCN: $vgpr0, $vgpr1 = V_SWAP_B32 $vgpr1, $vgpr0, implicit $exec
|
||||
# GCN-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF
|
||||
|
||||
---
|
||||
name: implicit_ops_mov_y_swap_b32
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
|
||||
$vgpr3 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec
|
||||
$vgpr1 = V_MOV_B32_e32 killed $vgpr3, implicit $exec, implicit $vgpr2, implicit-def $vgpr0_vgpr1, implicit killed $vgpr3
|
||||
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
|
||||
...
|
||||
|
Loading…
Reference in New Issue
Block a user