mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-02-04 00:06:50 +00:00
[AMDGPU] Do not reassign spilled registers
We cannot call LRM::unassign() if LRM::assign() was never called before, these are symmetrical calls. There are two ways of assigning a physical register to virtual, via LRM::assign() and via VRM::assignVirt2Phys(). LRM::assign() will call the VRM to assign the register and then update LiveIntervalUnion. Inline spiller calls VRM directly and thus LiveIntervalUnion never gets updated. A call to LRM::unassign() then asserts about inconsistent liveness. We have to note that not all callers of the InlineSpiller even have LRM to pass, RegAllocPBQP does not have it, so we cannot always pass LRM into the spiller. The only way to get into that spiller LRE_DidCloneVirtReg() call is from LiveRangeEdit::eliminateDeadDefs if we split an LI. This patch refuses to reassign a LiveInterval created by a split to workaround the problem. In fact we cannot reassign a spill anyway as all registers of the needed class are occupied and we are spilling. Fixes: SWDEV-267996 Differential Revision: https://reviews.llvm.org/D95489
This commit is contained in:
parent
6612c2bb68
commit
d91ee2f782
@ -190,6 +190,14 @@ GCNNSAReassign::CheckNSA(const MachineInstr &MI, bool Fast) const {
|
||||
if (MRI->getRegClass(Reg) != &AMDGPU::VGPR_32RegClass || Op.getSubReg())
|
||||
return NSA_Status::FIXED;
|
||||
|
||||
// InlineSpiller does not call LRM::assign() after an LI split leaving
|
||||
// it in an inconsistent state, so we cannot call LRM::unassign().
|
||||
// See llvm bug #48911.
|
||||
// Skip reassign if a register has originated from such split.
|
||||
// FIXME: Remove the workaround when bug #48911 is fixed.
|
||||
if (VRM->getPreSplitReg(Reg))
|
||||
return NSA_Status::FIXED;
|
||||
|
||||
const MachineInstr *Def = MRI->getUniqueVRegDef(Reg);
|
||||
|
||||
if (Def && Def->isCopy() && Def->getOperand(1).getReg() == PhysReg)
|
||||
|
@ -471,6 +471,14 @@ bool GCNRegBankReassign::isReassignable(Register Reg) const {
|
||||
if (Reg.isPhysical() || !VRM->isAssignedReg(Reg))
|
||||
return false;
|
||||
|
||||
// InlineSpiller does not call LRM::assign() after an LI split leaving it
|
||||
// in an inconsistent state, so we cannot call LRM::unassign().
|
||||
// See llvm bug #48911.
|
||||
// Skip reassign if a register has originated from such split.
|
||||
// FIXME: Remove the workaround when bug #48911 is fixed.
|
||||
if (VRM->getPreSplitReg(Reg))
|
||||
return false;
|
||||
|
||||
const MachineInstr *Def = MRI->getUniqueVRegDef(Reg);
|
||||
|
||||
Register PhysReg = VRM->getPhys(Reg);
|
||||
|
72
llvm/test/CodeGen/AMDGPU/nsa-reassign.mir
Normal file
72
llvm/test/CodeGen/AMDGPU/nsa-reassign.mir
Normal file
@ -0,0 +1,72 @@
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass greedy,amdgpu-nsa-reassign,virtregrewriter,si-shrink-instructions -o - %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
--- |
|
||||
define amdgpu_kernel void @nsa_reassign() #0 { ret void }
|
||||
define amdgpu_kernel void @do_not_reassign_spill() #0 { ret void }
|
||||
|
||||
attributes #0 = { "amdgpu-num-vgpr"="8" }
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: nsa_reassign
|
||||
# GCN: IMAGE_SAMPLE_C_L_V1_V8_gfx10
|
||||
---
|
||||
name: nsa_reassign
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
stackPtrOffsetReg: $sgpr32
|
||||
stack:
|
||||
- { id: 0, type: default, offset: 0, size: 4, alignment: 4 }
|
||||
registers:
|
||||
- { id: 0, class: vgpr_32, preferred-register: '$vgpr0' }
|
||||
- { id: 1, class: vgpr_32, preferred-register: '$vgpr1' }
|
||||
- { id: 2, class: vgpr_32, preferred-register: '$vgpr2' }
|
||||
- { id: 3, class: vgpr_32, preferred-register: '$vgpr3' }
|
||||
- { id: 4, class: vgpr_32, preferred-register: '$vgpr4' }
|
||||
- { id: 5, class: vgpr_32, preferred-register: '$vgpr5' }
|
||||
- { id: 6, class: vgpr_32, preferred-register: '$vgpr6' }
|
||||
- { id: 7, class: vgpr_32, preferred-register: '$vgpr7' }
|
||||
body: |
|
||||
bb.0:
|
||||
%0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
|
||||
%1 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
|
||||
%2 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
|
||||
%3 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
|
||||
%4 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
|
||||
%5 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
|
||||
%6 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
|
||||
%7:vgpr_32 = IMAGE_SAMPLE_C_L_V1_V5_nsa_gfx10 %0, %2, %4, %5, %6, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "ImageResource")
|
||||
S_ENDPGM 0, implicit %7
|
||||
...
|
||||
|
||||
# GCN-LABEL: do_not_reassign_spill
|
||||
# GCN: IMAGE_SAMPLE_C_L_V1_V5_nsa_gfx10
|
||||
---
|
||||
name: do_not_reassign_spill
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
stackPtrOffsetReg: $sgpr32
|
||||
stack:
|
||||
- { id: 0, type: default, offset: 0, size: 4, alignment: 4 }
|
||||
registers:
|
||||
- { id: 0, class: vgpr_32, preferred-register: '$vgpr0' }
|
||||
- { id: 1, class: vgpr_32, preferred-register: '$vgpr1' }
|
||||
- { id: 2, class: vgpr_32, preferred-register: '$vgpr2' }
|
||||
- { id: 3, class: vgpr_32, preferred-register: '$vgpr3' }
|
||||
- { id: 4, class: vgpr_32, preferred-register: '$vgpr4' }
|
||||
- { id: 5, class: vgpr_32, preferred-register: '$vgpr5' }
|
||||
- { id: 6, class: vgpr_32, preferred-register: '$vgpr6' }
|
||||
- { id: 7, class: vgpr_32, preferred-register: '$vgpr7' }
|
||||
body: |
|
||||
bb.0:
|
||||
%0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
|
||||
%1 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
|
||||
%2 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
|
||||
%3 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
|
||||
%4 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
|
||||
%5 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
|
||||
%6 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
|
||||
S_NOP 0, implicit-def dead $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5, implicit %6
|
||||
%7:vgpr_32 = IMAGE_SAMPLE_C_L_V1_V5_nsa_gfx10 %0, %2, %4, %5, %6, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "ImageResource")
|
||||
S_ENDPGM 0, implicit %7
|
||||
...
|
38
llvm/test/CodeGen/AMDGPU/regbank-reassign-split.mir
Normal file
38
llvm/test/CodeGen/AMDGPU/regbank-reassign-split.mir
Normal file
@ -0,0 +1,38 @@
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass greedy,amdgpu-regbanks-reassign,virtregrewriter -o - %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
--- |
|
||||
define amdgpu_kernel void @do_not_reassign_spill() #0 { ret void }
|
||||
|
||||
attributes #0 = { "amdgpu-num-vgpr"="8" }
|
||||
...
|
||||
|
||||
# GCN-LABEL: do_not_reassign_spill{{$}}
|
||||
# GCN: V_AND_B32_e32 killed $vgpr1, killed $vgpr5,
|
||||
---
|
||||
name: do_not_reassign_spill
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
stackPtrOffsetReg: $sgpr32
|
||||
stack:
|
||||
- { id: 0, type: default, offset: 0, size: 4, alignment: 4 }
|
||||
registers:
|
||||
- { id: 0, class: vgpr_32, preferred-register: '$vgpr0' }
|
||||
- { id: 1, class: vgpr_32, preferred-register: '$vgpr1' }
|
||||
- { id: 2, class: vgpr_32, preferred-register: '$vgpr2' }
|
||||
- { id: 3, class: vgpr_32, preferred-register: '$vgpr3' }
|
||||
- { id: 4, class: vgpr_32, preferred-register: '$vgpr4' }
|
||||
- { id: 5, class: vgpr_32, preferred-register: '$vgpr5' }
|
||||
- { id: 6, class: vgpr_32 }
|
||||
body: |
|
||||
bb.0:
|
||||
%0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
|
||||
%1 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
|
||||
%2 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
|
||||
%3 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
|
||||
%4 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
|
||||
%5 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, align 4, addrspace 5)
|
||||
S_NOP 0, implicit-def dead $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5
|
||||
%6 = V_AND_B32_e32 %1, %5, implicit $exec
|
||||
S_ENDPGM 0, implicit %6
|
||||
...
|
Loading…
x
Reference in New Issue
Block a user