[AMDGPU] Fix kill flag on overlapping sgpr copy

Same as on vgpr copies, we cannot kill the source register if it
overlaps with the destination register. Otherwise, the kill of the
source register will also count as a kill for the destination register.

Differential Revision: https://reviews.llvm.org/D120042
This commit is contained in:
Sebastian Neubauer 2022-02-17 13:33:07 +01:00
parent 0136a4401f
commit 1f0aadfa62
2 changed files with 52 additions and 1 deletions

View File

@ -930,7 +930,9 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
return;
}
expandSGPRCopy(*this, MBB, MI, DL, DestReg, SrcReg, KillSrc, RC, Forward);
const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
expandSGPRCopy(*this, MBB, MI, DL, DestReg, SrcReg, CanKillSuperReg, RC,
Forward);
return;
}

View File

@ -0,0 +1,49 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs -run-pass=postrapseudos -o - %s | FileCheck %s
# Don't set a kill of the super register on the last instruction with
# an overlapping copy. This would kill part of the values in the
# result copies.
---
name: overlapping_copy_kill_undef_reg_after_copy
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr30_sgpr31, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11
; CHECK-LABEL: name: overlapping_copy_kill_undef_reg_after_copy
; CHECK: liveins: $sgpr30_sgpr31, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $sgpr4_sgpr5, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; CHECK-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $sgpr6_sgpr7, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11
; CHECK-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $sgpr8_sgpr9, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11
; CHECK-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $sgpr10_sgpr11, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11
; CHECK-NEXT: renamable $sgpr1 = S_ADD_I32 0, $sgpr1, implicit-def $scc
; CHECK-NEXT: S_SETPC_B64 $sgpr30_sgpr31, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7
renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11
renamable $sgpr1 = S_ADD_I32 0, $sgpr1, implicit-def $scc
S_SETPC_B64 $sgpr30_sgpr31, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7
...
---
name: nonoverlapping_copy_kill
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr30_sgpr31, $sgpr3_sgpr4_sgpr5
; CHECK-LABEL: name: nonoverlapping_copy_kill
; CHECK: liveins: $sgpr30_sgpr31, $sgpr3_sgpr4_sgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $sgpr0 = S_MOV_B32 $sgpr3, implicit $sgpr3_sgpr4_sgpr5, implicit-def $sgpr0_sgpr1_sgpr2
; CHECK-NEXT: $sgpr1 = S_MOV_B32 $sgpr4, implicit $sgpr3_sgpr4_sgpr5
; CHECK-NEXT: $sgpr2 = S_MOV_B32 $sgpr5, implicit killed $sgpr3_sgpr4_sgpr5
; CHECK-NEXT: renamable $sgpr1 = S_ADD_I32 0, $sgpr1, implicit-def $scc
; CHECK-NEXT: S_SETPC_B64 $sgpr30_sgpr31, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2
renamable $sgpr0_sgpr1_sgpr2 = COPY killed renamable $sgpr3_sgpr4_sgpr5
renamable $sgpr1 = S_ADD_I32 0, $sgpr1, implicit-def $scc
S_SETPC_B64 $sgpr30_sgpr31, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2
...