From 1f0aadfa62a56ca5a71bec0911538fe360bfc28d Mon Sep 17 00:00:00 2001 From: Sebastian Neubauer Date: Thu, 17 Feb 2022 13:33:07 +0100 Subject: [PATCH] [AMDGPU] Fix kill flag on overlapping sgpr copy Same as on vgpr copies, we cannot kill the source register if it overlaps with the destination register. Otherwise, the kill of the source register will also count as a kill for the destination register. Differential Revision: https://reviews.llvm.org/D120042 --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 4 +- .../CodeGen/AMDGPU/copy-overlap-sgpr-kill.mir | 49 +++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AMDGPU/copy-overlap-sgpr-kill.mir diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 9899c36352b8..7e5c9e990d4b 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -930,7 +930,9 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc); return; } - expandSGPRCopy(*this, MBB, MI, DL, DestReg, SrcReg, KillSrc, RC, Forward); + const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg); + expandSGPRCopy(*this, MBB, MI, DL, DestReg, SrcReg, CanKillSuperReg, RC, + Forward); return; } diff --git a/llvm/test/CodeGen/AMDGPU/copy-overlap-sgpr-kill.mir b/llvm/test/CodeGen/AMDGPU/copy-overlap-sgpr-kill.mir new file mode 100644 index 000000000000..b97a9237a0b9 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/copy-overlap-sgpr-kill.mir @@ -0,0 +1,49 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs -run-pass=postrapseudos -o - %s | FileCheck %s + +# Don't set a kill of the super register on the last instruction with +# an overlapping copy. This would kill part of the values in the +# result copies. + +--- +name: overlapping_copy_kill_undef_reg_after_copy +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr30_sgpr31, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 + + ; CHECK-LABEL: name: overlapping_copy_kill_undef_reg_after_copy + ; CHECK: liveins: $sgpr30_sgpr31, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $sgpr4_sgpr5, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; CHECK-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $sgpr6_sgpr7, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 + ; CHECK-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $sgpr8_sgpr9, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 + ; CHECK-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $sgpr10_sgpr11, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 + ; CHECK-NEXT: renamable $sgpr1 = S_ADD_I32 0, $sgpr1, implicit-def $scc + ; CHECK-NEXT: S_SETPC_B64 $sgpr30_sgpr31, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 + renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 + renamable $sgpr1 = S_ADD_I32 0, $sgpr1, implicit-def $scc + S_SETPC_B64 $sgpr30_sgpr31, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 + +... + +--- +name: nonoverlapping_copy_kill +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr30_sgpr31, $sgpr3_sgpr4_sgpr5 + + ; CHECK-LABEL: name: nonoverlapping_copy_kill + ; CHECK: liveins: $sgpr30_sgpr31, $sgpr3_sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $sgpr0 = S_MOV_B32 $sgpr3, implicit $sgpr3_sgpr4_sgpr5, implicit-def $sgpr0_sgpr1_sgpr2 + ; CHECK-NEXT: $sgpr1 = S_MOV_B32 $sgpr4, implicit $sgpr3_sgpr4_sgpr5 + ; CHECK-NEXT: $sgpr2 = S_MOV_B32 $sgpr5, implicit killed $sgpr3_sgpr4_sgpr5 + ; CHECK-NEXT: renamable $sgpr1 = S_ADD_I32 0, $sgpr1, implicit-def $scc + ; CHECK-NEXT: S_SETPC_B64 $sgpr30_sgpr31, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 + renamable $sgpr0_sgpr1_sgpr2 = COPY killed renamable $sgpr3_sgpr4_sgpr5 + renamable $sgpr1 = S_ADD_I32 0, $sgpr1, implicit-def $scc + S_SETPC_B64 $sgpr30_sgpr31, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 + +...