mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-12-21 15:02:51 +00:00
RegAlloc: Fix verifier error with undef identity copies
The code did not match the example in the comment, and was checking the undef flag on the copy dest instead of source. The existing tests were only hitting the > 2 operands case. llvm-svn: 361156
This commit is contained in:
parent
3e1821bf43
commit
7c8ec18964
@ -384,7 +384,7 @@ void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) const {
|
||||
// give us additional liveness information: The target (super-)register
|
||||
// must not be valid before this point. Replace the COPY with a KILL
|
||||
// instruction to maintain this information.
|
||||
if (MI.getOperand(0).isUndef() || MI.getNumOperands() > 2) {
|
||||
if (MI.getOperand(1).isUndef() || MI.getNumOperands() > 2) {
|
||||
MI.setDesc(TII->get(TargetOpcode::KILL));
|
||||
LLVM_DEBUG(dbgs() << " replace by: " << MI);
|
||||
return;
|
||||
|
@ -0,0 +1,69 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -start-before=greedy -stop-after=virtregrewriter -verify-machineinstrs -o - %s | FileCheck %s
|
||||
|
||||
# The undef copy of %4 is allocated to $vgpr3, and the identity copy
|
||||
# was deleted, and $vgpr3 was considered undef. The code to replace
|
||||
# the undef copy with a kill was incorrectly checking the dest
|
||||
# operand, rather than the source.
|
||||
|
||||
--- |
|
||||
define amdgpu_kernel void @undef_identity_copy() {
|
||||
ret void
|
||||
}
|
||||
|
||||
declare hidden float @bar(<4 x float>)
|
||||
declare hidden void @foo()
|
||||
|
||||
...
|
||||
---
|
||||
name: undef_identity_copy
|
||||
tracksRegLiveness: true
|
||||
frameInfo:
|
||||
maxAlignment: 4
|
||||
hasCalls: true
|
||||
machineFunctionInfo:
|
||||
isEntryFunction: true
|
||||
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
|
||||
scratchWaveOffsetReg: '$sgpr95'
|
||||
frameOffsetReg: '$sgpr95'
|
||||
stackPtrOffsetReg: '$sgpr32'
|
||||
body: |
|
||||
bb.0:
|
||||
; CHECK-LABEL: name: undef_identity_copy
|
||||
; CHECK: renamable $vgpr32_vgpr33_vgpr34_vgpr35 = FLAT_LOAD_DWORDX4 undef renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, addrspace 1)
|
||||
; CHECK: renamable $sgpr6_sgpr7 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @foo + 4, target-flags(amdgpu-rel32-hi) @foo + 4, implicit-def dead $scc
|
||||
; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95
|
||||
; CHECK: $sgpr4 = COPY $sgpr95
|
||||
; CHECK: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr6_sgpr7, @foo, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4
|
||||
; CHECK: ADJCALLSTACKDOWN 0, 4, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95
|
||||
; CHECK: renamable $sgpr6_sgpr7 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @bar + 4, target-flags(amdgpu-rel32-hi) @bar + 4, implicit-def dead $scc
|
||||
; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95
|
||||
; CHECK: $sgpr4 = COPY $sgpr95
|
||||
; CHECK: $vgpr0 = COPY renamable $vgpr32
|
||||
; CHECK: $vgpr1 = COPY renamable $vgpr33
|
||||
; CHECK: $vgpr2 = COPY renamable $vgpr34
|
||||
; CHECK: $vgpr3 = KILL undef renamable $vgpr3
|
||||
; CHECK: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr6_sgpr7, @bar, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0, implicit killed $vgpr1, implicit killed $vgpr2, implicit killed $vgpr3, implicit-def $vgpr0
|
||||
; CHECK: ADJCALLSTACKDOWN 0, 4, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95
|
||||
; CHECK: FLAT_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%0:vreg_128 = FLAT_LOAD_DWORDX4 undef %1:vreg_64, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, addrspace 1)
|
||||
%2:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @foo + 4, target-flags(amdgpu-rel32-hi) @foo + 4, implicit-def dead $scc
|
||||
ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95
|
||||
$sgpr4 = COPY $sgpr95
|
||||
dead $sgpr30_sgpr31 = SI_CALL %2, @foo, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4
|
||||
ADJCALLSTACKDOWN 0, 4, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95
|
||||
%3:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @bar + 4, target-flags(amdgpu-rel32-hi) @bar + 4, implicit-def dead $scc
|
||||
ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95
|
||||
$sgpr4 = COPY $sgpr95
|
||||
$vgpr0 = COPY %0.sub0
|
||||
$vgpr1 = COPY %0.sub1
|
||||
$vgpr2 = COPY %0.sub2
|
||||
$vgpr3 = COPY undef %4:vgpr_32
|
||||
dead $sgpr30_sgpr31 = SI_CALL %3, @bar, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0, implicit killed $vgpr1, implicit killed $vgpr2, implicit killed $vgpr3, implicit-def $vgpr0
|
||||
%5:vgpr_32 = COPY $vgpr0
|
||||
ADJCALLSTACKDOWN 0, 4, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95
|
||||
FLAT_STORE_DWORD undef %6:vreg_64, %5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
Loading…
Reference in New Issue
Block a user