Unbundle KILL bundles in VirtRegRewriter

SplitKit forms invalid COPY subreg bundles without a leading
BUNDLE instruction. That manifests itself in post-RA scheduler
counting instruction and asserting on "Instruction count mismatch".

The bundle shall be undone by VirtRegRewriter::expandCopyBundle(),
but it does not because VirtRegRewriter::handleIdentityCopy() can
turn COPY bundle into a KILL bundle.

Process KILLs as well.

Differential Revision: https://reviews.llvm.org/D85484
This commit is contained in:
Stanislav Mekhanoshin 2020-08-07 12:56:09 -07:00
parent 34e38f92c7
commit ff3db7b010
2 changed files with 116 additions and 3 deletions

View File

@ -400,18 +400,18 @@ void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) const {
/// after processing the last in the bundle. Does not update LiveIntervals
/// which we shouldn't need for this instruction anymore.
void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const {
if (!MI.isCopy())
if (!MI.isCopy() && !MI.isKill())
return;
if (MI.isBundledWithPred() && !MI.isBundledWithSucc()) {
SmallVector<MachineInstr *, 2> MIs({&MI});
// Only do this when the complete bundle is made out of COPYs.
// Only do this when the complete bundle is made out of COPYs and KILLs.
MachineBasicBlock &MBB = *MI.getParent();
for (MachineBasicBlock::reverse_instr_iterator I =
std::next(MI.getReverseIterator()), E = MBB.instr_rend();
I != E && I->isBundledWithSucc(); ++I) {
if (!I->isCopy())
if (!I->isCopy() && !I->isKill())
return;
MIs.push_back(&*I);
}

View File

@ -0,0 +1,113 @@
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=greedy -o - -verify-machineinstrs %s | FileCheck -check-prefixes=MIR,RA %s
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=greedy,virtregrewriter,post-RA-sched -o - -verify-machineinstrs %s | FileCheck -check-prefixes=MIR,VR %s
# RUN: llc -march=amdgcn -mcpu=gfx900 -start-before=greedy -o - -verify-machineinstrs %s | FileCheck -check-prefix=ASM %s
---
# MIR-LABEL: name: splitkit_copy_bundle
# RA: undef %4.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15:sgpr_1024 = COPY %5.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 {
# RA-NEXT: internal %4.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27:sgpr_1024 = COPY %5.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27
# RA-NEXT: internal %4.sub28_sub29:sgpr_1024 = COPY %5.sub28_sub29
# RA-NEXT: }
# RA: undef %6.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15:sgpr_1024 = COPY %4.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 {
# RA-NEXT: internal %6.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27:sgpr_1024 = COPY %4.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27
# RA-NEXT: internal %6.sub28_sub29:sgpr_1024 = COPY %4.sub28_sub29
# RA-NEXT: }
# RA: undef %4.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15:sgpr_1024 = COPY %6.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 {
# RA-NEXT: internal %4.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27:sgpr_1024 = COPY %6.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27
# RA-NEXT: internal %4.sub28_sub29:sgpr_1024 = COPY %6.sub28_sub29
# RA-NEXT: }
# VR: renamable $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = KILL undef renamable $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
# VR-NEXT: renamable $sgpr96_sgpr97 = KILL undef renamable $sgpr96_sgpr97
# ASM-LABEL: {{^}}splitkit_copy_bundle:
# ASM: ; implicit-def: $sgpr34_sgpr35
# ASM-NEXT: ; implicit-def: $sgpr98_sgpr99
# ASM-NEXT: ; kill: def $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 killed $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
# ASM-NEXT: ; kill: def $sgpr96_sgpr97 killed $sgpr96_sgpr97
name: splitkit_copy_bundle
tracksRegLiveness: true
machineFunctionInfo:
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
stackPtrOffsetReg: '$sgpr32'
body: |
bb.0:
%0:sreg_64 = IMPLICIT_DEF
%1:sreg_64 = IMPLICIT_DEF
undef %2.sub1:sgpr_1024 = S_MOV_B32 -1
%2.sub0:sgpr_1024 = S_MOV_B32 -1
undef %3.sub0:sgpr_1024 = S_MOV_B32 0
bb.1:
%2.sub2:sgpr_1024 = COPY %2.sub0
%2.sub3:sgpr_1024 = COPY %2.sub1
%2.sub4:sgpr_1024 = COPY %2.sub0
%2.sub5:sgpr_1024 = COPY %2.sub1
%2.sub6:sgpr_1024 = COPY %2.sub0
%2.sub7:sgpr_1024 = COPY %2.sub1
%2.sub8:sgpr_1024 = COPY %2.sub0
%2.sub9:sgpr_1024 = COPY %2.sub1
%2.sub10:sgpr_1024 = COPY %2.sub0
%2.sub11:sgpr_1024 = COPY %2.sub1
%2.sub12:sgpr_1024 = COPY %2.sub0
%2.sub13:sgpr_1024 = COPY %2.sub1
%2.sub14:sgpr_1024 = COPY %2.sub0
%2.sub15:sgpr_1024 = COPY %2.sub1
%2.sub16:sgpr_1024 = COPY %2.sub0
%2.sub17:sgpr_1024 = COPY %2.sub1
%2.sub18:sgpr_1024 = COPY %2.sub0
%2.sub19:sgpr_1024 = COPY %2.sub1
%2.sub20:sgpr_1024 = COPY %2.sub0
%2.sub21:sgpr_1024 = COPY %2.sub1
%2.sub22:sgpr_1024 = COPY %2.sub0
%2.sub23:sgpr_1024 = COPY %2.sub1
%2.sub24:sgpr_1024 = COPY %2.sub0
%2.sub25:sgpr_1024 = COPY %2.sub1
%2.sub26:sgpr_1024 = COPY %2.sub0
%2.sub27:sgpr_1024 = COPY %2.sub1
%2.sub28:sgpr_1024 = COPY %2.sub0
%2.sub29:sgpr_1024 = COPY %2.sub1
%3.sub1:sgpr_1024 = COPY %3.sub0
%3.sub2:sgpr_1024 = COPY %3.sub0
%3.sub3:sgpr_1024 = COPY %3.sub0
%3.sub4:sgpr_1024 = COPY %3.sub0
%3.sub5:sgpr_1024 = COPY %3.sub0
%3.sub6:sgpr_1024 = COPY %3.sub0
%3.sub7:sgpr_1024 = COPY %3.sub0
%3.sub8:sgpr_1024 = COPY %3.sub0
%3.sub9:sgpr_1024 = COPY %3.sub0
%3.sub10:sgpr_1024 = COPY %3.sub0
%3.sub11:sgpr_1024 = COPY %3.sub0
%3.sub12:sgpr_1024 = COPY %3.sub0
%3.sub13:sgpr_1024 = COPY %3.sub0
%3.sub14:sgpr_1024 = COPY %3.sub0
%3.sub15:sgpr_1024 = COPY %3.sub0
%3.sub16:sgpr_1024 = COPY %3.sub0
%3.sub17:sgpr_1024 = COPY %3.sub0
%3.sub18:sgpr_1024 = COPY %3.sub0
%3.sub19:sgpr_1024 = COPY %3.sub0
%3.sub20:sgpr_1024 = COPY %3.sub0
%3.sub21:sgpr_1024 = COPY %3.sub0
%3.sub22:sgpr_1024 = COPY %3.sub0
%3.sub23:sgpr_1024 = COPY %3.sub0
%3.sub24:sgpr_1024 = COPY %3.sub0
%3.sub25:sgpr_1024 = COPY %3.sub0
%3.sub26:sgpr_1024 = COPY %3.sub0
%3.sub27:sgpr_1024 = COPY %3.sub0
%3.sub28:sgpr_1024 = COPY %3.sub0
%3.sub29:sgpr_1024 = COPY %3.sub0
%3.sub30:sgpr_1024 = COPY %3.sub0
%3.sub31:sgpr_1024 = COPY %3.sub0
bb.2:
S_NOP 0, implicit %0, implicit %1, csr_amdgpu_highregs
S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
S_BRANCH %bb.2
...