LiveIntervals: Fix handleMoveUp with subreg def moving across a def

If a subregister def was moved across another subregister def and
another use, the main range was not correctly updated. The end point
of the moved interval ended too early and missed the use from theh
other lanes in the subreg def.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@375300 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Matt Arsenault
2019-10-18 23:24:25 +00:00
parent f6288975c0
commit 8672594561
3 changed files with 190 additions and 1 deletions

View File

@@ -1288,6 +1288,20 @@ private:
const SlotIndex SplitPos = NewIdxDef;
OldIdxVNI = OldIdxIn->valno;
SlotIndex NewDefEndPoint = std::next(NewIdxIn)->end;
LiveRange::iterator Prev = std::prev(OldIdxIn);
if (OldIdxIn != LR.begin() &&
SlotIndex::isEarlierInstr(NewIdx, Prev->end)) {
// If the segment before OldIdx read a value defined earlier than
// NewIdx, the moved instruction also reads and forwards that
// value. Extend the lifetime of the new def point.
// Extend to where the previous range started, unless there is
// another redef first.
NewDefEndPoint = std::min(OldIdxIn->start,
std::next(NewIdxOut)->start);
}
// Merge the OldIdxIn and OldIdxOut segments into OldIdxOut.
OldIdxOut->valno->def = OldIdxIn->start;
*OldIdxOut = LiveRange::Segment(OldIdxIn->start, OldIdxOut->end,
@@ -1305,7 +1319,8 @@ private:
// There is no gap between NewSegment and its predecessor.
*NewSegment = LiveRange::Segment(Next->start, SplitPos,
Next->valno);
*Next = LiveRange::Segment(SplitPos, Next->end, OldIdxVNI);
*Next = LiveRange::Segment(SplitPos, NewDefEndPoint, OldIdxVNI);
Next->valno->def = SplitPos;
} else {
// There is a gap between NewSegment and its predecessor

View File

@@ -0,0 +1,134 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs -verify-misched -run-pass=machine-scheduler -o - %s | FileCheck %s
---
name: handleMoveUp_incorrect_interval
tracksRegLiveness: true
liveins:
- { reg: '$sgpr4_sgpr5', virtual-reg: '%0' }
frameInfo:
maxAlignment: 1
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
scratchWaveOffsetReg: '$sgpr101'
frameOffsetReg: '$sgpr101'
stackPtrOffsetReg: '$sgpr101'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
workGroupIDX: { reg: '$sgpr6' }
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
workItemIDX: { reg: '$vgpr0' }
body: |
; CHECK-LABEL: name: handleMoveUp_incorrect_interval
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x80000000)
; CHECK: liveins: $sgpr4_sgpr5
; CHECK: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %2:vgpr_32, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr101, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; CHECK: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sgpr_64 = S_LOAD_DWORDX2_IMM [[COPY]](p4), 0, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4)
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 5329
; CHECK: undef %5.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4, implicit $exec
; CHECK: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK: bb.1:
; CHECK: successors: %bb.1(0x80000000)
; CHECK: INLINEASM &"", 1, 851978, def dead %11
; CHECK: GLOBAL_STORE_DWORD undef %12:vreg_64, [[BUFFER_LOAD_DWORD_OFFEN]], 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; CHECK: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 undef %14:vgpr_32, 0, 0, implicit $exec :: (load 8, addrspace 3)
; CHECK: INLINEASM &"def $0 $1", 1, 851978, def %15, 851978, def %16
; CHECK: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec
; CHECK: [[DS_READ_B32_gfx9_1:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_1]], 0, 0, implicit $exec
; CHECK: [[DS_READ_B32_gfx9_2:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 undef %20:vgpr_32, 0, 0, implicit $exec
; CHECK: INLINEASM &"def $0 $1", 1, 851978, def %21, 851978, def %22
; CHECK: [[DS_READ_B32_gfx9_3:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_1]], 0, 0, implicit $exec
; CHECK: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK: %5.sub1:vreg_64 = COPY [[V_MOV_B32_e32_]]
; CHECK: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 1, [[DEF2]], implicit $exec
; CHECK: [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_GT_U32_e64 64, [[V_ADD_U32_e32_]], implicit $exec
; CHECK: [[DEF]].sub1:vreg_64 = COPY [[V_MOV_B32_e32_]]
; CHECK: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK: INLINEASM &"", 1, 851978, def dead [[V_MOV_B32_e32_2]], 851978, def dead [[V_MOV_B32_e32_3]], 851977, [[DS_READ_B64_gfx9_]].sub0, 2147483657, [[V_MOV_B32_e32_2]](tied-def 3), 2147549193, [[V_MOV_B32_e32_3]](tied-def 5), 851977, %15, 851977, %16, 851977, [[DS_READ_B32_gfx9_1]], 851977, [[DS_READ_B32_gfx9_]], 851977, [[DS_READ_B32_gfx9_3]], 851977, [[DS_READ_B32_gfx9_2]]
; CHECK: DS_WRITE_B32_gfx9 undef %28:vgpr_32, %21, 0, 0, implicit $exec :: (store 4, addrspace 3)
; CHECK: DS_WRITE_B32_gfx9 undef %29:vgpr_32, %22, 0, 0, implicit $exec :: (store 4, addrspace 3)
; CHECK: DS_WRITE_B64_gfx9 undef %30:vgpr_32, %5, 0, 0, implicit $exec :: (store 8, addrspace 3)
; CHECK: undef %31.sub1:vreg_64 = FLAT_LOAD_DWORD undef %32:vreg_64, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
; CHECK: [[V_MUL_LO_U32_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32 [[V_ADD_U32_e32_]], [[S_MOV_B32_]], implicit $exec
; CHECK: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, [[V_ADD_U32_e32_]], [[V_CMP_GT_U32_e64_]], implicit $exec
; CHECK: [[V_SUB_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e32 [[V_MUL_LO_U32_]], [[DEF1]], implicit $exec
; CHECK: [[V_MUL_LO_U32_1:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32 [[V_CNDMASK_B32_e64_]], [[S_MOV_B32_]], implicit $exec
; CHECK: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_SUB_U32_e32_]], [[DEF]].sub0, implicit $exec
; CHECK: [[V_SUB_U32_e32_1:%[0-9]+]]:vgpr_32 = V_SUB_U32_e32 [[V_MUL_LO_U32_1]], [[V_MUL_LO_U32_]], implicit $exec
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
; CHECK: [[DEF]].sub0:vreg_64 = V_ADD_U32_e32 [[V_SUB_U32_e32_1]], [[V_ADD_U32_e32_1]], implicit $exec
; CHECK: undef %38.sub0:vreg_64, %39:sreg_64_xexec = V_ADD_I32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[DEF]].sub0, 0, implicit $exec
; CHECK: undef %40.sub1:vreg_64, dead %41:sreg_64_xexec = V_ADDC_U32_e64 [[COPY1]], [[DEF]].sub1, %39, 0, implicit $exec
; CHECK: undef %42.sub0:sgpr_64 = V_READFIRSTLANE_B32 %38.sub0, implicit $exec
; CHECK: %42.sub1:sgpr_64 = V_READFIRSTLANE_B32 %40.sub1, implicit $exec
; CHECK: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %42, 0, 0, 0 :: (load 4, addrspace 1)
; CHECK: INLINEASM &"", 1
; CHECK: [[DS_READ_B32_gfx9_4:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 undef %45:vgpr_32, 0, 0, implicit $exec :: (load 4, addrspace 3)
; CHECK: GLOBAL_STORE_DWORD undef %46:vreg_64, [[DS_READ_B32_gfx9_4]], 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; CHECK: %31.sub0:vreg_64 = COPY [[S_LOAD_DWORD_IMM]], implicit $exec
; CHECK: DS_WRITE_B64_gfx9 undef %47:vgpr_32, %31, 0, 0, implicit $exec :: (store 8, addrspace 3)
; CHECK: S_BRANCH %bb.1
bb.0:
liveins: $sgpr4_sgpr5
%0:sgpr_64(p4) = COPY $sgpr4_sgpr5
%1:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %2:vgpr_32, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr101, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
%3:sgpr_64 = S_LOAD_DWORDX2_IMM %0(p4), 0, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4)
%4:sreg_32_xm0 = S_MOV_B32 5329
undef %5.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
%6:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%7:vgpr_32 = V_MOV_B32_e32 -4, implicit $exec
%8:vreg_64 = IMPLICIT_DEF
%9:vgpr_32 = IMPLICIT_DEF
%10:vgpr_32 = IMPLICIT_DEF
bb.1:
INLINEASM &"", 1, 851978, def %11:vgpr_32
GLOBAL_STORE_DWORD undef %12:vreg_64, %1, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
%13:vreg_64 = DS_READ_B64_gfx9 undef %14:vgpr_32, 0, 0, implicit $exec :: (load 8, addrspace 3)
INLINEASM &"def $0 $1", 1, 851978, def %15:vgpr_32, 851978, def %16:vgpr_32
%17:vgpr_32 = DS_READ_B32_gfx9 %6, 0, 0, implicit $exec
%18:vgpr_32 = DS_READ_B32_gfx9 %7, 0, 0, implicit $exec
%19:vgpr_32 = DS_READ_B32_gfx9 undef %20:vgpr_32, 0, 0, implicit $exec
INLINEASM &"def $0 $1", 1, 851978, def %21:vgpr_32, 851978, def %22:vgpr_32
%23:vgpr_32 = DS_READ_B32_gfx9 %7, 0, 0, implicit $exec
%24:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%5.sub1:vreg_64 = COPY %6
%25:vgpr_32 = V_ADD_U32_e32 1, %10, implicit $exec
%26:sreg_64_xexec = V_CMP_GT_U32_e64 64, %25, implicit $exec
%27:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
INLINEASM &"", 1, 851978, def dead %24, 851978, def dead %27, 851977, %13.sub0, 2147483657, %24(tied-def 3), 2147549193, %27(tied-def 5), 851977, %15, 851977, %16, 851977, %18, 851977, %17, 851977, %23, 851977, %19
DS_WRITE_B32_gfx9 undef %28:vgpr_32, %21, 0, 0, implicit $exec :: (store 4, addrspace 3)
DS_WRITE_B32_gfx9 undef %29:vgpr_32, %22, 0, 0, implicit $exec :: (store 4, addrspace 3)
DS_WRITE_B64_gfx9 undef %30:vgpr_32, %5, 0, 0, implicit $exec :: (store 8, addrspace 3)
undef %31.sub1:vreg_64 = FLAT_LOAD_DWORD undef %32:vreg_64, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
%33:vgpr_32 = V_MUL_LO_U32 %25, %4, implicit $exec
%10:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, %25, %26, implicit $exec
%34:vgpr_32 = V_SUB_U32_e32 %33, %9, implicit $exec
%9:vgpr_32 = V_MUL_LO_U32 %10, %4, implicit $exec
%35:vgpr_32 = V_ADD_U32_e32 %34, %8.sub0, implicit $exec
%36:vgpr_32 = V_SUB_U32_e32 %9, %33, implicit $exec
%37:vgpr_32 = COPY %3.sub1
undef %8.sub0:vreg_64 = V_ADD_U32_e32 %36, %35, implicit $exec
%8.sub1:vreg_64 = COPY %6
undef %38.sub0:vreg_64, %39:sreg_64_xexec = V_ADD_I32_e64 %3.sub0, %8.sub0, 0, implicit $exec
undef %40.sub1:vreg_64, dead %41:sreg_64_xexec = V_ADDC_U32_e64 %37, %8.sub1, %39, 0, implicit $exec
undef %42.sub0:sgpr_64 = V_READFIRSTLANE_B32 %38.sub0, implicit $exec
%42.sub1:sgpr_64 = V_READFIRSTLANE_B32 %40.sub1, implicit $exec
%43:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %42, 0, 0, 0 :: (load 4, addrspace 1)
INLINEASM &"", 1
%44:vgpr_32 = DS_READ_B32_gfx9 undef %45:vgpr_32, 0, 0, implicit $exec :: (load 4, addrspace 3)
GLOBAL_STORE_DWORD undef %46:vreg_64, %44, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
%31.sub0:vreg_64 = COPY %43, implicit $exec
DS_WRITE_B64_gfx9 undef %47:vgpr_32, %31, 0, 0, implicit $exec :: (store 8, addrspace 3)
S_BRANCH %bb.1
...

View File

@@ -421,6 +421,46 @@ TEST(LiveIntervalTest, DeadSubRegMoveUp) {
});
}
TEST(LiveIntervalTest, TestMoveSubRegDefAcrossUseDef) {
liveIntervalTest(R"MIR(
%1:vreg_64 = IMPLICIT_DEF
bb.1:
%2:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
%3:vgpr_32 = V_ADD_U32_e32 %2, %1.sub0, implicit $exec
undef %1.sub0:vreg_64 = V_ADD_U32_e32 %2, %2, implicit $exec
%1.sub1:vreg_64 = COPY %2
S_NOP 0, implicit %1.sub1
S_BRANCH %bb.1
)MIR", [](MachineFunction &MF, LiveIntervals &LIS) {
MachineInstr &UndefSubregDef = getMI(MF, 2, 1);
// The scheduler clears undef from subregister defs before moving
UndefSubregDef.getOperand(0).setIsUndef(false);
testHandleMove(MF, LIS, 3, 1, 1);
});
}
TEST(LiveIntervalTest, TestMoveSubRegDefAcrossUseDefMulti) {
liveIntervalTest(R"MIR(
%1:vreg_96 = IMPLICIT_DEF
bb.1:
%2:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
%3:vgpr_32 = V_ADD_U32_e32 %2, %1.sub0, implicit $exec
undef %1.sub0:vreg_96 = V_ADD_U32_e32 %2, %2, implicit $exec
%1.sub1:vreg_96 = COPY %2
%1.sub2:vreg_96 = COPY %2
S_NOP 0, implicit %1.sub1, implicit %1.sub2
S_BRANCH %bb.1
)MIR", [](MachineFunction &MF, LiveIntervals &LIS) {
MachineInstr &UndefSubregDef = getMI(MF, 2, 1);
// The scheduler clears undef from subregister defs before moving
UndefSubregDef.getOperand(0).setIsUndef(false);
testHandleMove(MF, LIS, 4, 1, 1);
});
}
int main(int argc, char **argv) {
::testing::InitGoogleTest(&argc, argv);
initLLVM();