RegisterScavenging: Followup to r305625

This does some improvements/cleanup to the recently introduced
scavengeRegisterBackwards() functionality:

- Rewrite findSurvivorBackwards algorithm to use the existing
  LiveRegUnit::accumulateBackward() code. This also avoids the Available
  and Candidates bitset and just need 1 LiveRegUnit instance
  (= 1 bitset).
- Pick registers in allocation order instead of register number order.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@305817 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Matthias Braun 2017-06-20 18:43:14 +00:00
parent 73854fd751
commit 0cc137e051
7 changed files with 83 additions and 86 deletions

View File

@ -372,60 +372,62 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
/// clobbered for the longest time.
/// Returns the register and the earliest position we know it to be free or
/// the position MBB.end() if no register is available.
static std::pair<unsigned, MachineBasicBlock::iterator>
findSurvivorBackwards(const TargetRegisterInfo &TRI,
static std::pair<MCPhysReg, MachineBasicBlock::iterator>
findSurvivorBackwards(const MachineRegisterInfo &MRI,
MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
BitVector &Available, BitVector &Candidates) {
const LiveRegUnits &LiveOut, ArrayRef<MCPhysReg> AllocationOrder) {
bool FoundTo = false;
unsigned Survivor = 0;
MCPhysReg Survivor = 0;
MachineBasicBlock::iterator Pos;
MachineBasicBlock &MBB = *From->getParent();
unsigned InstrLimit = 25;
unsigned InstrCountDown = InstrLimit;
const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
LiveRegUnits Used(TRI);
for (MachineBasicBlock::iterator I = From;; --I) {
const MachineInstr &MI = *I;
// Remove any candidates touched by instruction.
bool FoundVReg = false;
for (const MachineOperand &MO : MI.operands()) {
if (MO.isRegMask()) {
Candidates.clearBitsNotInMask(MO.getRegMask());
continue;
}
if (!MO.isReg() || MO.isUndef() || MO.isDebug())
continue;
unsigned Reg = MO.getReg();
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
FoundVReg = true;
} else if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI)
Candidates.reset(*AI);
}
}
Used.accumulateBackward(MI);
if (I == To) {
// If one of the available registers survived this long take it.
Available &= Candidates;
int Reg = Available.find_first();
if (Reg != -1)
return std::make_pair(Reg, MBB.end());
// See if one of the registers in RC wasn't used so far.
for (MCPhysReg Reg : AllocationOrder) {
if (!MRI.isReserved(Reg) && Used.available(Reg) &&
LiveOut.available(Reg))
return std::make_pair(Reg, MBB.end());
}
// Otherwise we will continue up to InstrLimit instructions to find
// the register which is not defined/used for the longest time.
FoundTo = true;
Pos = To;
}
if (FoundTo) {
if (Survivor == 0 || !Candidates.test(Survivor)) {
int Reg = Candidates.find_first();
if (Reg == -1)
if (Survivor == 0 || !Used.available(Survivor)) {
MCPhysReg AvilableReg = 0;
for (MCPhysReg Reg : AllocationOrder) {
if (!MRI.isReserved(Reg) && Used.available(Reg)) {
AvilableReg = Reg;
break;
}
}
if (AvilableReg == 0)
break;
Survivor = Reg;
Survivor = AvilableReg;
}
if (--InstrCountDown == 0)
break;
// Keep searching when we find a vreg since the spilled register will
// be usefull for this other vreg as well later.
bool FoundVReg = false;
for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
FoundVReg = true;
break;
}
}
if (FoundVReg) {
// Keep searching when we find a vreg since the spilled register will
// be usefull for this other vreg as well later.
InstrCountDown = InstrLimit;
Pos = I;
}
@ -568,18 +570,13 @@ unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC,
bool RestoreAfter, int SPAdj) {
const MachineBasicBlock &MBB = *To->getParent();
const MachineFunction &MF = *MBB.getParent();
// Consider all allocatable registers in the register class initially
BitVector Candidates = TRI->getAllocatableSet(MF, &RC);
// Try to find a register that's unused if there is one, as then we won't
// have to spill.
BitVector Available = getRegsAvailable(&RC);
// Find the register whose use is furthest away.
MachineBasicBlock::iterator UseMI;
std::pair<unsigned, MachineBasicBlock::iterator> P =
findSurvivorBackwards(*TRI, MBBI, To, Available, Candidates);
unsigned Reg = P.first;
ArrayRef<MCPhysReg> AllocationOrder = RC.getRawAllocationOrder(MF);
std::pair<MCPhysReg, MachineBasicBlock::iterator> P =
findSurvivorBackwards(*MRI, MBBI, To, LiveUnits, AllocationOrder);
MCPhysReg Reg = P.first;
MachineBasicBlock::iterator SpillBefore = P.second;
assert(Reg != 0 && "No register left to scavenge!");
// Found an available register?

View File

@ -5,7 +5,7 @@
; CHECK: str [[REG:x[0-9]+]], [sp, #8]
; CHECK: add [[REG]], sp, #248
; CHECK: str xzr, [{{\s*}}[[REG]], #32760]
; CHECK: ldr x30, [sp, #8]
; CHECK: ldr [[REG]], [sp, #8]
target triple = "arm64-apple-ios"
@ptr8 = external global i8*

View File

@ -6,9 +6,9 @@
; Materialize into a mov. Make sure there isn't an unnecessary copy.
; GCN-LABEL: {{^}}func_mov_fi_i32:
; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN: s_sub_u32 vcc_hi, s5, s4
; GCN-NEXT: v_lshr_b32_e64 [[SCALED:v[0-9]+]], vcc_hi, 6
; GCN-NEXT: v_add_i32_e32 v0, vcc, 4, [[SCALED]]
; GCN: s_sub_u32 s6, s5, s4
; GCN-NEXT: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s6, 6
; GCN-NEXT: v_add_i32_e64 v0, s[6:7], 4, [[SCALED]]
; GCN-NOT: v_mov
; GCN: ds_write_b32 v0, v0
define void @func_mov_fi_i32() #0 {
@ -22,9 +22,9 @@ define void @func_mov_fi_i32() #0 {
; GCN-LABEL: {{^}}func_add_constant_to_fi_i32:
; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN: s_sub_u32 vcc_hi, s5, s4
; GCN-NEXT: v_lshr_b32_e64 [[SCALED:v[0-9]+]], vcc_hi, 6
; GCN-NEXT: v_add_i32_e32 v0, vcc, 4, [[SCALED]]
; GCN: s_sub_u32 s6, s5, s4
; GCN-NEXT: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s6, 6
; GCN-NEXT: v_add_i32_e64 v0, s[6:7], 4, [[SCALED]]
; GCN-NEXT: v_add_i32_e32 v0, vcc, 4, v0
; GCN-NOT: v_mov
; GCN: ds_write_b32 v0, v0
@ -39,9 +39,9 @@ define void @func_add_constant_to_fi_i32() #0 {
; into.
; GCN-LABEL: {{^}}func_other_fi_user_i32:
; GCN: s_sub_u32 vcc_hi, s5, s4
; GCN-NEXT: v_lshr_b32_e64 [[SCALED:v[0-9]+]], vcc_hi, 6
; GCN-NEXT: v_add_i32_e32 v0, vcc, 4, [[SCALED]]
; GCN: s_sub_u32 s6, s5, s4
; GCN-NEXT: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s6, 6
; GCN-NEXT: v_add_i32_e64 v0, s[6:7], 4, [[SCALED]]
; GCN-NEXT: v_mul_lo_i32 v0, v0, 9
; GCN-NOT: v_mov
; GCN: ds_write_b32 v0, v0
@ -71,8 +71,8 @@ define void @func_load_private_arg_i32_ptr(i32* %ptr) #0 {
; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32_ptr:
; GCN: s_waitcnt
; GCN-NEXT: s_sub_u32 vcc_hi, s5, s4
; GCN-NEXT: v_lshr_b32_e64 v0, vcc_hi, 6
; GCN-NEXT: s_sub_u32 s6, s5, s4
; GCN-NEXT: v_lshr_b32_e64 v0, s6, 6
; GCN-NEXT: v_add_i32_e32 v0, vcc, 4, v0
; GCN-NOT: v_mov
; GCN: ds_write_b32 v0, v0
@ -99,8 +99,8 @@ define void @void_func_byval_struct_i8_i32_ptr_value({ i8, i32 }* byval %arg0) #
}
; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32_ptr_nonentry_block:
; GCN: s_sub_u32 vcc_hi, s5, s4
; GCN: v_lshr_b32_e64 v1, vcc_hi, 6
; GCN: s_sub_u32 s6, s5, s4
; GCN: v_lshr_b32_e64 v1, s6, 6
; GCN: s_and_saveexec_b64
; GCN: v_add_i32_e32 v0, vcc, 4, v1
@ -123,10 +123,10 @@ ret:
; Added offset can't be used with VOP3 add
; GCN-LABEL: {{^}}func_other_fi_user_non_inline_imm_offset_i32:
; GCN: s_sub_u32 vcc_hi, s5, s4
; GCN-DAG: v_lshr_b32_e64 [[SCALED:v[0-9]+]], vcc_hi, 6
; GCN-DAG: s_movk_i32 vcc_hi, 0x204
; GCN: v_add_i32_e32 v0, vcc, vcc_hi, [[SCALED]]
; GCN: s_sub_u32 s6, s5, s4
; GCN-DAG: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s6, 6
; GCN-DAG: s_movk_i32 s6, 0x204
; GCN: v_add_i32_e64 v0, s[6:7], s6, [[SCALED]]
; GCN: v_mul_lo_i32 v0, v0, 9
; GCN: ds_write_b32 v0, v0
define void @func_other_fi_user_non_inline_imm_offset_i32() #0 {

View File

@ -119,10 +119,10 @@ endif: ; preds = %else, %if
; GCN: ; clobber m0
; TOSMEM: s_mov_b32 vcc_hi, m0
; TOSMEM: s_mov_b32 s2, m0
; TOSMEM: s_add_u32 m0, s3, 0x100
; TOSMEM-NEXT: s_buffer_store_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 8-byte Folded Spill
; TOSMEM: s_mov_b32 m0, vcc_hi
; TOSMEM: s_mov_b32 m0, s2
; TOSMEM: s_mov_b64 exec,
; TOSMEM: s_cbranch_execz
@ -170,10 +170,10 @@ endif:
; TOSMEM: s_mov_b32 m0, -1
; TOSMEM: s_mov_b32 vcc_hi, m0
; TOSMEM: s_mov_b32 s0, m0
; TOSMEM: s_add_u32 m0, s3, 0x100
; TOSMEM: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[88:91], m0 ; 8-byte Folded Reload
; TOSMEM: s_mov_b32 m0, vcc_hi
; TOSMEM: s_mov_b32 m0, s0
; TOSMEM: s_waitcnt lgkmcnt(0)
; TOSMEM: ds_write_b64

View File

@ -8,15 +8,15 @@ entry:
; Note that part of what is being checked here is proper register reuse.
; CHECK: mfcr [[T1:r[0-9]+]] ; cr2
; CHECK: lis [[T2:r[0-9]+]], 1
; CHECK: addi r3, r1, 72
; CHECK: rotlwi [[T1]], [[T1]], 8
; CHECK: ori [[T2]], [[T2]], 34540
; CHECK: stwx [[T1]], r1, [[T2]]
; CHECK: lis [[T3:r[0-9]+]], 1
; CHECK: mfcr [[T4:r[0-9]+]] ; cr3
; CHECK: ori [[T3]], [[T3]], 34536
; CHECK: lis [[T3:r[0-9]+]], 1
; CHECK: rotlwi [[T4]], [[T4]], 12
; CHECK: ori [[T3]], [[T3]], 34536
; CHECK: stwx [[T4]], r1, [[T3]]
; CHECK: addi r3, r1, 72
%x = alloca [100000 x i8] ; <[100000 x i8]*> [#uses=1]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
%x1 = bitcast [100000 x i8]* %x to i8* ; <i8*> [#uses=1]

View File

@ -23,9 +23,9 @@ entry:
; CHECK-REG: blr
; CHECK-FISL: @foo1
; CHECK-FISL: lis 0, -1
; CHECK-FISL: ori 0, 0, 65384
; CHECK-FISL: stxsdx 1, 1, 0
; CHECK-FISL: lis 3, -1
; CHECK-FISL: ori 3, 3, 65384
; CHECK-FISL: stxsdx 1, 1, 3
; CHECK-FISL: blr
; CHECK-P9-REG: @foo1
@ -54,8 +54,8 @@ entry:
; CHECK-FISL: @foo2
; CHECK-FISL: xsadddp [[R1:[0-9]+]], 1, 1
; CHECK-FISL: stxsdx [[R1]], [[R1]], 0
; CHECK-FISL: lxsdx [[R1]], [[R1]], 0
; CHECK-FISL: stxsdx [[R1]], [[R1]], 3
; CHECK-FISL: lxsdx [[R1]], [[R1]], 3
; CHECK-FISL: blr
; CHECK-P9-REG: @foo2

View File

@ -235,9 +235,9 @@ entry:
; CHECK-FISL-LABEL: @test14
; CHECK-FISL: xxlor 0, 34, 35
; CHECK-FISL: xxlnor 34, 34, 35
; CHECK-FISL: lis 0, -1
; CHECK-FISL: ori 0, 0, 65520
; CHECK-FISL: stxvd2x 0, 1, 0
; CHECK-FISL: lis 3, -1
; CHECK-FISL: ori 3, 3, 65520
; CHECK-FISL: stxvd2x 0, 1, 3
; CHECK-FISL: blr
; CHECK-LE-LABEL: @test14
@ -260,9 +260,9 @@ entry:
; CHECK-FISL: xxlor 36, 0, 0
; CHECK-FISL: xxlnor 0, 34, 35
; CHECK-FISL: xxlor 34, 0, 0
; CHECK-FISL: lis 0, -1
; CHECK-FISL: ori 0, 0, 65520
; CHECK-FISL: stxvd2x 36, 1, 0
; CHECK-FISL: lis 3, -1
; CHECK-FISL: ori 3, 3, 65520
; CHECK-FISL: stxvd2x 36, 1, 3
; CHECK-FISL: blr
; CHECK-LE-LABEL: @test15
@ -285,9 +285,9 @@ entry:
; CHECK-FISL: xxlor 36, 0, 0
; CHECK-FISL: xxlnor 0, 34, 35
; CHECK-FISL: xxlor 34, 0, 0
; CHECK-FISL: lis 0, -1
; CHECK-FISL: ori 0, 0, 65520
; CHECK-FISL: stxvd2x 36, 1, 0
; CHECK-FISL: lis 3, -1
; CHECK-FISL: ori 3, 3, 65520
; CHECK-FISL: stxvd2x 36, 1, 3
; CHECK-FISL: blr
; CHECK-LE-LABEL: @test16
@ -330,9 +330,9 @@ entry:
; CHECK-FISL: xxlor 36, 0, 0
; CHECK-FISL: xxlandc 0, 34, 35
; CHECK-FISL: xxlor 34, 0, 0
; CHECK-FISL: lis 0, -1
; CHECK-FISL: ori 0, 0, 65520
; CHECK-FISL: stxvd2x 36, 1, 0
; CHECK-FISL: lis 3, -1
; CHECK-FISL: ori 3, 3, 65520
; CHECK-FISL: stxvd2x 36, 1, 3
; CHECK-FISL: blr
; CHECK-LE-LABEL: @test18
@ -355,9 +355,9 @@ entry:
; CHECK-FISL: xxlor 36, 0, 0
; CHECK-FISL: xxlandc 0, 34, 35
; CHECK-FISL: xxlor 34, 0, 0
; CHECK-FISL: lis 0, -1
; CHECK-FISL: ori 0, 0, 65520
; CHECK-FISL: stxvd2x 36, 1, 0
; CHECK-FISL: lis 3, -1
; CHECK-FISL: ori 3, 3, 65520
; CHECK-FISL: stxvd2x 36, 1, 3
; CHECK-FISL: blr
; CHECK-LE-LABEL: @test19