AArch64: improve redundant copy elimination.

Mostly, this fixes the bug that if the CBZ guaranteed Xn but Wn was used, we
didn't sort out the use-def chain properly.

I've also made it check more than just the last instruction for a compatible
CBZ (so it can cope without fallthroughs). I'd have liked to do that
separately, but it's helps writing the test.

Finally, I removed some custom loops in favour of MachineInstr helpers and
refactored the control flow to flatten it and avoid possibly quadratic
iterations in blocks with many copies. NFC for these, just a general tidy-up.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@261154 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tim Northover 2016-02-17 21:16:53 +00:00
parent 68c9756745
commit 1753e553bf
2 changed files with 71 additions and 46 deletions

View File

@ -63,6 +63,20 @@ char AArch64RedundantCopyElimination::ID = 0;
INITIALIZE_PASS(AArch64RedundantCopyElimination, "aarch64-copyelim",
"AArch64 redundant copy elimination pass", false, false)
static bool guaranteesZeroRegInBlock(MachineInstr *MI, MachineBasicBlock *MBB) {
unsigned Opc = MI->getOpcode();
// Check if the current basic block is the target block to which the
// CBZ/CBNZ instruction jumps when its Wt/Xt is zero.
if ((Opc == AArch64::CBZW || Opc == AArch64::CBZX) &&
MBB == MI->getOperand(1).getMBB())
return true;
else if ((Opc == AArch64::CBNZW || Opc == AArch64::CBNZX) &&
MBB != MI->getOperand(1).getMBB())
return true;
return false;
}
bool AArch64RedundantCopyElimination::optimizeCopy(MachineBasicBlock *MBB) {
// Check if the current basic block has a single predecessor.
if (MBB->pred_size() != 1)
@ -73,18 +87,16 @@ bool AArch64RedundantCopyElimination::optimizeCopy(MachineBasicBlock *MBB) {
if (CompBr == PredMBB->end() || PredMBB->succ_size() != 2)
return false;
unsigned LastOpc = CompBr->getOpcode();
// Check if the current basic block is the target block to which the cbz/cbnz
// instruction jumps when its Wt/Xt is zero.
if (LastOpc == AArch64::CBZW || LastOpc == AArch64::CBZX) {
if (MBB != CompBr->getOperand(1).getMBB())
return false;
} else if (LastOpc == AArch64::CBNZW || LastOpc == AArch64::CBNZX) {
if (MBB == CompBr->getOperand(1).getMBB())
return false;
} else {
++CompBr;
do {
--CompBr;
if (guaranteesZeroRegInBlock(CompBr, MBB))
break;
} while (CompBr != PredMBB->begin() && CompBr->isTerminator());
// We've not found a CBZ/CBNZ, time to bail out.
if (!guaranteesZeroRegInBlock(CompBr, MBB))
return false;
}
unsigned TargetReg = CompBr->getOperand(0).getReg();
if (!TargetReg)
@ -98,6 +110,8 @@ bool AArch64RedundantCopyElimination::optimizeCopy(MachineBasicBlock *MBB) {
TargetRegs.insert(*AI);
bool Changed = false;
MachineBasicBlock::iterator LastChange = MBB->begin();
unsigned SmallestDef = TargetReg;
// Remove redundant Copy instructions unless TargetReg is modified.
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) {
MachineInstr *MI = &*I;
@ -111,48 +125,40 @@ bool AArch64RedundantCopyElimination::optimizeCopy(MachineBasicBlock *MBB) {
if ((SrcReg == AArch64::XZR || SrcReg == AArch64::WZR) &&
!MRI->isReserved(DefReg) &&
(TargetReg == DefReg || TRI->isSuperRegister(DefReg, TargetReg))) {
CompBr->clearRegisterKills(DefReg, TRI);
if (MBB->isLiveIn(DefReg))
// Clear any kills of TargetReg between CompBr and MI.
for (MachineInstr &MMI :
make_range(MBB->begin()->getIterator(), MI->getIterator()))
MMI.clearRegisterKills(DefReg, TRI);
else
MBB->addLiveIn(DefReg);
DEBUG(dbgs() << "Remove redundant Copy : ");
DEBUG((MI)->print(dbgs()));
MI->eraseFromParent();
Changed = true;
LastChange = I;
NumCopiesRemoved++;
SmallestDef =
TRI->isSubRegister(SmallestDef, DefReg) ? DefReg : SmallestDef;
continue;
}
}
for (const MachineOperand &MO : MI->operands()) {
// FIXME: It is possible to use the register mask to check if all
// registers in TargetRegs are not clobbered. For now, we treat it like
// a basic block boundary.
if (MO.isRegMask())
return Changed;
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
if (!Reg)
continue;
assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
"Expect physical register");
// Stop if the TargetReg is modified.
if (MO.isDef() && TargetRegs.count(Reg))
return Changed;
}
if (MI->modifiesRegister(TargetReg, TRI))
break;
}
return Changed;
if (!Changed)
return false;
// Otherwise, we have to fixup the use-def chain, starting with the
// CBZ/CBNZ. Conservatively mark as much as we can live.
CompBr->clearRegisterKills(SmallestDef, TRI);
// Clear any kills of TargetReg between CompBr and MI.
if (std::any_of(TargetRegs.begin(), TargetRegs.end(),
[&](unsigned Reg) { return MBB->isLiveIn(Reg); })) {
for (MachineInstr &MMI :
make_range(MBB->begin()->getIterator(), LastChange->getIterator()))
MMI.clearRegisterKills(SmallestDef, TRI);
} else
MBB->addLiveIn(TargetReg);
return true;
}
bool AArch64RedundantCopyElimination::runOnMachineFunction(

View File

@ -1,6 +1,6 @@
; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=cortex-a57 -verify-machineinstrs < %s | FileCheck %s
; CHECK-LABEL: f_XX
; CHECK-LABEL: f_XX:
; CHECK: cbz x[[REG:[0-9]+]], [[BB:.LBB.*]]
; CHECK: [[BB]]:
; CHECK-NOT: mov x[[REG]], xzr
@ -18,7 +18,7 @@ if.end: ; preds = %entry, %if.then
ret i64 %a.0
}
; CHECK-LABEL: f_WW
; CHECK-LABEL: f_WW:
; CHECK: cbz w[[REG:[0-9]+]], [[BB:.LBB.*]]
; CHECK: [[BB]]:
; CHECK-NOT: mov w[[REG]], wzr
@ -36,7 +36,7 @@ if.end: ; preds = %entry, %if.then
ret i32 %a.0
}
; CHECK-LABEL: f_XW
; CHECK-LABEL: f_XW:
; CHECK: cbz x[[REG:[0-9]+]], [[BB:.LBB.*]]
; CHECK: [[BB]]:
; CHECK-NOT: mov w[[REG]], wzr
@ -54,7 +54,7 @@ if.end: ; preds = %entry, %if.then
ret i32 %a.0
}
; CHECK-LABEL: f_WX
; CHECK-LABEL: f_WX:
; CHECK: cbz w[[REG:[0-9]+]], [[BB:.LBB.*]]
; CHECK: [[BB]]:
; CHECK: mov x[[REG]], xzr
@ -73,3 +73,22 @@ if.end: ; preds = %entry, %if.then
%a.0 = phi i64 [ %0, %if.then ], [ 0, %entry ]
ret i64 %a.0
}
; CHECK-LABEL: test_superreg:
; CHECK: cbz x[[REG:[0-9]+]], [[BB:.LBB.*]]
; CHECK: [[BB]]:
; CHECK: str x[[REG]], [x1]
; CHECK-NOT: mov w[[REG]], wzr
; Because we returned w0 but x0 was marked live-in to the block, we didn't
; remove the <kill> on the str leading to a verification failure.
define i32 @test_superreg(i64 %in, i64* %dest) {
%tst = icmp eq i64 %in, 0
br i1 %tst, label %true, label %false
false:
ret i32 42
true:
store volatile i64 %in, i64* %dest
ret i32 0
}