[X86]: Improve Liveness checking for X86FixupBWInsts.cpp

Differential Revision: http://reviews.llvm.org/D21085


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@272797 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Kevin B. Smith 2016-06-15 16:03:06 +00:00
parent 30aa48372a
commit 8800861c19
2 changed files with 134 additions and 39 deletions

View File

@ -95,6 +95,12 @@ class FixupBWInstPass : public MachineFunctionPass {
/// nullptr.
MachineInstr *tryReplaceCopy(MachineInstr *MI) const;
// Change the MachineInstr \p MI into an eqivalent 32 bit instruction if
// possible. Return the replacement instruction if OK, return nullptr
// otherwise. Set WasCandidate to true or false depending on whether the
// MI was a candidate for this sort of transformation.
MachineInstr *tryReplaceInstr(MachineInstr *MI, MachineBasicBlock &MBB,
bool &WasCandidate) const;
public:
static char ID;
@ -267,6 +273,54 @@ MachineInstr *FixupBWInstPass::tryReplaceCopy(MachineInstr *MI) const {
return MIB;
}
MachineInstr *FixupBWInstPass::tryReplaceInstr(
MachineInstr *MI, MachineBasicBlock &MBB,
bool &WasCandidate) const {
MachineInstr *NewMI = nullptr;
WasCandidate = false;
// See if this is an instruction of the type we are currently looking for.
switch (MI->getOpcode()) {
case X86::MOV8rm:
// Only replace 8 bit loads with the zero extending versions if
// in an inner most loop and not optimizing for size. This takes
// an extra byte to encode, and provides limited performance upside.
if (MachineLoop *ML = MLI->getLoopFor(&MBB)) {
if (ML->begin() == ML->end() && !OptForSize) {
NewMI = tryReplaceLoad(X86::MOVZX32rm8, MI);
WasCandidate = true;
}
}
break;
case X86::MOV16rm:
// Always try to replace 16 bit load with 32 bit zero extending.
// Code size is the same, and there is sometimes a perf advantage
// from eliminating a false dependence on the upper portion of
// the register.
NewMI = tryReplaceLoad(X86::MOVZX32rm16, MI);
WasCandidate = true;
break;
case X86::MOV8rr:
case X86::MOV16rr:
// Always try to replace 8/16 bit copies with a 32 bit copy.
// Code size is either less (16) or equal (8), and there is sometimes a
// perf advantage from eliminating a false dependence on the upper portion
// of the register.
NewMI = tryReplaceCopy(MI);
WasCandidate = true;
break;
default:
// nothing to do here.
break;
}
return NewMI;
}
void FixupBWInstPass::processBasicBlock(MachineFunction &MF,
MachineBasicBlock &MBB) {
@ -288,57 +342,61 @@ void FixupBWInstPass::processBasicBlock(MachineFunction &MF,
// We run after PEI, so we need to AddPristinesAndCSRs.
LiveRegs.addLiveOuts(MBB);
bool CandidateDidntGetTransformed = false;
bool WasCandidate = false;
for (auto I = MBB.rbegin(); I != MBB.rend(); ++I) {
MachineInstr *NewMI = nullptr;
MachineInstr *MI = &*I;
MachineInstr *NewMI = tryReplaceInstr(MI, MBB, WasCandidate);
// See if this is an instruction of the type we are currently looking for.
switch (MI->getOpcode()) {
case X86::MOV8rm:
// Only replace 8 bit loads with the zero extending versions if
// in an inner most loop and not optimizing for size. This takes
// an extra byte to encode, and provides limited performance upside.
if (MachineLoop *ML = MLI->getLoopFor(&MBB)) {
if (ML->begin() == ML->end() && !OptForSize)
NewMI = tryReplaceLoad(X86::MOVZX32rm8, MI);
}
break;
case X86::MOV16rm:
// Always try to replace 16 bit load with 32 bit zero extending.
// Code size is the same, and there is sometimes a perf advantage
// from eliminating a false dependence on the upper portion of
// the register.
NewMI = tryReplaceLoad(X86::MOVZX32rm16, MI);
break;
case X86::MOV8rr:
case X86::MOV16rr:
// Always try to replace 8/16 bit copies with a 32 bit copy.
// Code size is either less (16) or equal (8), and there is sometimes a
// perf advantage from eliminating a false dependence on the upper portion
// of the register.
NewMI = tryReplaceCopy(MI);
break;
default:
// nothing to do here.
break;
}
if (NewMI)
// Add this to replacements if it was a candidate, even if NewMI is
// nullptr. We will revisit that in a bit.
if (WasCandidate) {
MIReplacements.push_back(std::make_pair(MI, NewMI));
if (!NewMI)
CandidateDidntGetTransformed = true;
}
// We're done with this instruction, update liveness for the next one.
LiveRegs.stepBackward(*MI);
}
if (CandidateDidntGetTransformed) {
// If there was a candidate that didn't get transformed then let's try
// doing the register liveness going forward. Sometimes one direction
// is overly conservative compared to the other.
// FIXME - Register liveness should be investigated further. This really
// shouldn't be necessary. See PR28142.
LiveRegs.clear();
LiveRegs.addLiveIns(MBB);
auto NextCandidateIter = MIReplacements.begin();
for (auto I = MBB.begin(); I != MBB.end(); ++I) {
MachineInstr *MI = &*I;
SmallVector<std::pair<unsigned, const MachineOperand*>, 4> Clobbers;
LiveRegs.stepForward(*MI, Clobbers);
// Only check and create a new instruction if this instruction is
// known to be a candidate that didn't get transformed.
if (NextCandidateIter->first == MI) {
if (NextCandidateIter->second == nullptr) {
MachineInstr *NewMI = tryReplaceInstr(MI, MBB, WasCandidate);
NextCandidateIter->second = NewMI;
}
++NextCandidateIter;
}
}
}
while (!MIReplacements.empty()) {
MachineInstr *MI = MIReplacements.back().first;
MachineInstr *NewMI = MIReplacements.back().second;
MIReplacements.pop_back();
MBB.insert(MI, NewMI);
MBB.erase(MI);
if (NewMI) {
MBB.insert(MI, NewMI);
MBB.erase(MI);
}
}
}

View File

@ -0,0 +1,37 @@
# RUN: llc -run-pass x86-fixup-bw-insts -mtriple=x86_64-- -o /dev/null %s 2>&1 | FileCheck %s
# Verify that the forwards live-ness checking code in fixup-bw-inst works.
--- |
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
define i8 @foo(i8 %p1) {
entry:
%t1 = or i8 %p1, 0
br label %false
false:
ret i8 %t1
}
...
---
name: foo
allVRegsAllocated: true
isSSA: false
tracksRegLiveness: true
liveins:
- { reg: '%edi' }
body: |
bb.0.entry:
liveins: %edi
successors: %bb.1.false
%al = MOV8rr %dil, implicit %edi
; CHECK: %eax = MOV32rr undef %edi, implicit %dil
bb.1.false:
liveins: %al, %ax, %eax, %rax
RETQ %al
...