[X86] Use LivePhysRegs in X86FixupBWInsts.

Kill-flags, which computeRegisterLiveness uses, are not reliable.
LivePhysRegs is.

Differential Revision: http://reviews.llvm.org/D19472

llvm-svn: 267495
This commit is contained in:
Ahmed Bougacha 2016-04-26 00:00:48 +00:00
parent 504105c02c
commit b6c12fe106
4 changed files with 38 additions and 27 deletions

View File

@ -49,7 +49,7 @@
#include "X86InstrInfo.h"
#include "X86Subtarget.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
@ -81,7 +81,7 @@ class FixupBWInstPass : public MachineFunctionPass {
/// \brief Loop over all of the instructions in the basic block
/// replacing applicable byte or word instructions with better
/// alternatives.
void processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB) const;
void processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
/// \brief This sets the \p SuperDestReg to the 32 bit super reg
/// of the original destination register of the MachineInstr
@ -128,6 +128,9 @@ private:
/// Machine loop info used for guiding some heruistics.
MachineLoopInfo *MLI;
/// Register Liveness information after the current instruction.
LivePhysRegs LiveRegs;
};
char FixupBWInstPass::ID = 0;
}
@ -142,6 +145,7 @@ bool FixupBWInstPass::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
OptForSize = MF.getFunction()->optForSize();
MLI = &getAnalysis<MachineLoopInfo>();
LiveRegs.init(&TII->getRegisterInfo());
DEBUG(dbgs() << "Start X86FixupBWInsts\n";);
@ -181,11 +185,7 @@ bool FixupBWInstPass::getSuperRegDestIfDead(MachineInstr *OrigMI,
if (getX86SubSuperRegister(SuperDestReg, OrigDestSize) != OrigDestReg)
return false;
MachineBasicBlock::LivenessQueryResult LQR =
OrigMI->getParent()->computeRegisterLiveness(&TII->getRegisterInfo(),
SuperDestReg, OrigMI);
if (LQR != MachineBasicBlock::LQR_Dead)
if (LiveRegs.contains(SuperDestReg))
return false;
if (OrigDestSize == 8) {
@ -194,9 +194,7 @@ bool FixupBWInstPass::getSuperRegDestIfDead(MachineInstr *OrigMI,
// whether the super-register is dead.
unsigned UpperByteReg = getX86SubSuperRegister(SuperDestReg, 8, true);
LQR = OrigMI->getParent()->computeRegisterLiveness(&TII->getRegisterInfo(),
UpperByteReg, OrigMI);
if (LQR != MachineBasicBlock::LQR_Dead)
if (LiveRegs.contains(UpperByteReg))
return false;
}
@ -229,7 +227,7 @@ MachineInstr *FixupBWInstPass::tryReplaceLoad(unsigned New32BitOpcode,
}
void FixupBWInstPass::processBasicBlock(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock &MBB) {
// This algorithm doesn't delete the instructions it is replacing
// right away. By leaving the existing instructions in place, the
@ -243,9 +241,14 @@ void FixupBWInstPass::processBasicBlock(MachineFunction &MF,
// from making it seem as if the larger register might be live.
SmallVector<std::pair<MachineInstr *, MachineInstr *>, 8> MIReplacements;
for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
// Start computing liveness for this block. We iterate from the end to be able
// to update this for each instruction.
LiveRegs.clear();
LiveRegs.addLiveOuts(&MBB);
for (auto I = MBB.rbegin(); I != MBB.rend(); ++I) {
MachineInstr *NewMI = nullptr;
MachineInstr *MI = I;
MachineInstr *MI = &*I;
// See if this is an instruction of the type we are currently looking for.
switch (MI->getOpcode()) {
@ -275,6 +278,9 @@ void FixupBWInstPass::processBasicBlock(MachineFunction &MF,
if (NewMI)
MIReplacements.push_back(std::make_pair(MI, NewMI));
// We're done with this instruction, update liveness for the next one.
LiveRegs.stepBackward(*MI);
}
while (!MIReplacements.empty()) {

View File

@ -1,6 +1,6 @@
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx -fixup-byte-word-insts=1 < %s | FileCheck -check-prefix=CHECK -check-prefix=BWON %s
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx -fixup-byte-word-insts=0 < %s | FileCheck -check-prefix=CHECK -check-prefix=BWOFF %s
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx -addr-sink-using-gep=1 < %s | FileCheck %s
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx -addr-sink-using-gep=1 < %s | FileCheck -check-prefix=CHECK -check-prefix=BWON %s
%struct.A = type { i8, i8, i8, i8, i8, i8, i8, i8 }
%struct.B = type { i32, i32, i32, i32, i32, i32, i32, i32 }
@ -185,7 +185,8 @@ define void @merge_loads_i16(i32 %count, %struct.A* noalias nocapture %q, %struc
; BWON: movzbl
; BWOFF: movb
; CHECK: movb
; CHECK: movb
; BWON: movzbl
; BWOFF: movb
; CHECK: movb
; CHECK: ret
define void @no_merge_loads(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp {
@ -340,8 +341,9 @@ block4: ; preds = %4, %.lr.ph
; Make sure that we merge the consecutive load/store sequence below and use a
; word (16 bit) instead of a byte copy.
; CHECK-LABEL: MergeLoadStoreBaseIndexOffset:
; CHECK: movw (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
; CHECK: movw [[REG]], (%{{.*}})
; BWON: movzwl (%{{.*}},%{{.*}}), %e[[REG:[a-z]+]]
; BWOFF: movw (%{{.*}},%{{.*}}), %[[REG:[a-z]+]]
; CHECK: movw %[[REG]], (%{{.*}})
define void @MergeLoadStoreBaseIndexOffset(i64* %a, i8* %b, i8* %c, i32 %n) {
br label %1
@ -372,8 +374,9 @@ define void @MergeLoadStoreBaseIndexOffset(i64* %a, i8* %b, i8* %c, i32 %n) {
; word (16 bit) instead of a byte copy even if there are intermediate sign
; extensions.
; CHECK-LABEL: MergeLoadStoreBaseIndexOffsetSext:
; CHECK: movw (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
; CHECK: movw [[REG]], (%{{.*}})
; BWON: movzwl (%{{.*}},%{{.*}}), %e[[REG:[a-z]+]]
; BWOFF: movw (%{{.*}},%{{.*}}), %[[REG:[a-z]+]]
; CHECK: movw %[[REG]], (%{{.*}})
define void @MergeLoadStoreBaseIndexOffsetSext(i8* %a, i8* %b, i8* %c, i32 %n) {
br label %1

View File

@ -11,8 +11,6 @@ target triple = "x86_64-apple-macosx10.8.0"
; This has byte loads interspersed with byte stores, in a single
; basic-block loop. The upper portion should be dead, so the movb loads
; should have been changed into movzbl instead.
; TODO: The second movb load doesn't get fixed due to register liveness
; not being accurate enough.
; CHECK-LABEL: foo1
; load:
; BWON: movzbl
@ -20,7 +18,8 @@ target triple = "x86_64-apple-macosx10.8.0"
; store:
; CHECK: movb
; load:
; CHECK: movb
; BWON: movzbl
; BWOFF: movb
; store:
; CHECK: movb
; CHECK: ret
@ -59,8 +58,6 @@ a4: ; preds = %4, %.lr.ph
; This has word loads interspersed with word stores.
; The upper portion should be dead, so the movw loads should have
; been changed into movzwl instead.
; TODO: The second movw load doesn't get fixed due to register liveness
; not being accurate enough.
; CHECK-LABEL: foo2
; load:
; BWON: movzwl
@ -68,7 +65,8 @@ a4: ; preds = %4, %.lr.ph
; store:
; CHECK: movw
; load:
; CHECK: movw
; BWON: movzwl
; BWOFF: movw
; store:
; CHECK: movw
; CHECK: ret

View File

@ -6,7 +6,10 @@
; RUN: FileCheck -check-prefix=CHECK -check-prefix=BWOFF %s
; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -fixup-byte-word-insts=1 | \
; RUN: FileCheck -check-prefix=CHECK -check-prefix=BWON %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck -check-prefix=DARWIN %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin -fixup-byte-word-insts=0 | \
; RUN: FileCheck -check-prefix=DARWIN -check-prefix=DARWIN-BWOFF %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin -fixup-byte-word-insts=1 | \
; RUN: FileCheck -check-prefix=DARWIN -check-prefix=DARWIN-BWON %s
@x = common global i32 0, align 4
@ -84,7 +87,8 @@ entry:
; Except on Darwin, for legay reasons.
; DARWIN-LABEL: unsigned_i16:
; DARWIN: movw
; DARWIN-BWOFF: movw
; DARWIN-BWON: movzwl
; DARWIN-NEXT: addw
; DARWIN-NEXT: movzwl
; DARWIN-NEXT: ret