Add a quick pass to optimize sign / zero extension instructions. For targets where the pre-extension values are available in the subreg of the result of the extension, replace the uses of the pre-extension value with the result + extract_subreg.

For now, this pass is fairly conservative. It only perform the replacement when both the pre- and post- extension values are used in the block. It will miss cases where the post-extension values are live, but not used.

llvm-svn: 93278
This commit is contained in:
Evan Cheng 2010-01-13 00:30:23 +00:00
parent 1738d7d11a
commit 76db3bb18e
9 changed files with 203 additions and 28 deletions

View File

@ -170,6 +170,10 @@ namespace llvm {
/// instructions.
FunctionPass *createMachineSinkingPass();
/// createOptimizeExtsPass - This pass performs sign / zero extension
/// optimization by increasing uses of extended values.
FunctionPass *createOptimizeExtsPass();
/// createStackSlotColoringPass - This pass performs stack slot coloring.
FunctionPass *createStackSlotColoringPass(bool);

View File

@ -149,16 +149,15 @@ public:
return false;
}
/// isCoalescableInstr - Return true if the instruction is "coalescable". That
/// is, it's like a copy where it's legal for the source to overlap the
/// destination. e.g. X86::MOVSX64rr32.
virtual bool isCoalescableInstr(const MachineInstr &MI, bool &isCopy,
unsigned &SrcReg, unsigned &DstReg,
unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
if (isMoveInstr(MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
isCopy = true;
return true;
}
/// isCoalescableExtInstr - Return true if the instruction is a "coalescable"
/// extension instruction. That is, it's like a copy where it's legal for the
/// source to overlap the destination. e.g. X86::MOVSX64rr32. If this returns
/// true, then it's expected the pre-extension value is available as a subreg
/// of the result register. This also returns the sub-register index in
/// SubIdx.
virtual bool isCoalescableExtInstr(const MachineInstr &MI,
unsigned &SrcReg, unsigned &DstReg,
unsigned &SubIdx) const {
return false;
}

View File

@ -62,6 +62,10 @@ static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
cl::desc("Verify generated machine code"),
cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL));
#if 1
static cl::opt<bool> XX("xx", cl::Hidden);
#endif
// Enable or disable FastISel. Both options are needed, because
// FastISel is enabled by default with -fast, and we wish to be
// able to enable or disable fast-isel independently from -O0.
@ -324,6 +328,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
/* allowDoubleDefs= */ true);
if (OptLevel != CodeGenOpt::None) {
PM.add(createOptimizeExtsPass());
if (!DisableMachineLICM)
PM.add(createMachineLICMPass());
if (!DisableMachineSink)

View File

@ -0,0 +1,149 @@
//===-- OptimizeExts.cpp - Optimize sign / zero extension instrs -----===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "ext-opt"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
static cl::opt<bool> Aggressive("aggressive-ext-opt", cl::Hidden,
cl::desc("Aggressive extension optimization"));
STATISTIC(NumReuse, "Number of extension results reused");
namespace {
class OptimizeExts : public MachineFunctionPass {
const TargetMachine *TM;
const TargetInstrInfo *TII;
MachineRegisterInfo *MRI;
MachineDominatorTree *DT; // Machine dominator tree
public:
static char ID; // Pass identification
OptimizeExts() : MachineFunctionPass(&ID) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
}
};
}
char OptimizeExts::ID = 0;
static RegisterPass<OptimizeExts>
X("opt-exts", "Optimize sign / zero extensions");
FunctionPass *llvm::createOptimizeExtsPass() { return new OptimizeExts(); }
bool OptimizeExts::runOnMachineFunction(MachineFunction &MF) {
TM = &MF.getTarget();
TII = TM->getInstrInfo();
MRI = &MF.getRegInfo();
DT = &getAnalysis<MachineDominatorTree>();
bool Changed = false;
SmallPtrSet<MachineInstr*, 8> LocalMIs;
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
MachineBasicBlock *MBB = &*I;
for (MachineBasicBlock::iterator MII = I->begin(), ME = I->end(); MII != ME;
++MII) {
MachineInstr *MI = &*MII;
LocalMIs.insert(MI);
unsigned SrcReg, DstReg, SubIdx;
if (TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) {
if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
TargetRegisterInfo::isPhysicalRegister(SrcReg))
continue;
MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg);
if (++UI == MRI->use_end())
// No other uses.
continue;
// Ok, the source has other uses. See if we can replace the other uses
// with use of the result of the extension.
SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
UI = MRI->use_begin(DstReg);
for (MachineRegisterInfo::use_iterator UE = MRI->use_end(); UI != UE;
++UI)
ReachedBBs.insert(UI->getParent());
bool ExtendLife = true;
SmallVector<MachineOperand*, 8> Uses;
SmallVector<MachineOperand*, 8> ExtendedUses;
UI = MRI->use_begin(SrcReg);
for (MachineRegisterInfo::use_iterator UE = MRI->use_end(); UI != UE;
++UI) {
MachineOperand &UseMO = UI.getOperand();
MachineInstr *UseMI = &*UI;
if (UseMI == MI)
continue;
MachineBasicBlock *UseMBB = UseMI->getParent();
if (UseMBB == MBB) {
// Local uses that come after the extension.
if (!LocalMIs.count(UseMI))
Uses.push_back(&UseMO);
} else if (ReachedBBs.count(UseMBB))
// Non-local uses where the result of extension is used. Always
// replace these.
Uses.push_back(&UseMO);
else if (Aggressive && DT->dominates(MBB, UseMBB))
// We may want to extend live range of the extension result in order
// to replace these uses.
ExtendedUses.push_back(&UseMO);
else {
// Both will be live out of the def MBB anyway. Don't extend live
// range of the extension result.
ExtendLife = false;
break;
}
}
if (ExtendLife && !ExtendedUses.empty())
// Ok, we'll extend the liveness of the extension result.
std::copy(ExtendedUses.begin(), ExtendedUses.end(),
std::back_inserter(Uses));
// Now replace all uses.
if (!Uses.empty()) {
const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
MachineOperand *UseMO = Uses[i];
MachineInstr *UseMI = UseMO->getParent();
MachineBasicBlock *UseMBB = UseMI->getParent();
unsigned NewVR = MRI->createVirtualRegister(RC);
BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
TII->get(TargetInstrInfo::EXTRACT_SUBREG), NewVR)
.addReg(DstReg).addImm(SubIdx);
UseMO->setReg(NewVR);
++NumReuse;
Changed = true;
}
}
}
}
}
return Changed;
}

View File

@ -713,9 +713,9 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI,
}
bool
X86InstrInfo::isCoalescableInstr(const MachineInstr &MI, bool &isCopy,
unsigned &SrcReg, unsigned &DstReg,
unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
unsigned &SrcReg, unsigned &DstReg,
unsigned &SubIdx) const {
switch (MI.getOpcode()) {
default: break;
case X86::MOVSX16rr8:
@ -733,10 +733,8 @@ X86InstrInfo::isCoalescableInstr(const MachineInstr &MI, bool &isCopy,
if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
// Be conservative.
return false;
isCopy = false;
SrcReg = MI.getOperand(1).getReg();
DstReg = MI.getOperand(0).getReg();
DstSubIdx = 0;
switch (MI.getOpcode()) {
default:
llvm_unreachable(0);
@ -747,22 +745,23 @@ X86InstrInfo::isCoalescableInstr(const MachineInstr &MI, bool &isCopy,
case X86::MOVZX32rr8:
case X86::MOVSX64rr8:
case X86::MOVZX64rr8:
SrcSubIdx = 1;
SubIdx = 1;
break;
case X86::MOVSX32rr16:
case X86::MOVZX32rr16:
case X86::MOVSX64rr16:
case X86::MOVZX64rr16:
SrcSubIdx = 3;
SubIdx = 3;
break;
case X86::MOVSX64rr32:
case X86::MOVZX64rr32:
SrcSubIdx = 4;
SubIdx = 4;
break;
}
return true;
}
}
return isMoveInstr(MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx);
return false;
}
/// isFrameOperand - Return true and the FrameIndex if the specified

View File

@ -448,13 +448,15 @@ public:
unsigned &SrcReg, unsigned &DstReg,
unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
/// isCoalescableInstr - Return true if the instruction is "coalescable". That
/// is, it's like a copy where it's legal for the source to overlap the
/// destination. e.g. X86::MOVSX64rr32.
virtual bool isCoalescableInstr(const MachineInstr &MI, bool &isCopy,
unsigned &SrcReg, unsigned &DstReg,
unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
/// isCoalescableExtInstr - Return true if the instruction is a "coalescable"
/// extension instruction. That is, it's like a copy where it's legal for the
/// source to overlap the destination. e.g. X86::MOVSX64rr32. If this returns
/// true, then it's expected the pre-extension value is available as a subreg
/// of the result register. This also returns the sub-register index in
/// SubIdx.
virtual bool isCoalescableExtInstr(const MachineInstr &MI,
unsigned &SrcReg, unsigned &DstReg,
unsigned &SubIdx) const;
unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
/// isLoadFromStackSlotPostFE - Check for post-frame ptr elimination

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim -stats |& grep asm-printer | grep 58
; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim -stats |& grep asm-printer | grep 57
; PR2568
@g_3 = external global i16 ; <i16*> [#uses=1]

View File

@ -0,0 +1,17 @@
; RUN: llc < %s -march=x86-64 | FileCheck %s
; rdar://7529457
define i64 @t(i64 %A, i64 %B, i32* %P, i64 *%P2) nounwind {
; CHECK: t:
; CHECK: movslq %e{{.*}}, %rax
; CHECK: movq %rax
; CHECK: movl %eax
%C = add i64 %A, %B
%D = trunc i64 %C to i32
volatile store i32 %D, i32* %P
%E = shl i64 %C, 32
%F = ashr i64 %E, 32
volatile store i64 %F, i64 *%P2
volatile store i32 %D, i32* %P
ret i64 undef
}

View File

@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t
; RUN: grep stackcoloring %t | grep "stack slot refs replaced with reg refs" | grep 6
; RUN: grep stackcoloring %t | grep "stack slot refs replaced with reg refs" | grep 9
type { [62 x %struct.Bitvec*] } ; type %0
type { i8* } ; type %1