mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-15 23:57:48 +00:00
Add a quick pass to optimize sign / zero extension instructions. For targets where the pre-extension values are available in the subreg of the result of the extension, replace the uses of the pre-extension value with the result + extract_subreg.
For now, this pass is fairly conservative. It only perform the replacement when both the pre- and post- extension values are used in the block. It will miss cases where the post-extension values are live, but not used. llvm-svn: 93278
This commit is contained in:
parent
1738d7d11a
commit
76db3bb18e
@ -170,6 +170,10 @@ namespace llvm {
|
||||
/// instructions.
|
||||
FunctionPass *createMachineSinkingPass();
|
||||
|
||||
/// createOptimizeExtsPass - This pass performs sign / zero extension
|
||||
/// optimization by increasing uses of extended values.
|
||||
FunctionPass *createOptimizeExtsPass();
|
||||
|
||||
/// createStackSlotColoringPass - This pass performs stack slot coloring.
|
||||
FunctionPass *createStackSlotColoringPass(bool);
|
||||
|
||||
|
@ -149,16 +149,15 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
/// isCoalescableInstr - Return true if the instruction is "coalescable". That
|
||||
/// is, it's like a copy where it's legal for the source to overlap the
|
||||
/// destination. e.g. X86::MOVSX64rr32.
|
||||
virtual bool isCoalescableInstr(const MachineInstr &MI, bool &isCopy,
|
||||
unsigned &SrcReg, unsigned &DstReg,
|
||||
unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
|
||||
if (isMoveInstr(MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
|
||||
isCopy = true;
|
||||
return true;
|
||||
}
|
||||
/// isCoalescableExtInstr - Return true if the instruction is a "coalescable"
|
||||
/// extension instruction. That is, it's like a copy where it's legal for the
|
||||
/// source to overlap the destination. e.g. X86::MOVSX64rr32. If this returns
|
||||
/// true, then it's expected the pre-extension value is available as a subreg
|
||||
/// of the result register. This also returns the sub-register index in
|
||||
/// SubIdx.
|
||||
virtual bool isCoalescableExtInstr(const MachineInstr &MI,
|
||||
unsigned &SrcReg, unsigned &DstReg,
|
||||
unsigned &SubIdx) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -62,6 +62,10 @@ static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
|
||||
cl::desc("Verify generated machine code"),
|
||||
cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL));
|
||||
|
||||
#if 1
|
||||
static cl::opt<bool> XX("xx", cl::Hidden);
|
||||
#endif
|
||||
|
||||
// Enable or disable FastISel. Both options are needed, because
|
||||
// FastISel is enabled by default with -fast, and we wish to be
|
||||
// able to enable or disable fast-isel independently from -O0.
|
||||
@ -324,6 +328,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
|
||||
/* allowDoubleDefs= */ true);
|
||||
|
||||
if (OptLevel != CodeGenOpt::None) {
|
||||
PM.add(createOptimizeExtsPass());
|
||||
if (!DisableMachineLICM)
|
||||
PM.add(createMachineLICMPass());
|
||||
if (!DisableMachineSink)
|
||||
|
149
lib/CodeGen/OptimizeExts.cpp
Normal file
149
lib/CodeGen/OptimizeExts.cpp
Normal file
@ -0,0 +1,149 @@
|
||||
//===-- OptimizeExts.cpp - Optimize sign / zero extension instrs -----===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#define DEBUG_TYPE "ext-opt"
|
||||
#include "llvm/CodeGen/Passes.h"
|
||||
#include "llvm/CodeGen/MachineDominators.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/Target/TargetInstrInfo.h"
|
||||
#include "llvm/Target/TargetRegisterInfo.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
using namespace llvm;
|
||||
|
||||
static cl::opt<bool> Aggressive("aggressive-ext-opt", cl::Hidden,
|
||||
cl::desc("Aggressive extension optimization"));
|
||||
|
||||
STATISTIC(NumReuse, "Number of extension results reused");
|
||||
|
||||
namespace {
|
||||
class OptimizeExts : public MachineFunctionPass {
|
||||
const TargetMachine *TM;
|
||||
const TargetInstrInfo *TII;
|
||||
MachineRegisterInfo *MRI;
|
||||
MachineDominatorTree *DT; // Machine dominator tree
|
||||
|
||||
public:
|
||||
static char ID; // Pass identification
|
||||
OptimizeExts() : MachineFunctionPass(&ID) {}
|
||||
|
||||
virtual bool runOnMachineFunction(MachineFunction &MF);
|
||||
|
||||
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.setPreservesCFG();
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
AU.addRequired<MachineDominatorTree>();
|
||||
AU.addPreserved<MachineDominatorTree>();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
char OptimizeExts::ID = 0;
|
||||
static RegisterPass<OptimizeExts>
|
||||
X("opt-exts", "Optimize sign / zero extensions");
|
||||
|
||||
FunctionPass *llvm::createOptimizeExtsPass() { return new OptimizeExts(); }
|
||||
|
||||
bool OptimizeExts::runOnMachineFunction(MachineFunction &MF) {
|
||||
TM = &MF.getTarget();
|
||||
TII = TM->getInstrInfo();
|
||||
MRI = &MF.getRegInfo();
|
||||
DT = &getAnalysis<MachineDominatorTree>();
|
||||
|
||||
bool Changed = false;
|
||||
|
||||
SmallPtrSet<MachineInstr*, 8> LocalMIs;
|
||||
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
|
||||
MachineBasicBlock *MBB = &*I;
|
||||
for (MachineBasicBlock::iterator MII = I->begin(), ME = I->end(); MII != ME;
|
||||
++MII) {
|
||||
MachineInstr *MI = &*MII;
|
||||
LocalMIs.insert(MI);
|
||||
|
||||
unsigned SrcReg, DstReg, SubIdx;
|
||||
if (TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) {
|
||||
if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
|
||||
TargetRegisterInfo::isPhysicalRegister(SrcReg))
|
||||
continue;
|
||||
|
||||
MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg);
|
||||
if (++UI == MRI->use_end())
|
||||
// No other uses.
|
||||
continue;
|
||||
|
||||
// Ok, the source has other uses. See if we can replace the other uses
|
||||
// with use of the result of the extension.
|
||||
|
||||
SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
|
||||
UI = MRI->use_begin(DstReg);
|
||||
for (MachineRegisterInfo::use_iterator UE = MRI->use_end(); UI != UE;
|
||||
++UI)
|
||||
ReachedBBs.insert(UI->getParent());
|
||||
|
||||
bool ExtendLife = true;
|
||||
SmallVector<MachineOperand*, 8> Uses;
|
||||
SmallVector<MachineOperand*, 8> ExtendedUses;
|
||||
|
||||
UI = MRI->use_begin(SrcReg);
|
||||
for (MachineRegisterInfo::use_iterator UE = MRI->use_end(); UI != UE;
|
||||
++UI) {
|
||||
MachineOperand &UseMO = UI.getOperand();
|
||||
MachineInstr *UseMI = &*UI;
|
||||
if (UseMI == MI)
|
||||
continue;
|
||||
MachineBasicBlock *UseMBB = UseMI->getParent();
|
||||
if (UseMBB == MBB) {
|
||||
// Local uses that come after the extension.
|
||||
if (!LocalMIs.count(UseMI))
|
||||
Uses.push_back(&UseMO);
|
||||
} else if (ReachedBBs.count(UseMBB))
|
||||
// Non-local uses where the result of extension is used. Always
|
||||
// replace these.
|
||||
Uses.push_back(&UseMO);
|
||||
else if (Aggressive && DT->dominates(MBB, UseMBB))
|
||||
// We may want to extend live range of the extension result in order
|
||||
// to replace these uses.
|
||||
ExtendedUses.push_back(&UseMO);
|
||||
else {
|
||||
// Both will be live out of the def MBB anyway. Don't extend live
|
||||
// range of the extension result.
|
||||
ExtendLife = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (ExtendLife && !ExtendedUses.empty())
|
||||
// Ok, we'll extend the liveness of the extension result.
|
||||
std::copy(ExtendedUses.begin(), ExtendedUses.end(),
|
||||
std::back_inserter(Uses));
|
||||
|
||||
// Now replace all uses.
|
||||
if (!Uses.empty()) {
|
||||
const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
|
||||
for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
|
||||
MachineOperand *UseMO = Uses[i];
|
||||
MachineInstr *UseMI = UseMO->getParent();
|
||||
MachineBasicBlock *UseMBB = UseMI->getParent();
|
||||
unsigned NewVR = MRI->createVirtualRegister(RC);
|
||||
BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
|
||||
TII->get(TargetInstrInfo::EXTRACT_SUBREG), NewVR)
|
||||
.addReg(DstReg).addImm(SubIdx);
|
||||
UseMO->setReg(NewVR);
|
||||
++NumReuse;
|
||||
Changed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return Changed;
|
||||
}
|
@ -713,9 +713,9 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI,
|
||||
}
|
||||
|
||||
bool
|
||||
X86InstrInfo::isCoalescableInstr(const MachineInstr &MI, bool &isCopy,
|
||||
unsigned &SrcReg, unsigned &DstReg,
|
||||
unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
|
||||
X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
|
||||
unsigned &SrcReg, unsigned &DstReg,
|
||||
unsigned &SubIdx) const {
|
||||
switch (MI.getOpcode()) {
|
||||
default: break;
|
||||
case X86::MOVSX16rr8:
|
||||
@ -733,10 +733,8 @@ X86InstrInfo::isCoalescableInstr(const MachineInstr &MI, bool &isCopy,
|
||||
if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
|
||||
// Be conservative.
|
||||
return false;
|
||||
isCopy = false;
|
||||
SrcReg = MI.getOperand(1).getReg();
|
||||
DstReg = MI.getOperand(0).getReg();
|
||||
DstSubIdx = 0;
|
||||
switch (MI.getOpcode()) {
|
||||
default:
|
||||
llvm_unreachable(0);
|
||||
@ -747,22 +745,23 @@ X86InstrInfo::isCoalescableInstr(const MachineInstr &MI, bool &isCopy,
|
||||
case X86::MOVZX32rr8:
|
||||
case X86::MOVSX64rr8:
|
||||
case X86::MOVZX64rr8:
|
||||
SrcSubIdx = 1;
|
||||
SubIdx = 1;
|
||||
break;
|
||||
case X86::MOVSX32rr16:
|
||||
case X86::MOVZX32rr16:
|
||||
case X86::MOVSX64rr16:
|
||||
case X86::MOVZX64rr16:
|
||||
SrcSubIdx = 3;
|
||||
SubIdx = 3;
|
||||
break;
|
||||
case X86::MOVSX64rr32:
|
||||
case X86::MOVZX64rr32:
|
||||
SrcSubIdx = 4;
|
||||
SubIdx = 4;
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return isMoveInstr(MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx);
|
||||
return false;
|
||||
}
|
||||
|
||||
/// isFrameOperand - Return true and the FrameIndex if the specified
|
||||
|
@ -448,13 +448,15 @@ public:
|
||||
unsigned &SrcReg, unsigned &DstReg,
|
||||
unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
|
||||
|
||||
/// isCoalescableInstr - Return true if the instruction is "coalescable". That
|
||||
/// is, it's like a copy where it's legal for the source to overlap the
|
||||
/// destination. e.g. X86::MOVSX64rr32.
|
||||
virtual bool isCoalescableInstr(const MachineInstr &MI, bool &isCopy,
|
||||
unsigned &SrcReg, unsigned &DstReg,
|
||||
unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
|
||||
|
||||
/// isCoalescableExtInstr - Return true if the instruction is a "coalescable"
|
||||
/// extension instruction. That is, it's like a copy where it's legal for the
|
||||
/// source to overlap the destination. e.g. X86::MOVSX64rr32. If this returns
|
||||
/// true, then it's expected the pre-extension value is available as a subreg
|
||||
/// of the result register. This also returns the sub-register index in
|
||||
/// SubIdx.
|
||||
virtual bool isCoalescableExtInstr(const MachineInstr &MI,
|
||||
unsigned &SrcReg, unsigned &DstReg,
|
||||
unsigned &SubIdx) const;
|
||||
|
||||
unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
|
||||
/// isLoadFromStackSlotPostFE - Check for post-frame ptr elimination
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim -stats |& grep asm-printer | grep 58
|
||||
; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim -stats |& grep asm-printer | grep 57
|
||||
; PR2568
|
||||
|
||||
@g_3 = external global i16 ; <i16*> [#uses=1]
|
||||
|
17
test/CodeGen/X86/sext-subreg.ll
Normal file
17
test/CodeGen/X86/sext-subreg.ll
Normal file
@ -0,0 +1,17 @@
|
||||
; RUN: llc < %s -march=x86-64 | FileCheck %s
|
||||
; rdar://7529457
|
||||
|
||||
define i64 @t(i64 %A, i64 %B, i32* %P, i64 *%P2) nounwind {
|
||||
; CHECK: t:
|
||||
; CHECK: movslq %e{{.*}}, %rax
|
||||
; CHECK: movq %rax
|
||||
; CHECK: movl %eax
|
||||
%C = add i64 %A, %B
|
||||
%D = trunc i64 %C to i32
|
||||
volatile store i32 %D, i32* %P
|
||||
%E = shl i64 %C, 32
|
||||
%F = ashr i64 %E, 32
|
||||
volatile store i64 %F, i64 *%P2
|
||||
volatile store i32 %D, i32* %P
|
||||
ret i64 undef
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t
|
||||
; RUN: grep stackcoloring %t | grep "stack slot refs replaced with reg refs" | grep 6
|
||||
; RUN: grep stackcoloring %t | grep "stack slot refs replaced with reg refs" | grep 9
|
||||
|
||||
type { [62 x %struct.Bitvec*] } ; type %0
|
||||
type { i8* } ; type %1
|
||||
|
Loading…
Reference in New Issue
Block a user