mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-30 15:10:33 +00:00
Teach the spiller to commute instructions in order to fold a reload. This hits 410 times on 444.namd and 122 times on 252.eon.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@52266 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
6100119620
commit
87bb991aa8
@ -36,16 +36,17 @@
|
|||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
|
||||||
STATISTIC(NumSpills, "Number of register spills");
|
STATISTIC(NumSpills , "Number of register spills");
|
||||||
STATISTIC(NumPSpills,"Number of physical register spills");
|
STATISTIC(NumPSpills ,"Number of physical register spills");
|
||||||
STATISTIC(NumReMats, "Number of re-materialization");
|
STATISTIC(NumReMats , "Number of re-materialization");
|
||||||
STATISTIC(NumDRM , "Number of re-materializable defs elided");
|
STATISTIC(NumDRM , "Number of re-materializable defs elided");
|
||||||
STATISTIC(NumStores, "Number of stores added");
|
STATISTIC(NumStores , "Number of stores added");
|
||||||
STATISTIC(NumLoads , "Number of loads added");
|
STATISTIC(NumLoads , "Number of loads added");
|
||||||
STATISTIC(NumReused, "Number of values reused");
|
STATISTIC(NumReused , "Number of values reused");
|
||||||
STATISTIC(NumDSE , "Number of dead stores elided");
|
STATISTIC(NumDSE , "Number of dead stores elided");
|
||||||
STATISTIC(NumDCE , "Number of copies elided");
|
STATISTIC(NumDCE , "Number of copies elided");
|
||||||
STATISTIC(NumDSS , "Number of dead spill slots removed");
|
STATISTIC(NumDSS , "Number of dead spill slots removed");
|
||||||
|
STATISTIC(NumCommutes, "Number of instructions commuted");
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
enum SpillerName { simple, local };
|
enum SpillerName { simple, local };
|
||||||
@ -356,6 +357,13 @@ namespace {
|
|||||||
AvailableSpills &Spills, BitVector &RegKills,
|
AvailableSpills &Spills, BitVector &RegKills,
|
||||||
std::vector<MachineOperand*> &KillOps,
|
std::vector<MachineOperand*> &KillOps,
|
||||||
VirtRegMap &VRM);
|
VirtRegMap &VRM);
|
||||||
|
bool CommuteToFoldReload(MachineBasicBlock &MBB,
|
||||||
|
MachineBasicBlock::iterator &MII,
|
||||||
|
unsigned VirtReg, unsigned SrcReg, int SS,
|
||||||
|
BitVector &RegKills,
|
||||||
|
std::vector<MachineOperand*> &KillOps,
|
||||||
|
const TargetRegisterInfo *TRI,
|
||||||
|
VirtRegMap &VRM);
|
||||||
void SpillRegToStackSlot(MachineBasicBlock &MBB,
|
void SpillRegToStackSlot(MachineBasicBlock &MBB,
|
||||||
MachineBasicBlock::iterator &MII,
|
MachineBasicBlock::iterator &MII,
|
||||||
int Idx, unsigned PhysReg, int StackSlot,
|
int Idx, unsigned PhysReg, int StackSlot,
|
||||||
@ -874,12 +882,12 @@ namespace {
|
|||||||
/// This enables unfolding optimization for a subsequent instruction which will
|
/// This enables unfolding optimization for a subsequent instruction which will
|
||||||
/// also eliminate the newly introduced store instruction.
|
/// also eliminate the newly introduced store instruction.
|
||||||
bool LocalSpiller::PrepForUnfoldOpti(MachineBasicBlock &MBB,
|
bool LocalSpiller::PrepForUnfoldOpti(MachineBasicBlock &MBB,
|
||||||
MachineBasicBlock::iterator &MII,
|
MachineBasicBlock::iterator &MII,
|
||||||
std::vector<MachineInstr*> &MaybeDeadStores,
|
std::vector<MachineInstr*> &MaybeDeadStores,
|
||||||
AvailableSpills &Spills,
|
AvailableSpills &Spills,
|
||||||
BitVector &RegKills,
|
BitVector &RegKills,
|
||||||
std::vector<MachineOperand*> &KillOps,
|
std::vector<MachineOperand*> &KillOps,
|
||||||
VirtRegMap &VRM) {
|
VirtRegMap &VRM) {
|
||||||
MachineFunction &MF = *MBB.getParent();
|
MachineFunction &MF = *MBB.getParent();
|
||||||
MachineInstr &MI = *MII;
|
MachineInstr &MI = *MII;
|
||||||
unsigned UnfoldedOpc = 0;
|
unsigned UnfoldedOpc = 0;
|
||||||
@ -971,6 +979,97 @@ bool LocalSpiller::PrepForUnfoldOpti(MachineBasicBlock &MBB,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// CommuteToFoldReload -
|
||||||
|
/// Look for
|
||||||
|
/// r1 = load fi#1
|
||||||
|
/// r1 = op r1, r2<kill>
|
||||||
|
/// store r1, fi#1
|
||||||
|
///
|
||||||
|
/// If op is commutable and r2 is killed, then we can xform these to
|
||||||
|
/// r2 = op r2, fi#1
|
||||||
|
/// store r2, fi#1
|
||||||
|
bool LocalSpiller::CommuteToFoldReload(MachineBasicBlock &MBB,
|
||||||
|
MachineBasicBlock::iterator &MII,
|
||||||
|
unsigned VirtReg, unsigned SrcReg, int SS,
|
||||||
|
BitVector &RegKills,
|
||||||
|
std::vector<MachineOperand*> &KillOps,
|
||||||
|
const TargetRegisterInfo *TRI,
|
||||||
|
VirtRegMap &VRM) {
|
||||||
|
if (MII == MBB.begin() || !MII->killsRegister(SrcReg))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
MachineFunction &MF = *MBB.getParent();
|
||||||
|
MachineInstr &MI = *MII;
|
||||||
|
MachineBasicBlock::iterator DefMII = prior(MII);
|
||||||
|
MachineInstr *DefMI = DefMII;
|
||||||
|
const TargetInstrDesc &TID = DefMI->getDesc();
|
||||||
|
unsigned NewDstIdx;
|
||||||
|
if (DefMII != MBB.begin() &&
|
||||||
|
TID.isCommutable() &&
|
||||||
|
TII->CommuteChangesDestination(DefMI, NewDstIdx)) {
|
||||||
|
MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
|
||||||
|
unsigned NewReg = NewDstMO.getReg();
|
||||||
|
if (!NewDstMO.isKill() || TRI->regsOverlap(NewReg, SrcReg))
|
||||||
|
return false;
|
||||||
|
MachineInstr *ReloadMI = prior(DefMII);
|
||||||
|
int FrameIdx;
|
||||||
|
unsigned DestReg = TII->isLoadFromStackSlot(ReloadMI, FrameIdx);
|
||||||
|
if (DestReg != SrcReg || FrameIdx != SS)
|
||||||
|
return false;
|
||||||
|
int UseIdx = DefMI->findRegisterUseOperandIdx(DestReg, false);
|
||||||
|
if (UseIdx == -1)
|
||||||
|
return false;
|
||||||
|
int DefIdx = TID.getOperandConstraint(UseIdx, TOI::TIED_TO);
|
||||||
|
if (DefIdx == -1)
|
||||||
|
return false;
|
||||||
|
assert(DefMI->getOperand(DefIdx).isRegister() &&
|
||||||
|
DefMI->getOperand(DefIdx).getReg() == SrcReg);
|
||||||
|
|
||||||
|
// Now commute def instruction.
|
||||||
|
MachineInstr *CommutedMI = TII->commuteInstruction(DefMI);
|
||||||
|
if (!CommutedMI)
|
||||||
|
return false;
|
||||||
|
SmallVector<unsigned, 2> Ops;
|
||||||
|
Ops.push_back(NewDstIdx);
|
||||||
|
MachineInstr *FoldedMI = TII->foldMemoryOperand(MF, CommutedMI, Ops, SS);
|
||||||
|
if (!FoldedMI) {
|
||||||
|
if (CommutedMI == DefMI)
|
||||||
|
TII->commuteInstruction(CommutedMI);
|
||||||
|
else
|
||||||
|
MBB.erase(CommutedMI);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
VRM.addSpillSlotUse(SS, FoldedMI);
|
||||||
|
VRM.virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
|
||||||
|
// Insert new def MI and spill MI.
|
||||||
|
const TargetRegisterClass* RC = MF.getRegInfo().getRegClass(VirtReg);
|
||||||
|
TII->storeRegToStackSlot(MBB, MI, NewReg, true, SS, RC);
|
||||||
|
MII = prior(MII);
|
||||||
|
MachineInstr *StoreMI = MII;
|
||||||
|
VRM.addSpillSlotUse(SS, StoreMI);
|
||||||
|
VRM.virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
|
||||||
|
MII = MBB.insert(MII, FoldedMI); // Update MII to backtrack.
|
||||||
|
|
||||||
|
// Delete all 3 old instructions.
|
||||||
|
InvalidateKills(MI, RegKills, KillOps);
|
||||||
|
VRM.RemoveMachineInstrFromMaps(&MI);
|
||||||
|
MBB.erase(&MI);
|
||||||
|
if (CommutedMI != DefMI)
|
||||||
|
MBB.erase(CommutedMI);
|
||||||
|
InvalidateKills(*DefMI, RegKills, KillOps);
|
||||||
|
VRM.RemoveMachineInstrFromMaps(DefMI);
|
||||||
|
MBB.erase(DefMI);
|
||||||
|
InvalidateKills(*ReloadMI, RegKills, KillOps);
|
||||||
|
VRM.RemoveMachineInstrFromMaps(ReloadMI);
|
||||||
|
MBB.erase(ReloadMI);
|
||||||
|
++NumCommutes;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/// findSuperReg - Find the SubReg's super-register of given register class
|
/// findSuperReg - Find the SubReg's super-register of given register class
|
||||||
/// where its SubIdx sub-register is SubReg.
|
/// where its SubIdx sub-register is SubReg.
|
||||||
static unsigned findSuperReg(const TargetRegisterClass *RC, unsigned SubReg,
|
static unsigned findSuperReg(const TargetRegisterClass *RC, unsigned SubReg,
|
||||||
@ -1587,15 +1686,23 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) {
|
|||||||
if (unsigned SrcReg = TII->isStoreToStackSlot(&MI, StackSlot)) {
|
if (unsigned SrcReg = TII->isStoreToStackSlot(&MI, StackSlot)) {
|
||||||
assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
|
assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
|
||||||
"Src hasn't been allocated yet?");
|
"Src hasn't been allocated yet?");
|
||||||
|
|
||||||
|
if (CommuteToFoldReload(MBB, MII, VirtReg, SrcReg, StackSlot,
|
||||||
|
RegKills, KillOps, TRI, VRM)) {
|
||||||
|
NextMII = next(MII);
|
||||||
|
BackTracked = true;
|
||||||
|
goto ProcessNextInst;
|
||||||
|
}
|
||||||
|
|
||||||
// Okay, this is certainly a store of SrcReg to [StackSlot]. Mark
|
// Okay, this is certainly a store of SrcReg to [StackSlot]. Mark
|
||||||
// this as a potentially dead store in case there is a subsequent
|
// this as a potentially dead store in case there is a subsequent
|
||||||
// store into the stack slot without a read from it.
|
// store into the stack slot without a read from it.
|
||||||
MaybeDeadStores[StackSlot] = &MI;
|
MaybeDeadStores[StackSlot] = &MI;
|
||||||
|
|
||||||
// If the stack slot value was previously available in some other
|
// If the stack slot value was previously available in some other
|
||||||
// register, change it now. Otherwise, make the register available,
|
// register, change it now. Otherwise, make the register
|
||||||
// in PhysReg.
|
// available in PhysReg.
|
||||||
Spills.addAvailable(StackSlot, &MI, SrcReg, false/*don't clobber*/);
|
Spills.addAvailable(StackSlot, &MI, SrcReg, false/*!clobber*/);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
42
test/CodeGen/X86/2008-06-13-SpillerCommuting.ll
Normal file
42
test/CodeGen/X86/2008-06-13-SpillerCommuting.ll
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -relocation-model=pic -stats |& grep {spiller - Number of instructions commuted}
|
||||||
|
|
||||||
|
%struct.CABAC_context_element = type { i8, i8 }
|
||||||
|
%struct.MB_Info_CABAC = type { i8, i8, [2 x i8], i8, i8, i8, i16, i16, [4 x i8], [8 x %struct.MotionVector] }
|
||||||
|
%struct.MotionVector = type { i16, i16 }
|
||||||
|
%struct.RBSP2 = type { i32, i32, i16, i8, i16, i16, <1 x i64>, i32, i32, i32*, i32*, i32*, i32*, i32, i32, i32, i32, i32, i8, i16, i8, %struct.MB_Info_CABAC*, %struct.MB_Info_CABAC*, [2 x %struct.MB_Info_CABAC], [12 x i8], [460 x %struct.CABAC_context_element], [10 x i8], [10 x i8], [10 x i16], [4 x [120 x i32]], [15 x [36 x i8]], [6 x [8 x i8]], i16* }
|
||||||
|
%struct.Slice_Info = type { i32, i8, %struct.seq_parameter_set_rbsp_t*, %struct.seq_parameter_set_rbsp_t, i32, i16*, i8, i8, i8, i8, i16, i32 }
|
||||||
|
%struct.seq_parameter_set_rbsp_t = type { i32, i32, i32 }
|
||||||
|
@_ZL21CABAC_CTX_state_table = external constant [64 x i16] ; <[64 x i16]*> [#uses=1]
|
||||||
|
@_ZL15rLPS_table_64x4 = external constant [64 x [4 x i8]] ; <[64 x [4 x i8]]*> [#uses=1]
|
||||||
|
|
||||||
|
define i32 @_ZN5RBSP220residual_block_cabacEP10Slice_InfoP13MB_Info_CABACS3_hjhhbPtPs(%struct.RBSP2* %this, %struct.Slice_Info* %slice, %struct.MB_Info_CABAC* %up, %struct.MB_Info_CABAC* %left, i8 zeroext %maxNumCoeff, i32 %blk_i, i8 zeroext %iCbCr, i8 zeroext %ctxBlockCat, i8 zeroext %intra_flag, i16* %mask, i16* %res) nounwind {
|
||||||
|
entry:
|
||||||
|
%tmp43.i1590 = getelementptr %struct.RBSP2* %this, i32 0, i32 0 ; <i32*> [#uses=1]
|
||||||
|
br label %bb803
|
||||||
|
|
||||||
|
bb803: ; preds = %_ZN5RBSP211decode_1bitEP21CABAC_context_element.exit1581, %entry
|
||||||
|
%numCoeff.11749 = phi i32 [ 0, %entry ], [ %numCoeff.11749.tmp868, %_ZN5RBSP211decode_1bitEP21CABAC_context_element.exit1581 ] ; <i32> [#uses=1]
|
||||||
|
%tmp28.i1503 = load i8* null, align 1 ; <i8> [#uses=1]
|
||||||
|
%tmp30.i1504 = getelementptr %struct.RBSP2* %this, i32 0, i32 25, i32 0, i32 0 ; <i8*> [#uses=2]
|
||||||
|
%tmp31.i1505 = load i8* %tmp30.i1504, align 1 ; <i8> [#uses=1]
|
||||||
|
%tmp3233.i1506 = zext i8 %tmp31.i1505 to i32 ; <i32> [#uses=2]
|
||||||
|
%tmp35.i1507 = getelementptr [64 x i16]* @_ZL21CABAC_CTX_state_table, i32 0, i32 %tmp3233.i1506 ; <i16*> [#uses=1]
|
||||||
|
%tmp36.i1508 = load i16* %tmp35.i1507, align 2 ; <i16> [#uses=1]
|
||||||
|
%tmp363738.i1509 = zext i16 %tmp36.i1508 to i32 ; <i32> [#uses=1]
|
||||||
|
%tmp51.i1514 = getelementptr [64 x [4 x i8]]* @_ZL15rLPS_table_64x4, i32 0, i32 %tmp3233.i1506, i32 0 ; <i8*> [#uses=1]
|
||||||
|
%tmp52.i1515 = load i8* %tmp51.i1514, align 1 ; <i8> [#uses=1]
|
||||||
|
%tmp5758.i1516 = zext i8 %tmp52.i1515 to i32 ; <i32> [#uses=1]
|
||||||
|
%tmp60.i1517 = sub i32 0, %tmp5758.i1516 ; <i32> [#uses=1]
|
||||||
|
store i32 %tmp60.i1517, i32* %tmp43.i1590, align 16
|
||||||
|
br i1 false, label %_ZN5RBSP211decode_1bitEP21CABAC_context_element.exit1581, label %bb.i1537
|
||||||
|
|
||||||
|
bb.i1537: ; preds = %bb803
|
||||||
|
unreachable
|
||||||
|
|
||||||
|
_ZN5RBSP211decode_1bitEP21CABAC_context_element.exit1581: ; preds = %bb803
|
||||||
|
%tmp328329.i1580 = trunc i32 %tmp363738.i1509 to i8 ; <i8> [#uses=1]
|
||||||
|
store i8 %tmp328329.i1580, i8* %tmp30.i1504, align 1
|
||||||
|
%toBool865 = icmp eq i8 %tmp28.i1503, 0 ; <i1> [#uses=1]
|
||||||
|
%numCoeff.11749.tmp868 = select i1 %toBool865, i32 %numCoeff.11749, i32 0 ; <i32> [#uses=1]
|
||||||
|
br label %bb803
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user