Re-commit: [MachineLICM] Add functions to MachineLICM to hoist invariant stores

This patch adds functions to allow MachineLICM to hoist invariant stores.
Currently, MachineLICM does not hoist any store instructions, however
when storing the same value to a constant spot on the stack, the store
instruction should be considered invariant and be hoisted. The function
isInvariantStore iterates each operand of the store instruction and checks
that each register operand satisfies isCallerPreservedPhysReg. The store
may be fed by a copy, which is hoisted by isCopyFeedingInvariantStore.
This patch also adds the PowerPC changes needed to consider the stack
register as caller preserved.

Differential Revision: https://reviews.llvm.org/D40196

llvm-svn: 328326
This commit is contained in:
Zaara Syeda 2018-03-23 15:28:15 +00:00
parent 6e87df0b0e
commit 72c84455d5
10 changed files with 275 additions and 49 deletions

View File

@ -444,6 +444,13 @@ public:
return false;
}
/// Returns the original SrcReg unless it is the target of a copy-like
/// operation, in which case we chain backwards through all such operations
/// to the ultimate source register. If a physical register is encountered,
/// we stop the search.
virtual unsigned lookThruCopyLike(unsigned SrcReg,
const MachineRegisterInfo *MRI) const;
/// Return a null-terminated list of all of the callee-saved registers on
/// this target. The register should be in the order of desired callee-save
/// stack frame offset. The first register is closest to the incoming stack

View File

@ -71,6 +71,10 @@ SinkInstsToAvoidSpills("sink-insts-to-avoid-spills",
cl::desc("MachineLICM should sink instructions into "
"loops to avoid register spills"),
cl::init(false), cl::Hidden);
static cl::opt<bool>
HoistConstStores("hoist-const-stores",
cl::desc("Hoist invariant stores"),
cl::init(true), cl::Hidden);
STATISTIC(NumHoisted,
"Number of machine instructions hoisted out of loops");
@ -82,6 +86,8 @@ STATISTIC(NumCSEed,
"Number of hoisted machine instructions CSEed");
STATISTIC(NumPostRAHoisted,
"Number of machine instructions hoisted out of loops post regalloc");
STATISTIC(NumStoreConst,
"Number of stores of const phys reg hoisted out of loops");
namespace {
@ -726,6 +732,8 @@ void MachineLICMBase::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
MachineInstr *MI = &*MII;
if (!Hoist(MI, Preheader))
UpdateRegPressure(MI);
// If we have hoisted an instruction that may store, it can only be a
// constant store.
MII = NextMII;
}
@ -889,13 +897,79 @@ static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) {
return false;
}
// This function iterates through all the operands of the input store MI and
// checks that each register operand statisfies isCallerPreservedPhysReg.
// This means, the value being stored and the address where it is being stored
// is constant throughout the body of the function (not including prologue and
// epilogue). When called with an MI that isn't a store, it returns false.
static bool isInvariantStore(const MachineInstr &MI,
const TargetRegisterInfo *TRI,
const MachineRegisterInfo *MRI) {
if (!MI.mayStore() || MI.hasUnmodeledSideEffects() ||
(MI.getNumOperands() == 0))
return false;
// Check that all register operands are caller-preserved physical registers.
for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg()) {
unsigned Reg = MO.getReg();
// If operand is a virtual register, check if it comes from a copy of a
// physical register.
if (TargetRegisterInfo::isVirtualRegister(Reg))
Reg = TRI->lookThruCopyLike(MO.getReg(), MRI);
if (TargetRegisterInfo::isVirtualRegister(Reg))
return false;
if (!TRI->isCallerPreservedPhysReg(Reg, *MI.getMF()))
return false;
}
}
return true;
}
// Return true if the input MI is a copy instruction that feeds an invariant
// store instruction. This means that the src of the copy has to satisfy
// isCallerPreservedPhysReg and atleast one of it's users should satisfy
// isInvariantStore.
static bool isCopyFeedingInvariantStore(const MachineInstr &MI,
const MachineRegisterInfo *MRI,
const TargetRegisterInfo *TRI) {
// FIXME: If targets would like to look through instructions that aren't
// pure copies, this can be updated to a query.
if (!MI.isCopy())
return false;
const MachineFunction *MF = MI.getMF();
// Check that we are copying a constant physical register.
unsigned CopySrcReg = MI.getOperand(1).getReg();
if (TargetRegisterInfo::isVirtualRegister(CopySrcReg))
return false;
if (!TRI->isCallerPreservedPhysReg(CopySrcReg, *MF))
return false;
unsigned CopyDstReg = MI.getOperand(0).getReg();
// Check if any of the uses of the copy are invariant stores.
assert (TargetRegisterInfo::isVirtualRegister(CopyDstReg) &&
"copy dst is not a virtual reg");
for (MachineInstr &UseMI : MRI->use_instructions(CopyDstReg)) {
if (UseMI.mayStore() && isInvariantStore(UseMI, TRI, MRI))
return true;
}
return false;
}
/// Returns true if the instruction may be a suitable candidate for LICM.
/// e.g. If the instruction is a call, then it's obviously not safe to hoist it.
bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) {
// Check if it's safe to move the instruction.
bool DontMoveAcrossStore = true;
if (!I.isSafeToMove(AA, DontMoveAcrossStore))
if ((!I.isSafeToMove(AA, DontMoveAcrossStore)) &&
!(HoistConstStores && isInvariantStore(I, TRI, MRI))) {
return false;
}
// If it is load then check if it is guaranteed to execute by making sure that
// it dominates all exiting blocks. If it doesn't, then there is a path out of
@ -1115,6 +1189,9 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) {
// - When hoisting the last use of a value in the loop, that value no longer
// needs to be live in the loop. This lowers register pressure in the loop.
if (HoistConstStores && isCopyFeedingInvariantStore(MI, MRI, TRI))
return true;
bool CheapInstr = IsCheapInstruction(MI);
bool CreatesCopy = HasLoopPHIUse(&MI);
@ -1351,6 +1428,11 @@ bool MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
if (!MI) return false;
}
// If we have hoisted an instruction that may store, it can only be a constant
// store.
if (MI->mayStore())
NumStoreConst++;
// Now move the instructions to the predecessor, inserting it before any
// terminator instructions.
DEBUG({

View File

@ -472,6 +472,29 @@ unsigned TargetRegisterInfo::getRegSizeInBits(unsigned Reg,
return getRegSizeInBits(*RC);
}
unsigned
TargetRegisterInfo::lookThruCopyLike(unsigned SrcReg,
const MachineRegisterInfo *MRI) const {
while (true) {
const MachineInstr *MI = MRI->getVRegDef(SrcReg);
if (!MI->isCopyLike())
return SrcReg;
unsigned CopySrcReg;
if (MI->isCopy())
CopySrcReg = MI->getOperand(1).getReg();
else {
assert(MI->isSubregToReg() && "Bad opcode for lookThruCopyLike");
CopySrcReg = MI->getOperand(2).getReg();
}
if (!isVirtualRegister(CopySrcReg))
return CopySrcReg;
SrcReg = CopySrcReg;
}
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD
void TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex,

View File

@ -2178,28 +2178,6 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return false;
}
unsigned PPCInstrInfo::lookThruCopyLike(unsigned SrcReg,
const MachineRegisterInfo *MRI) {
while (true) {
MachineInstr *MI = MRI->getVRegDef(SrcReg);
if (!MI->isCopyLike())
return SrcReg;
unsigned CopySrcReg;
if (MI->isCopy())
CopySrcReg = MI->getOperand(1).getReg();
else {
assert(MI->isSubregToReg() && "Bad opcode for lookThruCopyLike");
CopySrcReg = MI->getOperand(2).getReg();
}
if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg))
return CopySrcReg;
SrcReg = CopySrcReg;
}
}
// Essentially a compile-time implementation of a compare->isel sequence.
// It takes two constants to compare, along with the true/false registers
// and the comparison type (as a subreg to a CR field) and returns one
@ -2265,6 +2243,7 @@ MachineInstr *PPCInstrInfo::getConstantDefMI(MachineInstr &MI,
ConstOp = ~0U;
MachineInstr *DefMI = nullptr;
MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
const TargetRegisterInfo *TRI = &getRegisterInfo();
// If we'ere in SSA, get the defs through the MRI. Otherwise, only look
// within the basic block to see if the register is defined using an LI/LI8.
if (MRI->isSSA()) {
@ -2274,7 +2253,7 @@ MachineInstr *PPCInstrInfo::getConstantDefMI(MachineInstr &MI,
unsigned Reg = MI.getOperand(i).getReg();
if (!TargetRegisterInfo::isVirtualRegister(Reg))
continue;
unsigned TrueReg = lookThruCopyLike(Reg, MRI);
unsigned TrueReg = TRI->lookThruCopyLike(Reg, MRI);
if (TargetRegisterInfo::isVirtualRegister(TrueReg)) {
DefMI = MRI->getVRegDef(TrueReg);
if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8) {

View File

@ -359,13 +359,6 @@ public:
MachineInstr **KilledDef = nullptr) const;
void replaceInstrWithLI(MachineInstr &MI, const LoadImmediateInfo &LII) const;
// This is used to find the "true" source register for n
// Machine instruction. Returns the original SrcReg unless it is the target
// of a copy-like operation, in which case we chain backwards through all
// such operations to the ultimate source register. If a
// physical register is encountered, we stop the search.
static unsigned lookThruCopyLike(unsigned SrcReg,
const MachineRegisterInfo *MRI);
bool instrHasImmForm(const MachineInstr &MI, ImmInstrInfo &III) const;
};

View File

@ -220,7 +220,7 @@ bool PPCMIPeephole::simplifyCode(void) {
bool Simplified = false;
MachineInstr* ToErase = nullptr;
std::map<MachineInstr *, bool> TOCSaves;
const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
NumFunctionsEnteredInMIPeephole++;
if (ConvertRegReg) {
// Fixed-point conversion of reg/reg instructions fed by load-immediate
@ -297,9 +297,9 @@ bool PPCMIPeephole::simplifyCode(void) {
// We have to look through chains of COPY and SUBREG_TO_REG
// to find the real source values for comparison.
unsigned TrueReg1 =
TII->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
unsigned TrueReg2 =
TII->lookThruCopyLike(MI.getOperand(2).getReg(), MRI);
TRI->lookThruCopyLike(MI.getOperand(2).getReg(), MRI);
if (TrueReg1 == TrueReg2
&& TargetRegisterInfo::isVirtualRegister(TrueReg1)) {
@ -314,7 +314,7 @@ bool PPCMIPeephole::simplifyCode(void) {
if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
return false;
unsigned DefReg =
TII->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
if (TargetRegisterInfo::isVirtualRegister(DefReg)) {
MachineInstr *LoadMI = MRI->getVRegDef(DefReg);
if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
@ -341,9 +341,9 @@ bool PPCMIPeephole::simplifyCode(void) {
if (DefOpc == PPC::XXPERMDI) {
unsigned FeedImmed = DefMI->getOperand(3).getImm();
unsigned FeedReg1 =
TII->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
unsigned FeedReg2 =
TII->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
if ((FeedImmed == 0 || FeedImmed == 3) && FeedReg1 == FeedReg2) {
DEBUG(dbgs()
@ -402,7 +402,7 @@ bool PPCMIPeephole::simplifyCode(void) {
unsigned MyOpcode = MI.getOpcode();
unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
unsigned TrueReg =
TII->lookThruCopyLike(MI.getOperand(OpNo).getReg(), MRI);
TRI->lookThruCopyLike(MI.getOperand(OpNo).getReg(), MRI);
if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
break;
MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
@ -465,7 +465,7 @@ bool PPCMIPeephole::simplifyCode(void) {
case PPC::XVCVDPSP: {
// If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant.
unsigned TrueReg =
TII->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
break;
MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
@ -474,9 +474,9 @@ bool PPCMIPeephole::simplifyCode(void) {
// values.
if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
unsigned DefsReg1 =
TII->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
unsigned DefsReg2 =
TII->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
if (!TargetRegisterInfo::isVirtualRegister(DefsReg1) ||
!TargetRegisterInfo::isVirtualRegister(DefsReg2))
break;

View File

@ -65,6 +65,12 @@ static cl::opt<bool>
EnableGPRToVecSpills("ppc-enable-gpr-to-vsr-spills", cl::Hidden, cl::init(false),
cl::desc("Enable spills from gpr to vsr rather than stack"));
static cl::opt<bool>
StackPtrConst("ppc-stack-ptr-caller-preserved",
cl::desc("Consider R1 caller preserved so stack saves of "
"caller preserved registers can be LICM candidates"),
cl::init(true), cl::Hidden);
PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM)
: PPCGenRegisterInfo(TM.isPPC64() ? PPC::LR8 : PPC::LR,
TM.isPPC64() ? 0 : 1,
@ -304,15 +310,26 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
bool PPCRegisterInfo::isCallerPreservedPhysReg(unsigned PhysReg,
const MachineFunction &MF) const {
assert(TargetRegisterInfo::isPhysicalRegister(PhysReg));
if (TM.isELFv2ABI() && PhysReg == PPC::X2) {
const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const MachineFrameInfo &MFI = MF.getFrameInfo();
if (!TM.isPPC64())
return false;
if (!Subtarget.isSVR4ABI())
return false;
if (PhysReg == PPC::X2)
// X2 is guaranteed to be preserved within a function if it is reserved.
// The reason it's reserved is that it's the TOC pointer (and the function
// uses the TOC). In functions where it isn't reserved (i.e. leaf functions
// with no TOC access), we can't claim that it is preserved.
return (getReservedRegs(MF).test(PPC::X2));
} else {
return false;
}
if (StackPtrConst && (PhysReg == PPC::X1) && !MFI.hasVarSizedObjects()
&& !MFI.hasOpaqueSPAdjustment())
// The value of the stack pointer does not change within a function after
// the prologue and before the epilogue if there are no dynamic allocations
// and no inline asm which clobbers X1.
return true;
return false;
}
unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,

View File

@ -75,9 +75,7 @@ entry:
; CHECK: bctrl
; CHECK: ld 2, 40(1)
; CHECK: addis [[REG1:[0-9]+]], 2, .LC0@toc@ha
; CHECK: std 2, 40(1)
; CHECK: ld {{[0-9]+}}, .LC0@toc@l([[REG1]])
; CHECK: {{mr|ld}} 2,
; CHECK: mtctr
; CHECK: bctrl

View File

@ -0,0 +1,129 @@
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -hoist-const-stores -ppc-stack-ptr-caller-preserved < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -hoist-const-stores -ppc-stack-ptr-caller-preserved < %s | FileCheck %s -check-prefix=CHECKBE
; Test hoist out of single loop
define signext i32 @test1(i32 signext %lim, i32 (i32)* nocapture %Func) {
entry:
; CHECK-LABEL: test1
; CHECK: for.body.preheader
; CHECK: std 2, 24(1)
; CHECK: for.body
; CHECK-NOT: std 2, 24(1)
; CHECKBE-LABEL: test1
; CHECKBE: for.body.preheader
; CHECKBE: std 2, 40(1)
; CHECKBE: for.body
; CHECKBE-NOT: std 2, 40(1)
%cmp6 = icmp sgt i32 %lim, 0
br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup
for.body.preheader: ; preds = %entry
br label %for.body
for.cond.cleanup: ; preds = %for.body, %entry
%Sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
ret i32 %Sum.0.lcssa
for.body: ; preds = %for.body.preheader, %for.body
%i.08 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
%Sum.07 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
%call = tail call signext i32 %Func(i32 signext %i.08)
%add = add nsw i32 %call, %Sum.07
%inc = add nuw nsw i32 %i.08, 1
%exitcond = icmp eq i32 %inc, %lim
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}
; Test hoist of nested loop goes to outter loop preheader
define signext i32 @test2(i32 signext %lim, i32 (i32)* nocapture %Func) {
entry:
; CHECK-LABEL: test2
; CHECK: for.body4.lr.ph.preheader
; CHECK: std 2, 24(1)
; CHECK: for.body4.lr.ph
; CHECK-NOT: std 2, 24(1)
; CHECKBE-LABEL: test2
; CHECKBE: for.body4.lr.ph.preheader
; CHECKBE: std 2, 40(1)
; CHECKBE: for.body4.lr.ph
; CHECKBE-NOT: std 2, 40(1)
%cmp20 = icmp sgt i32 %lim, 0
br i1 %cmp20, label %for.body4.lr.ph.preheader, label %for.cond.cleanup
for.body4.lr.ph.preheader: ; preds = %entry
br label %for.body4.lr.ph
for.cond.cleanup: ; preds = %for.cond.cleanup3, %entry
%Sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.cond.cleanup3 ]
ret i32 %Sum.0.lcssa
for.body4.lr.ph: ; preds = %for.body4.lr.ph.preheader, %for.cond.cleanup3
%j.022 = phi i32 [ %inc6, %for.cond.cleanup3 ], [ 0, %for.body4.lr.ph.preheader ]
%Sum.021 = phi i32 [ %add, %for.cond.cleanup3 ], [ 0, %for.body4.lr.ph.preheader ]
br label %for.body4
for.cond.cleanup3: ; preds = %for.body4
%inc6 = add nuw nsw i32 %j.022, 1
%exitcond24 = icmp eq i32 %inc6, %lim
br i1 %exitcond24, label %for.cond.cleanup, label %for.body4.lr.ph
for.body4: ; preds = %for.body4, %for.body4.lr.ph
%i.019 = phi i32 [ %j.022, %for.body4.lr.ph ], [ %inc, %for.body4 ]
%Sum.118 = phi i32 [ %Sum.021, %for.body4.lr.ph ], [ %add, %for.body4 ]
%call = tail call signext i32 %Func(i32 signext %i.019)
%add = add nsw i32 %call, %Sum.118
%inc = add nuw nsw i32 %i.019, 1
%exitcond = icmp eq i32 %inc, %lim
br i1 %exitcond, label %for.cond.cleanup3, label %for.body4
}
; Test hoist out of if statement with low branch probability
; FIXME: we shouldn't hoist in such cases as it could increase the number
; of stores after hoisting.
define signext i32 @test3(i32 signext %lim, i32 (i32)* nocapture %Func) {
entry:
; CHECK-LABEL: test3
; CHECK: %for.body.lr.ph
; CHECK: std 2, 24(1)
; CHECK: %for.body
; CHECK-NOT: std 2, 24(1)
; CHECKBE-LABEL: test3
; CHECKBE: %for.body.lr.ph
; CHECKBE: std 2, 40(1)
; CHECKBE: %for.body
; CHECKBE-NOT: std 2, 40(1)
%cmp13 = icmp sgt i32 %lim, 0
br i1 %cmp13, label %for.body.lr.ph, label %for.cond.cleanup
for.body.lr.ph: ; preds = %entry
%sub = add nsw i32 %lim, -1
br label %for.body
for.cond.cleanup: ; preds = %if.end, %entry
%Sum.0.lcssa = phi i32 [ 0, %entry ], [ %add3, %if.end ]
ret i32 %Sum.0.lcssa
for.body: ; preds = %if.end, %for.body.lr.ph
%i.015 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %if.end ]
%Sum.014 = phi i32 [ 0, %for.body.lr.ph ], [ %add3, %if.end ]
%cmp1 = icmp eq i32 %i.015, %sub
br i1 %cmp1, label %if.then, label %if.end
if.then: ; preds = %for.body
%call = tail call signext i32 %Func(i32 signext %sub)
%add = add nsw i32 %call, %Sum.014
br label %if.end
if.end: ; preds = %if.then, %for.body
%Sum.1 = phi i32 [ %add, %if.then ], [ %Sum.014, %for.body ]
%call2 = tail call signext i32 @func(i32 signext %i.015)
%add3 = add nsw i32 %call2, %Sum.1
%inc = add nuw nsw i32 %i.015, 1
%exitcond = icmp eq i32 %inc, %lim
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}
declare signext i32 @func(i32 signext)

View File

@ -15,7 +15,6 @@ entry:
; INVFUNCDESC-DAG: ld [[REG3:[0-9]+]], 0(3)
; INVFUNCDESC: %for.body
; INVFUNCDESC: std 2, 40(1)
; INVFUNCDESC-DAG: mtctr [[REG3]]
; INVFUNCDESC-DAG: mr 11, [[REG2]]
; INVFUNCDESC-DAG: mr 2, [[REG1]]
@ -24,7 +23,6 @@ entry:
; NONINVFUNCDESC-LABEL: @bar
; NONINVFUNCDESC: %for.body
; NONINVFUNCDESC: std 2, 40(1)
; NONINVFUNCDESC-DAG: ld 3, 0(30)
; NONINVFUNCDESC-DAG: ld 11, 16(30)
; NONINVFUNCDESC-DAG: ld 2, 8(30)