mirror of
https://github.com/RPCS3/llvm.git
synced 2025-02-05 01:56:16 +00:00
Re-enable register pressure aware machine licm with fixes. Hoist() may have
erased the instruction during LICM so UpdateRegPressureAfter() should not reference it afterwards. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@116845 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
9cb4c7f878
commit
2312842de0
@ -24,6 +24,7 @@ class InstrItineraryData;
|
||||
class LiveVariables;
|
||||
class MCAsmInfo;
|
||||
class MachineMemOperand;
|
||||
class MachineRegisterInfo;
|
||||
class MDNode;
|
||||
class MCInst;
|
||||
class SDNode;
|
||||
@ -625,6 +626,19 @@ public:
|
||||
int getOperandLatency(const InstrItineraryData *ItinData,
|
||||
SDNode *DefNode, unsigned DefIdx,
|
||||
SDNode *UseNode, unsigned UseIdx) const;
|
||||
|
||||
/// hasHighOperandLatency - Compute operand latency between a def of 'Reg'
|
||||
/// and an use in the current loop, return true if the target considered
|
||||
/// it 'high'. This is used by optimization passes such as machine LICM to
|
||||
/// determine whether it makes sense to hoist an instruction out even in
|
||||
/// high register pressure situation.
|
||||
virtual
|
||||
bool hasHighOperandLatency(const InstrItineraryData *ItinData,
|
||||
const MachineRegisterInfo *MRI,
|
||||
const MachineInstr *DefMI, unsigned DefIdx,
|
||||
const MachineInstr *UseMI, unsigned UseIdx) const {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
/// TargetInstrInfoImpl - This is the default implementation of
|
||||
|
@ -43,11 +43,6 @@
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
static cl::opt<bool>
|
||||
TrackRegPressure("rp-aware-machine-licm",
|
||||
cl::desc("Register pressure aware machine LICM"),
|
||||
cl::init(false), cl::Hidden);
|
||||
|
||||
STATISTIC(NumHoisted,
|
||||
"Number of machine instructions hoisted out of loops");
|
||||
STATISTIC(NumLowRP,
|
||||
@ -128,6 +123,7 @@ namespace {
|
||||
RegSeen.clear();
|
||||
RegPressure.clear();
|
||||
RegLimit.clear();
|
||||
BackTrace.clear();
|
||||
for (DenseMap<unsigned,std::vector<const MachineInstr*> >::iterator
|
||||
CI = CSEMap.begin(), CE = CSEMap.end(); CI != CE; ++CI)
|
||||
CI->second.clear();
|
||||
@ -175,9 +171,10 @@ namespace {
|
||||
///
|
||||
bool IsLoopInvariantInst(MachineInstr &I);
|
||||
|
||||
/// ComputeOperandLatency - Compute operand latency between a def of 'Reg'
|
||||
/// and an use in the current loop.
|
||||
int ComputeOperandLatency(MachineInstr &MI, unsigned DefIdx, unsigned Reg);
|
||||
/// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
|
||||
/// and an use in the current loop, return true if the target considered
|
||||
/// it 'high'.
|
||||
bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, unsigned Reg);
|
||||
|
||||
/// IncreaseHighRegPressure - Visit BBs from preheader to current BB, check
|
||||
/// if hoisting an instruction of the given cost matrix can cause high
|
||||
@ -203,8 +200,9 @@ namespace {
|
||||
|
||||
/// UpdateRegPressureBefore / UpdateRegPressureAfter - Update estimate of
|
||||
/// register pressure before and after executing a specifi instruction.
|
||||
void UpdateRegPressureBefore(const MachineInstr *MI);
|
||||
void UpdateRegPressureAfter(const MachineInstr *MI);
|
||||
void UpdateRegPressureBefore(const MachineInstr *MI,
|
||||
SmallVector<unsigned, 4> &Defs);
|
||||
void UpdateRegPressureAfter(SmallVector<unsigned, 4> &Defs);
|
||||
|
||||
/// isLoadFromConstantMemory - Return true if the given instruction is a
|
||||
/// load from constant memory.
|
||||
@ -560,28 +558,26 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N, bool IsHeader) {
|
||||
if (!Preheader)
|
||||
return;
|
||||
|
||||
if (TrackRegPressure) {
|
||||
if (IsHeader) {
|
||||
// Compute registers which are liveout of preheader.
|
||||
RegSeen.clear();
|
||||
BackTrace.clear();
|
||||
InitRegPressure(Preheader);
|
||||
}
|
||||
|
||||
// Remember livein register pressure.
|
||||
BackTrace.push_back(RegPressure);
|
||||
if (IsHeader) {
|
||||
// Compute registers which are liveout of preheader.
|
||||
RegSeen.clear();
|
||||
BackTrace.clear();
|
||||
InitRegPressure(Preheader);
|
||||
}
|
||||
|
||||
// Remember livein register pressure.
|
||||
BackTrace.push_back(RegPressure);
|
||||
|
||||
SmallVector<unsigned, 4> Defs;
|
||||
for (MachineBasicBlock::iterator
|
||||
MII = BB->begin(), E = BB->end(); MII != E; ) {
|
||||
MachineBasicBlock::iterator NextMII = MII; ++NextMII;
|
||||
MachineInstr *MI = &*MII;
|
||||
|
||||
if (TrackRegPressure)
|
||||
UpdateRegPressureBefore(MI);
|
||||
assert(Defs.empty());
|
||||
UpdateRegPressureBefore(MI, Defs);
|
||||
Hoist(MI, Preheader);
|
||||
if (TrackRegPressure)
|
||||
UpdateRegPressureAfter(MI);
|
||||
UpdateRegPressureAfter(Defs);
|
||||
|
||||
MII = NextMII;
|
||||
}
|
||||
@ -595,8 +591,7 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N, bool IsHeader) {
|
||||
HoistRegion(Children[I]);
|
||||
}
|
||||
|
||||
if (TrackRegPressure)
|
||||
BackTrace.pop_back();
|
||||
BackTrace.pop_back();
|
||||
}
|
||||
|
||||
/// InitRegPressure - Find all virtual register references that are liveout of
|
||||
@ -635,12 +630,13 @@ void MachineLICM::InitRegPressure(MachineBasicBlock *BB) {
|
||||
|
||||
/// UpdateRegPressureBefore / UpdateRegPressureAfter - Update estimate of
|
||||
/// register pressure before and after executing a specifi instruction.
|
||||
void MachineLICM::UpdateRegPressureBefore(const MachineInstr *MI) {
|
||||
void MachineLICM::UpdateRegPressureBefore(const MachineInstr *MI,
|
||||
SmallVector<unsigned, 4> &Defs) {
|
||||
bool NoImpact = MI->isImplicitDef() || MI->isPHI();
|
||||
|
||||
for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
|
||||
const MachineOperand &MO = MI->getOperand(i);
|
||||
if (!MO.isReg() || MO.isImplicit() || !MO.isUse())
|
||||
if (!MO.isReg() || MO.isImplicit())
|
||||
continue;
|
||||
unsigned Reg = MO.getReg();
|
||||
if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg))
|
||||
@ -650,33 +646,26 @@ void MachineLICM::UpdateRegPressureBefore(const MachineInstr *MI) {
|
||||
if (NoImpact)
|
||||
continue;
|
||||
|
||||
if (!isNew && MO.isKill()) {
|
||||
const TargetRegisterClass *RC = MRI->getRegClass(Reg);
|
||||
EVT VT = *RC->vt_begin();
|
||||
unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
|
||||
unsigned RCCost = TLI->getRepRegClassCostFor(VT);
|
||||
if (MO.isDef())
|
||||
Defs.push_back(Reg);
|
||||
else {
|
||||
if (!isNew && MO.isKill()) {
|
||||
const TargetRegisterClass *RC = MRI->getRegClass(Reg);
|
||||
EVT VT = *RC->vt_begin();
|
||||
unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
|
||||
unsigned RCCost = TLI->getRepRegClassCostFor(VT);
|
||||
|
||||
assert(RCCost <= RegPressure[RCId]);
|
||||
RegPressure[RCId] -= RCCost;
|
||||
assert(RCCost <= RegPressure[RCId]);
|
||||
RegPressure[RCId] -= RCCost;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MachineLICM::UpdateRegPressureAfter(const MachineInstr *MI) {
|
||||
bool NoImpact = MI->isImplicitDef() || MI->isPHI();
|
||||
|
||||
for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
|
||||
const MachineOperand &MO = MI->getOperand(i);
|
||||
if (!MO.isReg() || MO.isImplicit() || !MO.isDef())
|
||||
continue;
|
||||
unsigned Reg = MO.getReg();
|
||||
if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg))
|
||||
continue;
|
||||
|
||||
void MachineLICM::UpdateRegPressureAfter(SmallVector<unsigned, 4> &Defs) {
|
||||
while (!Defs.empty()) {
|
||||
unsigned Reg = Defs.pop_back_val();
|
||||
RegSeen.insert(Reg);
|
||||
if (NoImpact)
|
||||
continue;
|
||||
|
||||
const TargetRegisterClass *RC = MRI->getRegClass(Reg);
|
||||
EVT VT = *RC->vt_begin();
|
||||
unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
|
||||
@ -792,15 +781,14 @@ bool MachineLICM::isLoadFromConstantMemory(MachineInstr *MI) {
|
||||
}
|
||||
}
|
||||
|
||||
/// ComputeOperandLatency - Compute operand latency between a def of 'Reg'
|
||||
/// and an use in the current loop.
|
||||
int MachineLICM::ComputeOperandLatency(MachineInstr &MI,
|
||||
unsigned DefIdx, unsigned Reg) {
|
||||
/// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
|
||||
/// and an use in the current loop, return true if the target considered
|
||||
/// it 'high'.
|
||||
bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
|
||||
unsigned DefIdx, unsigned Reg) {
|
||||
if (MRI->use_nodbg_empty(Reg))
|
||||
// No use? Return arbitrary large number!
|
||||
return 300;
|
||||
return false;
|
||||
|
||||
int Latency = -1;
|
||||
for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg),
|
||||
E = MRI->use_nodbg_end(); I != E; ++I) {
|
||||
MachineInstr *UseMI = &*I;
|
||||
@ -814,18 +802,15 @@ int MachineLICM::ComputeOperandLatency(MachineInstr &MI,
|
||||
if (MOReg != Reg)
|
||||
continue;
|
||||
|
||||
int UseCycle = TII->getOperandLatency(InstrItins, &MI, DefIdx, UseMI, i);
|
||||
Latency = std::max(Latency, UseCycle);
|
||||
if (TII->hasHighOperandLatency(InstrItins, MRI, &MI, DefIdx, UseMI, i))
|
||||
return true;
|
||||
}
|
||||
|
||||
if (Latency != -1)
|
||||
break;
|
||||
// Only look at the first in loop use.
|
||||
break;
|
||||
}
|
||||
|
||||
if (Latency == -1)
|
||||
Latency = InstrItins->getOperandCycle(MI.getDesc().getSchedClass(), DefIdx);
|
||||
|
||||
return Latency;
|
||||
return false;
|
||||
}
|
||||
|
||||
/// IncreaseHighRegPressure - Visit BBs from preheader to current BB, check
|
||||
@ -859,19 +844,19 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
|
||||
if (MI.isImplicitDef())
|
||||
return true;
|
||||
|
||||
// FIXME: For now, only hoist re-materilizable instructions. LICM will
|
||||
// increase register pressure. We want to make sure it doesn't increase
|
||||
// spilling.
|
||||
// If the instruction is cheap, only hoist if it is re-materilizable. LICM
|
||||
// will increase register pressure. It's probably not worth it if the
|
||||
// instruction is cheap.
|
||||
// Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting
|
||||
// these tend to help performance in low register pressure situation. The
|
||||
// trade off is it may cause spill in high pressure situation. It will end up
|
||||
// adding a store in the loop preheader. But the reload is no more expensive.
|
||||
// The side benefit is these loads are frequently CSE'ed.
|
||||
if (!TrackRegPressure || MI.getDesc().isAsCheapAsAMove()) {
|
||||
if (!TII->isTriviallyReMaterializable(&MI, AA) &&
|
||||
!isLoadFromConstantMemory(&MI))
|
||||
if (MI.getDesc().isAsCheapAsAMove()) {
|
||||
if (!TII->isTriviallyReMaterializable(&MI, AA))
|
||||
return false;
|
||||
} else {
|
||||
// Estimate register pressure to determine whether to LICM the instruction.
|
||||
// In low register pressure situation, we can be more aggressive about
|
||||
// hoisting. Also, favors hoisting long latency instructions even in
|
||||
// moderately high pressure situation.
|
||||
@ -884,13 +869,9 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
|
||||
if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg))
|
||||
continue;
|
||||
if (MO.isDef()) {
|
||||
if (InstrItins && !InstrItins->isEmpty()) {
|
||||
int Cycle = ComputeOperandLatency(MI, i, Reg);
|
||||
if (Cycle > 3) {
|
||||
// FIXME: Target specific high latency limit?
|
||||
++NumHighLatency;
|
||||
return true;
|
||||
}
|
||||
if (HasHighOperandLatency(MI, i, Reg)) {
|
||||
++NumHighLatency;
|
||||
return true;
|
||||
}
|
||||
|
||||
const TargetRegisterClass *RC = MRI->getRegClass(Reg);
|
||||
|
@ -1925,3 +1925,23 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
|
||||
return getOperandLatency(ItinData, DefTID, DefIdx, DefAlign,
|
||||
UseTID, UseIdx, UseAlign);
|
||||
}
|
||||
|
||||
bool ARMBaseInstrInfo::
|
||||
hasHighOperandLatency(const InstrItineraryData *ItinData,
|
||||
const MachineRegisterInfo *MRI,
|
||||
const MachineInstr *DefMI, unsigned DefIdx,
|
||||
const MachineInstr *UseMI, unsigned UseIdx) const {
|
||||
unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
|
||||
unsigned UDomain = UseMI->getDesc().TSFlags & ARMII::DomainMask;
|
||||
if (Subtarget.isCortexA8() &&
|
||||
(DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
|
||||
// CortexA8 VFP instructions are not pipelined.
|
||||
return true;
|
||||
|
||||
// Hoist VFP / NEON instructions with 4 or higher latency.
|
||||
int Latency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx);
|
||||
if (Latency <= 3)
|
||||
return false;
|
||||
return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
|
||||
UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
|
||||
}
|
||||
|
@ -377,6 +377,11 @@ private:
|
||||
unsigned DefIdx, unsigned DefAlign,
|
||||
const TargetInstrDesc &UseTID,
|
||||
unsigned UseIdx, unsigned UseAlign) const;
|
||||
|
||||
bool hasHighOperandLatency(const InstrItineraryData *ItinData,
|
||||
const MachineRegisterInfo *MRI,
|
||||
const MachineInstr *DefMI, unsigned DefIdx,
|
||||
const MachineInstr *UseMI, unsigned UseIdx) const;
|
||||
};
|
||||
|
||||
static inline
|
||||
|
@ -3152,6 +3152,41 @@ void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
|
||||
NopInst.setOpcode(X86::NOOP);
|
||||
}
|
||||
|
||||
bool X86InstrInfo::
|
||||
hasHighOperandLatency(const InstrItineraryData *ItinData,
|
||||
const MachineRegisterInfo *MRI,
|
||||
const MachineInstr *DefMI, unsigned DefIdx,
|
||||
const MachineInstr *UseMI, unsigned UseIdx) const {
|
||||
switch (DefMI->getOpcode()) {
|
||||
default: return false;
|
||||
case X86::DIVSDrm:
|
||||
case X86::DIVSDrm_Int:
|
||||
case X86::DIVSDrr:
|
||||
case X86::DIVSDrr_Int:
|
||||
case X86::DIVSSrm:
|
||||
case X86::DIVSSrm_Int:
|
||||
case X86::DIVSSrr:
|
||||
case X86::DIVSSrr_Int:
|
||||
case X86::SQRTPDm:
|
||||
case X86::SQRTPDm_Int:
|
||||
case X86::SQRTPDr:
|
||||
case X86::SQRTPDr_Int:
|
||||
case X86::SQRTPSm:
|
||||
case X86::SQRTPSm_Int:
|
||||
case X86::SQRTPSr:
|
||||
case X86::SQRTPSr_Int:
|
||||
case X86::SQRTSDm:
|
||||
case X86::SQRTSDm_Int:
|
||||
case X86::SQRTSDr:
|
||||
case X86::SQRTSDr_Int:
|
||||
case X86::SQRTSSm:
|
||||
case X86::SQRTSSm_Int:
|
||||
case X86::SQRTSSr:
|
||||
case X86::SQRTSSr_Int:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
/// CGBR - Create Global Base Reg pass. This initializes the PIC
|
||||
/// global base register for x86-32.
|
||||
|
@ -864,6 +864,11 @@ public:
|
||||
unsigned OpNum,
|
||||
const SmallVectorImpl<MachineOperand> &MOs,
|
||||
unsigned Size, unsigned Alignment) const;
|
||||
|
||||
bool hasHighOperandLatency(const InstrItineraryData *ItinData,
|
||||
const MachineRegisterInfo *MRI,
|
||||
const MachineInstr *DefMI, unsigned DefIdx,
|
||||
const MachineInstr *UseMI, unsigned UseIdx) const;
|
||||
|
||||
private:
|
||||
MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
|
||||
|
@ -1,65 +0,0 @@
|
||||
; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 -o /dev/null -stats -info-output-file - | grep "Number of re-materialization"
|
||||
|
||||
define i32 @main(i32 %argc, i8** nocapture %argv, double %d1, double %d2) nounwind {
|
||||
entry:
|
||||
br i1 undef, label %smvp.exit, label %bb.i3
|
||||
|
||||
bb.i3: ; preds = %bb.i3, %bb134
|
||||
br i1 undef, label %smvp.exit, label %bb.i3
|
||||
|
||||
smvp.exit: ; preds = %bb.i3
|
||||
%0 = fmul double %d1, 2.400000e-03 ; <double> [#uses=2]
|
||||
br i1 undef, label %bb138.preheader, label %bb159
|
||||
|
||||
bb138.preheader: ; preds = %smvp.exit
|
||||
br label %bb138
|
||||
|
||||
bb138: ; preds = %bb138, %bb138.preheader
|
||||
br i1 undef, label %bb138, label %bb145.loopexit
|
||||
|
||||
bb142: ; preds = %bb.nph218.bb.nph218.split_crit_edge, %phi0.exit
|
||||
%1 = fmul double %d1, -1.200000e-03 ; <double> [#uses=1]
|
||||
%2 = fadd double %d2, %1 ; <double> [#uses=1]
|
||||
%3 = fmul double %2, %d2 ; <double> [#uses=1]
|
||||
%4 = fsub double 0.000000e+00, %3 ; <double> [#uses=1]
|
||||
br i1 %14, label %phi1.exit, label %bb.i35
|
||||
|
||||
bb.i35: ; preds = %bb142
|
||||
%5 = call double @sin(double %15) nounwind readonly ; <double> [#uses=1]
|
||||
%6 = fmul double %5, 0x4031740AFA84AD8A ; <double> [#uses=1]
|
||||
%7 = fsub double 1.000000e+00, undef ; <double> [#uses=1]
|
||||
%8 = fdiv double %7, 6.000000e-01 ; <double> [#uses=1]
|
||||
br label %phi1.exit
|
||||
|
||||
phi1.exit: ; preds = %bb.i35, %bb142
|
||||
%.pn = phi double [ %6, %bb.i35 ], [ 0.000000e+00, %bb142 ] ; <double> [#uses=1]
|
||||
%9 = phi double [ %8, %bb.i35 ], [ 0.000000e+00, %bb142 ] ; <double> [#uses=1]
|
||||
%10 = fmul double %.pn, %9 ; <double> [#uses=1]
|
||||
br i1 %14, label %phi0.exit, label %bb.i
|
||||
|
||||
bb.i: ; preds = %phi1.exit
|
||||
unreachable
|
||||
|
||||
phi0.exit: ; preds = %phi1.exit
|
||||
%11 = fsub double %4, %10 ; <double> [#uses=1]
|
||||
%12 = fadd double 0.000000e+00, %11 ; <double> [#uses=1]
|
||||
store double %12, double* undef, align 4
|
||||
br label %bb142
|
||||
|
||||
bb145.loopexit: ; preds = %bb138
|
||||
br i1 undef, label %bb.nph218.bb.nph218.split_crit_edge, label %bb159
|
||||
|
||||
bb.nph218.bb.nph218.split_crit_edge: ; preds = %bb145.loopexit
|
||||
%13 = fmul double %0, 0x401921FB54442D18 ; <double> [#uses=1]
|
||||
%14 = fcmp ugt double %0, 6.000000e-01 ; <i1> [#uses=2]
|
||||
%15 = fdiv double %13, 6.000000e-01 ; <double> [#uses=1]
|
||||
br label %bb142
|
||||
|
||||
bb159: ; preds = %bb145.loopexit, %smvp.exit, %bb134
|
||||
unreachable
|
||||
|
||||
bb166: ; preds = %bb127
|
||||
unreachable
|
||||
}
|
||||
|
||||
declare double @sin(double) nounwind readonly
|
@ -2,17 +2,16 @@
|
||||
; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim -arm-vdup-splat | FileCheck %s
|
||||
; Modified version of machine-licm.ll with -arm-vdup-splat turned on, 8003375.
|
||||
; Eventually this should become the default and be moved into machine-licm.ll.
|
||||
; FIXME: the vdup should be hoisted out of the loop, 8248029.
|
||||
|
||||
define void @t2(i8* %ptr1, i8* %ptr2) nounwind {
|
||||
entry:
|
||||
; CHECK: t2:
|
||||
; CHECK: mov.w r3, #1065353216
|
||||
; CHECK: vdup.32 q{{.*}}, r3
|
||||
br i1 undef, label %bb1, label %bb2
|
||||
|
||||
bb1:
|
||||
; CHECK-NEXT: %bb1
|
||||
; CHECK: vdup.32 q{{.*}}, r3
|
||||
%indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ]
|
||||
%tmp1 = shl i32 %indvar, 2
|
||||
%gep1 = getelementptr i8* %ptr1, i32 %tmp1
|
||||
|
@ -1,6 +1,9 @@
|
||||
; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& not grep {Number of register spills}
|
||||
; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -stats |& FileCheck %s
|
||||
; Now this test spills one register. But a reload in the loop is cheaper than
|
||||
; the divsd so it's a win.
|
||||
|
||||
define fastcc void @fourn(double* %data, i32 %isign) nounwind {
|
||||
; CHECK: fourn
|
||||
entry:
|
||||
br label %bb
|
||||
|
||||
@ -11,6 +14,11 @@ bb: ; preds = %bb, %entry
|
||||
%1 = icmp sgt i32 %0, 2 ; <i1> [#uses=1]
|
||||
br i1 %1, label %bb30.loopexit, label %bb
|
||||
|
||||
; CHECK: %bb30.loopexit
|
||||
; CHECK: divsd %xmm0
|
||||
; CHECK: movsd %xmm0, 16(%esp)
|
||||
; CHECK: .align
|
||||
; CHECK-NEXT: %bb3
|
||||
bb3: ; preds = %bb30.loopexit, %bb25, %bb3
|
||||
%2 = load i32* null, align 4 ; <i32> [#uses=1]
|
||||
%3 = mul i32 %2, 0 ; <i32> [#uses=1]
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -stats |& grep {6 machine-licm}
|
||||
; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -stats |& grep {7 machine-licm}
|
||||
; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 | FileCheck %s
|
||||
; rdar://6627786
|
||||
; rdar://7792037
|
||||
|
Loading…
x
Reference in New Issue
Block a user