mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-23 12:40:17 +00:00
Improve instruction scheduling for the PPC POWER7
Aside from a few minor latency corrections, the major change here is a new hazard recognizer which focuses on better dispatch-group formation on the POWER7. As with the PPC970's hazard recognizer, the most important thing it does is avoid load-after-store hazards within the same dispatch group. It uses the POWER7's special dispatch-group-terminating nop instruction (instead of inserting multiple regular nop instructions). This new hazard recognizer makes use of the scheduling dependency graph itself, built using AA information, to robustly detect the possibility of load-after-store hazards. significant test-suite performance changes (the error bars are 99.5% confidence intervals based on 5 test-suite runs both with and without the change -- speedups are negative): speedups: MultiSource/Benchmarks/FreeBench/pcompress2/pcompress2 -0.55171% +/- 0.333168% MultiSource/Benchmarks/TSVC/CrossingThresholds-dbl/CrossingThresholds-dbl -17.5576% +/- 14.598% MultiSource/Benchmarks/TSVC/Reductions-dbl/Reductions-dbl -29.5708% +/- 7.09058% MultiSource/Benchmarks/TSVC/Reductions-flt/Reductions-flt -34.9471% +/- 11.4391% SingleSource/Benchmarks/BenchmarkGame/puzzle -25.1347% +/- 11.0104% SingleSource/Benchmarks/Misc/flops-8 -17.7297% +/- 9.79061% SingleSource/Benchmarks/Shootout-C++/ary3 -35.5018% +/- 23.9458% SingleSource/Regression/C/uint64_to_float -56.3165% +/- 25.4234% SingleSource/UnitTests/Vectorizer/gcc-loops -18.5309% +/- 6.8496% regressions: MultiSource/Benchmarks/ASCI_Purple/SMG2000/smg2000 18.351% +/- 12.156% SingleSource/Benchmarks/Shootout-C++/methcall 27.3086% +/- 14.4733% git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@197099 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
f15758b1d3
commit
f0c1388dd1
@ -15,12 +15,221 @@
|
||||
#include "PPCHazardRecognizers.h"
|
||||
#include "PPC.h"
|
||||
#include "PPCInstrInfo.h"
|
||||
#include "PPCTargetMachine.h"
|
||||
#include "llvm/CodeGen/ScheduleDAG.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
using namespace llvm;
|
||||
|
||||
bool PPCDispatchGroupSBHazardRecognizer::isLoadAfterStore(SUnit *SU) {
|
||||
// FIXME: Move this.
|
||||
if (isBCTRAfterSet(SU))
|
||||
return true;
|
||||
|
||||
const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
|
||||
if (!MCID)
|
||||
return false;
|
||||
|
||||
if (!MCID->mayLoad())
|
||||
return false;
|
||||
|
||||
// SU is a load; for any predecessors in this dispatch group, that are stores,
|
||||
// and with which we have an ordering dependency, return true.
|
||||
for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) {
|
||||
const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit());
|
||||
if (!PredMCID || !PredMCID->mayStore())
|
||||
continue;
|
||||
|
||||
if (!SU->Preds[i].isNormalMemory() && !SU->Preds[i].isBarrier())
|
||||
continue;
|
||||
|
||||
for (unsigned j = 0, je = CurGroup.size(); j != je; ++j)
|
||||
if (SU->Preds[i].getSUnit() == CurGroup[j])
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool PPCDispatchGroupSBHazardRecognizer::isBCTRAfterSet(SUnit *SU) {
|
||||
const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
|
||||
if (!MCID)
|
||||
return false;
|
||||
|
||||
if (!MCID->isBranch())
|
||||
return false;
|
||||
|
||||
// SU is a branch; for any predecessors in this dispatch group, with which we
|
||||
// have a data dependence and set the counter register, return true.
|
||||
for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) {
|
||||
const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit());
|
||||
if (!PredMCID || PredMCID->getSchedClass() != PPC::Sched::IIC_SprMTSPR)
|
||||
continue;
|
||||
|
||||
if (SU->Preds[i].isCtrl())
|
||||
continue;
|
||||
|
||||
for (unsigned j = 0, je = CurGroup.size(); j != je; ++j)
|
||||
if (SU->Preds[i].getSUnit() == CurGroup[j])
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// FIXME: Remove this when we don't need this:
|
||||
namespace llvm { namespace PPC { extern int getNonRecordFormOpcode(uint16_t); } }
|
||||
|
||||
// FIXME: A lot of code in PPCDispatchGroupSBHazardRecognizer is P7 specific.
|
||||
|
||||
bool PPCDispatchGroupSBHazardRecognizer::mustComeFirst(const MCInstrDesc *MCID,
|
||||
unsigned &NSlots) {
|
||||
// FIXME: Indirectly, this information is contained in the itinerary, and
|
||||
// we should derive it from there instead of separately specifying it
|
||||
// here.
|
||||
unsigned IIC = MCID->getSchedClass();
|
||||
switch (IIC) {
|
||||
default:
|
||||
NSlots = 1;
|
||||
break;
|
||||
case PPC::Sched::IIC_IntDivW:
|
||||
case PPC::Sched::IIC_IntDivD:
|
||||
case PPC::Sched::IIC_LdStLoadUpd:
|
||||
case PPC::Sched::IIC_LdStLDU:
|
||||
case PPC::Sched::IIC_LdStLFDU:
|
||||
case PPC::Sched::IIC_LdStLFDUX:
|
||||
case PPC::Sched::IIC_LdStLHA:
|
||||
case PPC::Sched::IIC_LdStLHAU:
|
||||
case PPC::Sched::IIC_LdStLWA:
|
||||
case PPC::Sched::IIC_LdStSTDU:
|
||||
case PPC::Sched::IIC_LdStSTFDU:
|
||||
NSlots = 2;
|
||||
break;
|
||||
case PPC::Sched::IIC_LdStLoadUpdX:
|
||||
case PPC::Sched::IIC_LdStLDUX:
|
||||
case PPC::Sched::IIC_LdStLHAUX:
|
||||
case PPC::Sched::IIC_LdStLWARX:
|
||||
case PPC::Sched::IIC_LdStLDARX:
|
||||
case PPC::Sched::IIC_LdStSTDUX:
|
||||
case PPC::Sched::IIC_LdStSTDCX:
|
||||
case PPC::Sched::IIC_LdStSTWCX:
|
||||
case PPC::Sched::IIC_BrMCRX: // mtcr
|
||||
// FIXME: Add sync/isync (here and in the itinerary).
|
||||
NSlots = 4;
|
||||
break;
|
||||
}
|
||||
|
||||
// FIXME: record-form instructions need a different itinerary class.
|
||||
if (NSlots == 1 && PPC::getNonRecordFormOpcode(MCID->getOpcode()) != -1)
|
||||
NSlots = 2;
|
||||
|
||||
switch (IIC) {
|
||||
default:
|
||||
// All multi-slot instructions must come first.
|
||||
return NSlots > 1;
|
||||
case PPC::Sched::IIC_SprMFCR:
|
||||
case PPC::Sched::IIC_SprMFCRF:
|
||||
case PPC::Sched::IIC_SprMTSPR:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
ScheduleHazardRecognizer::HazardType
|
||||
PPCDispatchGroupSBHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
|
||||
if (Stalls == 0 && isLoadAfterStore(SU))
|
||||
return NoopHazard;
|
||||
|
||||
return ScoreboardHazardRecognizer::getHazardType(SU, Stalls);
|
||||
}
|
||||
|
||||
bool PPCDispatchGroupSBHazardRecognizer::ShouldPreferAnother(SUnit *SU) {
|
||||
const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
|
||||
unsigned NSlots;
|
||||
if (MCID && mustComeFirst(MCID, NSlots) && CurSlots)
|
||||
return true;
|
||||
|
||||
return ScoreboardHazardRecognizer::ShouldPreferAnother(SU);
|
||||
}
|
||||
|
||||
unsigned PPCDispatchGroupSBHazardRecognizer::PreEmitNoops(SUnit *SU) {
|
||||
// We only need to fill out a maximum of 5 slots here: The 6th slot could
|
||||
// only be a second branch, and otherwise the next instruction will start a
|
||||
// new group.
|
||||
if (isLoadAfterStore(SU) && CurSlots < 6) {
|
||||
unsigned Directive =
|
||||
DAG->TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
|
||||
// If we're using a special group-terminating nop, then we need only one.
|
||||
if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7)
|
||||
return 1;
|
||||
|
||||
return 5 - CurSlots;
|
||||
}
|
||||
|
||||
return ScoreboardHazardRecognizer::PreEmitNoops(SU);
|
||||
}
|
||||
|
||||
void PPCDispatchGroupSBHazardRecognizer::EmitInstruction(SUnit *SU) {
|
||||
const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
|
||||
if (MCID) {
|
||||
if (CurSlots == 5 || (MCID->isBranch() && CurBranches == 1)) {
|
||||
CurGroup.clear();
|
||||
CurSlots = CurBranches = 0;
|
||||
} else {
|
||||
DEBUG(dbgs() << "**** Adding to dispatch group: SU(" <<
|
||||
SU->NodeNum << "): ");
|
||||
DEBUG(DAG->dumpNode(SU));
|
||||
|
||||
unsigned NSlots;
|
||||
bool MustBeFirst = mustComeFirst(MCID, NSlots);
|
||||
|
||||
// If this instruction must come first, but does not, then it starts a
|
||||
// new group.
|
||||
if (MustBeFirst && CurSlots) {
|
||||
CurSlots = CurBranches = 0;
|
||||
CurGroup.clear();
|
||||
}
|
||||
|
||||
CurSlots += NSlots;
|
||||
CurGroup.push_back(SU);
|
||||
|
||||
if (MCID->isBranch())
|
||||
++CurBranches;
|
||||
}
|
||||
}
|
||||
|
||||
return ScoreboardHazardRecognizer::EmitInstruction(SU);
|
||||
}
|
||||
|
||||
void PPCDispatchGroupSBHazardRecognizer::AdvanceCycle() {
|
||||
return ScoreboardHazardRecognizer::AdvanceCycle();
|
||||
}
|
||||
|
||||
void PPCDispatchGroupSBHazardRecognizer::RecedeCycle() {
|
||||
llvm_unreachable("Bottom-up scheduling not supported");
|
||||
}
|
||||
|
||||
void PPCDispatchGroupSBHazardRecognizer::Reset() {
|
||||
CurGroup.clear();
|
||||
CurSlots = CurBranches = 0;
|
||||
return ScoreboardHazardRecognizer::Reset();
|
||||
}
|
||||
|
||||
void PPCDispatchGroupSBHazardRecognizer::EmitNoop() {
|
||||
unsigned Directive =
|
||||
DAG->TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
|
||||
// If the group has now filled all of its slots, or if we're using a special
|
||||
// group-terminating nop, the group is complete.
|
||||
if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 ||
|
||||
CurSlots == 6) {
|
||||
CurGroup.clear();
|
||||
CurSlots = CurBranches = 0;
|
||||
} else {
|
||||
CurGroup.push_back(0);
|
||||
++CurSlots;
|
||||
}
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// PowerPC 970 Hazard Recognizer
|
||||
//
|
||||
|
@ -21,6 +21,32 @@
|
||||
|
||||
namespace llvm {
|
||||
|
||||
/// PPCDispatchGroupSBHazardRecognizer - This class implements a scoreboard-based
|
||||
/// hazard recognizer for PPC ooo processors with dispatch-group hazards.
|
||||
class PPCDispatchGroupSBHazardRecognizer : public ScoreboardHazardRecognizer {
|
||||
const ScheduleDAG *DAG;
|
||||
SmallVector<SUnit *, 7> CurGroup;
|
||||
unsigned CurSlots, CurBranches;
|
||||
|
||||
bool isLoadAfterStore(SUnit *SU);
|
||||
bool isBCTRAfterSet(SUnit *SU);
|
||||
bool mustComeFirst(const MCInstrDesc *MCID, unsigned &NSlots);
|
||||
public:
|
||||
PPCDispatchGroupSBHazardRecognizer(const InstrItineraryData *ItinData,
|
||||
const ScheduleDAG *DAG_) :
|
||||
ScoreboardHazardRecognizer(ItinData, DAG_), DAG(DAG_),
|
||||
CurSlots(0), CurBranches(0) {}
|
||||
|
||||
virtual HazardType getHazardType(SUnit *SU, int Stalls);
|
||||
virtual bool ShouldPreferAnother(SUnit* SU);
|
||||
virtual unsigned PreEmitNoops(SUnit *SU);
|
||||
virtual void EmitInstruction(SUnit *SU);
|
||||
virtual void AdvanceCycle();
|
||||
virtual void RecedeCycle();
|
||||
virtual void Reset();
|
||||
virtual void EmitNoop();
|
||||
};
|
||||
|
||||
/// PPCHazardRecognizer970 - This class defines a finite state automata that
|
||||
/// models the dispatch logic on the PowerPC 970 (aka G5) processor. This
|
||||
/// promotes good dispatch group formation and implements noop insertion to
|
||||
|
@ -258,6 +258,15 @@ class DForm_4_zero<bits<6> opcode, dag OOL, dag IOL, string asmstr,
|
||||
let Addr = 0;
|
||||
}
|
||||
|
||||
class DForm_4_fixedreg_zero<bits<6> opcode, bits<5> R, dag OOL, dag IOL,
|
||||
string asmstr, InstrItinClass itin,
|
||||
list<dag> pattern>
|
||||
: DForm_4<opcode, OOL, IOL, asmstr, itin, pattern> {
|
||||
let A = R;
|
||||
let B = R;
|
||||
let C = 0;
|
||||
}
|
||||
|
||||
class IForm_and_DForm_1<bits<6> opcode1, bit aa, bit lk, bits<6> opcode2,
|
||||
dag OOL, dag IOL, string asmstr,
|
||||
InstrItinClass itin, list<dag> pattern>
|
||||
|
@ -74,6 +74,9 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer(
|
||||
const ScheduleDAG *DAG) const {
|
||||
unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
|
||||
|
||||
if (Directive == PPC::DIR_PWR7)
|
||||
return new PPCDispatchGroupSBHazardRecognizer(II, DAG);
|
||||
|
||||
// Most subtargets use a PPC970 recognizer.
|
||||
if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2 &&
|
||||
Directive != PPC::DIR_E500mc && Directive != PPC::DIR_E5500) {
|
||||
@ -85,6 +88,56 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer(
|
||||
return new ScoreboardHazardRecognizer(II, DAG);
|
||||
}
|
||||
|
||||
|
||||
int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
|
||||
const MachineInstr *DefMI, unsigned DefIdx,
|
||||
const MachineInstr *UseMI,
|
||||
unsigned UseIdx) const {
|
||||
int Latency = PPCGenInstrInfo::getOperandLatency(ItinData, DefMI, DefIdx,
|
||||
UseMI, UseIdx);
|
||||
|
||||
const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
|
||||
unsigned Reg = DefMO.getReg();
|
||||
|
||||
const TargetRegisterInfo *TRI = &getRegisterInfo();
|
||||
bool IsRegCR;
|
||||
if (TRI->isVirtualRegister(Reg)) {
|
||||
const MachineRegisterInfo *MRI =
|
||||
&DefMI->getParent()->getParent()->getRegInfo();
|
||||
IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRRCRegClass) ||
|
||||
MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRBITRCRegClass);
|
||||
} else {
|
||||
IsRegCR = PPC::CRRCRegClass.contains(Reg) ||
|
||||
PPC::CRBITRCRegClass.contains(Reg);
|
||||
}
|
||||
|
||||
if (UseMI->isBranch() && IsRegCR) {
|
||||
if (Latency < 0)
|
||||
Latency = getInstrLatency(ItinData, DefMI);
|
||||
|
||||
// On some cores, there is an additional delay between writing to a condition
|
||||
// register, and using it from a branch.
|
||||
unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
|
||||
switch (Directive) {
|
||||
default: break;
|
||||
case PPC::DIR_7400:
|
||||
case PPC::DIR_750:
|
||||
case PPC::DIR_970:
|
||||
case PPC::DIR_E5500:
|
||||
case PPC::DIR_PWR4:
|
||||
case PPC::DIR_PWR5:
|
||||
case PPC::DIR_PWR5X:
|
||||
case PPC::DIR_PWR6:
|
||||
case PPC::DIR_PWR6X:
|
||||
case PPC::DIR_PWR7:
|
||||
Latency += 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return Latency;
|
||||
}
|
||||
|
||||
// Detect 32 -> 64-bit extensions where we may reuse the low sub-register.
|
||||
bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
|
||||
unsigned &SrcReg, unsigned &DstReg,
|
||||
@ -218,10 +271,19 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
|
||||
|
||||
void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI) const {
|
||||
DebugLoc DL;
|
||||
BuildMI(MBB, MI, DL, get(PPC::NOP));
|
||||
}
|
||||
// This function is used for scheduling, and the nop wanted here is the type
|
||||
// that terminates dispatch groups on the POWER cores.
|
||||
unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
|
||||
unsigned Opcode;
|
||||
switch (Directive) {
|
||||
default: Opcode = PPC::NOP; break;
|
||||
case PPC::DIR_PWR6: Opcode = PPC::NOP_GT_PWR6; break;
|
||||
case PPC::DIR_PWR7: Opcode = PPC::NOP_GT_PWR7; break;
|
||||
}
|
||||
|
||||
DebugLoc DL;
|
||||
BuildMI(MBB, MI, DL, get(Opcode));
|
||||
}
|
||||
|
||||
// Branch analysis.
|
||||
// Note: If the condition register is set to CTR or CTR8 then this is a
|
||||
|
@ -95,6 +95,18 @@ public:
|
||||
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
|
||||
const ScheduleDAG *DAG) const;
|
||||
|
||||
virtual
|
||||
int getOperandLatency(const InstrItineraryData *ItinData,
|
||||
const MachineInstr *DefMI, unsigned DefIdx,
|
||||
const MachineInstr *UseMI, unsigned UseIdx) const;
|
||||
virtual
|
||||
int getOperandLatency(const InstrItineraryData *ItinData,
|
||||
SDNode *DefNode, unsigned DefIdx,
|
||||
SDNode *UseNode, unsigned UseIdx) const {
|
||||
return PPCGenInstrInfo::getOperandLatency(ItinData, DefNode, DefIdx,
|
||||
UseNode, UseIdx);
|
||||
}
|
||||
|
||||
bool isCoalescableExtInstr(const MachineInstr &MI,
|
||||
unsigned &SrcReg, unsigned &DstReg,
|
||||
unsigned &SubIdx) const;
|
||||
|
@ -1616,8 +1616,17 @@ def XORI : DForm_4<26, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
|
||||
def XORIS : DForm_4<27, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
|
||||
"xoris $dst, $src1, $src2", IIC_IntSimple,
|
||||
[(set i32:$dst, (xor i32:$src1, imm16ShiftedZExt:$src2))]>;
|
||||
|
||||
def NOP : DForm_4_zero<24, (outs), (ins), "nop", IIC_IntSimple,
|
||||
[]>;
|
||||
let isCodeGenOnly = 1 in {
|
||||
// The POWER6 and POWER7 have special group-terminating nops.
|
||||
def NOP_GT_PWR6 : DForm_4_fixedreg_zero<24, 1, (outs), (ins),
|
||||
"ori 1, 1, 0", IIC_IntSimple, []>;
|
||||
def NOP_GT_PWR7 : DForm_4_fixedreg_zero<24, 2, (outs), (ins),
|
||||
"ori 2, 2, 0", IIC_IntSimple, []>;
|
||||
}
|
||||
|
||||
let isCompare = 1, neverHasSideEffects = 1 in {
|
||||
def CMPWI : DForm_5_ext<11, (outs crrc:$crD), (ins gprc:$rA, s16imm:$imm),
|
||||
"cmpwi $crD, $rA, $imm", IIC_IntCompare>;
|
||||
|
@ -93,6 +93,7 @@ def P7Itineraries : ProcessorItineraries<
|
||||
P7_DU3, P7_DU4], 0>,
|
||||
InstrStage<1, [P7_FX1, P7_FX2]>],
|
||||
[1, 1, 1]>,
|
||||
// FIXME: Add record-form itinerary data.
|
||||
InstrItinData<IIC_IntDivW , [InstrStage<1, [P7_DU1], 0>,
|
||||
InstrStage<1, [P7_DU2], 0>,
|
||||
InstrStage<36, [P7_FX1, P7_FX2]>],
|
||||
@ -290,7 +291,10 @@ def P7Itineraries : ProcessorItineraries<
|
||||
InstrStage<1, [P7_DU4], 0>,
|
||||
InstrStage<1, [P7_LS1, P7_LS2]>],
|
||||
[1, 1, 1]>,
|
||||
InstrItinData<IIC_BrMCRX , [InstrStage<1, [P7_DU4], 0>,
|
||||
InstrItinData<IIC_BrMCRX , [InstrStage<1, [P7_DU1], 0>,
|
||||
InstrStage<1, [P7_DU2], 0>,
|
||||
InstrStage<1, [P7_DU3], 0>,
|
||||
InstrStage<1, [P7_DU4], 0>,
|
||||
InstrStage<1, [P7_CRU]>,
|
||||
InstrStage<1, [P7_FX1, P7_FX2]>],
|
||||
[3, 1]>, // mtcr
|
||||
@ -300,6 +304,9 @@ def P7Itineraries : ProcessorItineraries<
|
||||
InstrItinData<IIC_SprMFCRF , [InstrStage<1, [P7_DU1], 0>,
|
||||
InstrStage<1, [P7_CRU]>],
|
||||
[3, 1]>,
|
||||
InstrItinData<IIC_SprMTSPR , [InstrStage<1, [P7_DU1], 0>,
|
||||
InstrStage<1, [P7_FX1]>],
|
||||
[4, 1]>, // mtctr
|
||||
InstrItinData<IIC_FPGeneral , [InstrStage<1, [P7_DU1, P7_DU2,
|
||||
P7_DU3, P7_DU4], 0>,
|
||||
InstrStage<1, [P7_VS1, P7_VS2]>],
|
||||
|
31
test/CodeGen/PowerPC/pwr7-gt-nop.ll
Normal file
31
test/CodeGen/PowerPC/pwr7-gt-nop.ll
Normal file
@ -0,0 +1,31 @@
|
||||
; RUN: llc < %s -mcpu=pwr7 | FileCheck %s
|
||||
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define void @foo(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c, float* nocapture %d) #0 {
|
||||
|
||||
; CHECK-LABEL: @foo
|
||||
|
||||
entry:
|
||||
%0 = load float* %b, align 4
|
||||
store float %0, float* %a, align 4
|
||||
%1 = load float* %c, align 4
|
||||
store float %1, float* %b, align 4
|
||||
%2 = load float* %a, align 4
|
||||
store float %2, float* %d, align 4
|
||||
ret void
|
||||
|
||||
; CHECK: lfs [[REG1:[0-9]+]], 0(4)
|
||||
; CHECK: stfs [[REG1]], 0(3)
|
||||
; CHECK: ori 2, 2, 0
|
||||
; CHECK: lfs [[REG2:[0-9]+]], 0(5)
|
||||
; CHECK: stfs [[REG2]], 0(4)
|
||||
; CHECK: ori 2, 2, 0
|
||||
; CHECK: lfs [[REG3:[0-9]+]], 0(3)
|
||||
; CHECK: stfs [[REG3]], 0(6)
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
Loading…
Reference in New Issue
Block a user