mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-20 02:28:23 +00:00
For each instruction itinerary class, specify the number of micro-ops each
instruction in the class would be decoded to. Or zero if the number of uOPs must be determined dynamically. This will be used to determine the cost-effectiveness of predicating a micro-coded instruction. llvm-svn: 113513
This commit is contained in:
parent
e1fb5b5738
commit
b79a03a3ca
@ -591,6 +591,11 @@ public:
|
||||
MachineInstr *CmpInstr) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
/// getNumMicroOps - Return the number of u-operations the given machine
|
||||
/// instruction will be decoded to on the target cpu.
|
||||
virtual unsigned getNumMicroOps(const MachineInstr *MI,
|
||||
const InstrItineraryData &ItinData) const;
|
||||
};
|
||||
|
||||
/// TargetInstrInfoImpl - This is the default implementation of
|
||||
|
@ -95,6 +95,7 @@ struct InstrStage {
|
||||
/// operands are read and written.
|
||||
///
|
||||
struct InstrItinerary {
|
||||
unsigned NumMicroOps; ///< # of micro-ops, 0 means it's variable
|
||||
unsigned FirstStage; ///< Index of first stage in itinerary
|
||||
unsigned LastStage; ///< Index of last + 1 stage in itinerary
|
||||
unsigned FirstOperandCycle; ///< Index of first operand rd/wr
|
||||
|
@ -66,7 +66,14 @@ class InstrStage<int cycles, list<FuncUnit> units,
|
||||
// across all chip sets. Thus a new chip set can be added without modifying
|
||||
// instruction information.
|
||||
//
|
||||
class InstrItinClass;
|
||||
// NumMicroOps represents the number of micro-operations that each instruction
|
||||
// in the class are decoded to. If the number is zero, then it means the
|
||||
// instruction can decode into variable number of micro-ops and it must be
|
||||
// determined dynamically.
|
||||
//
|
||||
class InstrItinClass<int ops = 1> {
|
||||
int NumMicroOps = ops;
|
||||
}
|
||||
def NoItinerary : InstrItinClass;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -1412,3 +1412,66 @@ ConvertToSetZeroFlag(MachineInstr *MI, MachineInstr *CmpInstr) const {
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned
|
||||
ARMBaseInstrInfo::getNumMicroOps(const MachineInstr *MI,
|
||||
const InstrItineraryData &ItinData) const {
|
||||
if (ItinData.isEmpty())
|
||||
return 1;
|
||||
|
||||
const TargetInstrDesc &Desc = MI->getDesc();
|
||||
unsigned Class = Desc.getSchedClass();
|
||||
unsigned UOps = ItinData.Itineratries[Class].NumMicroOps;
|
||||
if (UOps)
|
||||
return UOps;
|
||||
|
||||
unsigned Opc = MI->getOpcode();
|
||||
switch (Opc) {
|
||||
default:
|
||||
llvm_unreachable("Unexpected multi-uops instruction!");
|
||||
break;
|
||||
case ARM::VSTMQ:
|
||||
return 2;
|
||||
|
||||
// The number of uOps for load / store multiple are determined by the number
|
||||
// registers.
|
||||
// On Cortex-A8, each odd / even pair of register loads / stores
|
||||
// (e.g. r5 + r6) can be completed on the same cycle. The minimum is
|
||||
// 2. For VFP / NEON load / store multiple, the formula is
|
||||
// (#reg / 2) + (#reg % 2) + 1.
|
||||
// On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2).
|
||||
case ARM::VLDMD:
|
||||
case ARM::VLDMS:
|
||||
case ARM::VLDMD_UPD:
|
||||
case ARM::VLDMS_UPD:
|
||||
case ARM::VSTMD:
|
||||
case ARM::VSTMS:
|
||||
case ARM::VSTMD_UPD:
|
||||
case ARM::VSTMS_UPD: {
|
||||
unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands();
|
||||
return (NumRegs / 2) + (NumRegs % 2) + 1;
|
||||
}
|
||||
case ARM::LDM_RET:
|
||||
case ARM::LDM:
|
||||
case ARM::LDM_UPD:
|
||||
case ARM::STM:
|
||||
case ARM::STM_UPD:
|
||||
case ARM::tLDM:
|
||||
case ARM::tLDM_UPD:
|
||||
case ARM::tSTM_UPD:
|
||||
case ARM::tPOP_RET:
|
||||
case ARM::tPOP:
|
||||
case ARM::tPUSH:
|
||||
case ARM::t2LDM_RET:
|
||||
case ARM::t2LDM:
|
||||
case ARM::t2LDM_UPD:
|
||||
case ARM::t2STM:
|
||||
case ARM::t2STM_UPD: {
|
||||
// FIXME: Distinquish between Cortex-A8 / Cortex-A9 and other processor
|
||||
// families.
|
||||
unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands();
|
||||
UOps = (NumRegs / 2) + (NumRegs % 2);
|
||||
return (UOps > 2) ? UOps : 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -348,6 +348,9 @@ public:
|
||||
/// that we can remove a "comparison with zero".
|
||||
virtual bool ConvertToSetZeroFlag(MachineInstr *Instr,
|
||||
MachineInstr *CmpInstr) const;
|
||||
|
||||
virtual unsigned getNumMicroOps(const MachineInstr *MI,
|
||||
const InstrItineraryData &ItinData) const;
|
||||
};
|
||||
|
||||
static inline
|
||||
|
@ -282,7 +282,8 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
|
||||
// FIXME: remove when we have a way to marking a MI with these properties.
|
||||
let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
|
||||
hasExtraDefRegAllocReq = 1 in
|
||||
def tPOP_RET : T1I<(outs), (ins pred:$p, reglist:$dsts, variable_ops), IIC_Br,
|
||||
def tPOP_RET : T1I<(outs), (ins pred:$p, reglist:$dsts, variable_ops),
|
||||
IIC_iLoadmBr,
|
||||
"pop${p}\t$dsts", []>,
|
||||
T1Misc<{1,1,0,?,?,?,?}>;
|
||||
|
||||
@ -560,12 +561,12 @@ def tSTM_UPD : T1It<(outs tGPR:$wb),
|
||||
T1Encoding<{1,1,0,0,0,?}>; // A6.2 & A8.6.189
|
||||
|
||||
let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1 in
|
||||
def tPOP : T1I<(outs), (ins pred:$p, reglist:$dsts, variable_ops), IIC_Br,
|
||||
def tPOP : T1I<(outs), (ins pred:$p, reglist:$dsts, variable_ops), IIC_iLoadmBr,
|
||||
"pop${p}\t$dsts", []>,
|
||||
T1Misc<{1,1,0,?,?,?,?}>;
|
||||
|
||||
let mayStore = 1, Uses = [SP], Defs = [SP], hasExtraSrcRegAllocReq = 1 in
|
||||
def tPUSH : T1I<(outs), (ins pred:$p, reglist:$srcs, variable_ops), IIC_Br,
|
||||
def tPUSH : T1I<(outs), (ins pred:$p, reglist:$srcs, variable_ops), IIC_iStorem,
|
||||
"push${p}\t$srcs", []>,
|
||||
T1Misc<{0,1,0,?,?,?,?}>;
|
||||
|
||||
|
@ -42,15 +42,15 @@ def IIC_iLoadsi : InstrItinClass;
|
||||
def IIC_iLoadiu : InstrItinClass;
|
||||
def IIC_iLoadru : InstrItinClass;
|
||||
def IIC_iLoadsiu : InstrItinClass;
|
||||
def IIC_iLoadm : InstrItinClass;
|
||||
def IIC_iLoadmBr : InstrItinClass;
|
||||
def IIC_iLoadm : InstrItinClass<0>; // micro-coded
|
||||
def IIC_iLoadmBr : InstrItinClass<0>; // micro-coded
|
||||
def IIC_iStorei : InstrItinClass;
|
||||
def IIC_iStorer : InstrItinClass;
|
||||
def IIC_iStoresi : InstrItinClass;
|
||||
def IIC_iStoreiu : InstrItinClass;
|
||||
def IIC_iStoreru : InstrItinClass;
|
||||
def IIC_iStoresiu : InstrItinClass;
|
||||
def IIC_iStorem : InstrItinClass;
|
||||
def IIC_iStorem : InstrItinClass<0>; // micro-coded
|
||||
def IIC_Br : InstrItinClass;
|
||||
def IIC_fpSTAT : InstrItinClass;
|
||||
def IIC_fpUNA32 : InstrItinClass;
|
||||
@ -81,10 +81,10 @@ def IIC_fpSQRT32 : InstrItinClass;
|
||||
def IIC_fpSQRT64 : InstrItinClass;
|
||||
def IIC_fpLoad32 : InstrItinClass;
|
||||
def IIC_fpLoad64 : InstrItinClass;
|
||||
def IIC_fpLoadm : InstrItinClass;
|
||||
def IIC_fpLoadm : InstrItinClass<0>; // micro-coded
|
||||
def IIC_fpStore32 : InstrItinClass;
|
||||
def IIC_fpStore64 : InstrItinClass;
|
||||
def IIC_fpStorem : InstrItinClass;
|
||||
def IIC_fpStorem : InstrItinClass<0>; // micro-coded
|
||||
def IIC_VLD1 : InstrItinClass;
|
||||
def IIC_VLD2 : InstrItinClass;
|
||||
def IIC_VLD3 : InstrItinClass;
|
||||
|
@ -86,6 +86,11 @@ def ARMV6Itineraries : ProcessorItineraries<
|
||||
// Load multiple
|
||||
InstrItinData<IIC_iLoadm , [InstrStage<3, [V6_Pipe]>]>,
|
||||
|
||||
//
|
||||
// Load multiple plus branch
|
||||
InstrItinData<IIC_iLoadmBr , [InstrStage<3, [V6_Pipe]>,
|
||||
InstrStage<1, [V6_Pipe]>]>,
|
||||
|
||||
// Integer store pipeline
|
||||
//
|
||||
// Immediate offset
|
||||
|
@ -13,6 +13,7 @@
|
||||
|
||||
#include "llvm/Target/TargetInstrInfo.h"
|
||||
#include "llvm/MC/MCAsmInfo.h"
|
||||
#include "llvm/Target/TargetInstrItineraries.h"
|
||||
#include "llvm/Target/TargetRegisterInfo.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
using namespace llvm;
|
||||
@ -47,6 +48,22 @@ TargetInstrInfo::TargetInstrInfo(const TargetInstrDesc* Desc,
|
||||
TargetInstrInfo::~TargetInstrInfo() {
|
||||
}
|
||||
|
||||
unsigned
|
||||
TargetInstrInfo::getNumMicroOps(const MachineInstr *MI,
|
||||
const InstrItineraryData &ItinData) const {
|
||||
if (ItinData.isEmpty())
|
||||
return 1;
|
||||
|
||||
unsigned Class = MI->getDesc().getSchedClass();
|
||||
unsigned UOps = ItinData.Itineratries[Class].NumMicroOps;
|
||||
if (UOps)
|
||||
return UOps;
|
||||
|
||||
// The # of u-ops is dynamically determined. The specific target should
|
||||
// override this function to return the right number.
|
||||
return 1;
|
||||
}
|
||||
|
||||
/// insertNoop - Insert a noop into the instruction stream at the specified
|
||||
/// point.
|
||||
void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
|
||||
|
@ -172,13 +172,10 @@ void SubtargetEmitter::CPUKeyValues(raw_ostream &OS) {
|
||||
// CollectAllItinClasses - Gathers and enumerates all the itinerary classes.
|
||||
// Returns itinerary class count.
|
||||
//
|
||||
unsigned SubtargetEmitter::CollectAllItinClasses(raw_ostream &OS,
|
||||
std::map<std::string, unsigned> &ItinClassesMap) {
|
||||
// Gather and sort all itinerary classes
|
||||
std::vector<Record*> ItinClassList =
|
||||
Records.getAllDerivedDefinitions("InstrItinClass");
|
||||
std::sort(ItinClassList.begin(), ItinClassList.end(), LessRecord());
|
||||
|
||||
unsigned SubtargetEmitter::
|
||||
CollectAllItinClasses(raw_ostream &OS,
|
||||
std::map<std::string, unsigned> &ItinClassesMap,
|
||||
std::vector<Record*> &ItinClassList) {
|
||||
// For each itinerary class
|
||||
unsigned N = ItinClassList.size();
|
||||
for (unsigned i = 0; i < N; i++) {
|
||||
@ -271,7 +268,8 @@ void SubtargetEmitter::FormItineraryOperandCycleString(Record *ItinData,
|
||||
//
|
||||
void SubtargetEmitter::EmitStageAndOperandCycleData(raw_ostream &OS,
|
||||
unsigned NItinClasses,
|
||||
std::map<std::string, unsigned> &ItinClassesMap,
|
||||
std::map<std::string, unsigned> &ItinClassesMap,
|
||||
std::vector<Record*> &ItinClassList,
|
||||
std::vector<std::vector<InstrItinerary> > &ProcList) {
|
||||
// Gather processor iteraries
|
||||
std::vector<Record*> ProcItinList =
|
||||
@ -374,14 +372,16 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(raw_ostream &OS,
|
||||
}
|
||||
}
|
||||
|
||||
// Set up itinerary as location and location + stage count
|
||||
InstrItinerary Intinerary = { FindStage, FindStage + NStages,
|
||||
FindOperandCycle, FindOperandCycle + NOperandCycles};
|
||||
|
||||
// Locate where to inject into processor itinerary table
|
||||
const std::string &Name = ItinData->getValueAsDef("TheClass")->getName();
|
||||
unsigned Find = ItinClassesMap[Name];
|
||||
|
||||
// Set up itinerary as location and location + stage count
|
||||
unsigned NumUOps = ItinClassList[Find]->getValueAsInt("NumMicroOps");
|
||||
InstrItinerary Intinerary = { NumUOps, FindStage, FindStage + NStages,
|
||||
FindOperandCycle,
|
||||
FindOperandCycle + NOperandCycles};
|
||||
|
||||
// Inject - empty slots will be 0, 0
|
||||
ItinList[Find] = Intinerary;
|
||||
}
|
||||
@ -443,9 +443,11 @@ void SubtargetEmitter::EmitProcessorData(raw_ostream &OS,
|
||||
// Emit in the form of
|
||||
// { firstStage, lastStage, firstCycle, lastCycle } // index
|
||||
if (Intinerary.FirstStage == 0) {
|
||||
OS << " { 0, 0, 0, 0 }";
|
||||
OS << " { 1, 0, 0, 0, 0 }";
|
||||
} else {
|
||||
OS << " { " << Intinerary.FirstStage << ", " <<
|
||||
OS << " { " <<
|
||||
Intinerary.NumMicroOps << ", " <<
|
||||
Intinerary.FirstStage << ", " <<
|
||||
Intinerary.LastStage << ", " <<
|
||||
Intinerary.FirstOperandCycle << ", " <<
|
||||
Intinerary.LastOperandCycle << " }";
|
||||
@ -455,7 +457,7 @@ void SubtargetEmitter::EmitProcessorData(raw_ostream &OS,
|
||||
}
|
||||
|
||||
// End processor itinerary table
|
||||
OS << " { ~0U, ~0U, ~0U, ~0U } // end marker\n";
|
||||
OS << " { 1, ~0U, ~0U, ~0U, ~0U } // end marker\n";
|
||||
OS << "};\n";
|
||||
}
|
||||
}
|
||||
@ -511,16 +513,22 @@ void SubtargetEmitter::EmitProcessorLookup(raw_ostream &OS) {
|
||||
//
|
||||
void SubtargetEmitter::EmitData(raw_ostream &OS) {
|
||||
std::map<std::string, unsigned> ItinClassesMap;
|
||||
std::vector<std::vector<InstrItinerary> > ProcList;
|
||||
// Gather and sort all itinerary classes
|
||||
std::vector<Record*> ItinClassList =
|
||||
Records.getAllDerivedDefinitions("InstrItinClass");
|
||||
std::sort(ItinClassList.begin(), ItinClassList.end(), LessRecord());
|
||||
|
||||
// Enumerate all the itinerary classes
|
||||
unsigned NItinClasses = CollectAllItinClasses(OS, ItinClassesMap);
|
||||
unsigned NItinClasses = CollectAllItinClasses(OS, ItinClassesMap,
|
||||
ItinClassList);
|
||||
// Make sure the rest is worth the effort
|
||||
HasItineraries = NItinClasses != 1; // Ignore NoItinerary.
|
||||
|
||||
if (HasItineraries) {
|
||||
std::vector<std::vector<InstrItinerary> > ProcList;
|
||||
// Emit the stage data
|
||||
EmitStageAndOperandCycleData(OS, NItinClasses, ItinClassesMap, ProcList);
|
||||
EmitStageAndOperandCycleData(OS, NItinClasses, ItinClassesMap,
|
||||
ItinClassList, ProcList);
|
||||
// Emit the processor itinerary data
|
||||
EmitProcessorData(OS, ProcList);
|
||||
// Emit the processor lookup data
|
||||
|
@ -33,7 +33,8 @@ class SubtargetEmitter : public TableGenBackend {
|
||||
void FeatureKeyValues(raw_ostream &OS);
|
||||
void CPUKeyValues(raw_ostream &OS);
|
||||
unsigned CollectAllItinClasses(raw_ostream &OS,
|
||||
std::map<std::string, unsigned> &ItinClassesMap);
|
||||
std::map<std::string,unsigned> &ItinClassesMap,
|
||||
std::vector<Record*> &ItinClassList);
|
||||
void FormItineraryStageString(const std::string &Names,
|
||||
Record *ItinData, std::string &ItinString,
|
||||
unsigned &NStages);
|
||||
@ -41,6 +42,7 @@ class SubtargetEmitter : public TableGenBackend {
|
||||
unsigned &NOperandCycles);
|
||||
void EmitStageAndOperandCycleData(raw_ostream &OS, unsigned NItinClasses,
|
||||
std::map<std::string, unsigned> &ItinClassesMap,
|
||||
std::vector<Record*> &ItinClassList,
|
||||
std::vector<std::vector<InstrItinerary> > &ProcList);
|
||||
void EmitProcessorData(raw_ostream &OS,
|
||||
std::vector<std::vector<InstrItinerary> > &ProcList);
|
||||
|
Loading…
Reference in New Issue
Block a user