mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-29 14:20:29 +00:00
[X86] Improved sched models for X86 BT*rr instructions.
https://reviews.llvm.org/D49243 llvm-svn: 338365
This commit is contained in:
parent
c0ea535a72
commit
fac48f4efe
@ -1750,7 +1750,7 @@ def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", []>, // AH = flags
|
||||
// Bit tests instructions: BT, BTS, BTR, BTC.
|
||||
|
||||
let Defs = [EFLAGS] in {
|
||||
let SchedRW = [WriteALU] in {
|
||||
let SchedRW = [WriteBitTest] in {
|
||||
def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
|
||||
"bt{w}\t{$src2, $src1|$src1, $src2}",
|
||||
[(set EFLAGS, (X86bt GR16:$src1, GR16:$src2))]>,
|
||||
@ -1783,7 +1783,7 @@ let mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteALULd] in {
|
||||
[]>, TB, NotMemoryFoldable;
|
||||
}
|
||||
|
||||
let SchedRW = [WriteALU] in {
|
||||
let SchedRW = [WriteBitTest] in {
|
||||
def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2),
|
||||
"bt{w}\t{$src2, $src1|$src1, $src2}",
|
||||
[(set EFLAGS, (X86bt GR16:$src1, i16immSExt8:$src2))]>,
|
||||
@ -1818,7 +1818,7 @@ def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
|
||||
} // SchedRW
|
||||
|
||||
let hasSideEffects = 0 in {
|
||||
let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
|
||||
let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
|
||||
def BTC16rr : I<0xBB, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
|
||||
"btc{w}\t{$src2, $src1|$src1, $src2}", []>,
|
||||
OpSize16, TB, NotMemoryFoldable;
|
||||
@ -1842,7 +1842,7 @@ def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
|
||||
NotMemoryFoldable;
|
||||
}
|
||||
|
||||
let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
|
||||
let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
|
||||
def BTC16ri8 : Ii8<0xBA, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
|
||||
"btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
|
||||
def BTC32ri8 : Ii8<0xBA, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
|
||||
@ -1861,7 +1861,7 @@ def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
|
||||
Requires<[In64BitMode]>;
|
||||
}
|
||||
|
||||
let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
|
||||
let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
|
||||
def BTR16rr : I<0xB3, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
|
||||
"btr{w}\t{$src2, $src1|$src1, $src2}", []>,
|
||||
OpSize16, TB, NotMemoryFoldable;
|
||||
@ -1885,7 +1885,7 @@ def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
|
||||
NotMemoryFoldable;
|
||||
}
|
||||
|
||||
let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
|
||||
let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
|
||||
def BTR16ri8 : Ii8<0xBA, MRM6r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
|
||||
"btr{w}\t{$src2, $src1|$src1, $src2}", []>,
|
||||
OpSize16, TB;
|
||||
@ -1908,7 +1908,7 @@ def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
|
||||
Requires<[In64BitMode]>;
|
||||
}
|
||||
|
||||
let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
|
||||
let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
|
||||
def BTS16rr : I<0xAB, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
|
||||
"bts{w}\t{$src2, $src1|$src1, $src2}", []>,
|
||||
OpSize16, TB, NotMemoryFoldable;
|
||||
@ -1932,7 +1932,7 @@ def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
|
||||
NotMemoryFoldable;
|
||||
}
|
||||
|
||||
let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
|
||||
let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
|
||||
def BTS16ri8 : Ii8<0xBA, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
|
||||
"bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
|
||||
def BTS32ri8 : Ii8<0xBA, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
|
||||
|
@ -137,6 +137,7 @@ def : WriteRes<WriteSETCCStore, [BWPort06,BWPort4,BWPort237]> {
|
||||
let NumMicroOps = 3;
|
||||
}
|
||||
def : WriteRes<WriteLAHFSAHF, [BWPort06]>;
|
||||
def : WriteRes<WriteBitTest,[BWPort06]>; // Bit Test instrs
|
||||
|
||||
// Bit counts.
|
||||
defm : BWWriteResPair<WriteBSF, [BWPort1], 3>;
|
||||
@ -603,14 +604,6 @@ def BWWriteResGroup6 : SchedWriteRes<[BWPort06]> {
|
||||
let ResourceCycles = [1];
|
||||
}
|
||||
def: InstRW<[BWWriteResGroup6], (instrs CDQ, CQO)>;
|
||||
def: InstRW<[BWWriteResGroup6], (instregex "BT(16|32|64)ri8",
|
||||
"BT(16|32|64)rr",
|
||||
"BTC(16|32|64)ri8",
|
||||
"BTC(16|32|64)rr",
|
||||
"BTR(16|32|64)ri8",
|
||||
"BTR(16|32|64)rr",
|
||||
"BTS(16|32|64)ri8",
|
||||
"BTS(16|32|64)rr")>;
|
||||
|
||||
def BWWriteResGroup7 : SchedWriteRes<[BWPort15]> {
|
||||
let Latency = 1;
|
||||
|
@ -150,6 +150,7 @@ def : WriteRes<WriteSETCCStore, [HWPort06,HWPort4,HWPort237]> {
|
||||
let NumMicroOps = 3;
|
||||
}
|
||||
def : WriteRes<WriteLAHFSAHF, [HWPort06]>;
|
||||
def : WriteRes<WriteBitTest,[HWPort06]>;
|
||||
|
||||
// This is for simple LEAs with one or two input operands.
|
||||
// The complex ones can only execute on port 1, and they require two cycles on
|
||||
@ -895,14 +896,6 @@ def HWWriteResGroup7 : SchedWriteRes<[HWPort06]> {
|
||||
let ResourceCycles = [1];
|
||||
}
|
||||
def: InstRW<[HWWriteResGroup7], (instrs CDQ, CQO)>;
|
||||
def: InstRW<[HWWriteResGroup7], (instregex "BT(16|32|64)ri8",
|
||||
"BT(16|32|64)rr",
|
||||
"BTC(16|32|64)ri8",
|
||||
"BTC(16|32|64)rr",
|
||||
"BTR(16|32|64)ri8",
|
||||
"BTR(16|32|64)rr",
|
||||
"BTS(16|32|64)ri8",
|
||||
"BTS(16|32|64)rr")>;
|
||||
|
||||
def HWWriteResGroup8 : SchedWriteRes<[HWPort15]> {
|
||||
let Latency = 1;
|
||||
|
@ -145,6 +145,7 @@ def : WriteRes<WriteSETCCStore, [SBPort05,SBPort4,SBPort23]> {
|
||||
let NumMicroOps = 3;
|
||||
}
|
||||
def : WriteRes<WriteLAHFSAHF, [SBPort05]>;
|
||||
def : WriteRes<WriteBitTest,[SBPort05]>;
|
||||
|
||||
// This is for simple LEAs with one or two input operands.
|
||||
// The complex ones can only execute on port 1, and they require two cycles on
|
||||
@ -570,14 +571,6 @@ def SBWriteResGroup4 : SchedWriteRes<[SBPort05]> {
|
||||
let ResourceCycles = [1];
|
||||
}
|
||||
def: InstRW<[SBWriteResGroup4], (instrs CDQ, CQO)>;
|
||||
def: InstRW<[SBWriteResGroup4], (instregex "BT(16|32|64)ri8",
|
||||
"BT(16|32|64)rr",
|
||||
"BTC(16|32|64)ri8",
|
||||
"BTC(16|32|64)rr",
|
||||
"BTR(16|32|64)ri8",
|
||||
"BTR(16|32|64)rr",
|
||||
"BTS(16|32|64)ri8",
|
||||
"BTS(16|32|64)rr")>;
|
||||
|
||||
def SBWriteResGroup5 : SchedWriteRes<[SBPort15]> {
|
||||
let Latency = 1;
|
||||
|
@ -136,6 +136,7 @@ def : WriteRes<WriteSETCCStore, [SKLPort06,SKLPort4,SKLPort237]> {
|
||||
let NumMicroOps = 3;
|
||||
}
|
||||
def : WriteRes<WriteLAHFSAHF, [SKLPort06]>;
|
||||
def : WriteRes<WriteBitTest,[SKLPort06]>; //
|
||||
|
||||
// Bit counts.
|
||||
defm : SKLWriteResPair<WriteBSF, [SKLPort1], 3>;
|
||||
@ -605,14 +606,6 @@ def SKLWriteResGroup7 : SchedWriteRes<[SKLPort06]> {
|
||||
let ResourceCycles = [1];
|
||||
}
|
||||
def: InstRW<[SKLWriteResGroup7], (instrs CDQ, CQO, CLAC, STAC)>;
|
||||
def: InstRW<[SKLWriteResGroup7], (instregex "BT(16|32|64)ri8",
|
||||
"BT(16|32|64)rr",
|
||||
"BTC(16|32|64)ri8",
|
||||
"BTC(16|32|64)rr",
|
||||
"BTR(16|32|64)ri8",
|
||||
"BTR(16|32|64)rr",
|
||||
"BTS(16|32|64)ri8",
|
||||
"BTS(16|32|64)rr")>;
|
||||
|
||||
def SKLWriteResGroup8 : SchedWriteRes<[SKLPort15]> {
|
||||
let Latency = 1;
|
||||
|
@ -136,6 +136,7 @@ def : WriteRes<WriteSETCCStore, [SKXPort06,SKXPort4,SKXPort237]> {
|
||||
let NumMicroOps = 3;
|
||||
}
|
||||
def : WriteRes<WriteLAHFSAHF, [SKXPort06]>;
|
||||
def : WriteRes<WriteBitTest,[SKXPort06]>; //
|
||||
|
||||
// Integer shifts and rotates.
|
||||
defm : SKXWriteResPair<WriteShift, [SKXPort06], 1>;
|
||||
@ -618,14 +619,6 @@ def SKXWriteResGroup7 : SchedWriteRes<[SKXPort06]> {
|
||||
let ResourceCycles = [1];
|
||||
}
|
||||
def: InstRW<[SKXWriteResGroup7], (instrs CDQ, CQO, CLAC, STAC)>;
|
||||
def: InstRW<[SKXWriteResGroup7], (instregex "BT(16|32|64)ri8",
|
||||
"BT(16|32|64)rr",
|
||||
"BTC(16|32|64)ri8",
|
||||
"BTC(16|32|64)rr",
|
||||
"BTR(16|32|64)ri8",
|
||||
"BTR(16|32|64)rr",
|
||||
"BTS(16|32|64)ri8",
|
||||
"BTS(16|32|64)rr")>;
|
||||
|
||||
def SKXWriteResGroup8 : SchedWriteRes<[SKXPort15]> {
|
||||
let Latency = 1;
|
||||
|
@ -142,6 +142,7 @@ def WriteFCMOV : SchedWrite; // X87 conditional move.
|
||||
def WriteSETCC : SchedWrite; // Set register based on condition code.
|
||||
def WriteSETCCStore : SchedWrite;
|
||||
def WriteLAHFSAHF : SchedWrite; // Load/Store flags in AH.
|
||||
def WriteBitTest : SchedWrite; // Bit Test - TODO add memory folding support
|
||||
|
||||
// Integer shifts and rotates.
|
||||
defm WriteShift : X86SchedWritePair;
|
||||
|
@ -108,6 +108,7 @@ def : WriteRes<WriteLAHFSAHF, [AtomPort01]> {
|
||||
let Latency = 2;
|
||||
let ResourceCycles = [2];
|
||||
}
|
||||
def : WriteRes<WriteBitTest,[AtomPort01]>;
|
||||
|
||||
defm : X86WriteResUnsupported<WriteIMulH>;
|
||||
|
||||
|
@ -188,6 +188,7 @@ defm : X86WriteRes<WriteFCMOV, [JFPU0, JFPA], 3, [1,1], 1>; // x87 conditional m
|
||||
def : WriteRes<WriteSETCC, [JALU01]>; // Setcc.
|
||||
def : WriteRes<WriteSETCCStore, [JALU01,JSAGU]>;
|
||||
def : WriteRes<WriteLAHFSAHF, [JALU01]>;
|
||||
def : WriteRes<WriteBitTest,[JALU01]>;
|
||||
|
||||
// This is for simple LEAs with one or two input operands.
|
||||
def : WriteRes<WriteLEA, [JALU01]>;
|
||||
|
@ -120,6 +120,7 @@ def : WriteRes<WriteSETCCStore, [SLM_IEC_RSV01, SLM_MEC_RSV]> {
|
||||
let ResourceCycles = [2,1];
|
||||
}
|
||||
def : WriteRes<WriteLAHFSAHF, [SLM_IEC_RSV01]>;
|
||||
def : WriteRes<WriteBitTest,[SLM_IEC_RSV01]>;
|
||||
|
||||
// This is for simple LEAs with one or two input operands.
|
||||
// The complex ones can only execute on port 1, and they require two cycles on
|
||||
|
@ -198,6 +198,7 @@ defm : ZnWriteResPair<WriteCMOV2, [ZnALU], 1>;
|
||||
def : WriteRes<WriteSETCC, [ZnALU]>;
|
||||
def : WriteRes<WriteSETCCStore, [ZnALU, ZnAGU]>;
|
||||
defm : X86WriteRes<WriteLAHFSAHF, [ZnALU], 2, [1], 2>;
|
||||
def : WriteRes<WriteBitTest,[ZnALU]>;
|
||||
|
||||
// Bit counts.
|
||||
defm : ZnWriteResPair<WriteBSF, [ZnALU], 3>;
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "llvm/ADT/SmallSet.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/Support/Casting.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/Regex.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
@ -33,6 +34,16 @@ using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "subtarget-emitter"
|
||||
|
||||
#ifdef EXPENSIVE_CHECKS
|
||||
// FIXME: TableGen is failed iff EXPENSIVE_CHECKS defined
|
||||
static constexpr bool OptCheckSchedClasses = true;
|
||||
#else
|
||||
// FIXME: the default value should be false
|
||||
static cl::opt<bool> OptCheckSchedClasses(
|
||||
"check-sched-class-table", cl::init(true), cl::Hidden,
|
||||
cl::desc("Check sched class table on different types of inconsistencies"));
|
||||
#endif
|
||||
|
||||
#ifndef NDEBUG
|
||||
static void dumpIdxVec(ArrayRef<unsigned> V) {
|
||||
for (unsigned Idx : V)
|
||||
@ -223,6 +234,7 @@ CodeGenSchedModels::CodeGenSchedModels(RecordKeeper &RK,
|
||||
collectOptionalProcessorInfo();
|
||||
|
||||
checkCompleteness();
|
||||
checkSchedClasses();
|
||||
}
|
||||
|
||||
void CodeGenSchedModels::collectRetireControlUnits() {
|
||||
@ -699,6 +711,86 @@ void CodeGenSchedModels::collectSchedClasses() {
|
||||
}
|
||||
}
|
||||
|
||||
void CodeGenSchedModels::checkSchedClasses() {
|
||||
if (!OptCheckSchedClasses)
|
||||
return;
|
||||
|
||||
std::string str;
|
||||
raw_string_ostream OS(str);
|
||||
|
||||
// Check each instruction for each model to see if its overridden too often.
|
||||
// Iff YES it's a candidate for more fine-grained Sched Class.
|
||||
for (const CodeGenInstruction *Inst : Target.getInstructionsByEnumValue()) {
|
||||
StringRef InstName = Inst->TheDef->getName();
|
||||
unsigned SCIdx = getSchedClassIdx(*Inst);
|
||||
if (!SCIdx)
|
||||
continue;
|
||||
CodeGenSchedClass &SC = getSchedClass(SCIdx);
|
||||
if (SC.Writes.empty())
|
||||
continue;
|
||||
const RecVec &RWDefs = SchedClasses[SCIdx].InstRWs;
|
||||
if (RWDefs.empty())
|
||||
continue;
|
||||
// FIXME: what should be threshold here?
|
||||
if (RWDefs.size() > (ProcModels.size() / 2)) {
|
||||
// FIXME: this dump hangs the execution !!!
|
||||
// SC.dump(&Target.getSchedModels());
|
||||
OS << "SchedRW machine model for inst '" << InstName << "' (";
|
||||
for (auto I : SC.Writes)
|
||||
OS << " " << SchedWrites[I].Name;
|
||||
for (auto I : SC.Reads)
|
||||
OS << " " << SchedReads[I].Name;
|
||||
OS << " ) should be updated /improvedbecause it's overriden " << RWDefs.size()
|
||||
<< " times out of " << ProcModels.size() << " models:\n\t";
|
||||
for (Record *RWDef : RWDefs)
|
||||
OS << " " << getProcModel(RWDef->getValueAsDef("SchedModel")).ModelName;
|
||||
PrintWarning(OS.str());
|
||||
str.clear();
|
||||
}
|
||||
|
||||
// TODO: here we should check latency/uop in SC vs. RWDef. Maybe we
|
||||
// should do it iff RWDefs.size() == 1 only.
|
||||
// Iff latency/uop are the same then warn about unnecessary redefine.
|
||||
if (RWDefs.size()) {
|
||||
for (Record *RWDef : RWDefs) {
|
||||
IdxVec Writes;
|
||||
IdxVec Reads;
|
||||
findRWs(RWDef->getValueAsListOfDefs("OperandReadWrites"), Writes,
|
||||
Reads);
|
||||
|
||||
if ((Writes.size() == SC.Writes.size()) &&
|
||||
(Reads.size() == SC.Reads.size())) {
|
||||
// TODO: do we need sorting Write & Reads?
|
||||
for (unsigned I = 0, S = SC.Writes.size(); I < S; I++) {
|
||||
auto SCSchedW = SchedWrites[SC.Writes[I]];
|
||||
auto SchedW = SchedWrites[Writes[I]];
|
||||
if (!SCSchedW.TheDef || !SchedW.TheDef)
|
||||
continue;
|
||||
const RecordVal *R = SCSchedW.TheDef->getValue("Latency");
|
||||
// FIXME: We should deal with default Latency here
|
||||
if (!R || !R->getValue())
|
||||
continue;
|
||||
auto SCLat = SCSchedW.TheDef->getValueAsInt("Latency");
|
||||
auto SCuOp = SCSchedW.TheDef->getValueAsInt("NumMicroOps");
|
||||
auto Lat = SchedW.TheDef->getValueAsInt("Latency");
|
||||
auto uOp = SchedW.TheDef->getValueAsInt("NumMicroOps");
|
||||
if ((SCLat == Lat) && (SCuOp == uOp))
|
||||
OS << "Overridden verion of inst '" << InstName
|
||||
<< "' has the same latency & uOp values as the original one "
|
||||
"for model '"
|
||||
<< getProcModel(RWDef->getValueAsDef("SchedModel")).ModelName
|
||||
<< "'\n";
|
||||
}
|
||||
if (!str.empty()) {
|
||||
PrintWarning(OS.str());
|
||||
str.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get the SchedClass index for an instruction.
|
||||
unsigned
|
||||
CodeGenSchedModels::getSchedClassIdx(const CodeGenInstruction &Inst) const {
|
||||
|
@ -443,6 +443,8 @@ private:
|
||||
|
||||
void collectSchedClasses();
|
||||
|
||||
void checkSchedClasses();
|
||||
|
||||
void collectRetireControlUnits();
|
||||
|
||||
void collectRegisterFiles();
|
||||
|
Loading…
Reference in New Issue
Block a user