mirror of
https://github.com/RPCSX/llvm.git
synced 2025-02-09 22:13:07 +00:00
[X86][SchedModel] Add missing scheduling model for SSE related instructions.
The patch defines new or refines existing generic scheduling classes to match the behavior of the SSE instructions. It also maps those scheduling classes on the related SSE instructions. <rdar://problem/15607571> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@202065 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
68e1531d39
commit
b55c398992
File diff suppressed because it is too large
Load Diff
@ -50,6 +50,7 @@ def HWPort237 : ProcResGroup<[HWPort2, HWPort3, HWPort7]>;
|
||||
def HWPort05 : ProcResGroup<[HWPort0, HWPort5]>;
|
||||
def HWPort06 : ProcResGroup<[HWPort0, HWPort6]>;
|
||||
def HWPort15 : ProcResGroup<[HWPort1, HWPort5]>;
|
||||
def HWPort16 : ProcResGroup<[HWPort1, HWPort6]>;
|
||||
def HWPort015 : ProcResGroup<[HWPort0, HWPort1, HWPort5]>;
|
||||
def HWPort0156: ProcResGroup<[HWPort0, HWPort1, HWPort5, HWPort6]>;
|
||||
|
||||
@ -125,6 +126,18 @@ defm : HWWriteResPair<WriteFSqrt, HWPort0, 15>;
|
||||
defm : HWWriteResPair<WriteCvtF2I, HWPort1, 3>;
|
||||
defm : HWWriteResPair<WriteCvtI2F, HWPort1, 4>;
|
||||
defm : HWWriteResPair<WriteCvtF2F, HWPort1, 3>;
|
||||
defm : HWWriteResPair<WriteFShuffle, HWPort5, 1>;
|
||||
defm : HWWriteResPair<WriteFBlend, HWPort015, 1>;
|
||||
defm : HWWriteResPair<WriteFShuffle256, HWPort5, 3>;
|
||||
|
||||
def : WriteRes<WriteFVarBlend, [HWPort5]> {
|
||||
let Latency = 2;
|
||||
let ResourceCycles = [2];
|
||||
}
|
||||
def : WriteRes<WriteFVarBlendLd, [HWPort5, HWPort23]> {
|
||||
let Latency = 6;
|
||||
let ResourceCycles = [2, 1];
|
||||
}
|
||||
|
||||
// Vector integer operations.
|
||||
defm : HWWriteResPair<WriteVecShift, HWPort0, 1>;
|
||||
@ -132,7 +145,117 @@ defm : HWWriteResPair<WriteVecLogic, HWPort015, 1>;
|
||||
defm : HWWriteResPair<WriteVecALU, HWPort15, 1>;
|
||||
defm : HWWriteResPair<WriteVecIMul, HWPort0, 5>;
|
||||
defm : HWWriteResPair<WriteShuffle, HWPort5, 1>;
|
||||
defm : HWWriteResPair<WriteBlend, HWPort15, 1>;
|
||||
defm : HWWriteResPair<WriteShuffle256, HWPort5, 3>;
|
||||
|
||||
def : WriteRes<WriteVarBlend, [HWPort5]> {
|
||||
let Latency = 2;
|
||||
let ResourceCycles = [2];
|
||||
}
|
||||
def : WriteRes<WriteVarBlendLd, [HWPort5, HWPort23]> {
|
||||
let Latency = 6;
|
||||
let ResourceCycles = [2, 1];
|
||||
}
|
||||
|
||||
def : WriteRes<WriteVarVecShift, [HWPort0, HWPort5]> {
|
||||
let Latency = 2;
|
||||
let ResourceCycles = [2, 1];
|
||||
}
|
||||
def : WriteRes<WriteVarVecShiftLd, [HWPort0, HWPort5, HWPort23]> {
|
||||
let Latency = 6;
|
||||
let ResourceCycles = [2, 1, 1];
|
||||
}
|
||||
|
||||
def : WriteRes<WriteMPSAD, [HWPort0, HWPort5]> {
|
||||
let Latency = 6;
|
||||
let ResourceCycles = [1, 2];
|
||||
}
|
||||
def : WriteRes<WriteMPSADLd, [HWPort23, HWPort0, HWPort5]> {
|
||||
let Latency = 6;
|
||||
let ResourceCycles = [1, 1, 2];
|
||||
}
|
||||
|
||||
// String instructions.
|
||||
// Packed Compare Implicit Length Strings, Return Mask
|
||||
def : WriteRes<WritePCmpIStrM, [HWPort0]> {
|
||||
let Latency = 10;
|
||||
let ResourceCycles = [3];
|
||||
}
|
||||
def : WriteRes<WritePCmpIStrMLd, [HWPort0, HWPort23]> {
|
||||
let Latency = 10;
|
||||
let ResourceCycles = [3, 1];
|
||||
}
|
||||
|
||||
// Packed Compare Explicit Length Strings, Return Mask
|
||||
def : WriteRes<WritePCmpEStrM, [HWPort0, HWPort16, HWPort5]> {
|
||||
let Latency = 10;
|
||||
let ResourceCycles = [3, 2, 4];
|
||||
}
|
||||
def : WriteRes<WritePCmpEStrMLd, [HWPort05, HWPort16, HWPort23]> {
|
||||
let Latency = 10;
|
||||
let ResourceCycles = [6, 2, 1];
|
||||
}
|
||||
|
||||
// Packed Compare Implicit Length Strings, Return Index
|
||||
def : WriteRes<WritePCmpIStrI, [HWPort0]> {
|
||||
let Latency = 11;
|
||||
let ResourceCycles = [3];
|
||||
}
|
||||
def : WriteRes<WritePCmpIStrILd, [HWPort0, HWPort23]> {
|
||||
let Latency = 11;
|
||||
let ResourceCycles = [3, 1];
|
||||
}
|
||||
|
||||
// Packed Compare Explicit Length Strings, Return Index
|
||||
def : WriteRes<WritePCmpEStrI, [HWPort05, HWPort16]> {
|
||||
let Latency = 11;
|
||||
let ResourceCycles = [6, 2];
|
||||
}
|
||||
def : WriteRes<WritePCmpEStrILd, [HWPort0, HWPort16, HWPort5, HWPort23]> {
|
||||
let Latency = 11;
|
||||
let ResourceCycles = [3, 2, 2, 1];
|
||||
}
|
||||
|
||||
// AES Instructions.
|
||||
def : WriteRes<WriteAESDecEnc, [HWPort5]> {
|
||||
let Latency = 7;
|
||||
let ResourceCycles = [1];
|
||||
}
|
||||
def : WriteRes<WriteAESDecEncLd, [HWPort5, HWPort23]> {
|
||||
let Latency = 7;
|
||||
let ResourceCycles = [1, 1];
|
||||
}
|
||||
|
||||
def : WriteRes<WriteAESIMC, [HWPort5]> {
|
||||
let Latency = 14;
|
||||
let ResourceCycles = [2];
|
||||
}
|
||||
def : WriteRes<WriteAESIMCLd, [HWPort5, HWPort23]> {
|
||||
let Latency = 14;
|
||||
let ResourceCycles = [2, 1];
|
||||
}
|
||||
|
||||
def : WriteRes<WriteAESKeyGen, [HWPort0, HWPort5]> {
|
||||
let Latency = 10;
|
||||
let ResourceCycles = [2, 8];
|
||||
}
|
||||
def : WriteRes<WriteAESKeyGenLd, [HWPort0, HWPort5, HWPort23]> {
|
||||
let Latency = 10;
|
||||
let ResourceCycles = [2, 7, 1];
|
||||
}
|
||||
|
||||
// Carry-less multiplication instructions.
|
||||
def : WriteRes<WriteCLMul, [HWPort0, HWPort5]> {
|
||||
let Latency = 7;
|
||||
let ResourceCycles = [2, 1];
|
||||
}
|
||||
def : WriteRes<WriteCLMulLd, [HWPort0, HWPort5, HWPort23]> {
|
||||
let Latency = 7;
|
||||
let ResourceCycles = [2, 1, 1];
|
||||
}
|
||||
|
||||
def : WriteRes<WriteSystem, [HWPort0156]> { let Latency = 100; }
|
||||
def : WriteRes<WriteMicrocoded, [HWPort0156]> { let Latency = 100; }
|
||||
def : WriteRes<WriteFence, [HWPort23, HWPort4]>;
|
||||
def : WriteRes<WriteNop, []>;
|
||||
} // SchedModel
|
||||
|
@ -118,6 +118,16 @@ defm : SBWriteResPair<WriteFSqrt, SBPort0, 15>;
|
||||
defm : SBWriteResPair<WriteCvtF2I, SBPort1, 3>;
|
||||
defm : SBWriteResPair<WriteCvtI2F, SBPort1, 4>;
|
||||
defm : SBWriteResPair<WriteCvtF2F, SBPort1, 3>;
|
||||
defm : SBWriteResPair<WriteFShuffle, SBPort5, 1>;
|
||||
defm : SBWriteResPair<WriteFBlend, SBPort05, 1>;
|
||||
def : WriteRes<WriteFVarBlend, [SBPort0, SBPort5]> {
|
||||
let Latency = 2;
|
||||
let ResourceCycles = [1, 1];
|
||||
}
|
||||
def : WriteRes<WriteFVarBlendLd, [SBPort0, SBPort5, SBPort23]> {
|
||||
let Latency = 6;
|
||||
let ResourceCycles = [1, 1, 1];
|
||||
}
|
||||
|
||||
// Vector integer operations.
|
||||
defm : SBWriteResPair<WriteVecShift, SBPort05, 1>;
|
||||
@ -125,7 +135,112 @@ defm : SBWriteResPair<WriteVecLogic, SBPort015, 1>;
|
||||
defm : SBWriteResPair<WriteVecALU, SBPort15, 1>;
|
||||
defm : SBWriteResPair<WriteVecIMul, SBPort0, 5>;
|
||||
defm : SBWriteResPair<WriteShuffle, SBPort15, 1>;
|
||||
defm : SBWriteResPair<WriteBlend, SBPort15, 1>;
|
||||
def : WriteRes<WriteVarBlend, [SBPort1, SBPort5]> {
|
||||
let Latency = 2;
|
||||
let ResourceCycles = [1, 1];
|
||||
}
|
||||
def : WriteRes<WriteVarBlendLd, [SBPort1, SBPort5, SBPort23]> {
|
||||
let Latency = 6;
|
||||
let ResourceCycles = [1, 1, 1];
|
||||
}
|
||||
def : WriteRes<WriteMPSAD, [SBPort0, SBPort1, SBPort5]> {
|
||||
let Latency = 6;
|
||||
let ResourceCycles = [1, 1, 1];
|
||||
}
|
||||
def : WriteRes<WriteMPSADLd, [SBPort0, SBPort1, SBPort5, SBPort23]> {
|
||||
let Latency = 6;
|
||||
let ResourceCycles = [1, 1, 1, 1];
|
||||
}
|
||||
|
||||
// String instructions.
|
||||
// Packed Compare Implicit Length Strings, Return Mask
|
||||
def : WriteRes<WritePCmpIStrM, [SBPort015]> {
|
||||
let Latency = 11;
|
||||
let ResourceCycles = [3];
|
||||
}
|
||||
def : WriteRes<WritePCmpIStrMLd, [SBPort015, SBPort23]> {
|
||||
let Latency = 11;
|
||||
let ResourceCycles = [3, 1];
|
||||
}
|
||||
|
||||
// Packed Compare Explicit Length Strings, Return Mask
|
||||
def : WriteRes<WritePCmpEStrM, [SBPort015]> {
|
||||
let Latency = 11;
|
||||
let ResourceCycles = [8];
|
||||
}
|
||||
def : WriteRes<WritePCmpEStrMLd, [SBPort015, SBPort23]> {
|
||||
let Latency = 11;
|
||||
let ResourceCycles = [7, 1];
|
||||
}
|
||||
|
||||
// Packed Compare Implicit Length Strings, Return Index
|
||||
def : WriteRes<WritePCmpIStrI, [SBPort015]> {
|
||||
let Latency = 3;
|
||||
let ResourceCycles = [3];
|
||||
}
|
||||
def : WriteRes<WritePCmpIStrILd, [SBPort015, SBPort23]> {
|
||||
let Latency = 3;
|
||||
let ResourceCycles = [3, 1];
|
||||
}
|
||||
|
||||
// Packed Compare Explicit Length Strings, Return Index
|
||||
def : WriteRes<WritePCmpEStrI, [SBPort015]> {
|
||||
let Latency = 4;
|
||||
let ResourceCycles = [8];
|
||||
}
|
||||
def : WriteRes<WritePCmpEStrILd, [SBPort015, SBPort23]> {
|
||||
let Latency = 4;
|
||||
let ResourceCycles = [7, 1];
|
||||
}
|
||||
|
||||
// AES Instructions.
|
||||
def : WriteRes<WriteAESDecEnc, [SBPort015]> {
|
||||
let Latency = 8;
|
||||
let ResourceCycles = [2];
|
||||
}
|
||||
def : WriteRes<WriteAESDecEncLd, [SBPort015, SBPort23]> {
|
||||
let Latency = 8;
|
||||
let ResourceCycles = [2, 1];
|
||||
}
|
||||
|
||||
def : WriteRes<WriteAESIMC, [SBPort015]> {
|
||||
let Latency = 8;
|
||||
let ResourceCycles = [2];
|
||||
}
|
||||
def : WriteRes<WriteAESIMCLd, [SBPort015, SBPort23]> {
|
||||
let Latency = 8;
|
||||
let ResourceCycles = [2, 1];
|
||||
}
|
||||
|
||||
def : WriteRes<WriteAESKeyGen, [SBPort015]> {
|
||||
let Latency = 8;
|
||||
let ResourceCycles = [11];
|
||||
}
|
||||
def : WriteRes<WriteAESKeyGenLd, [SBPort015, SBPort23]> {
|
||||
let Latency = 8;
|
||||
let ResourceCycles = [10, 1];
|
||||
}
|
||||
|
||||
// Carry-less multiplication instructions.
|
||||
def : WriteRes<WriteCLMul, [SBPort015]> {
|
||||
let Latency = 14;
|
||||
let ResourceCycles = [18];
|
||||
}
|
||||
def : WriteRes<WriteCLMulLd, [SBPort015, SBPort23]> {
|
||||
let Latency = 14;
|
||||
let ResourceCycles = [17, 1];
|
||||
}
|
||||
|
||||
|
||||
def : WriteRes<WriteSystem, [SBPort015]> { let Latency = 100; }
|
||||
def : WriteRes<WriteMicrocoded, [SBPort015]> { let Latency = 100; }
|
||||
def : WriteRes<WriteFence, [SBPort23, SBPort4]>;
|
||||
def : WriteRes<WriteNop, []>;
|
||||
|
||||
// AVX2 is not supported on that architecture, but we should define the basic
|
||||
// scheduling resources anyway.
|
||||
defm : SBWriteResPair<WriteFShuffle256, SBPort0, 1>;
|
||||
defm : SBWriteResPair<WriteShuffle256, SBPort0, 1>;
|
||||
defm : SBWriteResPair<WriteVarVecShift, SBPort0, 1>;
|
||||
} // SchedModel
|
||||
|
@ -69,6 +69,9 @@ defm WriteFDiv : X86SchedWritePair; // Floating point division.
|
||||
defm WriteFSqrt : X86SchedWritePair; // Floating point square root.
|
||||
defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal.
|
||||
defm WriteFMA : X86SchedWritePair; // Fused Multiply Add.
|
||||
defm WriteFShuffle : X86SchedWritePair; // Floating point vector shuffles.
|
||||
defm WriteFBlend : X86SchedWritePair; // Floating point vector blends.
|
||||
defm WriteFVarBlend : X86SchedWritePair; // Fp vector variable blends.
|
||||
|
||||
// FMA Scheduling helper class.
|
||||
class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
|
||||
@ -77,23 +80,55 @@ class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
|
||||
defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals.
|
||||
defm WriteVecShift : X86SchedWritePair; // Vector integer shifts.
|
||||
defm WriteVecIMul : X86SchedWritePair; // Vector integer multiply.
|
||||
defm WriteShuffle : X86SchedWritePair; // Vector shuffles.
|
||||
defm WriteBlend : X86SchedWritePair; // Vector blends.
|
||||
defm WriteVarBlend : X86SchedWritePair; // Vector variable blends.
|
||||
defm WriteMPSAD : X86SchedWritePair; // Vector MPSAD.
|
||||
|
||||
// Vector bitwise operations.
|
||||
// These are often used on both floating point and integer vectors.
|
||||
defm WriteVecLogic : X86SchedWritePair; // Vector and/or/xor.
|
||||
defm WriteShuffle : X86SchedWritePair; // Vector shuffles and blends.
|
||||
|
||||
// Conversion between integer and float.
|
||||
defm WriteCvtF2I : X86SchedWritePair; // Float -> Integer.
|
||||
defm WriteCvtI2F : X86SchedWritePair; // Integer -> Float.
|
||||
defm WriteCvtF2F : X86SchedWritePair; // Float -> Float size conversion.
|
||||
|
||||
// Strings instructions.
|
||||
// Packed Compare Implicit Length Strings, Return Mask
|
||||
defm WritePCmpIStrM : X86SchedWritePair;
|
||||
// Packed Compare Explicit Length Strings, Return Mask
|
||||
defm WritePCmpEStrM : X86SchedWritePair;
|
||||
// Packed Compare Implicit Length Strings, Return Index
|
||||
defm WritePCmpIStrI : X86SchedWritePair;
|
||||
// Packed Compare Explicit Length Strings, Return Index
|
||||
defm WritePCmpEStrI : X86SchedWritePair;
|
||||
|
||||
// AES instructions.
|
||||
defm WriteAESDecEnc : X86SchedWritePair; // Decryption, encryption.
|
||||
defm WriteAESIMC : X86SchedWritePair; // InvMixColumn.
|
||||
defm WriteAESKeyGen : X86SchedWritePair; // Key Generation.
|
||||
|
||||
// Carry-less multiplication instructions.
|
||||
defm WriteCLMul : X86SchedWritePair;
|
||||
|
||||
// Catch-all for expensive system instructions.
|
||||
def WriteSystem : SchedWrite;
|
||||
|
||||
// AVX2.
|
||||
defm WriteFShuffle256 : X86SchedWritePair; // Fp 256-bit width vector shuffles.
|
||||
defm WriteShuffle256 : X86SchedWritePair; // 256-bit width vector shuffles.
|
||||
defm WriteVarVecShift : X86SchedWritePair; // Variable vector shifts.
|
||||
|
||||
// Old microcoded instructions that nobody use.
|
||||
def WriteMicrocoded : SchedWrite;
|
||||
|
||||
// Fence instructions.
|
||||
def WriteFence : SchedWrite;
|
||||
|
||||
// Nop, not very useful expect it provides a model for nops!
|
||||
def WriteNop : SchedWrite;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Instruction Itinerary classes used for X86
|
||||
def IIC_ALU_MEM : InstrItinClass;
|
||||
|
Loading…
x
Reference in New Issue
Block a user