mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-14 23:48:56 +00:00
I'm starting to commit KNL backend. I'll push patches one-by-one. This patch includes support for the extended register set XMM16-31, YMM16-31, ZMM0-31.
The full ISA you can see here: http://software.intel.com/en-us/intel-isa-extensions git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187030 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
9b3b286247
commit
e3809eed34
@ -86,6 +86,16 @@ def FeatureAVX : SubtargetFeature<"avx", "X86SSELevel", "AVX",
|
||||
def FeatureAVX2 : SubtargetFeature<"avx2", "X86SSELevel", "AVX2",
|
||||
"Enable AVX2 instructions",
|
||||
[FeatureAVX]>;
|
||||
def FeatureAVX512 : SubtargetFeature<"avx-512", "X86SSELevel", "AVX512",
|
||||
"Enable AVX-512 instructions",
|
||||
[FeatureAVX2]>;
|
||||
def FeatureERI : SubtargetFeature<"avx-512-eri", "HasERI", "true",
|
||||
"Enable AVX-512 Exponential and Reciprocal Instructions">;
|
||||
def FeatureCDI : SubtargetFeature<"avx-512-cdi", "HasCDI", "true",
|
||||
"Enable AVX-512 Conflict Detection Instructions">;
|
||||
def FeaturePFI : SubtargetFeature<"avx-512-pfi", "HasPFI", "true",
|
||||
"Enable AVX-512 PreFetch Instructions">;
|
||||
|
||||
def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
|
||||
"Enable packed carry-less multiplication instructions",
|
||||
[FeatureSSE2]>;
|
||||
@ -227,6 +237,15 @@ def : ProcessorModel<"core-avx2", HaswellModel,
|
||||
FeatureBMI, FeatureBMI2, FeatureFMA, FeatureRTM,
|
||||
FeatureHLE]>;
|
||||
|
||||
// KNL
|
||||
// FIXME: define KNL model
|
||||
def : ProcessorModel<"knl", HaswellModel,
|
||||
[FeatureAVX512, FeatureERI, FeatureCDI, FeaturePFI,
|
||||
FeatureCMPXCHG16B, FeatureFastUAMem, FeaturePOPCNT,
|
||||
FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,
|
||||
FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,
|
||||
FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE]>;
|
||||
|
||||
def : Proc<"k6", [FeatureMMX]>;
|
||||
def : Proc<"k6-2", [Feature3DNow]>;
|
||||
def : Proc<"k6-3", [Feature3DNow]>;
|
||||
|
@ -49,6 +49,12 @@ def RetCC_X86Common : CallingConv<[
|
||||
CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
|
||||
CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>,
|
||||
|
||||
// 512-bit vectors are returned in ZMM0 and ZMM1, when they fit. ZMM2 and ZMM3
|
||||
// can only be used by ABI non-compliant code. This vector type is only
|
||||
// supported while using the AVX-512 target feature.
|
||||
CCIfType<[v16i32, v8i64, v16f32, v8f64],
|
||||
CCAssignToReg<[ZMM0,ZMM1,ZMM2,ZMM3]>>,
|
||||
|
||||
// MMX vector types are always returned in MM0. If the target doesn't have
|
||||
// MM0, it doesn't support these vector types.
|
||||
CCIfType<[x86mmx], CCAssignToReg<[MM0]>>,
|
||||
@ -99,6 +105,10 @@ def RetCC_Intel_OCL_BI : CallingConv<[
|
||||
CCIfType<[v8f32, v4f64, v8i32, v4i64],
|
||||
CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>,
|
||||
|
||||
// 512-bit FP vectors
|
||||
CCIfType<[v16f32, v8f64, v16i32, v8i64],
|
||||
CCAssignToReg<[ZMM0,ZMM1,ZMM2,ZMM3]>>,
|
||||
|
||||
// i32, i64 in the standard way
|
||||
CCDelegateTo<RetCC_X86Common>
|
||||
]>;
|
||||
@ -213,10 +223,15 @@ def CC_X86_64_C : CallingConv<[
|
||||
// fixed arguments to vararg functions are supposed to be passed in
|
||||
// registers. Actually modeling that would be a lot of work, though.
|
||||
CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
|
||||
CCIfSubtarget<"hasAVX()",
|
||||
CCIfSubtarget<"hasFp256()",
|
||||
CCAssignToReg<[YMM0, YMM1, YMM2, YMM3,
|
||||
YMM4, YMM5, YMM6, YMM7]>>>>,
|
||||
|
||||
// The first 8 512-bit vector arguments are passed in ZMM registers.
|
||||
CCIfNotVarArg<CCIfType<[v16i32, v8i64, v16f32, v8f64],
|
||||
CCIfSubtarget<"hasAVX512()",
|
||||
CCAssignToReg<[ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5, ZMM6, ZMM7]>>>>,
|
||||
|
||||
// Integer/FP values get stored in stack slots that are 8 bytes in size and
|
||||
// 8-byte aligned if there are no more registers to hold them.
|
||||
CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
|
||||
@ -230,7 +245,11 @@ def CC_X86_64_C : CallingConv<[
|
||||
|
||||
// 256-bit vectors get 32-byte stack slots that are 32-byte aligned.
|
||||
CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
|
||||
CCAssignToStack<32, 32>>
|
||||
CCAssignToStack<32, 32>>,
|
||||
|
||||
// 512-bit vectors get 64-byte stack slots that are 64-byte aligned.
|
||||
CCIfType<[v16i32, v8i64, v16f32, v8f64],
|
||||
CCAssignToStack<64, 64>>
|
||||
]>;
|
||||
|
||||
// Calling convention used on Win64
|
||||
@ -251,6 +270,9 @@ def CC_X86_Win64_C : CallingConv<[
|
||||
// 256 bit vectors are passed by pointer
|
||||
CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], CCPassIndirect<i64>>,
|
||||
|
||||
// 512 bit vectors are passed by pointer
|
||||
CCIfType<[v16i32, v16f32, v8f64, v8i64], CCPassIndirect<i64>>,
|
||||
|
||||
// The first 4 MMX vector arguments are passed in GPRs.
|
||||
CCIfType<[x86mmx], CCBitConvertToType<i64>>,
|
||||
|
||||
@ -345,7 +367,7 @@ def CC_X86_32_Common : CallingConv<[
|
||||
|
||||
// The first 4 AVX 256-bit vector arguments are passed in YMM registers.
|
||||
CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
|
||||
CCIfSubtarget<"hasAVX()",
|
||||
CCIfSubtarget<"hasFp256()",
|
||||
CCAssignToReg<[YMM0, YMM1, YMM2, YMM3]>>>>,
|
||||
|
||||
// Other SSE vectors get 16-byte stack slots that are 16-byte aligned.
|
||||
@ -469,6 +491,10 @@ def CC_Intel_OCL_BI : CallingConv<[
|
||||
CCIfType<[v8f32, v4f64, v8i32, v4i64],
|
||||
CCAssignToReg<[YMM0, YMM1, YMM2, YMM3]>>,
|
||||
|
||||
// The 512-bit vector arguments are passed in ZMM registers.
|
||||
CCIfType<[v16f32, v8f64, v16i32, v8i64],
|
||||
CCAssignToReg<[ZMM0, ZMM1, ZMM2, ZMM3]>>,
|
||||
|
||||
CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>,
|
||||
CCIfSubtarget<"is64Bit()", CCDelegateTo<CC_X86_64_C>>,
|
||||
CCDelegateTo<CC_X86_32_C>
|
||||
@ -535,6 +561,10 @@ def CSR_Win64_Intel_OCL_BI_AVX : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12,
|
||||
R13, R14, R15,
|
||||
(sequence "YMM%u", 6, 15))>;
|
||||
|
||||
def CSR_Win64_Intel_OCL_BI_AVX512 : CalleeSavedRegs<(add RBX, RBP, RDI, RSI,
|
||||
R12, R13, R14, R15,
|
||||
(sequence "ZMM%u", 6, 21),
|
||||
K4, K5, K6, K7)>;
|
||||
//Standard C + XMM 8-15
|
||||
def CSR_64_Intel_OCL_BI : CalleeSavedRegs<(add CSR_64,
|
||||
(sequence "XMM%u", 8, 15))>;
|
||||
@ -542,3 +572,7 @@ def CSR_64_Intel_OCL_BI : CalleeSavedRegs<(add CSR_64,
|
||||
//Standard C + YMM 8-15
|
||||
def CSR_64_Intel_OCL_BI_AVX : CalleeSavedRegs<(add CSR_64,
|
||||
(sequence "YMM%u", 8, 15))>;
|
||||
|
||||
def CSR_64_Intel_OCL_BI_AVX512 : CalleeSavedRegs<(add CSR_64,
|
||||
(sequence "ZMM%u", 16, 31),
|
||||
K4, K5, K6, K7)>;
|
||||
|
@ -18595,6 +18595,11 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
|
||||
case MVT::v8f32:
|
||||
case MVT::v4f64:
|
||||
return std::make_pair(0U, &X86::VR256RegClass);
|
||||
case MVT::v8f64:
|
||||
case MVT::v16f32:
|
||||
case MVT::v16i32:
|
||||
case MVT::v8i64:
|
||||
return std::make_pair(0U, &X86::VR512RegClass);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -18705,7 +18710,13 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
|
||||
}
|
||||
} else if (Res.second == &X86::FR32RegClass ||
|
||||
Res.second == &X86::FR64RegClass ||
|
||||
Res.second == &X86::VR128RegClass) {
|
||||
Res.second == &X86::VR128RegClass ||
|
||||
Res.second == &X86::VR256RegClass ||
|
||||
Res.second == &X86::FR32XRegClass ||
|
||||
Res.second == &X86::FR64XRegClass ||
|
||||
Res.second == &X86::VR128XRegClass ||
|
||||
Res.second == &X86::VR256XRegClass ||
|
||||
Res.second == &X86::VR512RegClass) {
|
||||
// Handle references to XMM physical registers that got mapped into the
|
||||
// wrong class. This can happen with constraints like {xmm0} where the
|
||||
// target independent register mapper will just pick the first match it can
|
||||
@ -18719,6 +18730,8 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
|
||||
Res.second = &X86::VR128RegClass;
|
||||
else if (X86::VR256RegClass.hasType(VT))
|
||||
Res.second = &X86::VR256RegClass;
|
||||
else if (X86::VR512RegClass.hasType(VT))
|
||||
Res.second = &X86::VR512RegClass;
|
||||
}
|
||||
|
||||
return Res;
|
||||
|
@ -241,6 +241,11 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
|
||||
|
||||
case CallingConv::Intel_OCL_BI: {
|
||||
bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
|
||||
bool HasAVX512 = TM.getSubtarget<X86Subtarget>().hasAVX512();
|
||||
if (HasAVX512 && IsWin64)
|
||||
return CSR_Win64_Intel_OCL_BI_AVX512_SaveList;
|
||||
if (HasAVX512 && Is64Bit)
|
||||
return CSR_64_Intel_OCL_BI_AVX512_SaveList;
|
||||
if (HasAVX && IsWin64)
|
||||
return CSR_Win64_Intel_OCL_BI_AVX_SaveList;
|
||||
if (HasAVX && Is64Bit)
|
||||
@ -275,8 +280,13 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
|
||||
const uint32_t*
|
||||
X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
|
||||
bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
|
||||
bool HasAVX512 = TM.getSubtarget<X86Subtarget>().hasAVX512();
|
||||
|
||||
if (CC == CallingConv::Intel_OCL_BI) {
|
||||
if (IsWin64 && HasAVX512)
|
||||
return CSR_Win64_Intel_OCL_BI_AVX512_RegMask;
|
||||
if (Is64Bit && HasAVX512)
|
||||
return CSR_64_Intel_OCL_BI_AVX512_RegMask;
|
||||
if (IsWin64 && HasAVX)
|
||||
return CSR_Win64_Intel_OCL_BI_AVX_RegMask;
|
||||
if (Is64Bit && HasAVX)
|
||||
@ -380,6 +390,12 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
|
||||
Reserved.set(*AI);
|
||||
}
|
||||
}
|
||||
if (!Is64Bit || !TM.getSubtarget<X86Subtarget>().hasAVX512()) {
|
||||
for (unsigned n = 16; n != 32; ++n) {
|
||||
for (MCRegAliasIterator AI(X86::XMM0 + n, this, true); AI.isValid(); ++AI)
|
||||
Reserved.set(*AI);
|
||||
}
|
||||
}
|
||||
|
||||
return Reserved;
|
||||
}
|
||||
@ -690,4 +706,16 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsigned get512BitSuperRegister(unsigned Reg) {
|
||||
if (Reg >= X86::XMM0 && Reg <= X86::XMM31)
|
||||
return X86::ZMM0 + (Reg - X86::XMM0);
|
||||
if (Reg >= X86::YMM0 && Reg <= X86::YMM31)
|
||||
return X86::ZMM0 + (Reg - X86::YMM0);
|
||||
if (Reg >= X86::ZMM0 && Reg <= X86::ZMM31)
|
||||
return Reg;
|
||||
llvm_unreachable("Unexpected SIMD register");
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -137,6 +137,9 @@ public:
|
||||
// e.g. getX86SubSuperRegister(X86::EAX, MVT::i16) return X86:AX
|
||||
unsigned getX86SubSuperRegister(unsigned, MVT::SimpleValueType, bool High=false);
|
||||
|
||||
//get512BitRegister - X86 utility - returns 512-bit super register
|
||||
unsigned get512BitSuperRegister(unsigned Reg);
|
||||
|
||||
} // End llvm namespace
|
||||
|
||||
#endif
|
||||
|
@ -26,6 +26,7 @@ let Namespace = "X86" in {
|
||||
def sub_16bit : SubRegIndex<16>;
|
||||
def sub_32bit : SubRegIndex<32>;
|
||||
def sub_xmm : SubRegIndex<128>;
|
||||
def sub_ymm : SubRegIndex<256>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -186,28 +187,53 @@ def XMM12: X86Reg<"xmm12", 12>, DwarfRegNum<[29, -2, -2]>;
|
||||
def XMM13: X86Reg<"xmm13", 13>, DwarfRegNum<[30, -2, -2]>;
|
||||
def XMM14: X86Reg<"xmm14", 14>, DwarfRegNum<[31, -2, -2]>;
|
||||
def XMM15: X86Reg<"xmm15", 15>, DwarfRegNum<[32, -2, -2]>;
|
||||
|
||||
def XMM16: X86Reg<"xmm16", 16>, DwarfRegNum<[60, -2, -2]>;
|
||||
def XMM17: X86Reg<"xmm17", 17>, DwarfRegNum<[61, -2, -2]>;
|
||||
def XMM18: X86Reg<"xmm18", 18>, DwarfRegNum<[62, -2, -2]>;
|
||||
def XMM19: X86Reg<"xmm19", 19>, DwarfRegNum<[63, -2, -2]>;
|
||||
def XMM20: X86Reg<"xmm20", 20>, DwarfRegNum<[64, -2, -2]>;
|
||||
def XMM21: X86Reg<"xmm21", 21>, DwarfRegNum<[65, -2, -2]>;
|
||||
def XMM22: X86Reg<"xmm22", 22>, DwarfRegNum<[66, -2, -2]>;
|
||||
def XMM23: X86Reg<"xmm23", 23>, DwarfRegNum<[67, -2, -2]>;
|
||||
def XMM24: X86Reg<"xmm24", 24>, DwarfRegNum<[68, -2, -2]>;
|
||||
def XMM25: X86Reg<"xmm25", 25>, DwarfRegNum<[69, -2, -2]>;
|
||||
def XMM26: X86Reg<"xmm26", 26>, DwarfRegNum<[70, -2, -2]>;
|
||||
def XMM27: X86Reg<"xmm27", 27>, DwarfRegNum<[71, -2, -2]>;
|
||||
def XMM28: X86Reg<"xmm28", 28>, DwarfRegNum<[72, -2, -2]>;
|
||||
def XMM29: X86Reg<"xmm29", 29>, DwarfRegNum<[73, -2, -2]>;
|
||||
def XMM30: X86Reg<"xmm30", 30>, DwarfRegNum<[74, -2, -2]>;
|
||||
def XMM31: X86Reg<"xmm31", 31>, DwarfRegNum<[75, -2, -2]>;
|
||||
|
||||
} // CostPerUse
|
||||
|
||||
// YMM Registers, used by AVX instructions
|
||||
// YMM0-15 registers, used by AVX instructions and
|
||||
// YMM16-31 registers, used by AVX-512 instructions.
|
||||
let SubRegIndices = [sub_xmm] in {
|
||||
def YMM0: X86Reg<"ymm0", 0, [XMM0]>, DwarfRegAlias<XMM0>;
|
||||
def YMM1: X86Reg<"ymm1", 1, [XMM1]>, DwarfRegAlias<XMM1>;
|
||||
def YMM2: X86Reg<"ymm2", 2, [XMM2]>, DwarfRegAlias<XMM2>;
|
||||
def YMM3: X86Reg<"ymm3", 3, [XMM3]>, DwarfRegAlias<XMM3>;
|
||||
def YMM4: X86Reg<"ymm4", 4, [XMM4]>, DwarfRegAlias<XMM4>;
|
||||
def YMM5: X86Reg<"ymm5", 5, [XMM5]>, DwarfRegAlias<XMM5>;
|
||||
def YMM6: X86Reg<"ymm6", 6, [XMM6]>, DwarfRegAlias<XMM6>;
|
||||
def YMM7: X86Reg<"ymm7", 7, [XMM7]>, DwarfRegAlias<XMM7>;
|
||||
def YMM8: X86Reg<"ymm8", 8, [XMM8]>, DwarfRegAlias<XMM8>;
|
||||
def YMM9: X86Reg<"ymm9", 9, [XMM9]>, DwarfRegAlias<XMM9>;
|
||||
def YMM10: X86Reg<"ymm10", 10, [XMM10]>, DwarfRegAlias<XMM10>;
|
||||
def YMM11: X86Reg<"ymm11", 11, [XMM11]>, DwarfRegAlias<XMM11>;
|
||||
def YMM12: X86Reg<"ymm12", 12, [XMM12]>, DwarfRegAlias<XMM12>;
|
||||
def YMM13: X86Reg<"ymm13", 13, [XMM13]>, DwarfRegAlias<XMM13>;
|
||||
def YMM14: X86Reg<"ymm14", 14, [XMM14]>, DwarfRegAlias<XMM14>;
|
||||
def YMM15: X86Reg<"ymm15", 15, [XMM15]>, DwarfRegAlias<XMM15>;
|
||||
foreach Index = 0-31 in {
|
||||
def YMM#Index : X86Reg<"ymm"#Index, Index, [!cast<X86Reg>("XMM"#Index)]>,
|
||||
DwarfRegAlias<!cast<X86Reg>("XMM"#Index)>;
|
||||
}
|
||||
}
|
||||
|
||||
// ZMM Registers, used by AVX-512 instructions.
|
||||
let SubRegIndices = [sub_ymm] in {
|
||||
foreach Index = 0-31 in {
|
||||
def ZMM#Index : X86Reg<"zmm"#Index, Index, [!cast<X86Reg>("YMM"#Index)]>,
|
||||
DwarfRegAlias<!cast<X86Reg>("XMM"#Index)>;
|
||||
}
|
||||
}
|
||||
|
||||
// Mask Registers, used by AVX-512 instructions.
|
||||
def K0 : X86Reg<"k0", 0>, DwarfRegNum<[118, -2, -2]>;
|
||||
def K1 : X86Reg<"k1", 1>, DwarfRegNum<[119, -2, -2]>;
|
||||
def K2 : X86Reg<"k2", 2>, DwarfRegNum<[120, -2, -2]>;
|
||||
def K3 : X86Reg<"k3", 3>, DwarfRegNum<[121, -2, -2]>;
|
||||
def K4 : X86Reg<"k4", 4>, DwarfRegNum<[122, -2, -2]>;
|
||||
def K5 : X86Reg<"k5", 5>, DwarfRegNum<[123, -2, -2]>;
|
||||
def K6 : X86Reg<"k6", 6>, DwarfRegNum<[124, -2, -2]>;
|
||||
def K7 : X86Reg<"k7", 7>, DwarfRegNum<[125, -2, -2]>;
|
||||
|
||||
class STRegister<string n, bits<16> Enc, list<Register> A> : X86Reg<n, Enc> {
|
||||
let Aliases = A;
|
||||
}
|
||||
@ -421,3 +447,25 @@ def FPCCR : RegisterClass<"X86", [i16], 16, (add FPSW)> {
|
||||
let CopyCost = -1; // Don't allow copying of status registers.
|
||||
let isAllocatable = 0;
|
||||
}
|
||||
|
||||
// AVX-512 vector/mask registers.
|
||||
def VR512 : RegisterClass<"X86", [v16f32, v8f64, v16i32, v8i64], 512,
|
||||
(sequence "ZMM%u", 0, 31)>;
|
||||
|
||||
// Scalar AVX-512 floating point registers.
|
||||
def FR32X : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 31)>;
|
||||
|
||||
def FR64X : RegisterClass<"X86", [f64], 64, (add FR32X)>;
|
||||
|
||||
// Extended VR128 and VR256 for AVX-512 instructions
|
||||
def VR128X : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
|
||||
128, (add FR32X)>;
|
||||
def VR256X : RegisterClass<"X86", [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
|
||||
256, (sequence "YMM%u", 0, 31)>;
|
||||
|
||||
def VK8 : RegisterClass<"X86", [v8i1], 8, (sequence "K%u", 0, 7)>;
|
||||
def VK16 : RegisterClass<"X86", [v16i1], 16, (add VK8)>;
|
||||
|
||||
def VK8WM : RegisterClass<"X86", [v8i1], 8, (sub VK8, K0)>;
|
||||
def VK16WM : RegisterClass<"X86", [v16i1], 16, (add VK8WM)>;
|
||||
|
||||
|
@ -42,7 +42,7 @@ enum Style {
|
||||
class X86Subtarget : public X86GenSubtargetInfo {
|
||||
protected:
|
||||
enum X86SSEEnum {
|
||||
NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2
|
||||
NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512
|
||||
};
|
||||
|
||||
enum X863DNowEnum {
|
||||
@ -169,6 +169,15 @@ protected:
|
||||
/// address generation (AG) time.
|
||||
bool LEAUsesAG;
|
||||
|
||||
/// Processor has AVX-512 PreFetch Instructions
|
||||
bool HasPFI;
|
||||
|
||||
/// Processor has AVX-512 Exponential and Reciprocal Instructions
|
||||
bool HasERI;
|
||||
|
||||
/// Processor has AVX-512 Conflict Detection Instructions
|
||||
bool HasCDI;
|
||||
|
||||
/// stackAlignment - The minimum alignment known to hold of the stack frame on
|
||||
/// entry to the function and which must be maintained by every function.
|
||||
unsigned stackAlignment;
|
||||
@ -249,6 +258,7 @@ public:
|
||||
bool hasSSE42() const { return X86SSELevel >= SSE42; }
|
||||
bool hasAVX() const { return X86SSELevel >= AVX; }
|
||||
bool hasAVX2() const { return X86SSELevel >= AVX2; }
|
||||
bool hasAVX512() const { return X86SSELevel >= AVX512; }
|
||||
bool hasFp256() const { return hasAVX(); }
|
||||
bool hasInt256() const { return hasAVX2(); }
|
||||
bool hasSSE4A() const { return HasSSE4A; }
|
||||
@ -282,6 +292,9 @@ public:
|
||||
bool padShortFunctions() const { return PadShortFunctions; }
|
||||
bool callRegIndirect() const { return CallRegIndirect; }
|
||||
bool LEAusesAG() const { return LEAUsesAG; }
|
||||
bool hasCDI() const { return HasCDI; }
|
||||
bool hasPFI() const { return HasPFI; }
|
||||
bool hasERI() const { return HasERI; }
|
||||
|
||||
bool isAtom() const { return X86ProcFamily == IntelAtom; }
|
||||
|
||||
|
@ -105,23 +105,28 @@ FunctionPass *llvm::createX86IssueVZeroUpperPass() {
|
||||
}
|
||||
|
||||
static bool isYmmReg(unsigned Reg) {
|
||||
if (Reg >= X86::YMM0 && Reg <= X86::YMM15)
|
||||
return true;
|
||||
return (Reg >= X86::YMM0 && Reg <= X86::YMM31);
|
||||
}
|
||||
|
||||
return false;
|
||||
static bool isZmmReg(unsigned Reg) {
|
||||
return (Reg >= X86::ZMM0 && Reg <= X86::ZMM31);
|
||||
}
|
||||
|
||||
static bool checkFnHasLiveInYmm(MachineRegisterInfo &MRI) {
|
||||
for (MachineRegisterInfo::livein_iterator I = MRI.livein_begin(),
|
||||
E = MRI.livein_end(); I != E; ++I)
|
||||
if (isYmmReg(I->first))
|
||||
if (isYmmReg(I->first) || isZmmReg(I->first))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool clobbersAllYmmRegs(const MachineOperand &MO) {
|
||||
for (unsigned reg = X86::YMM0; reg < X86::YMM15; ++reg) {
|
||||
for (unsigned reg = X86::YMM0; reg < X86::YMM31; ++reg) {
|
||||
if (!MO.clobbersPhysReg(reg))
|
||||
return false;
|
||||
}
|
||||
for (unsigned reg = X86::ZMM0; reg < X86::ZMM31; ++reg) {
|
||||
if (!MO.clobbersPhysReg(reg))
|
||||
return false;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user