mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-23 19:17:17 +00:00
Added Skylake client to X86 targets and features
Changes in X86.td: I set features of Intel processors in incremental form: IVB = SNB + X HSW = IVB + X .. I added Skylake client processor and defined it's features FeatureADX was missing on KNL Added some new features to appropriate processors SMAP, IFMA, PREFETCHWT1, VMFUNC and others Differential Revision: http://reviews.llvm.org/D16357 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258659 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
cddf52c859
commit
f4a0173582
@ -805,25 +805,34 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
|
|||||||
Features["avx2"] = HasAVXSave && HasLeaf7 && ((EBX >> 5) & 1);
|
Features["avx2"] = HasAVXSave && HasLeaf7 && ((EBX >> 5) & 1);
|
||||||
|
|
||||||
Features["fsgsbase"] = HasLeaf7 && ((EBX >> 0) & 1);
|
Features["fsgsbase"] = HasLeaf7 && ((EBX >> 0) & 1);
|
||||||
|
Features["sgx"] = HasLeaf7 && ((EBX >> 2) & 1);
|
||||||
Features["bmi"] = HasLeaf7 && ((EBX >> 3) & 1);
|
Features["bmi"] = HasLeaf7 && ((EBX >> 3) & 1);
|
||||||
Features["hle"] = HasLeaf7 && ((EBX >> 4) & 1);
|
Features["hle"] = HasLeaf7 && ((EBX >> 4) & 1);
|
||||||
Features["bmi2"] = HasLeaf7 && ((EBX >> 8) & 1);
|
Features["bmi2"] = HasLeaf7 && ((EBX >> 8) & 1);
|
||||||
|
Features["invpcid"] = HasLeaf7 && ((EBX >> 10) & 1);
|
||||||
Features["rtm"] = HasLeaf7 && ((EBX >> 11) & 1);
|
Features["rtm"] = HasLeaf7 && ((EBX >> 11) & 1);
|
||||||
Features["rdseed"] = HasLeaf7 && ((EBX >> 18) & 1);
|
Features["rdseed"] = HasLeaf7 && ((EBX >> 18) & 1);
|
||||||
Features["adx"] = HasLeaf7 && ((EBX >> 19) & 1);
|
Features["adx"] = HasLeaf7 && ((EBX >> 19) & 1);
|
||||||
|
Features["smap"] = HasLeaf7 && ((EBX >> 20) & 1);
|
||||||
|
Features["pcommit"] = HasLeaf7 && ((EBX >> 22) & 1);
|
||||||
|
Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1);
|
||||||
|
Features["clwb"] = HasLeaf7 && ((EBX >> 24) & 1);
|
||||||
Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1);
|
Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1);
|
||||||
// Enable protection keys
|
|
||||||
Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1);
|
|
||||||
|
|
||||||
// AVX512 is only supported if the OS supports the context save for it.
|
// AVX512 is only supported if the OS supports the context save for it.
|
||||||
Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save;
|
Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save;
|
||||||
Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save;
|
Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save;
|
||||||
|
Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save;
|
||||||
Features["avx512pf"] = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save;
|
Features["avx512pf"] = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save;
|
||||||
Features["avx512er"] = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save;
|
Features["avx512er"] = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save;
|
||||||
Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save;
|
Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save;
|
||||||
Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save;
|
Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save;
|
||||||
Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save;
|
Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save;
|
||||||
|
|
||||||
|
Features["prefetchwt1"] = HasLeaf7 && (ECX & 1);
|
||||||
Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save;
|
Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save;
|
||||||
|
// Enable protection keys
|
||||||
|
Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1);
|
||||||
|
|
||||||
bool HasLeafD = MaxLevel >= 0xd &&
|
bool HasLeafD = MaxLevel >= 0xd &&
|
||||||
!GetX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
|
!GetX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
|
||||||
|
@ -125,6 +125,9 @@ def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true",
|
|||||||
def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true",
|
def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true",
|
||||||
"Enable AVX-512 PreFetch Instructions",
|
"Enable AVX-512 PreFetch Instructions",
|
||||||
[FeatureAVX512]>;
|
[FeatureAVX512]>;
|
||||||
|
def FeaturePREFETCHWT1 : SubtargetFeature<"prefetchwt1", "HasPFPREFETCHWT1",
|
||||||
|
"true",
|
||||||
|
"Prefetch with Intent to Write and T1 Hint">;
|
||||||
def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true",
|
def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true",
|
||||||
"Enable AVX-512 Doubleword and Quadword Instructions",
|
"Enable AVX-512 Doubleword and Quadword Instructions",
|
||||||
[FeatureAVX512]>;
|
[FeatureAVX512]>;
|
||||||
@ -137,6 +140,9 @@ def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true",
|
|||||||
def FeatureVBMI : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
|
def FeatureVBMI : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
|
||||||
"Enable AVX-512 Vector Bit Manipulation Instructions",
|
"Enable AVX-512 Vector Bit Manipulation Instructions",
|
||||||
[FeatureAVX512]>;
|
[FeatureAVX512]>;
|
||||||
|
def FeatureIFMA : SubtargetFeature<"ifma", "HasIFMA", "true",
|
||||||
|
"Enable AVX-512 Integer Fused Multiple-Add",
|
||||||
|
[FeatureAVX512]>;
|
||||||
def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true",
|
def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true",
|
||||||
"Enable protection keys">;
|
"Enable protection keys">;
|
||||||
def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
|
def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
|
||||||
@ -202,6 +208,20 @@ def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divw",
|
|||||||
def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
|
def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
|
||||||
"PadShortFunctions", "true",
|
"PadShortFunctions", "true",
|
||||||
"Pad short functions">;
|
"Pad short functions">;
|
||||||
|
def FeatureINVPCID : SubtargetFeature<"invpcid", "HasInvPCId", "true",
|
||||||
|
"Invalidate Process-Context Identifier">;
|
||||||
|
def FeatureVMFUNC : SubtargetFeature<"vmfunc", "HasVMFUNC", "true",
|
||||||
|
"VM Functions">;
|
||||||
|
def FeatureSMAP : SubtargetFeature<"smap", "HasSMAP", "true",
|
||||||
|
"Supervisor Mode Access Protection">;
|
||||||
|
def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true",
|
||||||
|
"Enable Software Guard Extensions">;
|
||||||
|
def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true",
|
||||||
|
"Flush A Cache Line Optimized">;
|
||||||
|
def FeaturePCOMMIT : SubtargetFeature<"pcommit", "HasPCOMMIT", "true",
|
||||||
|
"Enable Persistent Commit">;
|
||||||
|
def FeatureCLWB : SubtargetFeature<"clwb", "HasCLWB", "true",
|
||||||
|
"Cache Line Write Back">;
|
||||||
// TODO: This feature ought to be renamed.
|
// TODO: This feature ought to be renamed.
|
||||||
// What it really refers to are CPUs for which certain instructions
|
// What it really refers to are CPUs for which certain instructions
|
||||||
// (which ones besides the example below?) are microcoded.
|
// (which ones besides the example below?) are microcoded.
|
||||||
@ -365,13 +385,12 @@ def : WestmereProc<"westmere">;
|
|||||||
|
|
||||||
// SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
|
// SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
|
||||||
// rather than a superset.
|
// rather than a superset.
|
||||||
class SandyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
|
def ProcIntelSNB : SubtargetFeature<"snb", "X86ProcFamily", "IntelSNB",
|
||||||
|
" Intel SandyBridge Processor", [
|
||||||
FeatureMMX,
|
FeatureMMX,
|
||||||
FeatureAVX,
|
FeatureAVX,
|
||||||
FeatureFXSR,
|
FeatureFXSR,
|
||||||
FeatureCMPXCHG16B,
|
FeatureCMPXCHG16B,
|
||||||
FeatureSlowBTMem,
|
|
||||||
FeatureSlowUAMem32,
|
|
||||||
FeaturePOPCNT,
|
FeaturePOPCNT,
|
||||||
FeatureAES,
|
FeatureAES,
|
||||||
FeaturePCLMUL,
|
FeaturePCLMUL,
|
||||||
@ -379,187 +398,125 @@ class SandyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
|
|||||||
FeatureXSAVEOPT,
|
FeatureXSAVEOPT,
|
||||||
FeatureLAHFSAHF
|
FeatureLAHFSAHF
|
||||||
]>;
|
]>;
|
||||||
|
|
||||||
|
class SandyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
|
||||||
|
ProcIntelSNB,
|
||||||
|
FeatureSlowBTMem,
|
||||||
|
FeatureSlowUAMem32
|
||||||
|
]>;
|
||||||
def : SandyBridgeProc<"sandybridge">;
|
def : SandyBridgeProc<"sandybridge">;
|
||||||
def : SandyBridgeProc<"corei7-avx">; // Legacy alias.
|
def : SandyBridgeProc<"corei7-avx">; // Legacy alias.
|
||||||
|
|
||||||
class IvyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
|
def ProcIntelIVB : SubtargetFeature<"ivb", "X86ProcFamily", "IntelIVB",
|
||||||
FeatureMMX,
|
" Intel IvyBridge Processor", [
|
||||||
FeatureAVX,
|
ProcIntelSNB,
|
||||||
FeatureFXSR,
|
|
||||||
FeatureCMPXCHG16B,
|
|
||||||
FeatureSlowBTMem,
|
|
||||||
FeatureSlowUAMem32,
|
|
||||||
FeaturePOPCNT,
|
|
||||||
FeatureAES,
|
|
||||||
FeaturePCLMUL,
|
|
||||||
FeatureXSAVE,
|
|
||||||
FeatureXSAVEOPT,
|
|
||||||
FeatureRDRAND,
|
FeatureRDRAND,
|
||||||
FeatureF16C,
|
FeatureF16C,
|
||||||
FeatureFSGSBase,
|
FeatureFSGSBase
|
||||||
FeatureLAHFSAHF
|
]>;
|
||||||
|
|
||||||
|
class IvyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
|
||||||
|
ProcIntelIVB,
|
||||||
|
FeatureSlowBTMem,
|
||||||
|
FeatureSlowUAMem32
|
||||||
]>;
|
]>;
|
||||||
def : IvyBridgeProc<"ivybridge">;
|
def : IvyBridgeProc<"ivybridge">;
|
||||||
def : IvyBridgeProc<"core-avx-i">; // Legacy alias.
|
def : IvyBridgeProc<"core-avx-i">; // Legacy alias.
|
||||||
|
|
||||||
class HaswellProc<string Name> : ProcessorModel<Name, HaswellModel, [
|
def ProcIntelHSW : SubtargetFeature<"hsw", "X86ProcFamily", "IntelHSW",
|
||||||
FeatureMMX,
|
" Intel Haswell Processor", [
|
||||||
|
ProcIntelIVB,
|
||||||
FeatureAVX2,
|
FeatureAVX2,
|
||||||
FeatureFXSR,
|
|
||||||
FeatureCMPXCHG16B,
|
|
||||||
FeatureSlowBTMem,
|
|
||||||
FeaturePOPCNT,
|
|
||||||
FeatureAES,
|
|
||||||
FeaturePCLMUL,
|
|
||||||
FeatureRDRAND,
|
|
||||||
FeatureXSAVE,
|
|
||||||
FeatureXSAVEOPT,
|
|
||||||
FeatureF16C,
|
|
||||||
FeatureFSGSBase,
|
|
||||||
FeatureMOVBE,
|
|
||||||
FeatureLZCNT,
|
|
||||||
FeatureBMI,
|
FeatureBMI,
|
||||||
FeatureBMI2,
|
FeatureBMI2,
|
||||||
FeatureFMA,
|
FeatureFMA,
|
||||||
|
FeatureLZCNT,
|
||||||
|
FeatureMOVBE,
|
||||||
|
FeatureINVPCID,
|
||||||
|
FeatureVMFUNC,
|
||||||
FeatureRTM,
|
FeatureRTM,
|
||||||
FeatureHLE,
|
FeatureHLE,
|
||||||
FeatureSlowIncDec,
|
FeatureSlowIncDec
|
||||||
FeatureLAHFSAHF
|
|
||||||
]>;
|
]>;
|
||||||
|
|
||||||
|
class HaswellProc<string Name> : ProcessorModel<Name, HaswellModel,
|
||||||
|
[ProcIntelHSW]>;
|
||||||
def : HaswellProc<"haswell">;
|
def : HaswellProc<"haswell">;
|
||||||
def : HaswellProc<"core-avx2">; // Legacy alias.
|
def : HaswellProc<"core-avx2">; // Legacy alias.
|
||||||
|
|
||||||
class BroadwellProc<string Name> : ProcessorModel<Name, HaswellModel, [
|
def ProcIntelBDW : SubtargetFeature<"bdw", "X86ProcFamily", "IntelBDW",
|
||||||
FeatureMMX,
|
" Intel Broadwell Processor", [
|
||||||
FeatureAVX2,
|
ProcIntelHSW,
|
||||||
FeatureFXSR,
|
|
||||||
FeatureCMPXCHG16B,
|
|
||||||
FeatureSlowBTMem,
|
|
||||||
FeaturePOPCNT,
|
|
||||||
FeatureAES,
|
|
||||||
FeaturePCLMUL,
|
|
||||||
FeatureXSAVE,
|
|
||||||
FeatureXSAVEOPT,
|
|
||||||
FeatureRDRAND,
|
|
||||||
FeatureF16C,
|
|
||||||
FeatureFSGSBase,
|
|
||||||
FeatureMOVBE,
|
|
||||||
FeatureLZCNT,
|
|
||||||
FeatureBMI,
|
|
||||||
FeatureBMI2,
|
|
||||||
FeatureFMA,
|
|
||||||
FeatureRTM,
|
|
||||||
FeatureHLE,
|
|
||||||
FeatureADX,
|
FeatureADX,
|
||||||
FeatureRDSEED,
|
FeatureRDSEED,
|
||||||
FeatureSlowIncDec,
|
FeatureSMAP
|
||||||
FeatureLAHFSAHF
|
|
||||||
]>;
|
]>;
|
||||||
|
class BroadwellProc<string Name> : ProcessorModel<Name, HaswellModel,
|
||||||
|
[ProcIntelBDW]>;
|
||||||
def : BroadwellProc<"broadwell">;
|
def : BroadwellProc<"broadwell">;
|
||||||
|
|
||||||
|
def ProcIntelSKL : SubtargetFeature<"skl", "X86ProcFamily", "IntelSKL",
|
||||||
|
" Intel Skylake Client Processor", [
|
||||||
|
ProcIntelBDW,
|
||||||
|
FeatureMPX,
|
||||||
|
FeatureXSAVEC,
|
||||||
|
FeatureXSAVES,
|
||||||
|
FeatureSGX,
|
||||||
|
FeatureCLFLUSHOPT
|
||||||
|
]>;
|
||||||
|
|
||||||
|
// FIXME: define SKL model
|
||||||
|
class SkylakeClientProc<string Name> : ProcessorModel<Name, HaswellModel,
|
||||||
|
[ProcIntelSKL]>;
|
||||||
|
def : SkylakeClientProc<"skl">;
|
||||||
|
|
||||||
// FIXME: define KNL model
|
// FIXME: define KNL model
|
||||||
class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel, [
|
class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel,[
|
||||||
FeatureMMX,
|
ProcIntelIVB,
|
||||||
FeatureAVX512,
|
FeatureAVX512,
|
||||||
FeatureFXSR,
|
|
||||||
FeatureERI,
|
FeatureERI,
|
||||||
FeatureCDI,
|
FeatureCDI,
|
||||||
FeaturePFI,
|
FeaturePFI,
|
||||||
FeatureCMPXCHG16B,
|
FeaturePREFETCHWT1,
|
||||||
FeaturePOPCNT,
|
FeatureADX,
|
||||||
FeatureAES,
|
FeatureRDSEED,
|
||||||
FeaturePCLMUL,
|
|
||||||
FeatureXSAVE,
|
|
||||||
FeatureXSAVEOPT,
|
|
||||||
FeatureRDRAND,
|
|
||||||
FeatureF16C,
|
|
||||||
FeatureFSGSBase,
|
|
||||||
FeatureMOVBE,
|
FeatureMOVBE,
|
||||||
FeatureLZCNT,
|
FeatureLZCNT,
|
||||||
FeatureBMI,
|
FeatureBMI,
|
||||||
FeatureBMI2,
|
FeatureBMI2,
|
||||||
FeatureFMA,
|
FeatureFMA
|
||||||
FeatureRTM,
|
|
||||||
FeatureHLE,
|
|
||||||
FeatureSlowIncDec,
|
|
||||||
FeatureMPX,
|
|
||||||
FeatureLAHFSAHF
|
|
||||||
]>;
|
]>;
|
||||||
def : KnightsLandingProc<"knl">;
|
def : KnightsLandingProc<"knl">;
|
||||||
|
|
||||||
// FIXME: define SKX model
|
def ProcIntelSKX : SubtargetFeature<"skx", "X86ProcFamily", "IntelSKX",
|
||||||
class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel, [
|
" Intel Skylake Server Processor", [
|
||||||
FeatureMMX,
|
ProcIntelSKL,
|
||||||
FeatureAVX512,
|
FeatureAVX512,
|
||||||
FeatureFXSR,
|
|
||||||
FeatureCDI,
|
FeatureCDI,
|
||||||
FeatureDQI,
|
FeatureDQI,
|
||||||
FeatureBWI,
|
FeatureBWI,
|
||||||
FeatureVLX,
|
FeatureVLX,
|
||||||
FeaturePKU,
|
FeaturePKU,
|
||||||
FeatureCMPXCHG16B,
|
FeaturePCOMMIT,
|
||||||
FeatureSlowBTMem,
|
FeatureCLWB
|
||||||
FeaturePOPCNT,
|
|
||||||
FeatureAES,
|
|
||||||
FeaturePCLMUL,
|
|
||||||
FeatureXSAVE,
|
|
||||||
FeatureXSAVEOPT,
|
|
||||||
FeatureRDRAND,
|
|
||||||
FeatureF16C,
|
|
||||||
FeatureFSGSBase,
|
|
||||||
FeatureMOVBE,
|
|
||||||
FeatureLZCNT,
|
|
||||||
FeatureBMI,
|
|
||||||
FeatureBMI2,
|
|
||||||
FeatureFMA,
|
|
||||||
FeatureRTM,
|
|
||||||
FeatureHLE,
|
|
||||||
FeatureADX,
|
|
||||||
FeatureRDSEED,
|
|
||||||
FeatureSlowIncDec,
|
|
||||||
FeatureMPX,
|
|
||||||
FeatureXSAVEC,
|
|
||||||
FeatureXSAVES,
|
|
||||||
FeatureLAHFSAHF
|
|
||||||
]>;
|
]>;
|
||||||
def : SkylakeProc<"skylake">;
|
|
||||||
def : SkylakeProc<"skx">; // Legacy alias.
|
|
||||||
|
|
||||||
class CannonlakeProc<string Name> : ProcessorModel<Name, HaswellModel, [
|
// FIXME: define SKX model
|
||||||
FeatureMMX,
|
class SkylakeServerProc<string Name> : ProcessorModel<Name, HaswellModel,
|
||||||
FeatureAVX512,
|
[ ProcIntelSKX]>;
|
||||||
FeatureFXSR,
|
def : SkylakeServerProc<"skylake">;
|
||||||
FeatureCDI,
|
def : SkylakeServerProc<"skx">; // Legacy alias.
|
||||||
FeatureDQI,
|
|
||||||
FeatureBWI,
|
def ProcIntelCNL : SubtargetFeature<"cnl", "X86ProcFamily", "IntelCNL",
|
||||||
FeatureVLX,
|
" Intel Cannonlake Processor", [
|
||||||
FeaturePKU,
|
ProcIntelSKX,
|
||||||
FeatureCMPXCHG16B,
|
|
||||||
FeatureSlowBTMem,
|
|
||||||
FeaturePOPCNT,
|
|
||||||
FeatureAES,
|
|
||||||
FeaturePCLMUL,
|
|
||||||
FeatureXSAVE,
|
|
||||||
FeatureXSAVEOPT,
|
|
||||||
FeatureRDRAND,
|
|
||||||
FeatureF16C,
|
|
||||||
FeatureFSGSBase,
|
|
||||||
FeatureMOVBE,
|
|
||||||
FeatureLZCNT,
|
|
||||||
FeatureBMI,
|
|
||||||
FeatureBMI2,
|
|
||||||
FeatureVBMI,
|
FeatureVBMI,
|
||||||
FeatureFMA,
|
FeatureIFMA,
|
||||||
FeatureRTM,
|
FeatureSHA
|
||||||
FeatureHLE,
|
|
||||||
FeatureADX,
|
|
||||||
FeatureRDSEED,
|
|
||||||
FeatureSlowIncDec,
|
|
||||||
FeatureMPX,
|
|
||||||
FeatureXSAVEC,
|
|
||||||
FeatureXSAVES,
|
|
||||||
FeatureLAHFSAHF
|
|
||||||
]>;
|
]>;
|
||||||
|
|
||||||
|
class CannonlakeProc<string Name> : ProcessorModel<Name, HaswellModel,
|
||||||
|
[ ProcIntelCNL ]>;
|
||||||
def : CannonlakeProc<"cannonlake">;
|
def : CannonlakeProc<"cannonlake">;
|
||||||
def : CannonlakeProc<"cnl">;
|
def : CannonlakeProc<"cnl">;
|
||||||
|
|
||||||
|
@ -797,6 +797,8 @@ def HasBMI : Predicate<"Subtarget->hasBMI()">;
|
|||||||
def HasBMI2 : Predicate<"Subtarget->hasBMI2()">;
|
def HasBMI2 : Predicate<"Subtarget->hasBMI2()">;
|
||||||
def HasVBMI : Predicate<"Subtarget->hasVBMI()">,
|
def HasVBMI : Predicate<"Subtarget->hasVBMI()">,
|
||||||
AssemblerPredicate<"FeatureVBMI", "AVX-512 VBMI ISA">;
|
AssemblerPredicate<"FeatureVBMI", "AVX-512 VBMI ISA">;
|
||||||
|
def HasIFMA : Predicate<"Subtarget->hasIFMA()">,
|
||||||
|
AssemblerPredicate<"FeatureIFMA", "AVX-512 IFMA ISA">;
|
||||||
def HasRTM : Predicate<"Subtarget->hasRTM()">;
|
def HasRTM : Predicate<"Subtarget->hasRTM()">;
|
||||||
def HasHLE : Predicate<"Subtarget->hasHLE()">;
|
def HasHLE : Predicate<"Subtarget->hasHLE()">;
|
||||||
def HasTSX : Predicate<"Subtarget->hasRTM() || Subtarget->hasHLE()">;
|
def HasTSX : Predicate<"Subtarget->hasRTM() || Subtarget->hasHLE()">;
|
||||||
|
@ -262,6 +262,7 @@ void X86Subtarget::initializeEnvironment() {
|
|||||||
HasBMI = false;
|
HasBMI = false;
|
||||||
HasBMI2 = false;
|
HasBMI2 = false;
|
||||||
HasVBMI = false;
|
HasVBMI = false;
|
||||||
|
HasIFMA = false;
|
||||||
HasRTM = false;
|
HasRTM = false;
|
||||||
HasHLE = false;
|
HasHLE = false;
|
||||||
HasERI = false;
|
HasERI = false;
|
||||||
|
@ -55,7 +55,8 @@ protected:
|
|||||||
};
|
};
|
||||||
|
|
||||||
enum X86ProcFamilyEnum {
|
enum X86ProcFamilyEnum {
|
||||||
Others, IntelAtom, IntelSLM
|
Others, IntelAtom, IntelSLM, IntelSNB, IntelIVB, IntelHSW, IntelBDW,
|
||||||
|
IntelKNL, IntelSKL, IntelSKX, IntelCNL
|
||||||
};
|
};
|
||||||
|
|
||||||
/// X86 processor family: Intel Atom, and others
|
/// X86 processor family: Intel Atom, and others
|
||||||
@ -137,6 +138,9 @@ protected:
|
|||||||
/// Processor has VBMI instructions.
|
/// Processor has VBMI instructions.
|
||||||
bool HasVBMI;
|
bool HasVBMI;
|
||||||
|
|
||||||
|
/// Processor has Integer Fused Multiply Add
|
||||||
|
bool HasIFMA;
|
||||||
|
|
||||||
/// Processor has RTM instructions.
|
/// Processor has RTM instructions.
|
||||||
bool HasRTM;
|
bool HasRTM;
|
||||||
|
|
||||||
@ -158,6 +162,9 @@ protected:
|
|||||||
/// Processor has LAHF/SAHF instructions.
|
/// Processor has LAHF/SAHF instructions.
|
||||||
bool HasLAHFSAHF;
|
bool HasLAHFSAHF;
|
||||||
|
|
||||||
|
/// Processor has Prefetch with intent to Write instruction
|
||||||
|
bool HasPFPREFETCHWT1;
|
||||||
|
|
||||||
/// True if BT (bit test) of memory instructions are slow.
|
/// True if BT (bit test) of memory instructions are slow.
|
||||||
bool IsBTMemSlow;
|
bool IsBTMemSlow;
|
||||||
|
|
||||||
@ -229,9 +236,30 @@ protected:
|
|||||||
/// Processor has PKU extenstions
|
/// Processor has PKU extenstions
|
||||||
bool HasPKU;
|
bool HasPKU;
|
||||||
|
|
||||||
/// Processot supports MPX - Memory Protection Extensions
|
/// Processor supports MPX - Memory Protection Extensions
|
||||||
bool HasMPX;
|
bool HasMPX;
|
||||||
|
|
||||||
|
/// Processor supports Invalidate Process-Context Identifier
|
||||||
|
bool HasInvPCId;
|
||||||
|
|
||||||
|
/// Processor has VM Functions
|
||||||
|
bool HasVMFUNC;
|
||||||
|
|
||||||
|
/// Processor has Supervisor Mode Access Protection
|
||||||
|
bool HasSMAP;
|
||||||
|
|
||||||
|
/// Processor has Software Guard Extensions
|
||||||
|
bool HasSGX;
|
||||||
|
|
||||||
|
/// Processor supports Flush Cache Line instruction
|
||||||
|
bool HasCLFLUSHOPT;
|
||||||
|
|
||||||
|
/// Processor has Persistent Commit feature
|
||||||
|
bool HasPCOMMIT;
|
||||||
|
|
||||||
|
/// Processor supports Cache Line Write Back instruction
|
||||||
|
bool HasCLWB;
|
||||||
|
|
||||||
/// Use software floating point for code generation.
|
/// Use software floating point for code generation.
|
||||||
bool UseSoftFloat;
|
bool UseSoftFloat;
|
||||||
|
|
||||||
@ -378,6 +406,7 @@ public:
|
|||||||
bool hasBMI() const { return HasBMI; }
|
bool hasBMI() const { return HasBMI; }
|
||||||
bool hasBMI2() const { return HasBMI2; }
|
bool hasBMI2() const { return HasBMI2; }
|
||||||
bool hasVBMI() const { return HasVBMI; }
|
bool hasVBMI() const { return HasVBMI; }
|
||||||
|
bool hasIFMA() const { return HasIFMA; }
|
||||||
bool hasRTM() const { return HasRTM; }
|
bool hasRTM() const { return HasRTM; }
|
||||||
bool hasHLE() const { return HasHLE; }
|
bool hasHLE() const { return HasHLE; }
|
||||||
bool hasADX() const { return HasADX; }
|
bool hasADX() const { return HasADX; }
|
||||||
|
@ -214,31 +214,31 @@ define i64 @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
|
|||||||
; AVX512F-32-NEXT: vpcmpltb %zmm1, %zmm0, %k0
|
; AVX512F-32-NEXT: vpcmpltb %zmm1, %zmm0, %k0
|
||||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||||
; AVX512F-32-NEXT: vpcmpleb %zmm1, %zmm0, %k0
|
; AVX512F-32-NEXT: vpcmpleb %zmm1, %zmm0, %k0
|
||||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||||
; AVX512F-32-NEXT: vpcmpunordb %zmm1, %zmm0, %k0
|
; AVX512F-32-NEXT: vpcmpunordb %zmm1, %zmm0, %k0
|
||||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||||
; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k0
|
; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k0
|
||||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||||
; AVX512F-32-NEXT: vpcmpnltb %zmm1, %zmm0, %k0
|
; AVX512F-32-NEXT: vpcmpnltb %zmm1, %zmm0, %k0
|
||||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||||
; AVX512F-32-NEXT: vpcmpnleb %zmm1, %zmm0, %k0
|
; AVX512F-32-NEXT: vpcmpnleb %zmm1, %zmm0, %k0
|
||||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||||
; AVX512F-32-NEXT: vpcmpordb %zmm1, %zmm0, %k0
|
; AVX512F-32-NEXT: vpcmpordb %zmm1, %zmm0, %k0
|
||||||
; AVX512F-32-NEXT: kmovq %k0, (%esp)
|
; AVX512F-32-NEXT: kmovq %k0, (%esp)
|
||||||
; AVX512F-32-NEXT: addl (%esp), %eax
|
; AVX512F-32-NEXT: addl (%esp), %eax
|
||||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||||
; AVX512F-32-NEXT: addl $68, %esp
|
; AVX512F-32-NEXT: addl $68, %esp
|
||||||
; AVX512F-32-NEXT: retl
|
; AVX512F-32-NEXT: retl
|
||||||
%res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
|
%res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
|
||||||
@ -303,31 +303,31 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
|
|||||||
; AVX512F-32-NEXT: vpcmpltb %zmm1, %zmm0, %k0 {%k1}
|
; AVX512F-32-NEXT: vpcmpltb %zmm1, %zmm0, %k0 {%k1}
|
||||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||||
; AVX512F-32-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1}
|
; AVX512F-32-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1}
|
||||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||||
; AVX512F-32-NEXT: vpcmpunordb %zmm1, %zmm0, %k0 {%k1}
|
; AVX512F-32-NEXT: vpcmpunordb %zmm1, %zmm0, %k0 {%k1}
|
||||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||||
; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1}
|
; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1}
|
||||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||||
; AVX512F-32-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 {%k1}
|
; AVX512F-32-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 {%k1}
|
||||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||||
; AVX512F-32-NEXT: vpcmpnleb %zmm1, %zmm0, %k0 {%k1}
|
; AVX512F-32-NEXT: vpcmpnleb %zmm1, %zmm0, %k0 {%k1}
|
||||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||||
; AVX512F-32-NEXT: vpcmpordb %zmm1, %zmm0, %k0 {%k1}
|
; AVX512F-32-NEXT: vpcmpordb %zmm1, %zmm0, %k0 {%k1}
|
||||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||||
; AVX512F-32-NEXT: addl $68, %esp
|
; AVX512F-32-NEXT: addl $68, %esp
|
||||||
; AVX512F-32-NEXT: retl
|
; AVX512F-32-NEXT: retl
|
||||||
%res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
|
%res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
|
||||||
@ -390,31 +390,31 @@ define i64 @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
|
|||||||
; AVX512F-32-NEXT: vpcmpltub %zmm1, %zmm0, %k0
|
; AVX512F-32-NEXT: vpcmpltub %zmm1, %zmm0, %k0
|
||||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||||
; AVX512F-32-NEXT: vpcmpleub %zmm1, %zmm0, %k0
|
; AVX512F-32-NEXT: vpcmpleub %zmm1, %zmm0, %k0
|
||||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||||
; AVX512F-32-NEXT: vpcmpunordub %zmm1, %zmm0, %k0
|
; AVX512F-32-NEXT: vpcmpunordub %zmm1, %zmm0, %k0
|
||||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||||
; AVX512F-32-NEXT: vpcmpnequb %zmm1, %zmm0, %k0
|
; AVX512F-32-NEXT: vpcmpnequb %zmm1, %zmm0, %k0
|
||||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||||
; AVX512F-32-NEXT: vpcmpnltub %zmm1, %zmm0, %k0
|
; AVX512F-32-NEXT: vpcmpnltub %zmm1, %zmm0, %k0
|
||||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||||
; AVX512F-32-NEXT: vpcmpnleub %zmm1, %zmm0, %k0
|
; AVX512F-32-NEXT: vpcmpnleub %zmm1, %zmm0, %k0
|
||||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||||
; AVX512F-32-NEXT: vpcmpordub %zmm1, %zmm0, %k0
|
; AVX512F-32-NEXT: vpcmpordub %zmm1, %zmm0, %k0
|
||||||
; AVX512F-32-NEXT: kmovq %k0, (%esp)
|
; AVX512F-32-NEXT: kmovq %k0, (%esp)
|
||||||
; AVX512F-32-NEXT: addl (%esp), %eax
|
; AVX512F-32-NEXT: addl (%esp), %eax
|
||||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||||
; AVX512F-32-NEXT: addl $68, %esp
|
; AVX512F-32-NEXT: addl $68, %esp
|
||||||
; AVX512F-32-NEXT: retl
|
; AVX512F-32-NEXT: retl
|
||||||
%res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
|
%res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
|
||||||
@ -479,31 +479,31 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
|
|||||||
; AVX512F-32-NEXT: vpcmpltub %zmm1, %zmm0, %k0 {%k1}
|
; AVX512F-32-NEXT: vpcmpltub %zmm1, %zmm0, %k0 {%k1}
|
||||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||||
; AVX512F-32-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1}
|
; AVX512F-32-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1}
|
||||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||||
; AVX512F-32-NEXT: vpcmpunordub %zmm1, %zmm0, %k0 {%k1}
|
; AVX512F-32-NEXT: vpcmpunordub %zmm1, %zmm0, %k0 {%k1}
|
||||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||||
; AVX512F-32-NEXT: vpcmpnequb %zmm1, %zmm0, %k0 {%k1}
|
; AVX512F-32-NEXT: vpcmpnequb %zmm1, %zmm0, %k0 {%k1}
|
||||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||||
; AVX512F-32-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1}
|
; AVX512F-32-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1}
|
||||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||||
; AVX512F-32-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1}
|
; AVX512F-32-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1}
|
||||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||||
; AVX512F-32-NEXT: vpcmpordub %zmm1, %zmm0, %k0 {%k1}
|
; AVX512F-32-NEXT: vpcmpordub %zmm1, %zmm0, %k0 {%k1}
|
||||||
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||||
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
|
||||||
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
|
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
|
||||||
; AVX512F-32-NEXT: addl $68, %esp
|
; AVX512F-32-NEXT: addl $68, %esp
|
||||||
; AVX512F-32-NEXT: retl
|
; AVX512F-32-NEXT: retl
|
||||||
%res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
|
%res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
|
||||||
@ -2879,6 +2879,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psrl_w_512(<32 x i16> %x0, <8 x i16>
|
|||||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
|
; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
|
||||||
; AVX512BW-NEXT: vpaddw %zmm3, %zmm0, %zmm0
|
; AVX512BW-NEXT: vpaddw %zmm3, %zmm0, %zmm0
|
||||||
; AVX512BW-NEXT: retq
|
; AVX512BW-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrl_w_512:
|
||||||
|
; AVX512F-32: # BB#0:
|
||||||
|
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||||
|
; AVX512F-32-NEXT: vpsrlw %xmm1, %zmm0, %zmm2 {%k1}
|
||||||
|
; AVX512F-32-NEXT: vpsrlw %xmm1, %zmm0, %zmm3 {%k1} {z}
|
||||||
|
; AVX512F-32-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
|
||||||
|
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
|
||||||
|
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm0, %zmm0
|
||||||
|
; AVX512F-32-NEXT: retl
|
||||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
%res = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
||||||
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1)
|
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1)
|
||||||
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
|
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
|
||||||
@ -2899,6 +2909,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psrl_wi_512(<32 x i16> %x0, i8 %x1, <
|
|||||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||||
; AVX512BW-NEXT: vpaddw %zmm2, %zmm0, %zmm0
|
; AVX512BW-NEXT: vpaddw %zmm2, %zmm0, %zmm0
|
||||||
; AVX512BW-NEXT: retq
|
; AVX512BW-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrl_wi_512:
|
||||||
|
; AVX512F-32: # BB#0:
|
||||||
|
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||||
|
; AVX512F-32-NEXT: vpsrlw $3, %zmm0, %zmm1 {%k1}
|
||||||
|
; AVX512F-32-NEXT: vpsrlw $3, %zmm0, %zmm2 {%k1} {z}
|
||||||
|
; AVX512F-32-NEXT: vpsrlw $3, %zmm0, %zmm0
|
||||||
|
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||||
|
; AVX512F-32-NEXT: vpaddw %zmm2, %zmm0, %zmm0
|
||||||
|
; AVX512F-32-NEXT: retl
|
||||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3)
|
%res = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3)
|
||||||
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1)
|
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1)
|
||||||
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3)
|
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3)
|
||||||
@ -2919,6 +2939,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psrlv32hi(<32 x i16> %x0, <32 x i16>
|
|||||||
; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
|
; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
|
||||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||||
; AVX512BW-NEXT: retq
|
; AVX512BW-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrlv32hi:
|
||||||
|
; AVX512F-32: # BB#0:
|
||||||
|
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||||
|
; AVX512F-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 {%k1}
|
||||||
|
; AVX512F-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm3 {%k1} {z}
|
||||||
|
; AVX512F-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
|
||||||
|
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
|
||||||
|
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||||
|
; AVX512F-32-NEXT: retl
|
||||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
%res = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
||||||
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
|
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
|
||||||
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
|
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
|
||||||
@ -2939,6 +2969,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psra_w_512(<32 x i16> %x0, <8 x i16>
|
|||||||
; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
|
; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
|
||||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||||
; AVX512BW-NEXT: retq
|
; AVX512BW-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psra_w_512:
|
||||||
|
; AVX512F-32: # BB#0:
|
||||||
|
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||||
|
; AVX512F-32-NEXT: vpsraw %xmm1, %zmm0, %zmm2 {%k1}
|
||||||
|
; AVX512F-32-NEXT: vpsraw %xmm1, %zmm0, %zmm3 {%k1} {z}
|
||||||
|
; AVX512F-32-NEXT: vpsraw %xmm1, %zmm0, %zmm0
|
||||||
|
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
|
||||||
|
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||||
|
; AVX512F-32-NEXT: retl
|
||||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
%res = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
||||||
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
|
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
|
||||||
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1)
|
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1)
|
||||||
@ -2959,6 +2999,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psra_wi_512(<32 x i16> %x0, i8 %x1, <
|
|||||||
; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
|
; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
|
||||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||||
; AVX512BW-NEXT: retq
|
; AVX512BW-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psra_wi_512:
|
||||||
|
; AVX512F-32: # BB#0:
|
||||||
|
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||||
|
; AVX512F-32-NEXT: vpsraw $3, %zmm0, %zmm1 {%k1}
|
||||||
|
; AVX512F-32-NEXT: vpsraw $3, %zmm0, %zmm2 {%k1} {z}
|
||||||
|
; AVX512F-32-NEXT: vpsraw $3, %zmm0, %zmm0
|
||||||
|
; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1
|
||||||
|
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||||
|
; AVX512F-32-NEXT: retl
|
||||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3)
|
%res = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3)
|
||||||
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3)
|
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3)
|
||||||
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1)
|
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1)
|
||||||
@ -2979,6 +3029,16 @@ define <32 x i16>@test_int_x86_avx512_mask_pshufh_w_512(<32 x i16> %x0, i8 %x1,
|
|||||||
; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
|
; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
|
||||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||||
; AVX512BW-NEXT: retq
|
; AVX512BW-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX512F-32-LABEL: test_int_x86_avx512_mask_pshufh_w_512:
|
||||||
|
; AVX512F-32: # BB#0:
|
||||||
|
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||||
|
; AVX512F-32-NEXT: vpshufhw $3, %zmm0, %zmm1 {%k1}
|
||||||
|
; AVX512F-32-NEXT: vpshufhw $3, %zmm0, %zmm2 {%k1} {z}
|
||||||
|
; AVX512F-32-NEXT: vpshufhw $3, %zmm0, %zmm0
|
||||||
|
; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1
|
||||||
|
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||||
|
; AVX512F-32-NEXT: retl
|
||||||
%res = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3)
|
%res = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3)
|
||||||
%res1 = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3)
|
%res1 = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3)
|
||||||
%res2 = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1)
|
%res2 = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1)
|
||||||
@ -2999,6 +3059,16 @@ define <32 x i16>@test_int_x86_avx512_mask_pshufl_w_512(<32 x i16> %x0, i8 %x1,
|
|||||||
; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
|
; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
|
||||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||||
; AVX512BW-NEXT: retq
|
; AVX512BW-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX512F-32-LABEL: test_int_x86_avx512_mask_pshufl_w_512:
|
||||||
|
; AVX512F-32: # BB#0:
|
||||||
|
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||||
|
; AVX512F-32-NEXT: vpshuflw $3, %zmm0, %zmm1 {%k1}
|
||||||
|
; AVX512F-32-NEXT: vpshuflw $3, %zmm0, %zmm2 {%k1} {z}
|
||||||
|
; AVX512F-32-NEXT: vpshuflw $3, %zmm0, %zmm0
|
||||||
|
; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1
|
||||||
|
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||||
|
; AVX512F-32-NEXT: retl
|
||||||
%res = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3)
|
%res = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3)
|
||||||
%res1 = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3)
|
%res1 = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3)
|
||||||
%res2 = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1)
|
%res2 = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1)
|
||||||
@ -3019,6 +3089,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi(<32 x i16> %x0, <32 x i16>
|
|||||||
; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
|
; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
|
||||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||||
; AVX512BW-NEXT: retq
|
; AVX512BW-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrav32_hi:
|
||||||
|
; AVX512F-32: # BB#0:
|
||||||
|
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||||
|
; AVX512F-32-NEXT: vpsravw %zmm1, %zmm0, %zmm2 {%k1}
|
||||||
|
; AVX512F-32-NEXT: vpsravw %zmm1, %zmm0, %zmm3 {%k1} {z}
|
||||||
|
; AVX512F-32-NEXT: vpsravw %zmm1, %zmm0, %zmm0
|
||||||
|
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
|
||||||
|
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||||
|
; AVX512F-32-NEXT: retl
|
||||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
%res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
||||||
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
|
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
|
||||||
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
|
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
|
||||||
@ -3039,6 +3119,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psll_w_512(<32 x i16> %x0, <8 x i16>
|
|||||||
; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
|
; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
|
||||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||||
; AVX512BW-NEXT: retq
|
; AVX512BW-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psll_w_512:
|
||||||
|
; AVX512F-32: # BB#0:
|
||||||
|
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||||
|
; AVX512F-32-NEXT: vpsllw %xmm1, %zmm0, %zmm2 {%k1}
|
||||||
|
; AVX512F-32-NEXT: vpsllw %xmm1, %zmm0, %zmm3 {%k1} {z}
|
||||||
|
; AVX512F-32-NEXT: vpsllw %xmm1, %zmm0, %zmm0
|
||||||
|
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
|
||||||
|
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||||
|
; AVX512F-32-NEXT: retl
|
||||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
%res = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
||||||
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
|
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
|
||||||
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1)
|
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1)
|
||||||
@ -3059,6 +3149,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psll_wi_512(<32 x i16> %x0, i8 %x1, <
|
|||||||
; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
|
; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
|
||||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||||
; AVX512BW-NEXT: retq
|
; AVX512BW-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psll_wi_512:
|
||||||
|
; AVX512F-32: # BB#0:
|
||||||
|
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||||
|
; AVX512F-32-NEXT: vpsllw $3, %zmm0, %zmm1 {%k1}
|
||||||
|
; AVX512F-32-NEXT: vpsllw $3, %zmm0, %zmm2 {%k1} {z}
|
||||||
|
; AVX512F-32-NEXT: vpsllw $3, %zmm0, %zmm0
|
||||||
|
; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1
|
||||||
|
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||||
|
; AVX512F-32-NEXT: retl
|
||||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3)
|
%res = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3)
|
||||||
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3)
|
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3)
|
||||||
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1)
|
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1)
|
||||||
@ -3079,6 +3179,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psllv32hi(<32 x i16> %x0, <32 x i16>
|
|||||||
; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
|
; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
|
||||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||||
; AVX512BW-NEXT: retq
|
; AVX512BW-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psllv32hi:
|
||||||
|
; AVX512F-32: # BB#0:
|
||||||
|
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||||
|
; AVX512F-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 {%k1}
|
||||||
|
; AVX512F-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm3 {%k1} {z}
|
||||||
|
; AVX512F-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
|
||||||
|
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
|
||||||
|
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||||
|
; AVX512F-32-NEXT: retl
|
||||||
%res = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
%res = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
||||||
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
|
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
|
||||||
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
|
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
|
||||||
@ -3159,6 +3269,16 @@ define <32 x i16>@test_int_x86_avx512_mask_pmovzxb_w_512(<32 x i8> %x0, <32 x i1
|
|||||||
; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
|
; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
|
||||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||||
; AVX512BW-NEXT: retq
|
; AVX512BW-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovzxb_w_512:
|
||||||
|
; AVX512F-32: # BB#0:
|
||||||
|
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||||
|
; AVX512F-32-NEXT: vpmovzxbw %ymm0, %zmm1 {%k1}
|
||||||
|
; AVX512F-32-NEXT: vpmovzxbw %ymm0, %zmm2 {%k1} {z}
|
||||||
|
; AVX512F-32-NEXT: vpmovzxbw %ymm0, %zmm0
|
||||||
|
; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1
|
||||||
|
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||||
|
; AVX512F-32-NEXT: retl
|
||||||
%res = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2)
|
%res = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2)
|
||||||
%res1 = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> zeroinitializer, i32 %x2)
|
%res1 = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> zeroinitializer, i32 %x2)
|
||||||
%res2 = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 -1)
|
%res2 = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 -1)
|
||||||
@ -3179,6 +3299,16 @@ define <32 x i16>@test_int_x86_avx512_mask_pmovsxb_w_512(<32 x i8> %x0, <32 x i1
|
|||||||
; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
|
; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
|
||||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||||
; AVX512BW-NEXT: retq
|
; AVX512BW-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovsxb_w_512:
|
||||||
|
; AVX512F-32: # BB#0:
|
||||||
|
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||||
|
; AVX512F-32-NEXT: vpmovsxbw %ymm0, %zmm1 {%k1}
|
||||||
|
; AVX512F-32-NEXT: vpmovsxbw %ymm0, %zmm2 {%k1} {z}
|
||||||
|
; AVX512F-32-NEXT: vpmovsxbw %ymm0, %zmm0
|
||||||
|
; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1
|
||||||
|
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||||
|
; AVX512F-32-NEXT: retl
|
||||||
%res = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2)
|
%res = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2)
|
||||||
%res1 = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> zeroinitializer, i32 %x2)
|
%res1 = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> zeroinitializer, i32 %x2)
|
||||||
%res2 = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 -1)
|
%res2 = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 -1)
|
||||||
@ -3199,6 +3329,16 @@ define <32 x i16>@test_int_x86_avx512_mask_permvar_hi_512(<32 x i16> %x0, <32 x
|
|||||||
; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
|
; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
|
||||||
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||||
; AVX512BW-NEXT: retq
|
; AVX512BW-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX512F-32-LABEL: test_int_x86_avx512_mask_permvar_hi_512:
|
||||||
|
; AVX512F-32: # BB#0:
|
||||||
|
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||||
|
; AVX512F-32-NEXT: vpermw %zmm1, %zmm0, %zmm2 {%k1}
|
||||||
|
; AVX512F-32-NEXT: vpermw %zmm1, %zmm0, %zmm3 {%k1} {z}
|
||||||
|
; AVX512F-32-NEXT: vpermw %zmm1, %zmm0, %zmm0
|
||||||
|
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
|
||||||
|
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
|
||||||
|
; AVX512F-32-NEXT: retl
|
||||||
%res = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
%res = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
|
||||||
%res1 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
|
%res1 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
|
||||||
%res2 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
|
%res2 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user