Added Skylake client to X86 targets and features

Changes in X86.td:

I set features of Intel processors in incremental form: IVB = SNB + X HSW = IVB + X ..
I added Skylake client processor and defined it's features
FeatureADX was missing on KNL
Added some new features to appropriate processors SMAP, IFMA, PREFETCHWT1, VMFUNC and others

Differential Revision: http://reviews.llvm.org/D16357



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258659 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Elena Demikhovsky 2016-01-24 10:41:28 +00:00
parent cddf52c859
commit f4a0173582
6 changed files with 337 additions and 199 deletions

View File

@ -805,25 +805,34 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
Features["avx2"] = HasAVXSave && HasLeaf7 && ((EBX >> 5) & 1);
Features["fsgsbase"] = HasLeaf7 && ((EBX >> 0) & 1);
Features["sgx"] = HasLeaf7 && ((EBX >> 2) & 1);
Features["bmi"] = HasLeaf7 && ((EBX >> 3) & 1);
Features["hle"] = HasLeaf7 && ((EBX >> 4) & 1);
Features["bmi2"] = HasLeaf7 && ((EBX >> 8) & 1);
Features["invpcid"] = HasLeaf7 && ((EBX >> 10) & 1);
Features["rtm"] = HasLeaf7 && ((EBX >> 11) & 1);
Features["rdseed"] = HasLeaf7 && ((EBX >> 18) & 1);
Features["adx"] = HasLeaf7 && ((EBX >> 19) & 1);
Features["smap"] = HasLeaf7 && ((EBX >> 20) & 1);
Features["pcommit"] = HasLeaf7 && ((EBX >> 22) & 1);
Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1);
Features["clwb"] = HasLeaf7 && ((EBX >> 24) & 1);
Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1);
// Enable protection keys
Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1);
// AVX512 is only supported if the OS supports the context save for it.
Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save;
Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save;
Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save;
Features["avx512pf"] = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save;
Features["avx512er"] = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save;
Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save;
Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save;
Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save;
Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save;
Features["prefetchwt1"] = HasLeaf7 && (ECX & 1);
Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save;
// Enable protection keys
Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1);
bool HasLeafD = MaxLevel >= 0xd &&
!GetX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);

View File

@ -125,6 +125,9 @@ def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true",
def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true",
"Enable AVX-512 PreFetch Instructions",
[FeatureAVX512]>;
def FeaturePREFETCHWT1 : SubtargetFeature<"prefetchwt1", "HasPFPREFETCHWT1",
"true",
"Prefetch with Intent to Write and T1 Hint">;
def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true",
"Enable AVX-512 Doubleword and Quadword Instructions",
[FeatureAVX512]>;
@ -137,6 +140,9 @@ def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true",
def FeatureVBMI : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
"Enable AVX-512 Vector Bit Manipulation Instructions",
[FeatureAVX512]>;
def FeatureIFMA : SubtargetFeature<"ifma", "HasIFMA", "true",
"Enable AVX-512 Integer Fused Multiple-Add",
[FeatureAVX512]>;
def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true",
"Enable protection keys">;
def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
@ -202,6 +208,20 @@ def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divw",
def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
"PadShortFunctions", "true",
"Pad short functions">;
def FeatureINVPCID : SubtargetFeature<"invpcid", "HasInvPCId", "true",
"Invalidate Process-Context Identifier">;
def FeatureVMFUNC : SubtargetFeature<"vmfunc", "HasVMFUNC", "true",
"VM Functions">;
def FeatureSMAP : SubtargetFeature<"smap", "HasSMAP", "true",
"Supervisor Mode Access Protection">;
def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true",
"Enable Software Guard Extensions">;
def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true",
"Flush A Cache Line Optimized">;
def FeaturePCOMMIT : SubtargetFeature<"pcommit", "HasPCOMMIT", "true",
"Enable Persistent Commit">;
def FeatureCLWB : SubtargetFeature<"clwb", "HasCLWB", "true",
"Cache Line Write Back">;
// TODO: This feature ought to be renamed.
// What it really refers to are CPUs for which certain instructions
// (which ones besides the example below?) are microcoded.
@ -365,13 +385,12 @@ def : WestmereProc<"westmere">;
// SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
// rather than a superset.
class SandyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
def ProcIntelSNB : SubtargetFeature<"snb", "X86ProcFamily", "IntelSNB",
" Intel SandyBridge Processor", [
FeatureMMX,
FeatureAVX,
FeatureFXSR,
FeatureCMPXCHG16B,
FeatureSlowBTMem,
FeatureSlowUAMem32,
FeaturePOPCNT,
FeatureAES,
FeaturePCLMUL,
@ -379,187 +398,125 @@ class SandyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
FeatureXSAVEOPT,
FeatureLAHFSAHF
]>;
class SandyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
ProcIntelSNB,
FeatureSlowBTMem,
FeatureSlowUAMem32
]>;
def : SandyBridgeProc<"sandybridge">;
def : SandyBridgeProc<"corei7-avx">; // Legacy alias.
class IvyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
FeatureMMX,
FeatureAVX,
FeatureFXSR,
FeatureCMPXCHG16B,
FeatureSlowBTMem,
FeatureSlowUAMem32,
FeaturePOPCNT,
FeatureAES,
FeaturePCLMUL,
FeatureXSAVE,
FeatureXSAVEOPT,
def ProcIntelIVB : SubtargetFeature<"ivb", "X86ProcFamily", "IntelIVB",
" Intel IvyBridge Processor", [
ProcIntelSNB,
FeatureRDRAND,
FeatureF16C,
FeatureFSGSBase,
FeatureLAHFSAHF
FeatureFSGSBase
]>;
class IvyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
ProcIntelIVB,
FeatureSlowBTMem,
FeatureSlowUAMem32
]>;
def : IvyBridgeProc<"ivybridge">;
def : IvyBridgeProc<"core-avx-i">; // Legacy alias.
class HaswellProc<string Name> : ProcessorModel<Name, HaswellModel, [
FeatureMMX,
def ProcIntelHSW : SubtargetFeature<"hsw", "X86ProcFamily", "IntelHSW",
" Intel Haswell Processor", [
ProcIntelIVB,
FeatureAVX2,
FeatureFXSR,
FeatureCMPXCHG16B,
FeatureSlowBTMem,
FeaturePOPCNT,
FeatureAES,
FeaturePCLMUL,
FeatureRDRAND,
FeatureXSAVE,
FeatureXSAVEOPT,
FeatureF16C,
FeatureFSGSBase,
FeatureMOVBE,
FeatureLZCNT,
FeatureBMI,
FeatureBMI2,
FeatureFMA,
FeatureLZCNT,
FeatureMOVBE,
FeatureINVPCID,
FeatureVMFUNC,
FeatureRTM,
FeatureHLE,
FeatureSlowIncDec,
FeatureLAHFSAHF
FeatureSlowIncDec
]>;
class HaswellProc<string Name> : ProcessorModel<Name, HaswellModel,
[ProcIntelHSW]>;
def : HaswellProc<"haswell">;
def : HaswellProc<"core-avx2">; // Legacy alias.
class BroadwellProc<string Name> : ProcessorModel<Name, HaswellModel, [
FeatureMMX,
FeatureAVX2,
FeatureFXSR,
FeatureCMPXCHG16B,
FeatureSlowBTMem,
FeaturePOPCNT,
FeatureAES,
FeaturePCLMUL,
FeatureXSAVE,
FeatureXSAVEOPT,
FeatureRDRAND,
FeatureF16C,
FeatureFSGSBase,
FeatureMOVBE,
FeatureLZCNT,
FeatureBMI,
FeatureBMI2,
FeatureFMA,
FeatureRTM,
FeatureHLE,
def ProcIntelBDW : SubtargetFeature<"bdw", "X86ProcFamily", "IntelBDW",
" Intel Broadwell Processor", [
ProcIntelHSW,
FeatureADX,
FeatureRDSEED,
FeatureSlowIncDec,
FeatureLAHFSAHF
FeatureSMAP
]>;
class BroadwellProc<string Name> : ProcessorModel<Name, HaswellModel,
[ProcIntelBDW]>;
def : BroadwellProc<"broadwell">;
def ProcIntelSKL : SubtargetFeature<"skl", "X86ProcFamily", "IntelSKL",
" Intel Skylake Client Processor", [
ProcIntelBDW,
FeatureMPX,
FeatureXSAVEC,
FeatureXSAVES,
FeatureSGX,
FeatureCLFLUSHOPT
]>;
// FIXME: define SKL model
class SkylakeClientProc<string Name> : ProcessorModel<Name, HaswellModel,
[ProcIntelSKL]>;
def : SkylakeClientProc<"skl">;
// FIXME: define KNL model
class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel, [
FeatureMMX,
class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel,[
ProcIntelIVB,
FeatureAVX512,
FeatureFXSR,
FeatureERI,
FeatureCDI,
FeaturePFI,
FeatureCMPXCHG16B,
FeaturePOPCNT,
FeatureAES,
FeaturePCLMUL,
FeatureXSAVE,
FeatureXSAVEOPT,
FeatureRDRAND,
FeatureF16C,
FeatureFSGSBase,
FeaturePREFETCHWT1,
FeatureADX,
FeatureRDSEED,
FeatureMOVBE,
FeatureLZCNT,
FeatureBMI,
FeatureBMI2,
FeatureFMA,
FeatureRTM,
FeatureHLE,
FeatureSlowIncDec,
FeatureMPX,
FeatureLAHFSAHF
FeatureFMA
]>;
def : KnightsLandingProc<"knl">;
// FIXME: define SKX model
class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel, [
FeatureMMX,
def ProcIntelSKX : SubtargetFeature<"skx", "X86ProcFamily", "IntelSKX",
" Intel Skylake Server Processor", [
ProcIntelSKL,
FeatureAVX512,
FeatureFXSR,
FeatureCDI,
FeatureDQI,
FeatureBWI,
FeatureVLX,
FeaturePKU,
FeatureCMPXCHG16B,
FeatureSlowBTMem,
FeaturePOPCNT,
FeatureAES,
FeaturePCLMUL,
FeatureXSAVE,
FeatureXSAVEOPT,
FeatureRDRAND,
FeatureF16C,
FeatureFSGSBase,
FeatureMOVBE,
FeatureLZCNT,
FeatureBMI,
FeatureBMI2,
FeatureFMA,
FeatureRTM,
FeatureHLE,
FeatureADX,
FeatureRDSEED,
FeatureSlowIncDec,
FeatureMPX,
FeatureXSAVEC,
FeatureXSAVES,
FeatureLAHFSAHF
FeaturePCOMMIT,
FeatureCLWB
]>;
def : SkylakeProc<"skylake">;
def : SkylakeProc<"skx">; // Legacy alias.
class CannonlakeProc<string Name> : ProcessorModel<Name, HaswellModel, [
FeatureMMX,
FeatureAVX512,
FeatureFXSR,
FeatureCDI,
FeatureDQI,
FeatureBWI,
FeatureVLX,
FeaturePKU,
FeatureCMPXCHG16B,
FeatureSlowBTMem,
FeaturePOPCNT,
FeatureAES,
FeaturePCLMUL,
FeatureXSAVE,
FeatureXSAVEOPT,
FeatureRDRAND,
FeatureF16C,
FeatureFSGSBase,
FeatureMOVBE,
FeatureLZCNT,
FeatureBMI,
FeatureBMI2,
// FIXME: define SKX model
class SkylakeServerProc<string Name> : ProcessorModel<Name, HaswellModel,
[ ProcIntelSKX]>;
def : SkylakeServerProc<"skylake">;
def : SkylakeServerProc<"skx">; // Legacy alias.
def ProcIntelCNL : SubtargetFeature<"cnl", "X86ProcFamily", "IntelCNL",
" Intel Cannonlake Processor", [
ProcIntelSKX,
FeatureVBMI,
FeatureFMA,
FeatureRTM,
FeatureHLE,
FeatureADX,
FeatureRDSEED,
FeatureSlowIncDec,
FeatureMPX,
FeatureXSAVEC,
FeatureXSAVES,
FeatureLAHFSAHF
FeatureIFMA,
FeatureSHA
]>;
class CannonlakeProc<string Name> : ProcessorModel<Name, HaswellModel,
[ ProcIntelCNL ]>;
def : CannonlakeProc<"cannonlake">;
def : CannonlakeProc<"cnl">;

View File

@ -797,6 +797,8 @@ def HasBMI : Predicate<"Subtarget->hasBMI()">;
def HasBMI2 : Predicate<"Subtarget->hasBMI2()">;
def HasVBMI : Predicate<"Subtarget->hasVBMI()">,
AssemblerPredicate<"FeatureVBMI", "AVX-512 VBMI ISA">;
def HasIFMA : Predicate<"Subtarget->hasIFMA()">,
AssemblerPredicate<"FeatureIFMA", "AVX-512 IFMA ISA">;
def HasRTM : Predicate<"Subtarget->hasRTM()">;
def HasHLE : Predicate<"Subtarget->hasHLE()">;
def HasTSX : Predicate<"Subtarget->hasRTM() || Subtarget->hasHLE()">;

View File

@ -262,6 +262,7 @@ void X86Subtarget::initializeEnvironment() {
HasBMI = false;
HasBMI2 = false;
HasVBMI = false;
HasIFMA = false;
HasRTM = false;
HasHLE = false;
HasERI = false;

View File

@ -55,7 +55,8 @@ protected:
};
enum X86ProcFamilyEnum {
Others, IntelAtom, IntelSLM
Others, IntelAtom, IntelSLM, IntelSNB, IntelIVB, IntelHSW, IntelBDW,
IntelKNL, IntelSKL, IntelSKX, IntelCNL
};
/// X86 processor family: Intel Atom, and others
@ -137,6 +138,9 @@ protected:
/// Processor has VBMI instructions.
bool HasVBMI;
/// Processor has Integer Fused Multiply Add
bool HasIFMA;
/// Processor has RTM instructions.
bool HasRTM;
@ -158,6 +162,9 @@ protected:
/// Processor has LAHF/SAHF instructions.
bool HasLAHFSAHF;
/// Processor has Prefetch with intent to Write instruction
bool HasPFPREFETCHWT1;
/// True if BT (bit test) of memory instructions are slow.
bool IsBTMemSlow;
@ -229,9 +236,30 @@ protected:
/// Processor has PKU extenstions
bool HasPKU;
/// Processot supports MPX - Memory Protection Extensions
/// Processor supports MPX - Memory Protection Extensions
bool HasMPX;
/// Processor supports Invalidate Process-Context Identifier
bool HasInvPCId;
/// Processor has VM Functions
bool HasVMFUNC;
/// Processor has Supervisor Mode Access Protection
bool HasSMAP;
/// Processor has Software Guard Extensions
bool HasSGX;
/// Processor supports Flush Cache Line instruction
bool HasCLFLUSHOPT;
/// Processor has Persistent Commit feature
bool HasPCOMMIT;
/// Processor supports Cache Line Write Back instruction
bool HasCLWB;
/// Use software floating point for code generation.
bool UseSoftFloat;
@ -378,6 +406,7 @@ public:
bool hasBMI() const { return HasBMI; }
bool hasBMI2() const { return HasBMI2; }
bool hasVBMI() const { return HasVBMI; }
bool hasIFMA() const { return HasIFMA; }
bool hasRTM() const { return HasRTM; }
bool hasHLE() const { return HasHLE; }
bool hasADX() const { return HasADX; }

View File

@ -214,31 +214,31 @@ define i64 @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
; AVX512F-32-NEXT: vpcmpltb %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpleb %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpunordb %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpnltb %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpnleb %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpordb %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, (%esp)
; AVX512F-32-NEXT: addl (%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: addl $68, %esp
; AVX512F-32-NEXT: retl
%res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
@ -303,31 +303,31 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: vpcmpltb %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpunordb %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpnleb %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpordb %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: addl $68, %esp
; AVX512F-32-NEXT: retl
%res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
@ -390,31 +390,31 @@ define i64 @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
; AVX512F-32-NEXT: vpcmpltub %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpleub %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpunordub %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpnequb %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpnltub %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpnleub %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpordub %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, (%esp)
; AVX512F-32-NEXT: addl (%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: addl $68, %esp
; AVX512F-32-NEXT: retl
%res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
@ -479,31 +479,31 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: vpcmpltub %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpunordub %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpnequb %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpordub %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: addl $68, %esp
; AVX512F-32-NEXT: retl
%res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
@ -2879,6 +2879,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psrl_w_512(<32 x i16> %x0, <8 x i16>
; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
; AVX512BW-NEXT: vpaddw %zmm3, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrl_w_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpsrlw %xmm1, %zmm0, %zmm2 {%k1}
; AVX512F-32-NEXT: vpsrlw %xmm1, %zmm0, %zmm3 {%k1} {z}
; AVX512F-32-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm0, %zmm0
; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1)
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
@ -2899,6 +2909,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psrl_wi_512(<32 x i16> %x0, i8 %x1, <
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: vpaddw %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrl_wi_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpsrlw $3, %zmm0, %zmm1 {%k1}
; AVX512F-32-NEXT: vpsrlw $3, %zmm0, %zmm2 {%k1} {z}
; AVX512F-32-NEXT: vpsrlw $3, %zmm0, %zmm0
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: vpaddw %zmm2, %zmm0, %zmm0
; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1)
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3)
@ -2919,6 +2939,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psrlv32hi(<32 x i16> %x0, <32 x i16>
; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrlv32hi:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 {%k1}
; AVX512F-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm3 {%k1} {z}
; AVX512F-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
@ -2939,6 +2969,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psra_w_512(<32 x i16> %x0, <8 x i16>
; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psra_w_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpsraw %xmm1, %zmm0, %zmm2 {%k1}
; AVX512F-32-NEXT: vpsraw %xmm1, %zmm0, %zmm3 {%k1} {z}
; AVX512F-32-NEXT: vpsraw %xmm1, %zmm0, %zmm0
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1)
@ -2959,6 +2999,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psra_wi_512(<32 x i16> %x0, i8 %x1, <
; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psra_wi_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpsraw $3, %zmm0, %zmm1 {%k1}
; AVX512F-32-NEXT: vpsraw $3, %zmm0, %zmm2 {%k1} {z}
; AVX512F-32-NEXT: vpsraw $3, %zmm0, %zmm0
; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3)
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1)
@ -2979,6 +3029,16 @@ define <32 x i16>@test_int_x86_avx512_mask_pshufh_w_512(<32 x i16> %x0, i8 %x1,
; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_mask_pshufh_w_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpshufhw $3, %zmm0, %zmm1 {%k1}
; AVX512F-32-NEXT: vpshufhw $3, %zmm0, %zmm2 {%k1} {z}
; AVX512F-32-NEXT: vpshufhw $3, %zmm0, %zmm0
; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3)
%res2 = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1)
@ -2992,13 +3052,23 @@ declare <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16>, i8, <32 x i16>
define <32 x i16>@test_int_x86_avx512_mask_pshufl_w_512(<32 x i16> %x0, i8 %x1, <32 x i16> %x2, i32 %x3) {
; AVX512BW-LABEL: test_int_x86_avx512_mask_pshufl_w_512:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovd %esi, %k1
; AVX512BW-NEXT: vpshuflw $3, %zmm0, %zmm1 {%k1}
; AVX512BW-NEXT: vpshuflw $3, %zmm0, %zmm2 {%k1} {z}
; AVX512BW-NEXT: vpshuflw $3, %zmm0, %zmm0
; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: retq
; AVX512BW-NEXT: kmovd %esi, %k1
; AVX512BW-NEXT: vpshuflw $3, %zmm0, %zmm1 {%k1}
; AVX512BW-NEXT: vpshuflw $3, %zmm0, %zmm2 {%k1} {z}
; AVX512BW-NEXT: vpshuflw $3, %zmm0, %zmm0
; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_mask_pshufl_w_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpshuflw $3, %zmm0, %zmm1 {%k1}
; AVX512F-32-NEXT: vpshuflw $3, %zmm0, %zmm2 {%k1} {z}
; AVX512F-32-NEXT: vpshuflw $3, %zmm0, %zmm0
; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3)
%res2 = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1)
@ -3019,6 +3089,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi(<32 x i16> %x0, <32 x i16>
; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrav32_hi:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpsravw %zmm1, %zmm0, %zmm2 {%k1}
; AVX512F-32-NEXT: vpsravw %zmm1, %zmm0, %zmm3 {%k1} {z}
; AVX512F-32-NEXT: vpsravw %zmm1, %zmm0, %zmm0
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
@ -3039,6 +3119,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psll_w_512(<32 x i16> %x0, <8 x i16>
; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psll_w_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpsllw %xmm1, %zmm0, %zmm2 {%k1}
; AVX512F-32-NEXT: vpsllw %xmm1, %zmm0, %zmm3 {%k1} {z}
; AVX512F-32-NEXT: vpsllw %xmm1, %zmm0, %zmm0
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1)
@ -3059,6 +3149,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psll_wi_512(<32 x i16> %x0, i8 %x1, <
; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psll_wi_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpsllw $3, %zmm0, %zmm1 {%k1}
; AVX512F-32-NEXT: vpsllw $3, %zmm0, %zmm2 {%k1} {z}
; AVX512F-32-NEXT: vpsllw $3, %zmm0, %zmm0
; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3)
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1)
@ -3079,6 +3179,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psllv32hi(<32 x i16> %x0, <32 x i16>
; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psllv32hi:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 {%k1}
; AVX512F-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm3 {%k1} {z}
; AVX512F-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
@ -3152,13 +3262,23 @@ declare <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8>, <32 x i16>, i3
define <32 x i16>@test_int_x86_avx512_mask_pmovzxb_w_512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) {
; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovzxb_w_512:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovd %edi, %k1
; AVX512BW-NEXT: vpmovzxbw %ymm0, %zmm1 {%k1}
; AVX512BW-NEXT: vpmovzxbw %ymm0, %zmm2 {%k1} {z}
; AVX512BW-NEXT: vpmovzxbw %ymm0, %zmm0
; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: retq
; AVX512BW-NEXT: kmovd %edi, %k1
; AVX512BW-NEXT: vpmovzxbw %ymm0, %zmm1 {%k1}
; AVX512BW-NEXT: vpmovzxbw %ymm0, %zmm2 {%k1} {z}
; AVX512BW-NEXT: vpmovzxbw %ymm0, %zmm0
; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovzxb_w_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpmovzxbw %ymm0, %zmm1 {%k1}
; AVX512F-32-NEXT: vpmovzxbw %ymm0, %zmm2 {%k1} {z}
; AVX512F-32-NEXT: vpmovzxbw %ymm0, %zmm0
; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> zeroinitializer, i32 %x2)
%res2 = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 -1)
@ -3172,13 +3292,23 @@ declare <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8>, <32 x i16>, i3
define <32 x i16>@test_int_x86_avx512_mask_pmovsxb_w_512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) {
; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovsxb_w_512:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovd %edi, %k1
; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm1 {%k1}
; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm2 {%k1} {z}
; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0
; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: retq
; AVX512BW-NEXT: kmovd %edi, %k1
; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm1 {%k1}
; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm2 {%k1} {z}
; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0
; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovsxb_w_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpmovsxbw %ymm0, %zmm1 {%k1}
; AVX512F-32-NEXT: vpmovsxbw %ymm0, %zmm2 {%k1} {z}
; AVX512F-32-NEXT: vpmovsxbw %ymm0, %zmm0
; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> zeroinitializer, i32 %x2)
%res2 = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 -1)
@ -3192,13 +3322,23 @@ declare <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16>, <32 x i16>,
define <32 x i16>@test_int_x86_avx512_mask_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
; AVX512BW-LABEL: test_int_x86_avx512_mask_permvar_hi_512:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovd %edi, %k1
; AVX512BW-NEXT: vpermw %zmm1, %zmm0, %zmm2 {%k1}
; AVX512BW-NEXT: vpermw %zmm1, %zmm0, %zmm3 {%k1} {z}
; AVX512BW-NEXT: vpermw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: retq
; AVX512BW-NEXT: kmovd %edi, %k1
; AVX512BW-NEXT: vpermw %zmm1, %zmm0, %zmm2 {%k1}
; AVX512BW-NEXT: vpermw %zmm1, %zmm0, %zmm3 {%k1} {z}
; AVX512BW-NEXT: vpermw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_mask_permvar_hi_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpermw %zmm1, %zmm0, %zmm2 {%k1}
; AVX512F-32-NEXT: vpermw %zmm1, %zmm0, %zmm3 {%k1} {z}
; AVX512F-32-NEXT: vpermw %zmm1, %zmm0, %zmm0
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
%res2 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)