Added Skylake client to X86 targets and features

Changes in X86.td:

I set features of Intel processors in incremental form: IVB = SNB + X HSW = IVB + X ..
I added Skylake client processor and defined it's features
FeatureADX was missing on KNL
Added some new features to appropriate processors SMAP, IFMA, PREFETCHWT1, VMFUNC and others

Differential Revision: http://reviews.llvm.org/D16357



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258659 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Elena Demikhovsky 2016-01-24 10:41:28 +00:00
parent cddf52c859
commit f4a0173582
6 changed files with 337 additions and 199 deletions

View File

@ -805,25 +805,34 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
Features["avx2"] = HasAVXSave && HasLeaf7 && ((EBX >> 5) & 1); Features["avx2"] = HasAVXSave && HasLeaf7 && ((EBX >> 5) & 1);
Features["fsgsbase"] = HasLeaf7 && ((EBX >> 0) & 1); Features["fsgsbase"] = HasLeaf7 && ((EBX >> 0) & 1);
Features["sgx"] = HasLeaf7 && ((EBX >> 2) & 1);
Features["bmi"] = HasLeaf7 && ((EBX >> 3) & 1); Features["bmi"] = HasLeaf7 && ((EBX >> 3) & 1);
Features["hle"] = HasLeaf7 && ((EBX >> 4) & 1); Features["hle"] = HasLeaf7 && ((EBX >> 4) & 1);
Features["bmi2"] = HasLeaf7 && ((EBX >> 8) & 1); Features["bmi2"] = HasLeaf7 && ((EBX >> 8) & 1);
Features["invpcid"] = HasLeaf7 && ((EBX >> 10) & 1);
Features["rtm"] = HasLeaf7 && ((EBX >> 11) & 1); Features["rtm"] = HasLeaf7 && ((EBX >> 11) & 1);
Features["rdseed"] = HasLeaf7 && ((EBX >> 18) & 1); Features["rdseed"] = HasLeaf7 && ((EBX >> 18) & 1);
Features["adx"] = HasLeaf7 && ((EBX >> 19) & 1); Features["adx"] = HasLeaf7 && ((EBX >> 19) & 1);
Features["smap"] = HasLeaf7 && ((EBX >> 20) & 1);
Features["pcommit"] = HasLeaf7 && ((EBX >> 22) & 1);
Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1);
Features["clwb"] = HasLeaf7 && ((EBX >> 24) & 1);
Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1); Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1);
// Enable protection keys
Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1);
// AVX512 is only supported if the OS supports the context save for it. // AVX512 is only supported if the OS supports the context save for it.
Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save; Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save;
Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save; Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save;
Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save;
Features["avx512pf"] = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save; Features["avx512pf"] = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save;
Features["avx512er"] = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save; Features["avx512er"] = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save;
Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save; Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save;
Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save; Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save;
Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save; Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save;
Features["prefetchwt1"] = HasLeaf7 && (ECX & 1);
Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save; Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save;
// Enable protection keys
Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1);
bool HasLeafD = MaxLevel >= 0xd && bool HasLeafD = MaxLevel >= 0xd &&
!GetX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX); !GetX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);

View File

@ -125,6 +125,9 @@ def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true",
def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true", def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true",
"Enable AVX-512 PreFetch Instructions", "Enable AVX-512 PreFetch Instructions",
[FeatureAVX512]>; [FeatureAVX512]>;
def FeaturePREFETCHWT1 : SubtargetFeature<"prefetchwt1", "HasPFPREFETCHWT1",
"true",
"Prefetch with Intent to Write and T1 Hint">;
def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true", def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true",
"Enable AVX-512 Doubleword and Quadword Instructions", "Enable AVX-512 Doubleword and Quadword Instructions",
[FeatureAVX512]>; [FeatureAVX512]>;
@ -137,6 +140,9 @@ def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true",
def FeatureVBMI : SubtargetFeature<"avx512vbmi", "HasVBMI", "true", def FeatureVBMI : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
"Enable AVX-512 Vector Bit Manipulation Instructions", "Enable AVX-512 Vector Bit Manipulation Instructions",
[FeatureAVX512]>; [FeatureAVX512]>;
def FeatureIFMA : SubtargetFeature<"ifma", "HasIFMA", "true",
"Enable AVX-512 Integer Fused Multiple-Add",
[FeatureAVX512]>;
def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true", def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true",
"Enable protection keys">; "Enable protection keys">;
def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true", def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
@ -202,6 +208,20 @@ def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divw",
def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions", def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
"PadShortFunctions", "true", "PadShortFunctions", "true",
"Pad short functions">; "Pad short functions">;
def FeatureINVPCID : SubtargetFeature<"invpcid", "HasInvPCId", "true",
"Invalidate Process-Context Identifier">;
def FeatureVMFUNC : SubtargetFeature<"vmfunc", "HasVMFUNC", "true",
"VM Functions">;
def FeatureSMAP : SubtargetFeature<"smap", "HasSMAP", "true",
"Supervisor Mode Access Protection">;
def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true",
"Enable Software Guard Extensions">;
def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true",
"Flush A Cache Line Optimized">;
def FeaturePCOMMIT : SubtargetFeature<"pcommit", "HasPCOMMIT", "true",
"Enable Persistent Commit">;
def FeatureCLWB : SubtargetFeature<"clwb", "HasCLWB", "true",
"Cache Line Write Back">;
// TODO: This feature ought to be renamed. // TODO: This feature ought to be renamed.
// What it really refers to are CPUs for which certain instructions // What it really refers to are CPUs for which certain instructions
// (which ones besides the example below?) are microcoded. // (which ones besides the example below?) are microcoded.
@ -365,13 +385,12 @@ def : WestmereProc<"westmere">;
// SSE is not listed here since llvm treats AVX as a reimplementation of SSE, // SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
// rather than a superset. // rather than a superset.
class SandyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [ def ProcIntelSNB : SubtargetFeature<"snb", "X86ProcFamily", "IntelSNB",
" Intel SandyBridge Processor", [
FeatureMMX, FeatureMMX,
FeatureAVX, FeatureAVX,
FeatureFXSR, FeatureFXSR,
FeatureCMPXCHG16B, FeatureCMPXCHG16B,
FeatureSlowBTMem,
FeatureSlowUAMem32,
FeaturePOPCNT, FeaturePOPCNT,
FeatureAES, FeatureAES,
FeaturePCLMUL, FeaturePCLMUL,
@ -379,187 +398,125 @@ class SandyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
FeatureXSAVEOPT, FeatureXSAVEOPT,
FeatureLAHFSAHF FeatureLAHFSAHF
]>; ]>;
class SandyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
ProcIntelSNB,
FeatureSlowBTMem,
FeatureSlowUAMem32
]>;
def : SandyBridgeProc<"sandybridge">; def : SandyBridgeProc<"sandybridge">;
def : SandyBridgeProc<"corei7-avx">; // Legacy alias. def : SandyBridgeProc<"corei7-avx">; // Legacy alias.
class IvyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [ def ProcIntelIVB : SubtargetFeature<"ivb", "X86ProcFamily", "IntelIVB",
FeatureMMX, " Intel IvyBridge Processor", [
FeatureAVX, ProcIntelSNB,
FeatureFXSR,
FeatureCMPXCHG16B,
FeatureSlowBTMem,
FeatureSlowUAMem32,
FeaturePOPCNT,
FeatureAES,
FeaturePCLMUL,
FeatureXSAVE,
FeatureXSAVEOPT,
FeatureRDRAND, FeatureRDRAND,
FeatureF16C, FeatureF16C,
FeatureFSGSBase, FeatureFSGSBase
FeatureLAHFSAHF ]>;
class IvyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
ProcIntelIVB,
FeatureSlowBTMem,
FeatureSlowUAMem32
]>; ]>;
def : IvyBridgeProc<"ivybridge">; def : IvyBridgeProc<"ivybridge">;
def : IvyBridgeProc<"core-avx-i">; // Legacy alias. def : IvyBridgeProc<"core-avx-i">; // Legacy alias.
class HaswellProc<string Name> : ProcessorModel<Name, HaswellModel, [ def ProcIntelHSW : SubtargetFeature<"hsw", "X86ProcFamily", "IntelHSW",
FeatureMMX, " Intel Haswell Processor", [
ProcIntelIVB,
FeatureAVX2, FeatureAVX2,
FeatureFXSR,
FeatureCMPXCHG16B,
FeatureSlowBTMem,
FeaturePOPCNT,
FeatureAES,
FeaturePCLMUL,
FeatureRDRAND,
FeatureXSAVE,
FeatureXSAVEOPT,
FeatureF16C,
FeatureFSGSBase,
FeatureMOVBE,
FeatureLZCNT,
FeatureBMI, FeatureBMI,
FeatureBMI2, FeatureBMI2,
FeatureFMA, FeatureFMA,
FeatureLZCNT,
FeatureMOVBE,
FeatureINVPCID,
FeatureVMFUNC,
FeatureRTM, FeatureRTM,
FeatureHLE, FeatureHLE,
FeatureSlowIncDec, FeatureSlowIncDec
FeatureLAHFSAHF
]>; ]>;
class HaswellProc<string Name> : ProcessorModel<Name, HaswellModel,
[ProcIntelHSW]>;
def : HaswellProc<"haswell">; def : HaswellProc<"haswell">;
def : HaswellProc<"core-avx2">; // Legacy alias. def : HaswellProc<"core-avx2">; // Legacy alias.
class BroadwellProc<string Name> : ProcessorModel<Name, HaswellModel, [ def ProcIntelBDW : SubtargetFeature<"bdw", "X86ProcFamily", "IntelBDW",
FeatureMMX, " Intel Broadwell Processor", [
FeatureAVX2, ProcIntelHSW,
FeatureFXSR,
FeatureCMPXCHG16B,
FeatureSlowBTMem,
FeaturePOPCNT,
FeatureAES,
FeaturePCLMUL,
FeatureXSAVE,
FeatureXSAVEOPT,
FeatureRDRAND,
FeatureF16C,
FeatureFSGSBase,
FeatureMOVBE,
FeatureLZCNT,
FeatureBMI,
FeatureBMI2,
FeatureFMA,
FeatureRTM,
FeatureHLE,
FeatureADX, FeatureADX,
FeatureRDSEED, FeatureRDSEED,
FeatureSlowIncDec, FeatureSMAP
FeatureLAHFSAHF
]>; ]>;
class BroadwellProc<string Name> : ProcessorModel<Name, HaswellModel,
[ProcIntelBDW]>;
def : BroadwellProc<"broadwell">; def : BroadwellProc<"broadwell">;
def ProcIntelSKL : SubtargetFeature<"skl", "X86ProcFamily", "IntelSKL",
" Intel Skylake Client Processor", [
ProcIntelBDW,
FeatureMPX,
FeatureXSAVEC,
FeatureXSAVES,
FeatureSGX,
FeatureCLFLUSHOPT
]>;
// FIXME: define SKL model
class SkylakeClientProc<string Name> : ProcessorModel<Name, HaswellModel,
[ProcIntelSKL]>;
def : SkylakeClientProc<"skl">;
// FIXME: define KNL model // FIXME: define KNL model
class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel, [ class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel,[
FeatureMMX, ProcIntelIVB,
FeatureAVX512, FeatureAVX512,
FeatureFXSR,
FeatureERI, FeatureERI,
FeatureCDI, FeatureCDI,
FeaturePFI, FeaturePFI,
FeatureCMPXCHG16B, FeaturePREFETCHWT1,
FeaturePOPCNT, FeatureADX,
FeatureAES, FeatureRDSEED,
FeaturePCLMUL,
FeatureXSAVE,
FeatureXSAVEOPT,
FeatureRDRAND,
FeatureF16C,
FeatureFSGSBase,
FeatureMOVBE, FeatureMOVBE,
FeatureLZCNT, FeatureLZCNT,
FeatureBMI, FeatureBMI,
FeatureBMI2, FeatureBMI2,
FeatureFMA, FeatureFMA
FeatureRTM,
FeatureHLE,
FeatureSlowIncDec,
FeatureMPX,
FeatureLAHFSAHF
]>; ]>;
def : KnightsLandingProc<"knl">; def : KnightsLandingProc<"knl">;
// FIXME: define SKX model def ProcIntelSKX : SubtargetFeature<"skx", "X86ProcFamily", "IntelSKX",
class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel, [ " Intel Skylake Server Processor", [
FeatureMMX, ProcIntelSKL,
FeatureAVX512, FeatureAVX512,
FeatureFXSR,
FeatureCDI, FeatureCDI,
FeatureDQI, FeatureDQI,
FeatureBWI, FeatureBWI,
FeatureVLX, FeatureVLX,
FeaturePKU, FeaturePKU,
FeatureCMPXCHG16B, FeaturePCOMMIT,
FeatureSlowBTMem, FeatureCLWB
FeaturePOPCNT,
FeatureAES,
FeaturePCLMUL,
FeatureXSAVE,
FeatureXSAVEOPT,
FeatureRDRAND,
FeatureF16C,
FeatureFSGSBase,
FeatureMOVBE,
FeatureLZCNT,
FeatureBMI,
FeatureBMI2,
FeatureFMA,
FeatureRTM,
FeatureHLE,
FeatureADX,
FeatureRDSEED,
FeatureSlowIncDec,
FeatureMPX,
FeatureXSAVEC,
FeatureXSAVES,
FeatureLAHFSAHF
]>; ]>;
def : SkylakeProc<"skylake">;
def : SkylakeProc<"skx">; // Legacy alias.
class CannonlakeProc<string Name> : ProcessorModel<Name, HaswellModel, [ // FIXME: define SKX model
FeatureMMX, class SkylakeServerProc<string Name> : ProcessorModel<Name, HaswellModel,
FeatureAVX512, [ ProcIntelSKX]>;
FeatureFXSR, def : SkylakeServerProc<"skylake">;
FeatureCDI, def : SkylakeServerProc<"skx">; // Legacy alias.
FeatureDQI,
FeatureBWI, def ProcIntelCNL : SubtargetFeature<"cnl", "X86ProcFamily", "IntelCNL",
FeatureVLX, " Intel Cannonlake Processor", [
FeaturePKU, ProcIntelSKX,
FeatureCMPXCHG16B,
FeatureSlowBTMem,
FeaturePOPCNT,
FeatureAES,
FeaturePCLMUL,
FeatureXSAVE,
FeatureXSAVEOPT,
FeatureRDRAND,
FeatureF16C,
FeatureFSGSBase,
FeatureMOVBE,
FeatureLZCNT,
FeatureBMI,
FeatureBMI2,
FeatureVBMI, FeatureVBMI,
FeatureFMA, FeatureIFMA,
FeatureRTM, FeatureSHA
FeatureHLE,
FeatureADX,
FeatureRDSEED,
FeatureSlowIncDec,
FeatureMPX,
FeatureXSAVEC,
FeatureXSAVES,
FeatureLAHFSAHF
]>; ]>;
class CannonlakeProc<string Name> : ProcessorModel<Name, HaswellModel,
[ ProcIntelCNL ]>;
def : CannonlakeProc<"cannonlake">; def : CannonlakeProc<"cannonlake">;
def : CannonlakeProc<"cnl">; def : CannonlakeProc<"cnl">;

View File

@ -797,6 +797,8 @@ def HasBMI : Predicate<"Subtarget->hasBMI()">;
def HasBMI2 : Predicate<"Subtarget->hasBMI2()">; def HasBMI2 : Predicate<"Subtarget->hasBMI2()">;
def HasVBMI : Predicate<"Subtarget->hasVBMI()">, def HasVBMI : Predicate<"Subtarget->hasVBMI()">,
AssemblerPredicate<"FeatureVBMI", "AVX-512 VBMI ISA">; AssemblerPredicate<"FeatureVBMI", "AVX-512 VBMI ISA">;
def HasIFMA : Predicate<"Subtarget->hasIFMA()">,
AssemblerPredicate<"FeatureIFMA", "AVX-512 IFMA ISA">;
def HasRTM : Predicate<"Subtarget->hasRTM()">; def HasRTM : Predicate<"Subtarget->hasRTM()">;
def HasHLE : Predicate<"Subtarget->hasHLE()">; def HasHLE : Predicate<"Subtarget->hasHLE()">;
def HasTSX : Predicate<"Subtarget->hasRTM() || Subtarget->hasHLE()">; def HasTSX : Predicate<"Subtarget->hasRTM() || Subtarget->hasHLE()">;

View File

@ -262,6 +262,7 @@ void X86Subtarget::initializeEnvironment() {
HasBMI = false; HasBMI = false;
HasBMI2 = false; HasBMI2 = false;
HasVBMI = false; HasVBMI = false;
HasIFMA = false;
HasRTM = false; HasRTM = false;
HasHLE = false; HasHLE = false;
HasERI = false; HasERI = false;

View File

@ -55,7 +55,8 @@ protected:
}; };
enum X86ProcFamilyEnum { enum X86ProcFamilyEnum {
Others, IntelAtom, IntelSLM Others, IntelAtom, IntelSLM, IntelSNB, IntelIVB, IntelHSW, IntelBDW,
IntelKNL, IntelSKL, IntelSKX, IntelCNL
}; };
/// X86 processor family: Intel Atom, and others /// X86 processor family: Intel Atom, and others
@ -137,6 +138,9 @@ protected:
/// Processor has VBMI instructions. /// Processor has VBMI instructions.
bool HasVBMI; bool HasVBMI;
/// Processor has Integer Fused Multiply Add
bool HasIFMA;
/// Processor has RTM instructions. /// Processor has RTM instructions.
bool HasRTM; bool HasRTM;
@ -158,6 +162,9 @@ protected:
/// Processor has LAHF/SAHF instructions. /// Processor has LAHF/SAHF instructions.
bool HasLAHFSAHF; bool HasLAHFSAHF;
/// Processor has Prefetch with intent to Write instruction
bool HasPFPREFETCHWT1;
/// True if BT (bit test) of memory instructions are slow. /// True if BT (bit test) of memory instructions are slow.
bool IsBTMemSlow; bool IsBTMemSlow;
@ -229,9 +236,30 @@ protected:
/// Processor has PKU extenstions /// Processor has PKU extenstions
bool HasPKU; bool HasPKU;
/// Processot supports MPX - Memory Protection Extensions /// Processor supports MPX - Memory Protection Extensions
bool HasMPX; bool HasMPX;
/// Processor supports Invalidate Process-Context Identifier
bool HasInvPCId;
/// Processor has VM Functions
bool HasVMFUNC;
/// Processor has Supervisor Mode Access Protection
bool HasSMAP;
/// Processor has Software Guard Extensions
bool HasSGX;
/// Processor supports Flush Cache Line instruction
bool HasCLFLUSHOPT;
/// Processor has Persistent Commit feature
bool HasPCOMMIT;
/// Processor supports Cache Line Write Back instruction
bool HasCLWB;
/// Use software floating point for code generation. /// Use software floating point for code generation.
bool UseSoftFloat; bool UseSoftFloat;
@ -378,6 +406,7 @@ public:
bool hasBMI() const { return HasBMI; } bool hasBMI() const { return HasBMI; }
bool hasBMI2() const { return HasBMI2; } bool hasBMI2() const { return HasBMI2; }
bool hasVBMI() const { return HasVBMI; } bool hasVBMI() const { return HasVBMI; }
bool hasIFMA() const { return HasIFMA; }
bool hasRTM() const { return HasRTM; } bool hasRTM() const { return HasRTM; }
bool hasHLE() const { return HasHLE; } bool hasHLE() const { return HasHLE; }
bool hasADX() const { return HasADX; } bool hasADX() const { return HasADX; }

View File

@ -214,31 +214,31 @@ define i64 @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
; AVX512F-32-NEXT: vpcmpltb %zmm1, %zmm0, %k0 ; AVX512F-32-NEXT: vpcmpltb %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpleb %zmm1, %zmm0, %k0 ; AVX512F-32-NEXT: vpcmpleb %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpunordb %zmm1, %zmm0, %k0 ; AVX512F-32-NEXT: vpcmpunordb %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 ; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 ; AVX512F-32-NEXT: vpcmpnltb %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpnleb %zmm1, %zmm0, %k0 ; AVX512F-32-NEXT: vpcmpnleb %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpordb %zmm1, %zmm0, %k0 ; AVX512F-32-NEXT: vpcmpordb %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, (%esp) ; AVX512F-32-NEXT: kmovq %k0, (%esp)
; AVX512F-32-NEXT: addl (%esp), %eax ; AVX512F-32-NEXT: addl (%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: addl $68, %esp ; AVX512F-32-NEXT: addl $68, %esp
; AVX512F-32-NEXT: retl ; AVX512F-32-NEXT: retl
%res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1) %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
@ -303,31 +303,31 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
; AVX512F-32-NEXT: vpcmpltb %zmm1, %zmm0, %k0 {%k1} ; AVX512F-32-NEXT: vpcmpltb %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1} ; AVX512F-32-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpunordb %zmm1, %zmm0, %k0 {%k1} ; AVX512F-32-NEXT: vpcmpunordb %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} ; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 {%k1} ; AVX512F-32-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpnleb %zmm1, %zmm0, %k0 {%k1} ; AVX512F-32-NEXT: vpcmpnleb %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpordb %zmm1, %zmm0, %k0 {%k1} ; AVX512F-32-NEXT: vpcmpordb %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: addl $68, %esp ; AVX512F-32-NEXT: addl $68, %esp
; AVX512F-32-NEXT: retl ; AVX512F-32-NEXT: retl
%res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask) %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
@ -390,31 +390,31 @@ define i64 @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
; AVX512F-32-NEXT: vpcmpltub %zmm1, %zmm0, %k0 ; AVX512F-32-NEXT: vpcmpltub %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpleub %zmm1, %zmm0, %k0 ; AVX512F-32-NEXT: vpcmpleub %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpunordub %zmm1, %zmm0, %k0 ; AVX512F-32-NEXT: vpcmpunordub %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpnequb %zmm1, %zmm0, %k0 ; AVX512F-32-NEXT: vpcmpnequb %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 ; AVX512F-32-NEXT: vpcmpnltub %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 ; AVX512F-32-NEXT: vpcmpnleub %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpordub %zmm1, %zmm0, %k0 ; AVX512F-32-NEXT: vpcmpordub %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, (%esp) ; AVX512F-32-NEXT: kmovq %k0, (%esp)
; AVX512F-32-NEXT: addl (%esp), %eax ; AVX512F-32-NEXT: addl (%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: addl $68, %esp ; AVX512F-32-NEXT: addl $68, %esp
; AVX512F-32-NEXT: retl ; AVX512F-32-NEXT: retl
%res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1) %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
@ -479,31 +479,31 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; AVX512F-32-NEXT: vpcmpltub %zmm1, %zmm0, %k0 {%k1} ; AVX512F-32-NEXT: vpcmpltub %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1} ; AVX512F-32-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpunordub %zmm1, %zmm0, %k0 {%k1} ; AVX512F-32-NEXT: vpcmpunordub %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpnequb %zmm1, %zmm0, %k0 {%k1} ; AVX512F-32-NEXT: vpcmpnequb %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1} ; AVX512F-32-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1} ; AVX512F-32-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vpcmpordub %zmm1, %zmm0, %k0 {%k1} ; AVX512F-32-NEXT: vpcmpordub %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx ; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: addl $68, %esp ; AVX512F-32-NEXT: addl $68, %esp
; AVX512F-32-NEXT: retl ; AVX512F-32-NEXT: retl
%res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask) %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
@ -2879,6 +2879,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psrl_w_512(<32 x i16> %x0, <8 x i16>
; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
; AVX512BW-NEXT: vpaddw %zmm3, %zmm0, %zmm0 ; AVX512BW-NEXT: vpaddw %zmm3, %zmm0, %zmm0
; AVX512BW-NEXT: retq ; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrl_w_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpsrlw %xmm1, %zmm0, %zmm2 {%k1}
; AVX512F-32-NEXT: vpsrlw %xmm1, %zmm0, %zmm3 {%k1} {z}
; AVX512F-32-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm0, %zmm0
; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1) %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1)
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
@ -2899,6 +2909,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psrl_wi_512(<32 x i16> %x0, i8 %x1, <
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: vpaddw %zmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vpaddw %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: retq ; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrl_wi_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpsrlw $3, %zmm0, %zmm1 {%k1}
; AVX512F-32-NEXT: vpsrlw $3, %zmm0, %zmm2 {%k1} {z}
; AVX512F-32-NEXT: vpsrlw $3, %zmm0, %zmm0
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: vpaddw %zmm2, %zmm0, %zmm0
; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3) %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1) %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1)
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3) %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3)
@ -2919,6 +2939,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psrlv32hi(<32 x i16> %x0, <32 x i16>
; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1 ; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: retq ; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrlv32hi:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 {%k1}
; AVX512F-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm3 {%k1} {z}
; AVX512F-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) %res = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
@ -2939,6 +2969,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psra_w_512(<32 x i16> %x0, <8 x i16>
; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1 ; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: retq ; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psra_w_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpsraw %xmm1, %zmm0, %zmm2 {%k1}
; AVX512F-32-NEXT: vpsraw %xmm1, %zmm0, %zmm3 {%k1} {z}
; AVX512F-32-NEXT: vpsraw %xmm1, %zmm0, %zmm0
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) %res = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1) %res2 = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1)
@ -2959,6 +2999,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psra_wi_512(<32 x i16> %x0, i8 %x1, <
; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1 ; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: retq ; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psra_wi_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpsraw $3, %zmm0, %zmm1 {%k1}
; AVX512F-32-NEXT: vpsraw $3, %zmm0, %zmm2 {%k1} {z}
; AVX512F-32-NEXT: vpsraw $3, %zmm0, %zmm0
; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3) %res = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3)
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1) %res2 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1)
@ -2979,6 +3029,16 @@ define <32 x i16>@test_int_x86_avx512_mask_pshufh_w_512(<32 x i16> %x0, i8 %x1,
; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1 ; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: retq ; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_mask_pshufh_w_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpshufhw $3, %zmm0, %zmm1 {%k1}
; AVX512F-32-NEXT: vpshufhw $3, %zmm0, %zmm2 {%k1} {z}
; AVX512F-32-NEXT: vpshufhw $3, %zmm0, %zmm0
; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3) %res = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3)
%res2 = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1) %res2 = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1)
@ -2999,6 +3059,16 @@ define <32 x i16>@test_int_x86_avx512_mask_pshufl_w_512(<32 x i16> %x0, i8 %x1,
; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1 ; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: retq ; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_mask_pshufl_w_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpshuflw $3, %zmm0, %zmm1 {%k1}
; AVX512F-32-NEXT: vpshuflw $3, %zmm0, %zmm2 {%k1} {z}
; AVX512F-32-NEXT: vpshuflw $3, %zmm0, %zmm0
; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3) %res = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3)
%res2 = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1) %res2 = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1)
@ -3019,6 +3089,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi(<32 x i16> %x0, <32 x i16>
; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1 ; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: retq ; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrav32_hi:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpsravw %zmm1, %zmm0, %zmm2 {%k1}
; AVX512F-32-NEXT: vpsravw %zmm1, %zmm0, %zmm3 {%k1} {z}
; AVX512F-32-NEXT: vpsravw %zmm1, %zmm0, %zmm0
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) %res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
@ -3039,6 +3119,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psll_w_512(<32 x i16> %x0, <8 x i16>
; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1 ; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: retq ; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psll_w_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpsllw %xmm1, %zmm0, %zmm2 {%k1}
; AVX512F-32-NEXT: vpsllw %xmm1, %zmm0, %zmm3 {%k1} {z}
; AVX512F-32-NEXT: vpsllw %xmm1, %zmm0, %zmm0
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) %res = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1) %res2 = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1)
@ -3059,6 +3149,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psll_wi_512(<32 x i16> %x0, i8 %x1, <
; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1 ; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: retq ; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psll_wi_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpsllw $3, %zmm0, %zmm1 {%k1}
; AVX512F-32-NEXT: vpsllw $3, %zmm0, %zmm2 {%k1} {z}
; AVX512F-32-NEXT: vpsllw $3, %zmm0, %zmm0
; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3) %res = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3)
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1) %res2 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1)
@ -3079,6 +3179,16 @@ define <32 x i16>@test_int_x86_avx512_mask_psllv32hi(<32 x i16> %x0, <32 x i16>
; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1 ; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: retq ; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psllv32hi:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 {%k1}
; AVX512F-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm3 {%k1} {z}
; AVX512F-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) %res = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) %res2 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
@ -3159,6 +3269,16 @@ define <32 x i16>@test_int_x86_avx512_mask_pmovzxb_w_512(<32 x i8> %x0, <32 x i1
; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1 ; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: retq ; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovzxb_w_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpmovzxbw %ymm0, %zmm1 {%k1}
; AVX512F-32-NEXT: vpmovzxbw %ymm0, %zmm2 {%k1} {z}
; AVX512F-32-NEXT: vpmovzxbw %ymm0, %zmm0
; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) %res = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> zeroinitializer, i32 %x2) %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> zeroinitializer, i32 %x2)
%res2 = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 -1) %res2 = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 -1)
@ -3179,6 +3299,16 @@ define <32 x i16>@test_int_x86_avx512_mask_pmovsxb_w_512(<32 x i8> %x0, <32 x i1
; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1 ; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: retq ; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovsxb_w_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpmovsxbw %ymm0, %zmm1 {%k1}
; AVX512F-32-NEXT: vpmovsxbw %ymm0, %zmm2 {%k1} {z}
; AVX512F-32-NEXT: vpmovsxbw %ymm0, %zmm0
; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) %res = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> zeroinitializer, i32 %x2) %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> zeroinitializer, i32 %x2)
%res2 = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 -1) %res2 = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 -1)
@ -3199,6 +3329,16 @@ define <32 x i16>@test_int_x86_avx512_mask_permvar_hi_512(<32 x i16> %x0, <32 x
; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1 ; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: retq ; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_mask_permvar_hi_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpermw %zmm1, %zmm0, %zmm2 {%k1}
; AVX512F-32-NEXT: vpermw %zmm1, %zmm0, %zmm3 {%k1} {z}
; AVX512F-32-NEXT: vpermw %zmm1, %zmm0, %zmm0
; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) %res = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
%res2 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) %res2 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)