[X86] Add ISD nodes for masked truncate so we can properly represent when the output has more elements than the input due to needing to be 128 bits.

We can't properly represent this with a vselect since the upper elements of the result are supposed to be zeroed regardless of the mask.

This also reuses the new nodes even when the result type fits in 128 bits if the input is q/d and the result is w/b since vselect w/b using k-register condition isn't legal without avx512bw. Currently we're doing this even when avx512bw is enabled, but I might change that.

This fixes some of PR34877

llvm-svn: 350985
This commit is contained in:
Craig Topper 2019-01-12 00:55:27 +00:00
parent 2b7752ef46
commit 45b13032ad
9 changed files with 609 additions and 447 deletions

View File

@ -21385,10 +21385,6 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
case X86ISD::VPSHUFBITQMB:
case X86ISD::VFPCLASS:
return DAG.getNode(ISD::AND, dl, VT, Op, VMask);
case ISD::TRUNCATE:
case X86ISD::VTRUNC:
case X86ISD::VTRUNCS:
case X86ISD::VTRUNCUS:
case X86ISD::CVTPS2PH:
// We can't use ISD::VSELECT here because it is not always "Legal"
// for the destination type. For example vpmovqb require only AVX512
@ -22068,6 +22064,22 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDValue Results[] = { SetCC, Res };
return DAG.getMergeValues(Results, dl);
}
case TRUNCATE_TO_REG: {
SDValue Src = Op.getOperand(1);
SDValue PassThru = Op.getOperand(2);
SDValue Mask = Op.getOperand(3);
if (isAllOnesConstant(Mask))
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl,
Op.getValueType(), Src),
Mask, PassThru, Subtarget, DAG);
MVT SrcVT = Src.getSimpleValueType();
MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements());
Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
return DAG.getNode(IntrData->Opc1, dl, Op.getValueType(), Src, PassThru,
Mask);
}
default:
break;
}
@ -27172,6 +27184,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VTRUNC: return "X86ISD::VTRUNC";
case X86ISD::VTRUNCS: return "X86ISD::VTRUNCS";
case X86ISD::VTRUNCUS: return "X86ISD::VTRUNCUS";
case X86ISD::VMTRUNC: return "X86ISD::VMTRUNC";
case X86ISD::VMTRUNCS: return "X86ISD::VMTRUNCS";
case X86ISD::VMTRUNCUS: return "X86ISD::VMTRUNCUS";
case X86ISD::VTRUNCSTORES: return "X86ISD::VTRUNCSTORES";
case X86ISD::VTRUNCSTOREUS: return "X86ISD::VTRUNCSTOREUS";
case X86ISD::VMTRUNCSTORES: return "X86ISD::VMTRUNCSTORES";

View File

@ -292,6 +292,12 @@ namespace llvm {
// Vector integer truncate with unsigned/signed saturation.
VTRUNCUS, VTRUNCS,
// Masked version of the above. Used when less than a 128-bit result is
// produced since the mask only applies to the lower elements and can't
// be represented by a select.
// SRC, PASSTHRU, MASK
VMTRUNC, VMTRUNCUS, VMTRUNCS,
// Vector FP extend.
VFPEXT, VFPEXT_RND, VFPEXTS_RND,

View File

@ -9318,14 +9318,47 @@ defm : avx512_masked_scalar_imm<fceil, "RNDSCALESD", X86Movsd,
// Integer truncate and extend operations
//-------------------------------------------------
// PatFrags that contain a select and a truncate op. The take operands in the
// same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass
// either to the multiclasses.
def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask),
(vselect node:$mask,
(trunc node:$src), node:$src0)>;
def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask),
(vselect node:$mask,
(X86vtruncs node:$src), node:$src0)>;
def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask),
(vselect node:$mask,
(X86vtruncus node:$src), node:$src0)>;
multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
SDPatternOperator MaskNode,
X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
let ExeDomain = DestInfo.ExeDomain in
defm rr : AVX512_maskable<opc, MRMDestReg, DestInfo, (outs DestInfo.RC:$dst),
(ins SrcInfo.RC:$src1), OpcodeStr ,"$src1", "$src1",
(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1)))>,
EVEX, T8XS, Sched<[sched]>;
let ExeDomain = DestInfo.ExeDomain in {
def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
(ins SrcInfo.RC:$src),
OpcodeStr # "\t{$src, $dst|$dst, $src}",
[(set DestInfo.RC:$dst,
(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>,
EVEX, Sched<[sched]>;
let Constraints = "$src0 = $dst" in
def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
(ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
[(set DestInfo.RC:$dst,
(MaskNode (SrcInfo.VT SrcInfo.RC:$src),
(DestInfo.VT DestInfo.RC:$src0),
SrcInfo.KRCWM:$mask))]>,
EVEX, EVEX_K, Sched<[sched]>;
def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
(ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
[(set DestInfo.RC:$dst,
(DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>,
EVEX, EVEX_KZ, Sched<[sched]>;
}
let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
def mr : AVX512XS8I<opc, MRMDestMem, (outs),
@ -9356,7 +9389,11 @@ multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
}
multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
SDNode OpNode256, SDNode OpNode512, X86FoldableSchedWrite sched,
SDNode OpNode256, SDNode OpNode512,
SDPatternOperator MaskNode128,
SDPatternOperator MaskNode256,
SDPatternOperator MaskNode512,
X86FoldableSchedWrite sched,
AVX512VLVectorVTInfo VTSrcInfo,
X86VectorVTInfo DestInfoZ128,
X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
@ -9365,118 +9402,167 @@ multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
PatFrag mtruncFrag, Predicate prd = HasAVX512>{
let Predicates = [HasVLX, prd] in {
defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, sched,
defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched,
VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
truncFrag, mtruncFrag, NAME>, EVEX_V128;
defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, sched,
defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched,
VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
truncFrag, mtruncFrag, NAME>, EVEX_V256;
}
let Predicates = [prd] in
defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, sched,
defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched,
VTSrcInfo.info512, DestInfoZ, x86memopZ>,
avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
truncFrag, mtruncFrag, NAME>, EVEX_V512;
}
multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
SDPatternOperator MaskNode,
X86FoldableSchedWrite sched, PatFrag StoreNode,
PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode, sched,
PatFrag MaskedStoreNode, SDNode InVecNode,
SDPatternOperator InVecMaskNode> {
defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode,
InVecMaskNode, InVecMaskNode, InVecMaskNode, sched,
avx512vl_i64_info, v16i8x_info, v16i8x_info,
v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
}
multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
SDPatternOperator MaskNode,
X86FoldableSchedWrite sched, PatFrag StoreNode,
PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, sched,
PatFrag MaskedStoreNode, SDNode InVecNode,
SDPatternOperator InVecMaskNode> {
defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
InVecMaskNode, InVecMaskNode, MaskNode, sched,
avx512vl_i64_info, v8i16x_info, v8i16x_info,
v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
}
multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
SDPatternOperator MaskNode,
X86FoldableSchedWrite sched, PatFrag StoreNode,
PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, sched,
PatFrag MaskedStoreNode, SDNode InVecNode,
SDPatternOperator InVecMaskNode> {
defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
InVecMaskNode, MaskNode, MaskNode, sched,
avx512vl_i64_info, v4i32x_info, v4i32x_info,
v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
}
multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
SDPatternOperator MaskNode,
X86FoldableSchedWrite sched, PatFrag StoreNode,
PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, sched,
PatFrag MaskedStoreNode, SDNode InVecNode,
SDPatternOperator InVecMaskNode> {
defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
InVecMaskNode, InVecMaskNode, MaskNode, sched,
avx512vl_i32_info, v16i8x_info, v16i8x_info,
v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
}
multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
SDPatternOperator MaskNode,
X86FoldableSchedWrite sched, PatFrag StoreNode,
PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, sched,
PatFrag MaskedStoreNode, SDNode InVecNode,
SDPatternOperator InVecMaskNode> {
defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
InVecMaskNode, MaskNode, MaskNode, sched,
avx512vl_i32_info, v8i16x_info, v8i16x_info,
v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
}
multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
SDPatternOperator MaskNode,
X86FoldableSchedWrite sched, PatFrag StoreNode,
PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
PatFrag MaskedStoreNode, SDNode InVecNode,
SDPatternOperator InVecMaskNode> {
defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
sched, avx512vl_i16_info, v16i8x_info, v16i8x_info,
InVecMaskNode, MaskNode, MaskNode, sched,
avx512vl_i16_info, v16i8x_info, v16i8x_info,
v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
}
defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", trunc, WriteShuffle256,
truncstorevi8, masked_truncstorevi8, X86vtrunc>;
defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, WriteShuffle256,
truncstore_s_vi8, masked_truncstore_s_vi8>;
defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus, WriteShuffle256,
truncstore_us_vi8, masked_truncstore_us_vi8>;
defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", trunc, select_trunc,
WriteShuffle256, truncstorevi8,
masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, select_truncs,
WriteShuffle256, truncstore_s_vi8,
masked_truncstore_s_vi8, X86vtruncs,
X86vmtruncs>;
defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus,
select_truncus, WriteShuffle256,
truncstore_us_vi8, masked_truncstore_us_vi8,
X86vtruncus, X86vmtruncus>;
defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, WriteShuffle256,
truncstorevi16, masked_truncstorevi16, X86vtrunc>;
defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, WriteShuffle256,
truncstore_s_vi16, masked_truncstore_s_vi16>;
defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, WriteShuffle256,
truncstore_us_vi16, masked_truncstore_us_vi16>;
defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
WriteShuffle256, truncstorevi16,
masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, select_truncs,
WriteShuffle256, truncstore_s_vi16,
masked_truncstore_s_vi16, X86vtruncs,
X86vmtruncs>;
defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
select_truncus, WriteShuffle256,
truncstore_us_vi16, masked_truncstore_us_vi16,
X86vtruncus, X86vmtruncus>;
defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, WriteShuffle256,
truncstorevi32, masked_truncstorevi32, X86vtrunc>;
defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, WriteShuffle256,
truncstore_s_vi32, masked_truncstore_s_vi32>;
defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, WriteShuffle256,
truncstore_us_vi32, masked_truncstore_us_vi32>;
defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
WriteShuffle256, truncstorevi32,
masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, select_truncs,
WriteShuffle256, truncstore_s_vi32,
masked_truncstore_s_vi32, X86vtruncs,
X86vmtruncs>;
defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
select_truncus, WriteShuffle256,
truncstore_us_vi32, masked_truncstore_us_vi32,
X86vtruncus, X86vmtruncus>;
defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, WriteShuffle256,
truncstorevi8, masked_truncstorevi8, X86vtrunc>;
defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, WriteShuffle256,
truncstore_s_vi8, masked_truncstore_s_vi8>;
defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, WriteShuffle256,
truncstore_us_vi8, masked_truncstore_us_vi8>;
defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
WriteShuffle256, truncstorevi8,
masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
WriteShuffle256, truncstore_s_vi8,
masked_truncstore_s_vi8, X86vtruncs,
X86vmtruncs>;
defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus,
select_truncus, WriteShuffle256,
truncstore_us_vi8, masked_truncstore_us_vi8,
X86vtruncus, X86vmtruncus>;
defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, WriteShuffle256,
truncstorevi16, masked_truncstorevi16, X86vtrunc>;
defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, WriteShuffle256,
truncstore_s_vi16, masked_truncstore_s_vi16>;
defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, WriteShuffle256,
truncstore_us_vi16, masked_truncstore_us_vi16>;
defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
WriteShuffle256, truncstorevi16,
masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
WriteShuffle256, truncstore_s_vi16,
masked_truncstore_s_vi16, X86vtruncs,
X86vmtruncs>;
defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
select_truncus, WriteShuffle256,
truncstore_us_vi16, masked_truncstore_us_vi16,
X86vtruncus, X86vmtruncus>;
defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, WriteShuffle256,
truncstorevi8, masked_truncstorevi8, X86vtrunc>;
defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, WriteShuffle256,
truncstore_s_vi8, masked_truncstore_s_vi8>;
defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, WriteShuffle256,
truncstore_us_vi8, masked_truncstore_us_vi8>;
defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
WriteShuffle256, truncstorevi8,
masked_truncstorevi8, X86vtrunc,
X86vmtrunc>;
defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
WriteShuffle256, truncstore_s_vi8,
masked_truncstore_s_vi8, X86vtruncs,
X86vmtruncs>;
defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
select_truncus, WriteShuffle256,
truncstore_us_vi8, masked_truncstore_us_vi8,
X86vtruncus, X86vmtruncus>;
let Predicates = [HasAVX512, NoVLX] in {
def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
@ -9495,6 +9581,44 @@ def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
VR256X:$src, sub_ymm))), sub_xmm))>;
}
// Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes.
multiclass mtrunc_lowering<string InstrName, SDNode OpNode,
X86VectorVTInfo DestInfo,
X86VectorVTInfo SrcInfo> {
def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
DestInfo.RC:$src0,
SrcInfo.KRCWM:$mask)),
(!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0,
SrcInfo.KRCWM:$mask,
SrcInfo.RC:$src)>;
def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
DestInfo.ImmAllZerosV,
SrcInfo.KRCWM:$mask)),
(!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask,
SrcInfo.RC:$src)>;
}
let Predicates = [HasVLX] in {
defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>;
defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>;
defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>;
}
let Predicates = [HasAVX512] in {
defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>;
defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>;
defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>;
defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>;
defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>;
defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>;
defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>;
defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
}
multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{

View File

@ -104,12 +104,21 @@ def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def SDTVtrunc : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisInt<0>, SDTCisInt<1>,
SDTCisOpSmallerThanOp<0, 1>]>;
SDTCisInt<0>, SDTCisInt<1>,
SDTCisOpSmallerThanOp<0, 1>]>;
def SDTVmtrunc : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisInt<0>, SDTCisInt<1>,
SDTCisOpSmallerThanOp<0, 1>,
SDTCisSameAs<0, 2>,
SDTCVecEltisVT<3, i1>,
SDTCisSameNumEltsAs<1, 3>]>;
def X86vtrunc : SDNode<"X86ISD::VTRUNC", SDTVtrunc>;
def X86vtruncs : SDNode<"X86ISD::VTRUNCS", SDTVtrunc>;
def X86vtruncus : SDNode<"X86ISD::VTRUNCUS", SDTVtrunc>;
def X86vmtrunc : SDNode<"X86ISD::VMTRUNC", SDTVmtrunc>;
def X86vmtruncs : SDNode<"X86ISD::VMTRUNCS", SDTVmtrunc>;
def X86vmtruncus : SDNode<"X86ISD::VMTRUNCUS", SDTVmtrunc>;
def X86vfpext : SDNode<"X86ISD::VFPEXT",
SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f64>,

View File

@ -32,6 +32,7 @@ enum IntrinsicType : uint16_t {
IFMA_OP, VPERM_2OP, INTR_TYPE_SCALAR_MASK,
INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK,
COMPRESS_EXPAND_IN_REG,
TRUNCATE_TO_REG,
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS,
FIXUPIMMS_MASKZ, GATHER_AVX2,
@ -667,110 +668,110 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::FMULS_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_mul_ss_round, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::FMULS_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_db_128, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNC, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_db_256, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNC, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_db_512, INTR_TYPE_1OP_MASK,
ISD::TRUNCATE, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_dw_128, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNC, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_dw_256, INTR_TYPE_1OP_MASK,
ISD::TRUNCATE, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_dw_512, INTR_TYPE_1OP_MASK,
ISD::TRUNCATE, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_qb_128, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNC, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_qb_256, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNC, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_qb_512, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNC, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_qd_128, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNC, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_db_128, TRUNCATE_TO_REG,
X86ISD::VTRUNC, X86ISD::VMTRUNC),
X86_INTRINSIC_DATA(avx512_mask_pmov_db_256, TRUNCATE_TO_REG,
X86ISD::VTRUNC, X86ISD::VMTRUNC),
X86_INTRINSIC_DATA(avx512_mask_pmov_db_512, TRUNCATE_TO_REG,
ISD::TRUNCATE, X86ISD::VMTRUNC),
X86_INTRINSIC_DATA(avx512_mask_pmov_dw_128, TRUNCATE_TO_REG,
X86ISD::VTRUNC, X86ISD::VMTRUNC),
X86_INTRINSIC_DATA(avx512_mask_pmov_dw_256, TRUNCATE_TO_REG,
ISD::TRUNCATE, X86ISD::VMTRUNC),
X86_INTRINSIC_DATA(avx512_mask_pmov_dw_512, TRUNCATE_TO_REG,
ISD::TRUNCATE, X86ISD::VMTRUNC),
X86_INTRINSIC_DATA(avx512_mask_pmov_qb_128, TRUNCATE_TO_REG,
X86ISD::VTRUNC, X86ISD::VMTRUNC),
X86_INTRINSIC_DATA(avx512_mask_pmov_qb_256, TRUNCATE_TO_REG,
X86ISD::VTRUNC, X86ISD::VMTRUNC),
X86_INTRINSIC_DATA(avx512_mask_pmov_qb_512, TRUNCATE_TO_REG,
X86ISD::VTRUNC, X86ISD::VMTRUNC),
X86_INTRINSIC_DATA(avx512_mask_pmov_qd_128, TRUNCATE_TO_REG,
X86ISD::VTRUNC, X86ISD::VMTRUNC),
X86_INTRINSIC_DATA(avx512_mask_pmov_qd_256, INTR_TYPE_1OP_MASK,
ISD::TRUNCATE, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_qd_512, INTR_TYPE_1OP_MASK,
ISD::TRUNCATE, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_qw_128, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNC, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_qw_256, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNC, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_qw_512, INTR_TYPE_1OP_MASK,
ISD::TRUNCATE, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_wb_128, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNC, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_qw_128, TRUNCATE_TO_REG,
X86ISD::VTRUNC, X86ISD::VMTRUNC),
X86_INTRINSIC_DATA(avx512_mask_pmov_qw_256, TRUNCATE_TO_REG,
X86ISD::VTRUNC, X86ISD::VMTRUNC),
X86_INTRINSIC_DATA(avx512_mask_pmov_qw_512, TRUNCATE_TO_REG,
ISD::TRUNCATE, X86ISD::VMTRUNC),
X86_INTRINSIC_DATA(avx512_mask_pmov_wb_128, TRUNCATE_TO_REG,
X86ISD::VTRUNC, X86ISD::VMTRUNC),
X86_INTRINSIC_DATA(avx512_mask_pmov_wb_256, INTR_TYPE_1OP_MASK,
ISD::TRUNCATE, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_wb_512, INTR_TYPE_1OP_MASK,
ISD::TRUNCATE, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovs_db_128, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovs_db_256, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovs_db_512, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_128, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_256, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_512, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_128, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_256, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_512, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovs_db_128, TRUNCATE_TO_REG,
X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
X86_INTRINSIC_DATA(avx512_mask_pmovs_db_256, TRUNCATE_TO_REG,
X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
X86_INTRINSIC_DATA(avx512_mask_pmovs_db_512, TRUNCATE_TO_REG,
X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_128, TRUNCATE_TO_REG,
X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_256, TRUNCATE_TO_REG,
X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_512, TRUNCATE_TO_REG,
X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_128, TRUNCATE_TO_REG,
X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_256, TRUNCATE_TO_REG,
X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_512, TRUNCATE_TO_REG,
X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_128, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_256, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_512, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_128, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_256, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_512, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_128, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_128, TRUNCATE_TO_REG,
X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_256, TRUNCATE_TO_REG,
X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_512, TRUNCATE_TO_REG,
X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_128, TRUNCATE_TO_REG,
X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_256, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_512, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovus_db_128, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCUS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovus_db_256, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCUS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovus_db_512, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCUS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_128, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCUS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_256, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCUS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_512, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCUS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_128, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCUS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_256, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCUS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_512, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCUS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_128, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCUS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovus_db_128, TRUNCATE_TO_REG,
X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
X86_INTRINSIC_DATA(avx512_mask_pmovus_db_256, TRUNCATE_TO_REG,
X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
X86_INTRINSIC_DATA(avx512_mask_pmovus_db_512, TRUNCATE_TO_REG,
X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_128, TRUNCATE_TO_REG,
X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_256, TRUNCATE_TO_REG,
X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_512, TRUNCATE_TO_REG,
X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_128, TRUNCATE_TO_REG,
X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_256, TRUNCATE_TO_REG,
X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_512, TRUNCATE_TO_REG,
X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_128, TRUNCATE_TO_REG,
X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_256, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCUS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_512, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCUS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_128, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCUS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_256, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCUS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_512, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCUS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_128, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCUS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_128, TRUNCATE_TO_REG,
X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_256, TRUNCATE_TO_REG,
X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_512, TRUNCATE_TO_REG,
X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_128, TRUNCATE_TO_REG,
X86ISD::VTRUNCUS, X86ISD::VMTRUNCUS),
X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_256, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCUS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_512, INTR_TYPE_1OP_MASK,

View File

@ -2603,11 +2603,11 @@ define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_512(<8 x i64> %x0, <16 x i8> %
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpmovqb %zmm0, %xmm2 {%k1} {z}
; CHECK-NEXT: vpmovqb %zmm0, %xmm2
; CHECK-NEXT: vpmovqb %zmm0, %xmm1 {%k1}
; CHECK-NEXT: vpaddb %xmm2, %xmm1, %xmm1
; CHECK-NEXT: vpmovqb %zmm0, %xmm0
; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpmovqb %zmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0
; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
@ -2639,11 +2639,11 @@ define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_512(<8 x i64> %x0, <16 x i8>
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpmovsqb %zmm0, %xmm2 {%k1} {z}
; CHECK-NEXT: vpmovsqb %zmm0, %xmm2
; CHECK-NEXT: vpmovsqb %zmm0, %xmm1 {%k1}
; CHECK-NEXT: vpaddb %xmm2, %xmm1, %xmm1
; CHECK-NEXT: vpmovsqb %zmm0, %xmm0
; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpmovsqb %zmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0
; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
@ -2675,11 +2675,11 @@ define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_512(<8 x i64> %x0, <16 x i8>
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpmovusqb %zmm0, %xmm2 {%k1} {z}
; CHECK-NEXT: vpmovusqb %zmm0, %xmm2
; CHECK-NEXT: vpmovusqb %zmm0, %xmm1 {%k1}
; CHECK-NEXT: vpaddb %xmm2, %xmm1, %xmm1
; CHECK-NEXT: vpmovusqb %zmm0, %xmm0
; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpmovusqb %zmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0
; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
@ -2711,11 +2711,11 @@ define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_512(<8 x i64> %x0, <8 x i16> %
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpmovqw %zmm0, %xmm2 {%k1} {z}
; CHECK-NEXT: vpmovqw %zmm0, %xmm2
; CHECK-NEXT: vpmovqw %zmm0, %xmm1 {%k1}
; CHECK-NEXT: vpaddw %xmm2, %xmm1, %xmm1
; CHECK-NEXT: vpmovqw %zmm0, %xmm0
; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpmovqw %zmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0
; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
@ -2747,11 +2747,11 @@ define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_512(<8 x i64> %x0, <8 x i16>
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpmovsqw %zmm0, %xmm2 {%k1} {z}
; CHECK-NEXT: vpmovsqw %zmm0, %xmm2
; CHECK-NEXT: vpmovsqw %zmm0, %xmm1 {%k1}
; CHECK-NEXT: vpaddw %xmm2, %xmm1, %xmm1
; CHECK-NEXT: vpmovsqw %zmm0, %xmm0
; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpmovsqw %zmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0
; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
@ -2783,11 +2783,11 @@ define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_512(<8 x i64> %x0, <8 x i16>
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpmovusqw %zmm0, %xmm2 {%k1} {z}
; CHECK-NEXT: vpmovusqw %zmm0, %xmm2
; CHECK-NEXT: vpmovusqw %zmm0, %xmm1 {%k1}
; CHECK-NEXT: vpaddw %xmm2, %xmm1, %xmm1
; CHECK-NEXT: vpmovusqw %zmm0, %xmm0
; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpmovusqw %zmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0
; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
@ -2924,11 +2924,11 @@ define <16 x i8>@test_int_x86_avx512_mask_pmov_db_512(<16 x i32> %x0, <16 x i8>
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpmovdb %zmm0, %xmm2 {%k1} {z}
; CHECK-NEXT: vpmovdb %zmm0, %xmm2
; CHECK-NEXT: vpmovdb %zmm0, %xmm1 {%k1}
; CHECK-NEXT: vpaddb %xmm2, %xmm1, %xmm1
; CHECK-NEXT: vpmovdb %zmm0, %xmm0
; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpmovdb %zmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0
; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
@ -2960,11 +2960,11 @@ define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_512(<16 x i32> %x0, <16 x i8>
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpmovsdb %zmm0, %xmm2 {%k1} {z}
; CHECK-NEXT: vpmovsdb %zmm0, %xmm2
; CHECK-NEXT: vpmovsdb %zmm0, %xmm1 {%k1}
; CHECK-NEXT: vpaddb %xmm2, %xmm1, %xmm1
; CHECK-NEXT: vpmovsdb %zmm0, %xmm0
; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpmovsdb %zmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0
; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
@ -2996,11 +2996,11 @@ define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_512(<16 x i32> %x0, <16 x i8
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpmovusdb %zmm0, %xmm2 {%k1} {z}
; CHECK-NEXT: vpmovusdb %zmm0, %xmm2
; CHECK-NEXT: vpmovusdb %zmm0, %xmm1 {%k1}
; CHECK-NEXT: vpaddb %xmm2, %xmm1, %xmm1
; CHECK-NEXT: vpmovusdb %zmm0, %xmm0
; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpmovusdb %zmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0
; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
@ -3032,11 +3032,11 @@ define <16 x i16>@test_int_x86_avx512_mask_pmov_dw_512(<16 x i32> %x0, <16 x i16
; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpmovdw %zmm0, %ymm2 {%k1} {z}
; CHECK-NEXT: vpmovdw %zmm0, %ymm2
; CHECK-NEXT: vpmovdw %zmm0, %ymm1 {%k1}
; CHECK-NEXT: vpaddw %ymm2, %ymm1, %ymm1
; CHECK-NEXT: vpmovdw %zmm0, %ymm0
; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0
; CHECK-NEXT: vpmovdw %zmm0, %ymm0 {%k1} {z}
; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0
; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0
; CHECK-NEXT: retq
%res0 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
%res1 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
@ -3067,11 +3067,11 @@ define <16 x i16>@test_int_x86_avx512_mask_pmovs_dw_512(<16 x i32> %x0, <16 x i1
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpmovsdw %zmm0, %ymm2 {%k1} {z}
; CHECK-NEXT: vpmovsdw %zmm0, %ymm2
; CHECK-NEXT: vpmovsdw %zmm0, %ymm1 {%k1}
; CHECK-NEXT: vpaddw %ymm2, %ymm1, %ymm1
; CHECK-NEXT: vpmovsdw %zmm0, %ymm0
; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0
; CHECK-NEXT: vpmovsdw %zmm0, %ymm0 {%k1} {z}
; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0
; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0
; CHECK-NEXT: retq
%res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
%res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
@ -3102,11 +3102,11 @@ define <16 x i16>@test_int_x86_avx512_mask_pmovus_dw_512(<16 x i32> %x0, <16 x i
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpmovusdw %zmm0, %ymm2 {%k1} {z}
; CHECK-NEXT: vpmovusdw %zmm0, %ymm2
; CHECK-NEXT: vpmovusdw %zmm0, %ymm1 {%k1}
; CHECK-NEXT: vpaddw %ymm2, %ymm1, %ymm1
; CHECK-NEXT: vpmovusdw %zmm0, %ymm0
; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0
; CHECK-NEXT: vpmovusdw %zmm0, %ymm0 {%k1} {z}
; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0
; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0
; CHECK-NEXT: retq
%res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
%res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)

View File

@ -1454,22 +1454,23 @@ declare <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16>, <16 x i8>, i8)
define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) {
; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_128:
; X86: # %bb.0:
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
; X86-NEXT: vpmovwb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x30,0xc2]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
; X86-NEXT: vpmovwb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x30,0xc2]
; X86-NEXT: vpmovwb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x30,0xc1]
; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xca]
; X86-NEXT: vpmovwb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x30,0xc0]
; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1]
; X86-NEXT: vpmovwb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x30,0xc0]
; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_128:
; X64: # %bb.0:
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; X64-NEXT: vpmovwb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x30,0xc2]
; X64-NEXT: vpmovwb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x30,0xc2]
; X64-NEXT: vpmovwb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x30,0xc1]
; X64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xca]
; X64-NEXT: vpmovwb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x30,0xc0]
; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1]
; X64-NEXT: vpmovwb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x30,0xc0]
; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1)
%res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2)
@ -1507,22 +1508,23 @@ declare <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16>, <16 x i8>, i8)
define <16 x i8>@test_int_x86_avx512_mask_pmovs_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) {
; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_128:
; X86: # %bb.0:
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
; X86-NEXT: vpmovswb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x20,0xc2]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
; X86-NEXT: vpmovswb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x20,0xc2]
; X86-NEXT: vpmovswb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x20,0xc1]
; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xca]
; X86-NEXT: vpmovswb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x20,0xc0]
; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1]
; X86-NEXT: vpmovswb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x20,0xc0]
; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_128:
; X64: # %bb.0:
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; X64-NEXT: vpmovswb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x20,0xc2]
; X64-NEXT: vpmovswb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x20,0xc2]
; X64-NEXT: vpmovswb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x20,0xc1]
; X64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xca]
; X64-NEXT: vpmovswb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x20,0xc0]
; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1]
; X64-NEXT: vpmovswb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x20,0xc0]
; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1)
%res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2)
@ -1560,22 +1562,23 @@ declare <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16>, <16 x i8>, i8)
define <16 x i8>@test_int_x86_avx512_mask_pmovus_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) {
; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_128:
; X86: # %bb.0:
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
; X86-NEXT: vpmovuswb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x10,0xc2]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
; X86-NEXT: vpmovuswb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x10,0xc2]
; X86-NEXT: vpmovuswb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x10,0xc1]
; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xca]
; X86-NEXT: vpmovuswb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x10,0xc0]
; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1]
; X86-NEXT: vpmovuswb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x10,0xc0]
; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_128:
; X64: # %bb.0:
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; X64-NEXT: vpmovuswb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x10,0xc2]
; X64-NEXT: vpmovuswb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x10,0xc2]
; X64-NEXT: vpmovuswb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x10,0xc1]
; X64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xca]
; X64-NEXT: vpmovuswb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x10,0xc0]
; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1]
; X64-NEXT: vpmovuswb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x10,0xc0]
; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1)
%res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2)

View File

@ -1617,23 +1617,23 @@ declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64>, <16 x i8>, i8)
define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
; X86-LABEL: test_int_x86_avx512_mask_pmov_qb_128:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovqb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x32,0xc2]
; X86-NEXT: vpmovqb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x32,0xc2]
; X86-NEXT: vpmovqb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x32,0xc1]
; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
; X86-NEXT: vpmovqb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x32,0xc0]
; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1]
; X86-NEXT: vpmovqb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x32,0xc0]
; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_pmov_qb_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vpmovqb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x32,0xc2]
; X64-NEXT: vpmovqb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x32,0xc2]
; X64-NEXT: vpmovqb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x32,0xc1]
; X64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
; X64-NEXT: vpmovqb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x32,0xc0]
; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1]
; X64-NEXT: vpmovqb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x32,0xc0]
; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1)
%res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2)
@ -1671,23 +1671,23 @@ declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64>, <16 x i8>, i8)
define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
; X86-LABEL: test_int_x86_avx512_mask_pmovs_qb_128:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovsqb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x22,0xc2]
; X86-NEXT: vpmovsqb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x22,0xc2]
; X86-NEXT: vpmovsqb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x22,0xc1]
; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
; X86-NEXT: vpmovsqb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x22,0xc0]
; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1]
; X86-NEXT: vpmovsqb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x22,0xc0]
; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_pmovs_qb_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vpmovsqb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x22,0xc2]
; X64-NEXT: vpmovsqb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x22,0xc2]
; X64-NEXT: vpmovsqb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x22,0xc1]
; X64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
; X64-NEXT: vpmovsqb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x22,0xc0]
; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1]
; X64-NEXT: vpmovsqb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x22,0xc0]
; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1)
%res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2)
@ -1725,23 +1725,23 @@ declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64>, <16 x i8>, i8)
define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
; X86-LABEL: test_int_x86_avx512_mask_pmovus_qb_128:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovusqb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x12,0xc2]
; X86-NEXT: vpmovusqb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x12,0xc2]
; X86-NEXT: vpmovusqb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x12,0xc1]
; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
; X86-NEXT: vpmovusqb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x12,0xc0]
; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1]
; X86-NEXT: vpmovusqb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x12,0xc0]
; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_pmovus_qb_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vpmovusqb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x12,0xc2]
; X64-NEXT: vpmovusqb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x12,0xc2]
; X64-NEXT: vpmovusqb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x12,0xc1]
; X64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
; X64-NEXT: vpmovusqb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x12,0xc0]
; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1]
; X64-NEXT: vpmovusqb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x12,0xc0]
; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1)
%res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2)
@ -1779,24 +1779,24 @@ declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64>, <16 x i8>, i8)
define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
; X86-LABEL: test_int_x86_avx512_mask_pmov_qb_256:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovqb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x32,0xc2]
; X86-NEXT: vpmovqb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x32,0xc2]
; X86-NEXT: vpmovqb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x32,0xc1]
; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
; X86-NEXT: vpmovqb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x32,0xc0]
; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1]
; X86-NEXT: vpmovqb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x32,0xc0]
; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_pmov_qb_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vpmovqb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x32,0xc2]
; X64-NEXT: vpmovqb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x32,0xc2]
; X64-NEXT: vpmovqb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x32,0xc1]
; X64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
; X64-NEXT: vpmovqb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x32,0xc0]
; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1]
; X64-NEXT: vpmovqb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x32,0xc0]
; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1)
@ -1837,24 +1837,24 @@ declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64>, <16 x i8>, i8)
define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
; X86-LABEL: test_int_x86_avx512_mask_pmovs_qb_256:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovsqb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x22,0xc2]
; X86-NEXT: vpmovsqb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x22,0xc2]
; X86-NEXT: vpmovsqb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x22,0xc1]
; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
; X86-NEXT: vpmovsqb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x22,0xc0]
; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1]
; X86-NEXT: vpmovsqb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x22,0xc0]
; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_pmovs_qb_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vpmovsqb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x22,0xc2]
; X64-NEXT: vpmovsqb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x22,0xc2]
; X64-NEXT: vpmovsqb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x22,0xc1]
; X64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
; X64-NEXT: vpmovsqb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x22,0xc0]
; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1]
; X64-NEXT: vpmovsqb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x22,0xc0]
; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1)
@ -1895,24 +1895,24 @@ declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64>, <16 x i8>, i8)
define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
; X86-LABEL: test_int_x86_avx512_mask_pmovus_qb_256:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovusqb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x12,0xc2]
; X86-NEXT: vpmovusqb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x12,0xc2]
; X86-NEXT: vpmovusqb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x12,0xc1]
; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
; X86-NEXT: vpmovusqb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x12,0xc0]
; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1]
; X86-NEXT: vpmovusqb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x12,0xc0]
; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_pmovus_qb_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vpmovusqb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x12,0xc2]
; X64-NEXT: vpmovusqb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x12,0xc2]
; X64-NEXT: vpmovusqb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x12,0xc1]
; X64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
; X64-NEXT: vpmovusqb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x12,0xc0]
; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1]
; X64-NEXT: vpmovusqb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x12,0xc0]
; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1)
@ -1955,21 +1955,21 @@ define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_128(<2 x i64> %x0, <8 x i16> %
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovqw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x34,0xc2]
; X86-NEXT: vpmovqw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x34,0xc1]
; X86-NEXT: vpmovqw %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x34,0xc2]
; X86-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca]
; X86-NEXT: vpmovqw %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x34,0xc0]
; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1]
; X86-NEXT: vpmovqw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x34,0xc0]
; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_pmov_qw_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vpmovqw %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x34,0xc2]
; X64-NEXT: vpmovqw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x34,0xc2]
; X64-NEXT: vpmovqw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x34,0xc1]
; X64-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca]
; X64-NEXT: vpmovqw %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x34,0xc0]
; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1]
; X64-NEXT: vpmovqw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x34,0xc0]
; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
%res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1)
%res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2)
@ -2009,21 +2009,21 @@ define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_128(<2 x i64> %x0, <8 x i16>
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovsqw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x24,0xc2]
; X86-NEXT: vpmovsqw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x24,0xc1]
; X86-NEXT: vpmovsqw %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x24,0xc2]
; X86-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca]
; X86-NEXT: vpmovsqw %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x24,0xc0]
; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1]
; X86-NEXT: vpmovsqw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x24,0xc0]
; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_pmovs_qw_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vpmovsqw %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x24,0xc2]
; X64-NEXT: vpmovsqw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x24,0xc2]
; X64-NEXT: vpmovsqw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x24,0xc1]
; X64-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca]
; X64-NEXT: vpmovsqw %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x24,0xc0]
; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1]
; X64-NEXT: vpmovsqw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x24,0xc0]
; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
%res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1)
%res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2)
@ -2063,21 +2063,21 @@ define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_128(<2 x i64> %x0, <8 x i16>
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovusqw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x14,0xc2]
; X86-NEXT: vpmovusqw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x14,0xc1]
; X86-NEXT: vpmovusqw %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x14,0xc2]
; X86-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca]
; X86-NEXT: vpmovusqw %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x14,0xc0]
; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1]
; X86-NEXT: vpmovusqw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x14,0xc0]
; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_pmovus_qw_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vpmovusqw %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x14,0xc2]
; X64-NEXT: vpmovusqw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x14,0xc2]
; X64-NEXT: vpmovusqw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x14,0xc1]
; X64-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca]
; X64-NEXT: vpmovusqw %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x14,0xc0]
; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1]
; X64-NEXT: vpmovusqw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x14,0xc0]
; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
%res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1)
%res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2)
@ -2117,22 +2117,22 @@ define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_256(<4 x i64> %x0, <8 x i16> %
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovqw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x34,0xc2]
; X86-NEXT: vpmovqw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x34,0xc1]
; X86-NEXT: vpmovqw %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x34,0xc2]
; X86-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca]
; X86-NEXT: vpmovqw %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x34,0xc0]
; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1]
; X86-NEXT: vpmovqw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x34,0xc0]
; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_pmov_qw_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vpmovqw %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x34,0xc2]
; X64-NEXT: vpmovqw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x34,0xc2]
; X64-NEXT: vpmovqw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x34,0xc1]
; X64-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca]
; X64-NEXT: vpmovqw %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x34,0xc0]
; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1]
; X64-NEXT: vpmovqw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x34,0xc0]
; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
%res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1)
@ -2175,22 +2175,22 @@ define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_256(<4 x i64> %x0, <8 x i16>
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovsqw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x24,0xc2]
; X86-NEXT: vpmovsqw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x24,0xc1]
; X86-NEXT: vpmovsqw %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x24,0xc2]
; X86-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca]
; X86-NEXT: vpmovsqw %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x24,0xc0]
; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1]
; X86-NEXT: vpmovsqw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x24,0xc0]
; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_pmovs_qw_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vpmovsqw %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x24,0xc2]
; X64-NEXT: vpmovsqw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x24,0xc2]
; X64-NEXT: vpmovsqw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x24,0xc1]
; X64-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca]
; X64-NEXT: vpmovsqw %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x24,0xc0]
; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1]
; X64-NEXT: vpmovsqw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x24,0xc0]
; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
%res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1)
@ -2233,22 +2233,22 @@ define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_256(<4 x i64> %x0, <8 x i16>
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovusqw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x14,0xc2]
; X86-NEXT: vpmovusqw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x14,0xc1]
; X86-NEXT: vpmovusqw %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x14,0xc2]
; X86-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca]
; X86-NEXT: vpmovusqw %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x14,0xc0]
; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1]
; X86-NEXT: vpmovusqw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x14,0xc0]
; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_pmovus_qw_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vpmovusqw %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x14,0xc2]
; X64-NEXT: vpmovusqw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x14,0xc2]
; X64-NEXT: vpmovusqw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x14,0xc1]
; X64-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca]
; X64-NEXT: vpmovusqw %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x14,0xc0]
; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1]
; X64-NEXT: vpmovusqw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x14,0xc0]
; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
%res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1)
@ -2291,21 +2291,21 @@ define <4 x i32>@test_int_x86_avx512_mask_pmov_qd_128(<2 x i64> %x0, <4 x i32> %
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovqd %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x35,0xc2]
; X86-NEXT: vpmovqd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x35,0xc1]
; X86-NEXT: vpmovqd %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x35,0xc2]
; X86-NEXT: vpaddd %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xca]
; X86-NEXT: vpmovqd %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x35,0xc0]
; X86-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1]
; X86-NEXT: vpmovqd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x35,0xc0]
; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_pmov_qd_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vpmovqd %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x35,0xc2]
; X64-NEXT: vpmovqd %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x35,0xc2]
; X64-NEXT: vpmovqd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x35,0xc1]
; X64-NEXT: vpaddd %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xca]
; X64-NEXT: vpmovqd %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x35,0xc0]
; X64-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1]
; X64-NEXT: vpmovqd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x35,0xc0]
; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
%res0 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1)
%res1 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2)
@ -2345,20 +2345,20 @@ define <4 x i32>@test_int_x86_avx512_mask_pmovs_qd_128(<2 x i64> %x0, <4 x i32>
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovsqd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x25,0xc1]
; X86-NEXT: vpmovsqd %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x25,0xc2]
; X86-NEXT: vpaddd %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xca]
; X86-NEXT: vpmovsqd %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x25,0xc0]
; X86-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0xc8]
; X86-NEXT: vmovdqa32 %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6f,0xd0]
; X86-NEXT: vpaddd %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xca]
; X86-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_pmovs_qd_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vpmovsqd %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x25,0xc2]
; X64-NEXT: vpmovsqd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x25,0xc1]
; X64-NEXT: vpaddd %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xca]
; X64-NEXT: vpmovsqd %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x25,0xc0]
; X64-NEXT: vmovdqa32 %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6f,0xd0]
; X64-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0xc8]
; X64-NEXT: vpaddd %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xca]
; X64-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1]
; X64-NEXT: retq # encoding: [0xc3]
%res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1)
@ -2399,21 +2399,21 @@ define <4 x i32>@test_int_x86_avx512_mask_pmovus_qd_128(<2 x i64> %x0, <4 x i32>
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovusqd %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x15,0xc2]
; X86-NEXT: vpmovusqd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x15,0xc1]
; X86-NEXT: vpmovusqd %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x15,0xc2]
; X86-NEXT: vpaddd %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xca]
; X86-NEXT: vpmovusqd %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x15,0xc0]
; X86-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1]
; X86-NEXT: vpmovusqd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x15,0xc0]
; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_pmovus_qd_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vpmovusqd %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x15,0xc2]
; X64-NEXT: vpmovusqd %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x15,0xc2]
; X64-NEXT: vpmovusqd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x15,0xc1]
; X64-NEXT: vpaddd %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xca]
; X64-NEXT: vpmovusqd %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x15,0xc0]
; X64-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1]
; X64-NEXT: vpmovusqd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x15,0xc0]
; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
%res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1)
%res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2)
@ -2625,23 +2625,23 @@ declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32>, <16 x i8>, i8)
define <16 x i8>@test_int_x86_avx512_mask_pmov_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
; X86-LABEL: test_int_x86_avx512_mask_pmov_db_128:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovdb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x31,0xc2]
; X86-NEXT: vpmovdb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x31,0xc2]
; X86-NEXT: vpmovdb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x31,0xc1]
; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
; X86-NEXT: vpmovdb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x31,0xc0]
; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1]
; X86-NEXT: vpmovdb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x31,0xc0]
; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_pmov_db_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vpmovdb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x31,0xc2]
; X64-NEXT: vpmovdb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x31,0xc2]
; X64-NEXT: vpmovdb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x31,0xc1]
; X64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
; X64-NEXT: vpmovdb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x31,0xc0]
; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1]
; X64-NEXT: vpmovdb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x31,0xc0]
; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1)
%res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2)
@ -2679,23 +2679,23 @@ declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32>, <16 x i8>, i8)
define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
; X86-LABEL: test_int_x86_avx512_mask_pmovs_db_128:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovsdb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x21,0xc2]
; X86-NEXT: vpmovsdb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x21,0xc2]
; X86-NEXT: vpmovsdb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x21,0xc1]
; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
; X86-NEXT: vpmovsdb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x21,0xc0]
; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1]
; X86-NEXT: vpmovsdb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x21,0xc0]
; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_pmovs_db_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vpmovsdb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x21,0xc2]
; X64-NEXT: vpmovsdb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x21,0xc2]
; X64-NEXT: vpmovsdb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x21,0xc1]
; X64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
; X64-NEXT: vpmovsdb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x21,0xc0]
; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1]
; X64-NEXT: vpmovsdb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x21,0xc0]
; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1)
%res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2)
@ -2733,23 +2733,23 @@ declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32>, <16 x i8>, i8)
define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
; X86-LABEL: test_int_x86_avx512_mask_pmovus_db_128:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovusdb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x11,0xc2]
; X86-NEXT: vpmovusdb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x11,0xc2]
; X86-NEXT: vpmovusdb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x11,0xc1]
; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
; X86-NEXT: vpmovusdb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x11,0xc0]
; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1]
; X86-NEXT: vpmovusdb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x11,0xc0]
; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_pmovus_db_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vpmovusdb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x11,0xc2]
; X64-NEXT: vpmovusdb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x11,0xc2]
; X64-NEXT: vpmovusdb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x11,0xc1]
; X64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
; X64-NEXT: vpmovusdb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x11,0xc0]
; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1]
; X64-NEXT: vpmovusdb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x11,0xc0]
; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1)
%res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2)
@ -2787,24 +2787,24 @@ declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32>, <16 x i8>, i8)
define <16 x i8>@test_int_x86_avx512_mask_pmov_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
; X86-LABEL: test_int_x86_avx512_mask_pmov_db_256:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovdb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x31,0xc2]
; X86-NEXT: vpmovdb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x31,0xc2]
; X86-NEXT: vpmovdb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x31,0xc1]
; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
; X86-NEXT: vpmovdb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x31,0xc0]
; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1]
; X86-NEXT: vpmovdb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x31,0xc0]
; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_pmov_db_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vpmovdb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x31,0xc2]
; X64-NEXT: vpmovdb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x31,0xc2]
; X64-NEXT: vpmovdb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x31,0xc1]
; X64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
; X64-NEXT: vpmovdb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x31,0xc0]
; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1]
; X64-NEXT: vpmovdb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x31,0xc0]
; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1)
@ -2845,24 +2845,24 @@ declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32>, <16 x i8>, i8)
define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
; X86-LABEL: test_int_x86_avx512_mask_pmovs_db_256:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovsdb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x21,0xc2]
; X86-NEXT: vpmovsdb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x21,0xc2]
; X86-NEXT: vpmovsdb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x21,0xc1]
; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
; X86-NEXT: vpmovsdb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x21,0xc0]
; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1]
; X86-NEXT: vpmovsdb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x21,0xc0]
; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_pmovs_db_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vpmovsdb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x21,0xc2]
; X64-NEXT: vpmovsdb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x21,0xc2]
; X64-NEXT: vpmovsdb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x21,0xc1]
; X64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
; X64-NEXT: vpmovsdb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x21,0xc0]
; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1]
; X64-NEXT: vpmovsdb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x21,0xc0]
; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1)
@ -2903,24 +2903,24 @@ declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32>, <16 x i8>, i8)
define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
; X86-LABEL: test_int_x86_avx512_mask_pmovus_db_256:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovusdb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x11,0xc2]
; X86-NEXT: vpmovusdb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x11,0xc2]
; X86-NEXT: vpmovusdb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x11,0xc1]
; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
; X86-NEXT: vpmovusdb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x11,0xc0]
; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1]
; X86-NEXT: vpmovusdb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x11,0xc0]
; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_pmovus_db_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vpmovusdb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x11,0xc2]
; X64-NEXT: vpmovusdb %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x11,0xc2]
; X64-NEXT: vpmovusdb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x11,0xc1]
; X64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
; X64-NEXT: vpmovusdb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x11,0xc0]
; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1]
; X64-NEXT: vpmovusdb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x11,0xc0]
; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfc,0xc0]
; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfc,0xc0]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
%res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1)
@ -2963,21 +2963,21 @@ define <8 x i16>@test_int_x86_avx512_mask_pmov_dw_128(<4 x i32> %x0, <8 x i16> %
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovdw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x33,0xc2]
; X86-NEXT: vpmovdw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x33,0xc1]
; X86-NEXT: vpmovdw %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x33,0xc2]
; X86-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca]
; X86-NEXT: vpmovdw %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x33,0xc0]
; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1]
; X86-NEXT: vpmovdw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x33,0xc0]
; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_pmov_dw_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vpmovdw %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x33,0xc2]
; X64-NEXT: vpmovdw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x33,0xc2]
; X64-NEXT: vpmovdw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x33,0xc1]
; X64-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca]
; X64-NEXT: vpmovdw %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x33,0xc0]
; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1]
; X64-NEXT: vpmovdw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x33,0xc0]
; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
%res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1)
%res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2)
@ -3017,21 +3017,21 @@ define <8 x i16>@test_int_x86_avx512_mask_pmovs_dw_128(<4 x i32> %x0, <8 x i16>
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovsdw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x23,0xc2]
; X86-NEXT: vpmovsdw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x23,0xc1]
; X86-NEXT: vpmovsdw %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x23,0xc2]
; X86-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca]
; X86-NEXT: vpmovsdw %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x23,0xc0]
; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1]
; X86-NEXT: vpmovsdw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x23,0xc0]
; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_pmovs_dw_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vpmovsdw %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x23,0xc2]
; X64-NEXT: vpmovsdw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x23,0xc2]
; X64-NEXT: vpmovsdw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x23,0xc1]
; X64-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca]
; X64-NEXT: vpmovsdw %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x23,0xc0]
; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1]
; X64-NEXT: vpmovsdw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x23,0xc0]
; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
%res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1)
%res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2)
@ -3071,21 +3071,21 @@ define <8 x i16>@test_int_x86_avx512_mask_pmovus_dw_128(<4 x i32> %x0, <8 x i16>
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovusdw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x13,0xc2]
; X86-NEXT: vpmovusdw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x13,0xc1]
; X86-NEXT: vpmovusdw %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x13,0xc2]
; X86-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca]
; X86-NEXT: vpmovusdw %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x13,0xc0]
; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1]
; X86-NEXT: vpmovusdw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x13,0xc0]
; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_pmovus_dw_128:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vpmovusdw %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x13,0xc2]
; X64-NEXT: vpmovusdw %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x13,0xc2]
; X64-NEXT: vpmovusdw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x13,0xc1]
; X64-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca]
; X64-NEXT: vpmovusdw %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x13,0xc0]
; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1]
; X64-NEXT: vpmovusdw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x13,0xc0]
; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
%res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1)
%res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2)
@ -3125,22 +3125,22 @@ define <8 x i16>@test_int_x86_avx512_mask_pmov_dw_256(<8 x i32> %x0, <8 x i16> %
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovdw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x33,0xc2]
; X86-NEXT: vpmovdw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x33,0xc1]
; X86-NEXT: vpmovdw %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x33,0xc2]
; X86-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca]
; X86-NEXT: vpmovdw %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x33,0xc0]
; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1]
; X86-NEXT: vpmovdw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x33,0xc0]
; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_pmov_dw_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vpmovdw %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x33,0xc2]
; X64-NEXT: vpmovdw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x33,0xc2]
; X64-NEXT: vpmovdw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x33,0xc1]
; X64-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca]
; X64-NEXT: vpmovdw %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x33,0xc0]
; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1]
; X64-NEXT: vpmovdw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x33,0xc0]
; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
%res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1)
@ -3183,22 +3183,22 @@ define <8 x i16>@test_int_x86_avx512_mask_pmovs_dw_256(<8 x i32> %x0, <8 x i16>
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovsdw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x23,0xc2]
; X86-NEXT: vpmovsdw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x23,0xc1]
; X86-NEXT: vpmovsdw %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x23,0xc2]
; X86-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca]
; X86-NEXT: vpmovsdw %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x23,0xc0]
; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1]
; X86-NEXT: vpmovsdw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x23,0xc0]
; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_pmovs_dw_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vpmovsdw %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x23,0xc2]
; X64-NEXT: vpmovsdw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x23,0xc2]
; X64-NEXT: vpmovsdw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x23,0xc1]
; X64-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca]
; X64-NEXT: vpmovsdw %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x23,0xc0]
; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1]
; X64-NEXT: vpmovsdw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x23,0xc0]
; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
%res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1)
@ -3241,22 +3241,22 @@ define <8 x i16>@test_int_x86_avx512_mask_pmovus_dw_256(<8 x i32> %x0, <8 x i16>
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovusdw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x13,0xc2]
; X86-NEXT: vpmovusdw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x13,0xc1]
; X86-NEXT: vpmovusdw %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x13,0xc2]
; X86-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca]
; X86-NEXT: vpmovusdw %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x13,0xc0]
; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1]
; X86-NEXT: vpmovusdw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x13,0xc0]
; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_mask_pmovus_dw_256:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
; X64-NEXT: vpmovusdw %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x13,0xc2]
; X64-NEXT: vpmovusdw %ymm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x28,0x13,0xc2]
; X64-NEXT: vpmovusdw %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x13,0xc1]
; X64-NEXT: vpaddw %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfd,0xca]
; X64-NEXT: vpmovusdw %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x13,0xc0]
; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1]
; X64-NEXT: vpmovusdw %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x13,0xc0]
; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc0]
; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
%res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1)

View File

@ -4,7 +4,11 @@
define <2 x i64> @undef_tval() {
; CHECK-LABEL: undef_tval:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [1,1,1,1,1,1,1,1]
; CHECK-NEXT: vmovdqa {{.*#+}} xmm0 = [1,1,1,1,1,1,1,1]
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpmovqw %zmm0, %xmm0 {%k1}
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%1 = tail call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> undef, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, i8 1) #3
%2 = bitcast <8 x i16> %1 to <2 x i64>