AVX512: Implemented encoding and intrinsics for VPERMILPS/PD instructions.

Added tests for intrinsics and encoding.

Differential Revision: http://reviews.llvm.org/D12690

llvm-svn: 249261
This commit is contained in:
Igor Breger 2015-10-04 07:20:41 +00:00
parent 28f3a7787c
commit 38dd6d8710
11 changed files with 1142 additions and 61 deletions

View File

@ -1406,6 +1406,78 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
[llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_vpermil_pd_128 :
GCCBuiltin<"__builtin_ia32_vpermilpd_mask">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_i32_ty, llvm_v2f64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_vpermil_pd_256 :
GCCBuiltin<"__builtin_ia32_vpermilpd256_mask">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_vpermil_pd_512 :
GCCBuiltin<"__builtin_ia32_vpermilpd512_mask">,
Intrinsic<[llvm_v8f64_ty],
[llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_vpermil_ps_128 :
GCCBuiltin<"__builtin_ia32_vpermilps_mask">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_i32_ty, llvm_v4f32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_vpermil_ps_256 :
GCCBuiltin<"__builtin_ia32_vpermilps256_mask">,
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_vpermil_ps_512 :
GCCBuiltin<"__builtin_ia32_vpermilps512_mask">,
Intrinsic<[llvm_v16f32_ty],
[llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_vpermilvar_pd_256 :
GCCBuiltin<"__builtin_ia32_vpermilvarpd256_mask">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_vpermilvar_pd_512 :
GCCBuiltin<"__builtin_ia32_vpermilvarpd512_mask">,
Intrinsic<[llvm_v8f64_ty],
[llvm_v8f64_ty, llvm_v8i64_ty, llvm_v8f64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_vpermilvar_pd_128 :
GCCBuiltin<"__builtin_ia32_vpermilvarpd_mask">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_vpermilvar_ps_256 :
GCCBuiltin<"__builtin_ia32_vpermilvarps256_mask">,
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f32_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_vpermilvar_ps_512 :
GCCBuiltin<"__builtin_ia32_vpermilvarps512_mask">,
Intrinsic<[llvm_v16f32_ty],
[llvm_v16f32_ty, llvm_v16i32_ty, llvm_v16f32_ty, llvm_i16_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_vpermilvar_ps_128 :
GCCBuiltin<"__builtin_ia32_vpermilvarps_mask">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_pshuf_b_128 :
GCCBuiltin<"__builtin_ia32_pshufb128_mask">,
Intrinsic<[llvm_v16i8_ty],

View File

@ -16003,11 +16003,16 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
RoundingMode, Sae),
Mask, Src0, Subtarget, DAG);
}
case INTR_TYPE_2OP_MASK: {
case INTR_TYPE_2OP_MASK:
case INTR_TYPE_2OP_IMM8_MASK: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
SDValue PassThru = Op.getOperand(3);
SDValue Mask = Op.getOperand(4);
if (IntrData->Type == INTR_TYPE_2OP_IMM8_MASK)
Src2 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src2);
// We specify 2 possible opcodes for intrinsics with rounding modes.
// First, we check if the intrinsic may have non-default rounding mode,
// (IntrData->Opc1 != 0), then we check the rounding mode operand.

View File

@ -1109,62 +1109,6 @@ defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
}
//===----------------------------------------------------------------------===//
// AVX-512 - VPERM
//
// -- immediate form --
multiclass avx512_perm_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in {
def ri : AVX512AIi8<opc, MRMSrcReg, (outs _.RC:$dst),
(ins _.RC:$src1, u8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _.RC:$dst,
(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>,
EVEX;
def mi : AVX512AIi8<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.MemOp:$src1, u8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _.RC:$dst,
(_.VT (OpNode (_.LdFrag addr:$src1),
(i8 imm:$src2))))]>,
EVEX, EVEX_CD8<_.EltSize, CD8VF>;
}
}
multiclass avx512_permil<bits<8> OpcImm, bits<8> OpcVar, X86VectorVTInfo _,
X86VectorVTInfo Ctrl> :
avx512_perm_imm<OpcImm, "vpermil" # _.Suffix, X86VPermilpi, _> {
let ExeDomain = _.ExeDomain in {
def rr : AVX5128I<OpcVar, MRMSrcReg, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2),
!strconcat("vpermil" # _.Suffix,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _.RC:$dst,
(_.VT (X86VPermilpv _.RC:$src1,
(Ctrl.VT Ctrl.RC:$src2))))]>,
EVEX_4V;
def rm : AVX5128I<OpcVar, MRMSrcMem, (outs _.RC:$dst),
(ins _.RC:$src1, Ctrl.MemOp:$src2),
!strconcat("vpermil" # _.Suffix,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _.RC:$dst,
(_.VT (X86VPermilpv _.RC:$src1,
(Ctrl.VT (Ctrl.LdFrag addr:$src2)))))]>,
EVEX_4V;
}
}
defm VPERMILPSZ : avx512_permil<0x04, 0x0C, v16f32_info, v16i32_info>,
EVEX_V512;
defm VPERMILPDZ : avx512_permil<0x05, 0x0D, v8f64_info, v8i64_info>,
EVEX_V512, VEX_W;
def : Pat<(v16i32 (X86VPermilpi VR512:$src1, (i8 imm:$imm))),
(VPERMILPSZri VR512:$src1, imm:$imm)>;
def : Pat<(v8i64 (X86VPermilpi VR512:$src1, (i8 imm:$imm))),
(VPERMILPDZri VR512:$src1, imm:$imm)>;
// -- VPERM2I - 3 source operands form --
multiclass avx512_perm_3src<bits<8> opc, string OpcodeStr,
SDNode OpNode, X86VectorVTInfo _> {
@ -4130,7 +4074,73 @@ defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
X86VPermi, avx512vl_f64_info>,
EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
//===----------------------------------------------------------------------===//
// AVX-512 - VPERMIL
//===----------------------------------------------------------------------===//
multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _, X86VectorVTInfo Ctrl> {
defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1,
(Ctrl.VT Ctrl.RC:$src2)))>,
T8PD, EVEX_4V;
let mayLoad = 1 in {
defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode
_.RC:$src1,
(Ctrl.VT (bitconvert(Ctrl.LdFrag addr:$src2)))))>,
T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
"${src2}"##_.BroadcastStr##", $src1",
"$src1, ${src2}"##_.BroadcastStr,
(_.VT (OpNode
_.RC:$src1,
(Ctrl.VT (X86VBroadcast
(Ctrl.ScalarLdFrag addr:$src2)))))>,
T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
}//let mayLoad = 1
}
multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
let Predicates = [HasAVX512] in {
defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, _.info512,
Ctrl.info512>, EVEX_V512;
}
let Predicates = [HasAVX512, HasVLX] in {
defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, _.info128,
Ctrl.info128>, EVEX_V128;
defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, _.info256,
Ctrl.info256>, EVEX_V256;
}
}
multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, _, Ctrl>;
defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
X86VPermilpi, _>,
EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
let isCodeGenOnly = 1 in {
// lowering implementation with the alternative types
defm NAME#_I: avx512_permil_vec_common<OpcodeStr, OpcVar, Ctrl, Ctrl>;
defm NAME#_I: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem,
OpcodeStr, X86VPermilpi, Ctrl>,
EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
}
}
defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
avx512vl_i32_info>;
defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
avx512vl_i64_info>, VEX_W;
//===----------------------------------------------------------------------===//
// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
//===----------------------------------------------------------------------===//

View File

@ -8087,17 +8087,19 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
(bitconvert (i_frag addr:$src2))))]>, VEX_4V,
Sched<[WriteFShuffleLd, ReadAfterLd]>;
def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
let Predicates = [HasAVX, NoVLX] in {
def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (vt (X86VPermilpi RC:$src1, (i8 imm:$src2))))]>, VEX,
Sched<[WriteFShuffle]>;
def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst),
def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst),
(ins x86memop_f:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst,
(vt (X86VPermilpi (load addr:$src1), (i8 imm:$src2))))]>, VEX,
Sched<[WriteFShuffleLd]>;
}// Predicates = [HasAVX, NoVLX]
}
let ExeDomain = SSEPackedSingle in {
@ -8113,7 +8115,7 @@ let ExeDomain = SSEPackedDouble in {
loadv4i64, int_x86_avx_vpermilvar_pd_256, v4f64>, VEX_L;
}
let Predicates = [HasAVX] in {
let Predicates = [HasAVX, NoVLX] in {
def : Pat<(v8f32 (X86VPermilpv VR256:$src1, (v8i32 VR256:$src2))),
(VPERMILPSYrr VR256:$src1, VR256:$src2)>;
def : Pat<(v8f32 (X86VPermilpv VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)))),

View File

@ -22,7 +22,7 @@ enum IntrinsicType {
INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_2OP_IMM8, INTR_TYPE_3OP, INTR_TYPE_4OP,
CMP_MASK, CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, VSHIFT_MASK, COMI,
INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM,
INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM,
INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM, INTR_TYPE_2OP_IMM8_MASK,
INTR_TYPE_3OP_MASK, INTR_TYPE_3OP_MASK_RM, INTR_TYPE_3OP_IMM8_MASK,
FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK,
VPERM_3OP_MASKZ, INTR_TYPE_SCALAR_MASK,
@ -1407,6 +1407,30 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::VPERMIV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_512, VPERM_3OP_MASK,
X86ISD::VPERMIV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermil_pd_128, INTR_TYPE_2OP_IMM8_MASK,
X86ISD::VPERMILPI, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermil_pd_256, INTR_TYPE_2OP_IMM8_MASK,
X86ISD::VPERMILPI, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermil_pd_512, INTR_TYPE_2OP_IMM8_MASK,
X86ISD::VPERMILPI, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermil_ps_128, INTR_TYPE_2OP_IMM8_MASK,
X86ISD::VPERMILPI, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermil_ps_256, INTR_TYPE_2OP_IMM8_MASK,
X86ISD::VPERMILPI, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermil_ps_512, INTR_TYPE_2OP_IMM8_MASK,
X86ISD::VPERMILPI, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermilvar_pd_128, INTR_TYPE_2OP_MASK,
X86ISD::VPERMILPV, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermilvar_pd_256, INTR_TYPE_2OP_MASK,
X86ISD::VPERMILPV, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermilvar_pd_512, INTR_TYPE_2OP_MASK,
X86ISD::VPERMILPV, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermilvar_ps_128, INTR_TYPE_2OP_MASK,
X86ISD::VPERMILPV, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermilvar_ps_256, INTR_TYPE_2OP_MASK,
X86ISD::VPERMILPV, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermilvar_ps_512, INTR_TYPE_2OP_MASK,
X86ISD::VPERMILPV, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_128, VPERM_3OP_MASK,
X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_128, VPERM_3OP_MASK,

View File

@ -4348,6 +4348,88 @@ define <16 x float>@test_int_x86_avx512_mask_shuf_ps_512(<16 x float> %x0, <16 x
ret <16 x float> %res2
}
declare <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double>, i32, <8 x double>, i8)
define <8 x double>@test_int_x86_avx512_mask_vpermil_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_512:
; CHECK: ## BB#0:
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd $22, %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vpermilpd $22, %zmm0, %zmm2 {%k1} {z}
; CHECK-NEXT: vpermilpd $22, %zmm0, %zmm0
; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm1
; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 %x3)
%res1 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> zeroinitializer, i8 %x3)
%res2 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 -1)
%res3 = fadd <8 x double> %res, %res1
%res4 = fadd <8 x double> %res3, %res2
ret <8 x double> %res4
}
declare <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float>, i32, <16 x float>, i16)
define <16 x float>@test_int_x86_avx512_mask_vpermil_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpermilps $22, %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vpermilps $22, %zmm0, %zmm2 {%k1} {z}
; CHECK-NEXT: vpermilps $22, %zmm0, %zmm0
; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm1
; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 %x3)
%res1 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> zeroinitializer, i16 %x3)
%res2 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 -1)
%res3 = fadd <16 x float> %res, %res1
%res4 = fadd <16 x float> %res3, %res2
ret <16 x float> %res4
}
declare <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
define <8 x double>@test_int_x86_avx512_mask_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_512:
; CHECK: ## BB#0:
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm3 {%k1} {z}
; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm0
; CHECK-NEXT: vaddpd %zmm3, %zmm2, %zmm1
; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
%res1 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> zeroinitializer, i8 %x3)
%res2 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
%res3 = fadd <8 x double> %res, %res1
%res4 = fadd <8 x double> %res2, %res3
ret <8 x double> %res4
}
declare <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm3 {%k1} {z}
; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm0
; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm1
; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
%res1 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> zeroinitializer, i16 %x3)
%res2 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
%res3 = fadd <16 x float> %res, %res1
%res4 = fadd <16 x float> %res2, %res3
ret <16 x float> %res4
}
declare <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float>, <4 x float>, i32, <16 x float>, i8)
define <16 x float>@test_int_x86_avx512_mask_insertf32x4_512(<16 x float> %x0, <4 x float> %x1, <16 x float> %x3, i8 %x4) {

View File

@ -4812,6 +4812,174 @@ define <4 x i64>@test_int_x86_avx512_mask_valign_q_256(<4 x i64> %x0, <4 x i64>
ret <4 x i64> %res2
}
declare <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double>, i32, <4 x double>, i8)
define <4 x double>@test_int_x86_avx512_mask_vpermil_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_256:
; CHECK: ## BB#0:
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm2 {%k1} {z}
; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm0
; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm1
; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
%res = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 %x3)
%res1 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> zeroinitializer, i8 %x3)
%res2 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 -1)
%res3 = fadd <4 x double> %res, %res1
%res4 = fadd <4 x double> %res2, %res3
ret <4 x double> %res4
}
declare <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double>, i32, <2 x double>, i8)
define <2 x double>@test_int_x86_avx512_mask_vpermil_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_128:
; CHECK: ## BB#0:
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm2 {%k1} {z}
; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm0
; CHECK-NEXT: vaddpd %xmm2, %xmm1, %xmm1
; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 %x3)
%res1 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> zeroinitializer, i8 %x3)
%res2 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 -1)
%res3 = fadd <2 x double> %res, %res1
%res4 = fadd <2 x double> %res3, %res2
ret <2 x double> %res4
}
declare <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float>, i32, <8 x float>, i8)
define <8 x float>@test_int_x86_avx512_mask_vpermil_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_256:
; CHECK: ## BB#0:
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps $22, %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vpermilps $22, %ymm0, %ymm2 {%k1} {z}
; CHECK-NEXT: vpermilps $22, %ymm0, %ymm0
; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm1
; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
; CHECK-NEXT: retq
%res = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 %x3)
%res1 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> zeroinitializer, i8 %x3)
%res2 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 -1)
%res3 = fadd <8 x float> %res, %res1
%res4 = fadd <8 x float> %res3, %res2
ret <8 x float> %res4
}
declare <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float>, i32, <4 x float>, i8)
define <4 x float>@test_int_x86_avx512_mask_vpermil_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_128:
; CHECK: ## BB#0:
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps $22, %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vpermilps $22, %xmm0, %xmm2 {%k1} {z}
; CHECK-NEXT: vpermilps $22, %xmm0, %xmm0
; CHECK-NEXT: vaddps %xmm2, %xmm1, %xmm1
; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 %x3)
%res1 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> zeroinitializer, i8 %x3)
%res2 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 -1)
%res3 = fadd <4 x float> %res, %res1
%res4 = fadd <4 x float> %res2, %res3
ret <4 x float> %res4
}
declare <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8)
define <4 x double>@test_int_x86_avx512_mask_vpermilvar_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_256:
; CHECK: ## BB#0:
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm2 {%k1}
; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm3 {%k1} {z}
; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm0
; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm1
; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
%res = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3)
%res1 = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> zeroinitializer, i8 %x3)
%res2 = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1)
%res3 = fadd <4 x double> %res, %res1
%res4 = fadd <4 x double> %res2, %res3
ret <4 x double> %res4
}
declare <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double>, <2 x i64>, <2 x double>, i8)
define <2 x double>@test_int_x86_avx512_mask_vpermilvar_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_128:
; CHECK: ## BB#0:
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm3 {%k1} {z}
; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vaddpd %xmm3, %xmm2, %xmm1
; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3)
%res1 = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> zeroinitializer, i8 %x3)
%res2 = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1)
%res3 = fadd <2 x double> %res, %res1
%res4 = fadd <2 x double> %res3, %res2
ret <2 x double> %res4
}
declare <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float>, <8 x i32>, <8 x float>, i8)
define <8 x float>@test_int_x86_avx512_mask_vpermilvar_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_256:
; CHECK: ## BB#0:
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm2 {%k1}
; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm3 {%k1} {z}
; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm0
; CHECK-NEXT: vaddps %ymm3, %ymm2, %ymm1
; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
; CHECK-NEXT: retq
%res = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3)
%res1 = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> zeroinitializer, i8 %x3)
%res2 = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
%res3 = fadd <8 x float> %res, %res1
%res4 = fadd <8 x float> %res3, %res2
ret <8 x float> %res4
}
declare <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float>, <4 x i32>, <4 x float>, i8)
define <4 x float>@test_int_x86_avx512_mask_vpermilvar_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_128:
; CHECK: ## BB#0:
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm3 {%k1} {z}
; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm1
; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3)
%res1 = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> zeroinitializer, i8 %x3)
%res2 = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1)
%res3 = fadd <4 x float> %res, %res1
%res4 = fadd <4 x float> %res2, %res3
ret <4 x float> %res4
}
declare <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float>, <4 x float>, i32, <8 x float>, i8)
define <8 x float>@test_int_x86_avx512_mask_insertf32x4_256(<8 x float> %x0, <4 x float> %x1, <8 x float> %x3, i8 %x4) {

View File

@ -4,6 +4,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -mattr=+avx512vl | FileCheck %s --check-prefix=AVX512VL
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-unknown"
@ -135,6 +136,11 @@ define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) {
; AVX: # BB#0:
; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX-NEXT: retq
; AVX512VL-LABEL: shuffle_v2f64_10:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vpermilpd $1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0>
ret <2 x double> %shuffle
}
@ -191,6 +197,11 @@ define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) {
; AVX: # BB#0:
; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
; AVX-NEXT: retq
; AVX512VL-LABEL: shuffle_v2f64_32:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vpermilpd $1, %xmm1, %xmm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 2>
ret <2 x double> %shuffle
}
@ -1167,6 +1178,11 @@ define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) {
; AVX: # BB#0:
; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0]
; AVX-NEXT: retq
; AVX512VL-LABEL: shuffle_mem_v2f64_10:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vpermilpd $1, (%rdi), %xmm0
; AVX512VL-NEXT: retq
%a = load <2 x double>, <2 x double>* %ptr
%shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0>
ret <2 x double> %shuffle

View File

@ -1,5 +1,6 @@
; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
; RUN: llc < %s -mcpu=knl -mattr=+avx512vl | FileCheck %s --check-prefix=AVX512VL
target triple = "x86_64-unknown-unknown"
@ -133,6 +134,11 @@ define <4 x double> @shuffle_v4f64_0023(<4 x double> %a, <4 x double> %b) {
; ALL: # BB#0:
; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,3]
; ALL-NEXT: retq
; AVX512VL-LABEL: shuffle_v4f64_0023:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vpermilpd $8, %ymm0, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 3>
ret <4 x double> %shuffle
}

View File

@ -15926,6 +15926,238 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: encoding: [0x62,0xa2,0xfd,0x41,0xa2,0x94,0x81,0x00,0x04,0x00,0x00]
vscatterdpd %zmm18, 1024(%rcx, %ymm24,4) {%k1}
// CHECK: vpermilps $171, %zmm22, %zmm2
// CHECK: encoding: [0x62,0xb3,0x7d,0x48,0x04,0xd6,0xab]
vpermilps $0xab, %zmm22, %zmm2
// CHECK: vpermilps $171, %zmm22, %zmm2 {%k2}
// CHECK: encoding: [0x62,0xb3,0x7d,0x4a,0x04,0xd6,0xab]
vpermilps $0xab, %zmm22, %zmm2 {%k2}
// CHECK: vpermilps $171, %zmm22, %zmm2 {%k2} {z}
// CHECK: encoding: [0x62,0xb3,0x7d,0xca,0x04,0xd6,0xab]
vpermilps $0xab, %zmm22, %zmm2 {%k2} {z}
// CHECK: vpermilps $123, %zmm22, %zmm2
// CHECK: encoding: [0x62,0xb3,0x7d,0x48,0x04,0xd6,0x7b]
vpermilps $0x7b, %zmm22, %zmm2
// CHECK: vpermilps $123, (%rcx), %zmm2
// CHECK: encoding: [0x62,0xf3,0x7d,0x48,0x04,0x11,0x7b]
vpermilps $0x7b, (%rcx), %zmm2
// CHECK: vpermilps $123, 291(%rax,%r14,8), %zmm2
// CHECK: encoding: [0x62,0xb3,0x7d,0x48,0x04,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
vpermilps $0x7b, 291(%rax,%r14,8), %zmm2
// CHECK: vpermilps $123, (%rcx){1to16}, %zmm2
// CHECK: encoding: [0x62,0xf3,0x7d,0x58,0x04,0x11,0x7b]
vpermilps $0x7b, (%rcx){1to16}, %zmm2
// CHECK: vpermilps $123, 8128(%rdx), %zmm2
// CHECK: encoding: [0x62,0xf3,0x7d,0x48,0x04,0x52,0x7f,0x7b]
vpermilps $0x7b, 8128(%rdx), %zmm2
// CHECK: vpermilps $123, 8192(%rdx), %zmm2
// CHECK: encoding: [0x62,0xf3,0x7d,0x48,0x04,0x92,0x00,0x20,0x00,0x00,0x7b]
vpermilps $0x7b, 8192(%rdx), %zmm2
// CHECK: vpermilps $123, -8192(%rdx), %zmm2
// CHECK: encoding: [0x62,0xf3,0x7d,0x48,0x04,0x52,0x80,0x7b]
vpermilps $0x7b, -8192(%rdx), %zmm2
// CHECK: vpermilps $123, -8256(%rdx), %zmm2
// CHECK: encoding: [0x62,0xf3,0x7d,0x48,0x04,0x92,0xc0,0xdf,0xff,0xff,0x7b]
vpermilps $0x7b, -8256(%rdx), %zmm2
// CHECK: vpermilps $123, 508(%rdx){1to16}, %zmm2
// CHECK: encoding: [0x62,0xf3,0x7d,0x58,0x04,0x52,0x7f,0x7b]
vpermilps $0x7b, 508(%rdx){1to16}, %zmm2
// CHECK: vpermilps $123, 512(%rdx){1to16}, %zmm2
// CHECK: encoding: [0x62,0xf3,0x7d,0x58,0x04,0x92,0x00,0x02,0x00,0x00,0x7b]
vpermilps $0x7b, 512(%rdx){1to16}, %zmm2
// CHECK: vpermilps $123, -512(%rdx){1to16}, %zmm2
// CHECK: encoding: [0x62,0xf3,0x7d,0x58,0x04,0x52,0x80,0x7b]
vpermilps $0x7b, -512(%rdx){1to16}, %zmm2
// CHECK: vpermilps $123, -516(%rdx){1to16}, %zmm2
// CHECK: encoding: [0x62,0xf3,0x7d,0x58,0x04,0x92,0xfc,0xfd,0xff,0xff,0x7b]
vpermilps $0x7b, -516(%rdx){1to16}, %zmm2
// CHECK: vpermilps %zmm2, %zmm20, %zmm13
// CHECK: encoding: [0x62,0x72,0x5d,0x40,0x0c,0xea]
vpermilps %zmm2, %zmm20, %zmm13
// CHECK: vpermilps %zmm2, %zmm20, %zmm13 {%k1}
// CHECK: encoding: [0x62,0x72,0x5d,0x41,0x0c,0xea]
vpermilps %zmm2, %zmm20, %zmm13 {%k1}
// CHECK: vpermilps %zmm2, %zmm20, %zmm13 {%k1} {z}
// CHECK: encoding: [0x62,0x72,0x5d,0xc1,0x0c,0xea]
vpermilps %zmm2, %zmm20, %zmm13 {%k1} {z}
// CHECK: vpermilps (%rcx), %zmm20, %zmm13
// CHECK: encoding: [0x62,0x72,0x5d,0x40,0x0c,0x29]
vpermilps (%rcx), %zmm20, %zmm13
// CHECK: vpermilps 291(%rax,%r14,8), %zmm20, %zmm13
// CHECK: encoding: [0x62,0x32,0x5d,0x40,0x0c,0xac,0xf0,0x23,0x01,0x00,0x00]
vpermilps 291(%rax,%r14,8), %zmm20, %zmm13
// CHECK: vpermilps (%rcx){1to16}, %zmm20, %zmm13
// CHECK: encoding: [0x62,0x72,0x5d,0x50,0x0c,0x29]
vpermilps (%rcx){1to16}, %zmm20, %zmm13
// CHECK: vpermilps 8128(%rdx), %zmm20, %zmm13
// CHECK: encoding: [0x62,0x72,0x5d,0x40,0x0c,0x6a,0x7f]
vpermilps 8128(%rdx), %zmm20, %zmm13
// CHECK: vpermilps 8192(%rdx), %zmm20, %zmm13
// CHECK: encoding: [0x62,0x72,0x5d,0x40,0x0c,0xaa,0x00,0x20,0x00,0x00]
vpermilps 8192(%rdx), %zmm20, %zmm13
// CHECK: vpermilps -8192(%rdx), %zmm20, %zmm13
// CHECK: encoding: [0x62,0x72,0x5d,0x40,0x0c,0x6a,0x80]
vpermilps -8192(%rdx), %zmm20, %zmm13
// CHECK: vpermilps -8256(%rdx), %zmm20, %zmm13
// CHECK: encoding: [0x62,0x72,0x5d,0x40,0x0c,0xaa,0xc0,0xdf,0xff,0xff]
vpermilps -8256(%rdx), %zmm20, %zmm13
// CHECK: vpermilps 508(%rdx){1to16}, %zmm20, %zmm13
// CHECK: encoding: [0x62,0x72,0x5d,0x50,0x0c,0x6a,0x7f]
vpermilps 508(%rdx){1to16}, %zmm20, %zmm13
// CHECK: vpermilps 512(%rdx){1to16}, %zmm20, %zmm13
// CHECK: encoding: [0x62,0x72,0x5d,0x50,0x0c,0xaa,0x00,0x02,0x00,0x00]
vpermilps 512(%rdx){1to16}, %zmm20, %zmm13
// CHECK: vpermilps -512(%rdx){1to16}, %zmm20, %zmm13
// CHECK: encoding: [0x62,0x72,0x5d,0x50,0x0c,0x6a,0x80]
vpermilps -512(%rdx){1to16}, %zmm20, %zmm13
// CHECK: vpermilps -516(%rdx){1to16}, %zmm20, %zmm13
// CHECK: encoding: [0x62,0x72,0x5d,0x50,0x0c,0xaa,0xfc,0xfd,0xff,0xff]
vpermilps -516(%rdx){1to16}, %zmm20, %zmm13
// CHECK: vpermilpd $171, %zmm4, %zmm19
// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x05,0xdc,0xab]
vpermilpd $0xab, %zmm4, %zmm19
// CHECK: vpermilpd $171, %zmm4, %zmm19 {%k1}
// CHECK: encoding: [0x62,0xe3,0xfd,0x49,0x05,0xdc,0xab]
vpermilpd $0xab, %zmm4, %zmm19 {%k1}
// CHECK: vpermilpd $171, %zmm4, %zmm19 {%k1} {z}
// CHECK: encoding: [0x62,0xe3,0xfd,0xc9,0x05,0xdc,0xab]
vpermilpd $0xab, %zmm4, %zmm19 {%k1} {z}
// CHECK: vpermilpd $123, %zmm4, %zmm19
// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x05,0xdc,0x7b]
vpermilpd $0x7b, %zmm4, %zmm19
// CHECK: vpermilpd $123, (%rcx), %zmm19
// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x05,0x19,0x7b]
vpermilpd $0x7b, (%rcx), %zmm19
// CHECK: vpermilpd $123, 291(%rax,%r14,8), %zmm19
// CHECK: encoding: [0x62,0xa3,0xfd,0x48,0x05,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
vpermilpd $0x7b, 291(%rax,%r14,8), %zmm19
// CHECK: vpermilpd $123, (%rcx){1to8}, %zmm19
// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x05,0x19,0x7b]
vpermilpd $0x7b, (%rcx){1to8}, %zmm19
// CHECK: vpermilpd $123, 8128(%rdx), %zmm19
// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x05,0x5a,0x7f,0x7b]
vpermilpd $0x7b, 8128(%rdx), %zmm19
// CHECK: vpermilpd $123, 8192(%rdx), %zmm19
// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x05,0x9a,0x00,0x20,0x00,0x00,0x7b]
vpermilpd $0x7b, 8192(%rdx), %zmm19
// CHECK: vpermilpd $123, -8192(%rdx), %zmm19
// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x05,0x5a,0x80,0x7b]
vpermilpd $0x7b, -8192(%rdx), %zmm19
// CHECK: vpermilpd $123, -8256(%rdx), %zmm19
// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x05,0x9a,0xc0,0xdf,0xff,0xff,0x7b]
vpermilpd $0x7b, -8256(%rdx), %zmm19
// CHECK: vpermilpd $123, 1016(%rdx){1to8}, %zmm19
// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x05,0x5a,0x7f,0x7b]
vpermilpd $0x7b, 1016(%rdx){1to8}, %zmm19
// CHECK: vpermilpd $123, 1024(%rdx){1to8}, %zmm19
// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x05,0x9a,0x00,0x04,0x00,0x00,0x7b]
vpermilpd $0x7b, 1024(%rdx){1to8}, %zmm19
// CHECK: vpermilpd $123, -1024(%rdx){1to8}, %zmm19
// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x05,0x5a,0x80,0x7b]
vpermilpd $0x7b, -1024(%rdx){1to8}, %zmm19
// CHECK: vpermilpd $123, -1032(%rdx){1to8}, %zmm19
// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x05,0x9a,0xf8,0xfb,0xff,0xff,0x7b]
vpermilpd $0x7b, -1032(%rdx){1to8}, %zmm19
// CHECK: vpermilpd %zmm21, %zmm26, %zmm1
// CHECK: encoding: [0x62,0xb2,0xad,0x40,0x0d,0xcd]
vpermilpd %zmm21, %zmm26, %zmm1
// CHECK: vpermilpd %zmm21, %zmm26, %zmm1 {%k2}
// CHECK: encoding: [0x62,0xb2,0xad,0x42,0x0d,0xcd]
vpermilpd %zmm21, %zmm26, %zmm1 {%k2}
// CHECK: vpermilpd %zmm21, %zmm26, %zmm1 {%k2} {z}
// CHECK: encoding: [0x62,0xb2,0xad,0xc2,0x0d,0xcd]
vpermilpd %zmm21, %zmm26, %zmm1 {%k2} {z}
// CHECK: vpermilpd (%rcx), %zmm26, %zmm1
// CHECK: encoding: [0x62,0xf2,0xad,0x40,0x0d,0x09]
vpermilpd (%rcx), %zmm26, %zmm1
// CHECK: vpermilpd 291(%rax,%r14,8), %zmm26, %zmm1
// CHECK: encoding: [0x62,0xb2,0xad,0x40,0x0d,0x8c,0xf0,0x23,0x01,0x00,0x00]
vpermilpd 291(%rax,%r14,8), %zmm26, %zmm1
// CHECK: vpermilpd (%rcx){1to8}, %zmm26, %zmm1
// CHECK: encoding: [0x62,0xf2,0xad,0x50,0x0d,0x09]
vpermilpd (%rcx){1to8}, %zmm26, %zmm1
// CHECK: vpermilpd 8128(%rdx), %zmm26, %zmm1
// CHECK: encoding: [0x62,0xf2,0xad,0x40,0x0d,0x4a,0x7f]
vpermilpd 8128(%rdx), %zmm26, %zmm1
// CHECK: vpermilpd 8192(%rdx), %zmm26, %zmm1
// CHECK: encoding: [0x62,0xf2,0xad,0x40,0x0d,0x8a,0x00,0x20,0x00,0x00]
vpermilpd 8192(%rdx), %zmm26, %zmm1
// CHECK: vpermilpd -8192(%rdx), %zmm26, %zmm1
// CHECK: encoding: [0x62,0xf2,0xad,0x40,0x0d,0x4a,0x80]
vpermilpd -8192(%rdx), %zmm26, %zmm1
// CHECK: vpermilpd -8256(%rdx), %zmm26, %zmm1
// CHECK: encoding: [0x62,0xf2,0xad,0x40,0x0d,0x8a,0xc0,0xdf,0xff,0xff]
vpermilpd -8256(%rdx), %zmm26, %zmm1
// CHECK: vpermilpd 1016(%rdx){1to8}, %zmm26, %zmm1
// CHECK: encoding: [0x62,0xf2,0xad,0x50,0x0d,0x4a,0x7f]
vpermilpd 1016(%rdx){1to8}, %zmm26, %zmm1
// CHECK: vpermilpd 1024(%rdx){1to8}, %zmm26, %zmm1
// CHECK: encoding: [0x62,0xf2,0xad,0x50,0x0d,0x8a,0x00,0x04,0x00,0x00]
vpermilpd 1024(%rdx){1to8}, %zmm26, %zmm1
// CHECK: vpermilpd -1024(%rdx){1to8}, %zmm26, %zmm1
// CHECK: encoding: [0x62,0xf2,0xad,0x50,0x0d,0x4a,0x80]
vpermilpd -1024(%rdx){1to8}, %zmm26, %zmm1
// CHECK: vpermilpd -1032(%rdx){1to8}, %zmm26, %zmm1
// CHECK: encoding: [0x62,0xf2,0xad,0x50,0x0d,0x8a,0xf8,0xfb,0xff,0xff]
vpermilpd -1032(%rdx){1to8}, %zmm26, %zmm1
// CHECK: vcvtpd2dq %zmm15, %ymm24
// CHECK: encoding: [0x62,0x41,0xff,0x48,0xe6,0xc7]
vcvtpd2dq %zmm15, %ymm24

View File

@ -20523,6 +20523,470 @@ vaddpd {rz-sae}, %zmm2, %zmm1, %zmm1
// CHECK: encoding: [0x62,0x22,0xfd,0x21,0xa2,0xb4,0xb9,0x00,0x04,0x00,0x00]
vscatterdpd %ymm30, 1024(%rcx, %xmm31,4) {%k1}
// CHECK: vpermilps $171, %xmm28, %xmm20
// CHECK: encoding: [0x62,0x83,0x7d,0x08,0x04,0xe4,0xab]
vpermilps $0xab, %xmm28, %xmm20
// CHECK: vpermilps $171, %xmm28, %xmm20 {%k4}
// CHECK: encoding: [0x62,0x83,0x7d,0x0c,0x04,0xe4,0xab]
vpermilps $0xab, %xmm28, %xmm20 {%k4}
// CHECK: vpermilps $171, %xmm28, %xmm20 {%k4} {z}
// CHECK: encoding: [0x62,0x83,0x7d,0x8c,0x04,0xe4,0xab]
vpermilps $0xab, %xmm28, %xmm20 {%k4} {z}
// CHECK: vpermilps $123, %xmm28, %xmm20
// CHECK: encoding: [0x62,0x83,0x7d,0x08,0x04,0xe4,0x7b]
vpermilps $0x7b, %xmm28, %xmm20
// CHECK: vpermilps $123, (%rcx), %xmm20
// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x04,0x21,0x7b]
vpermilps $0x7b, (%rcx), %xmm20
// CHECK: vpermilps $123, 291(%rax,%r14,8), %xmm20
// CHECK: encoding: [0x62,0xa3,0x7d,0x08,0x04,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
vpermilps $0x7b, 291(%rax,%r14,8), %xmm20
// CHECK: vpermilps $123, (%rcx){1to4}, %xmm20
// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x04,0x21,0x7b]
vpermilps $0x7b, (%rcx){1to4}, %xmm20
// CHECK: vpermilps $123, 2032(%rdx), %xmm20
// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x04,0x62,0x7f,0x7b]
vpermilps $0x7b, 2032(%rdx), %xmm20
// CHECK: vpermilps $123, 2048(%rdx), %xmm20
// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x04,0xa2,0x00,0x08,0x00,0x00,0x7b]
vpermilps $0x7b, 2048(%rdx), %xmm20
// CHECK: vpermilps $123, -2048(%rdx), %xmm20
// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x04,0x62,0x80,0x7b]
vpermilps $0x7b, -2048(%rdx), %xmm20
// CHECK: vpermilps $123, -2064(%rdx), %xmm20
// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x04,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
vpermilps $0x7b, -2064(%rdx), %xmm20
// CHECK: vpermilps $123, 508(%rdx){1to4}, %xmm20
// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x04,0x62,0x7f,0x7b]
vpermilps $0x7b, 508(%rdx){1to4}, %xmm20
// CHECK: vpermilps $123, 512(%rdx){1to4}, %xmm20
// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x04,0xa2,0x00,0x02,0x00,0x00,0x7b]
vpermilps $0x7b, 512(%rdx){1to4}, %xmm20
// CHECK: vpermilps $123, -512(%rdx){1to4}, %xmm20
// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x04,0x62,0x80,0x7b]
vpermilps $0x7b, -512(%rdx){1to4}, %xmm20
// CHECK: vpermilps $123, -516(%rdx){1to4}, %xmm20
// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x04,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
vpermilps $0x7b, -516(%rdx){1to4}, %xmm20
// CHECK: vpermilps $171, %ymm17, %ymm30
// CHECK: encoding: [0x62,0x23,0x7d,0x28,0x04,0xf1,0xab]
vpermilps $0xab, %ymm17, %ymm30
// CHECK: vpermilps $171, %ymm17, %ymm30 {%k5}
// CHECK: encoding: [0x62,0x23,0x7d,0x2d,0x04,0xf1,0xab]
vpermilps $0xab, %ymm17, %ymm30 {%k5}
// CHECK: vpermilps $171, %ymm17, %ymm30 {%k5} {z}
// CHECK: encoding: [0x62,0x23,0x7d,0xad,0x04,0xf1,0xab]
vpermilps $0xab, %ymm17, %ymm30 {%k5} {z}
// CHECK: vpermilps $123, %ymm17, %ymm30
// CHECK: encoding: [0x62,0x23,0x7d,0x28,0x04,0xf1,0x7b]
vpermilps $0x7b, %ymm17, %ymm30
// CHECK: vpermilps $123, (%rcx), %ymm30
// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x04,0x31,0x7b]
vpermilps $0x7b, (%rcx), %ymm30
// CHECK: vpermilps $123, 291(%rax,%r14,8), %ymm30
// CHECK: encoding: [0x62,0x23,0x7d,0x28,0x04,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b]
vpermilps $0x7b, 291(%rax,%r14,8), %ymm30
// CHECK: vpermilps $123, (%rcx){1to8}, %ymm30
// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x04,0x31,0x7b]
vpermilps $0x7b, (%rcx){1to8}, %ymm30
// CHECK: vpermilps $123, 4064(%rdx), %ymm30
// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x04,0x72,0x7f,0x7b]
vpermilps $0x7b, 4064(%rdx), %ymm30
// CHECK: vpermilps $123, 4096(%rdx), %ymm30
// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x04,0xb2,0x00,0x10,0x00,0x00,0x7b]
vpermilps $0x7b, 4096(%rdx), %ymm30
// CHECK: vpermilps $123, -4096(%rdx), %ymm30
// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x04,0x72,0x80,0x7b]
vpermilps $0x7b, -4096(%rdx), %ymm30
// CHECK: vpermilps $123, -4128(%rdx), %ymm30
// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x04,0xb2,0xe0,0xef,0xff,0xff,0x7b]
vpermilps $0x7b, -4128(%rdx), %ymm30
// CHECK: vpermilps $123, 508(%rdx){1to8}, %ymm30
// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x04,0x72,0x7f,0x7b]
vpermilps $0x7b, 508(%rdx){1to8}, %ymm30
// CHECK: vpermilps $123, 512(%rdx){1to8}, %ymm30
// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x04,0xb2,0x00,0x02,0x00,0x00,0x7b]
vpermilps $0x7b, 512(%rdx){1to8}, %ymm30
// CHECK: vpermilps $123, -512(%rdx){1to8}, %ymm30
// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x04,0x72,0x80,0x7b]
vpermilps $0x7b, -512(%rdx){1to8}, %ymm30
// CHECK: vpermilps $123, -516(%rdx){1to8}, %ymm30
// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x04,0xb2,0xfc,0xfd,0xff,0xff,0x7b]
vpermilps $0x7b, -516(%rdx){1to8}, %ymm30
// CHECK: vpermilps %xmm22, %xmm28, %xmm28
// CHECK: encoding: [0x62,0x22,0x1d,0x00,0x0c,0xe6]
vpermilps %xmm22, %xmm28, %xmm28
// CHECK: vpermilps %xmm22, %xmm28, %xmm28 {%k6}
// CHECK: encoding: [0x62,0x22,0x1d,0x06,0x0c,0xe6]
vpermilps %xmm22, %xmm28, %xmm28 {%k6}
// CHECK: vpermilps %xmm22, %xmm28, %xmm28 {%k6} {z}
// CHECK: encoding: [0x62,0x22,0x1d,0x86,0x0c,0xe6]
vpermilps %xmm22, %xmm28, %xmm28 {%k6} {z}
// CHECK: vpermilps (%rcx), %xmm28, %xmm28
// CHECK: encoding: [0x62,0x62,0x1d,0x00,0x0c,0x21]
vpermilps (%rcx), %xmm28, %xmm28
// CHECK: vpermilps 291(%rax,%r14,8), %xmm28, %xmm28
// CHECK: encoding: [0x62,0x22,0x1d,0x00,0x0c,0xa4,0xf0,0x23,0x01,0x00,0x00]
vpermilps 291(%rax,%r14,8), %xmm28, %xmm28
// CHECK: vpermilps (%rcx){1to4}, %xmm28, %xmm28
// CHECK: encoding: [0x62,0x62,0x1d,0x10,0x0c,0x21]
vpermilps (%rcx){1to4}, %xmm28, %xmm28
// CHECK: vpermilps 2032(%rdx), %xmm28, %xmm28
// CHECK: encoding: [0x62,0x62,0x1d,0x00,0x0c,0x62,0x7f]
vpermilps 2032(%rdx), %xmm28, %xmm28
// CHECK: vpermilps 2048(%rdx), %xmm28, %xmm28
// CHECK: encoding: [0x62,0x62,0x1d,0x00,0x0c,0xa2,0x00,0x08,0x00,0x00]
vpermilps 2048(%rdx), %xmm28, %xmm28
// CHECK: vpermilps -2048(%rdx), %xmm28, %xmm28
// CHECK: encoding: [0x62,0x62,0x1d,0x00,0x0c,0x62,0x80]
vpermilps -2048(%rdx), %xmm28, %xmm28
// CHECK: vpermilps -2064(%rdx), %xmm28, %xmm28
// CHECK: encoding: [0x62,0x62,0x1d,0x00,0x0c,0xa2,0xf0,0xf7,0xff,0xff]
vpermilps -2064(%rdx), %xmm28, %xmm28
// CHECK: vpermilps 508(%rdx){1to4}, %xmm28, %xmm28
// CHECK: encoding: [0x62,0x62,0x1d,0x10,0x0c,0x62,0x7f]
vpermilps 508(%rdx){1to4}, %xmm28, %xmm28
// CHECK: vpermilps 512(%rdx){1to4}, %xmm28, %xmm28
// CHECK: encoding: [0x62,0x62,0x1d,0x10,0x0c,0xa2,0x00,0x02,0x00,0x00]
vpermilps 512(%rdx){1to4}, %xmm28, %xmm28
// CHECK: vpermilps -512(%rdx){1to4}, %xmm28, %xmm28
// CHECK: encoding: [0x62,0x62,0x1d,0x10,0x0c,0x62,0x80]
vpermilps -512(%rdx){1to4}, %xmm28, %xmm28
// CHECK: vpermilps -516(%rdx){1to4}, %xmm28, %xmm28
// CHECK: encoding: [0x62,0x62,0x1d,0x10,0x0c,0xa2,0xfc,0xfd,0xff,0xff]
vpermilps -516(%rdx){1to4}, %xmm28, %xmm28
// CHECK: vpermilps %ymm21, %ymm28, %ymm29
// CHECK: encoding: [0x62,0x22,0x1d,0x20,0x0c,0xed]
vpermilps %ymm21, %ymm28, %ymm29
// CHECK: vpermilps %ymm21, %ymm28, %ymm29 {%k2}
// CHECK: encoding: [0x62,0x22,0x1d,0x22,0x0c,0xed]
vpermilps %ymm21, %ymm28, %ymm29 {%k2}
// CHECK: vpermilps %ymm21, %ymm28, %ymm29 {%k2} {z}
// CHECK: encoding: [0x62,0x22,0x1d,0xa2,0x0c,0xed]
vpermilps %ymm21, %ymm28, %ymm29 {%k2} {z}
// CHECK: vpermilps (%rcx), %ymm28, %ymm29
// CHECK: encoding: [0x62,0x62,0x1d,0x20,0x0c,0x29]
vpermilps (%rcx), %ymm28, %ymm29
// CHECK: vpermilps 291(%rax,%r14,8), %ymm28, %ymm29
// CHECK: encoding: [0x62,0x22,0x1d,0x20,0x0c,0xac,0xf0,0x23,0x01,0x00,0x00]
vpermilps 291(%rax,%r14,8), %ymm28, %ymm29
// CHECK: vpermilps (%rcx){1to8}, %ymm28, %ymm29
// CHECK: encoding: [0x62,0x62,0x1d,0x30,0x0c,0x29]
vpermilps (%rcx){1to8}, %ymm28, %ymm29
// CHECK: vpermilps 4064(%rdx), %ymm28, %ymm29
// CHECK: encoding: [0x62,0x62,0x1d,0x20,0x0c,0x6a,0x7f]
vpermilps 4064(%rdx), %ymm28, %ymm29
// CHECK: vpermilps 4096(%rdx), %ymm28, %ymm29
// CHECK: encoding: [0x62,0x62,0x1d,0x20,0x0c,0xaa,0x00,0x10,0x00,0x00]
vpermilps 4096(%rdx), %ymm28, %ymm29
// CHECK: vpermilps -4096(%rdx), %ymm28, %ymm29
// CHECK: encoding: [0x62,0x62,0x1d,0x20,0x0c,0x6a,0x80]
vpermilps -4096(%rdx), %ymm28, %ymm29
// CHECK: vpermilps -4128(%rdx), %ymm28, %ymm29
// CHECK: encoding: [0x62,0x62,0x1d,0x20,0x0c,0xaa,0xe0,0xef,0xff,0xff]
vpermilps -4128(%rdx), %ymm28, %ymm29
// CHECK: vpermilps 508(%rdx){1to8}, %ymm28, %ymm29
// CHECK: encoding: [0x62,0x62,0x1d,0x30,0x0c,0x6a,0x7f]
vpermilps 508(%rdx){1to8}, %ymm28, %ymm29
// CHECK: vpermilps 512(%rdx){1to8}, %ymm28, %ymm29
// CHECK: encoding: [0x62,0x62,0x1d,0x30,0x0c,0xaa,0x00,0x02,0x00,0x00]
vpermilps 512(%rdx){1to8}, %ymm28, %ymm29
// CHECK: vpermilps -512(%rdx){1to8}, %ymm28, %ymm29
// CHECK: encoding: [0x62,0x62,0x1d,0x30,0x0c,0x6a,0x80]
vpermilps -512(%rdx){1to8}, %ymm28, %ymm29
// CHECK: vpermilps -516(%rdx){1to8}, %ymm28, %ymm29
// CHECK: encoding: [0x62,0x62,0x1d,0x30,0x0c,0xaa,0xfc,0xfd,0xff,0xff]
vpermilps -516(%rdx){1to8}, %ymm28, %ymm29
// CHECK: vpermilpd $171, %xmm19, %xmm29
// CHECK: encoding: [0x62,0x23,0xfd,0x08,0x05,0xeb,0xab]
vpermilpd $0xab, %xmm19, %xmm29
// CHECK: vpermilpd $171, %xmm19, %xmm29 {%k7}
// CHECK: encoding: [0x62,0x23,0xfd,0x0f,0x05,0xeb,0xab]
vpermilpd $0xab, %xmm19, %xmm29 {%k7}
// CHECK: vpermilpd $171, %xmm19, %xmm29 {%k7} {z}
// CHECK: encoding: [0x62,0x23,0xfd,0x8f,0x05,0xeb,0xab]
vpermilpd $0xab, %xmm19, %xmm29 {%k7} {z}
// CHECK: vpermilpd $123, %xmm19, %xmm29
// CHECK: encoding: [0x62,0x23,0xfd,0x08,0x05,0xeb,0x7b]
vpermilpd $0x7b, %xmm19, %xmm29
// CHECK: vpermilpd $123, (%rcx), %xmm29
// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x05,0x29,0x7b]
vpermilpd $0x7b, (%rcx), %xmm29
// CHECK: vpermilpd $123, 291(%rax,%r14,8), %xmm29
// CHECK: encoding: [0x62,0x23,0xfd,0x08,0x05,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
vpermilpd $0x7b, 291(%rax,%r14,8), %xmm29
// CHECK: vpermilpd $123, (%rcx){1to2}, %xmm29
// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x05,0x29,0x7b]
vpermilpd $0x7b, (%rcx){1to2}, %xmm29
// CHECK: vpermilpd $123, 2032(%rdx), %xmm29
// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x05,0x6a,0x7f,0x7b]
vpermilpd $0x7b, 2032(%rdx), %xmm29
// CHECK: vpermilpd $123, 2048(%rdx), %xmm29
// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x05,0xaa,0x00,0x08,0x00,0x00,0x7b]
vpermilpd $0x7b, 2048(%rdx), %xmm29
// CHECK: vpermilpd $123, -2048(%rdx), %xmm29
// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x05,0x6a,0x80,0x7b]
vpermilpd $0x7b, -2048(%rdx), %xmm29
// CHECK: vpermilpd $123, -2064(%rdx), %xmm29
// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x05,0xaa,0xf0,0xf7,0xff,0xff,0x7b]
vpermilpd $0x7b, -2064(%rdx), %xmm29
// CHECK: vpermilpd $123, 1016(%rdx){1to2}, %xmm29
// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x05,0x6a,0x7f,0x7b]
vpermilpd $0x7b, 1016(%rdx){1to2}, %xmm29
// CHECK: vpermilpd $123, 1024(%rdx){1to2}, %xmm29
// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x05,0xaa,0x00,0x04,0x00,0x00,0x7b]
vpermilpd $0x7b, 1024(%rdx){1to2}, %xmm29
// CHECK: vpermilpd $123, -1024(%rdx){1to2}, %xmm29
// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x05,0x6a,0x80,0x7b]
vpermilpd $0x7b, -1024(%rdx){1to2}, %xmm29
// CHECK: vpermilpd $123, -1032(%rdx){1to2}, %xmm29
// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x05,0xaa,0xf8,0xfb,0xff,0xff,0x7b]
vpermilpd $0x7b, -1032(%rdx){1to2}, %xmm29
// CHECK: vpermilpd $171, %ymm24, %ymm17
// CHECK: encoding: [0x62,0x83,0xfd,0x28,0x05,0xc8,0xab]
vpermilpd $0xab, %ymm24, %ymm17
// CHECK: vpermilpd $171, %ymm24, %ymm17 {%k6}
// CHECK: encoding: [0x62,0x83,0xfd,0x2e,0x05,0xc8,0xab]
vpermilpd $0xab, %ymm24, %ymm17 {%k6}
// CHECK: vpermilpd $171, %ymm24, %ymm17 {%k6} {z}
// CHECK: encoding: [0x62,0x83,0xfd,0xae,0x05,0xc8,0xab]
vpermilpd $0xab, %ymm24, %ymm17 {%k6} {z}
// CHECK: vpermilpd $123, %ymm24, %ymm17
// CHECK: encoding: [0x62,0x83,0xfd,0x28,0x05,0xc8,0x7b]
vpermilpd $0x7b, %ymm24, %ymm17
// CHECK: vpermilpd $123, (%rcx), %ymm17
// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x05,0x09,0x7b]
vpermilpd $0x7b, (%rcx), %ymm17
// CHECK: vpermilpd $123, 291(%rax,%r14,8), %ymm17
// CHECK: encoding: [0x62,0xa3,0xfd,0x28,0x05,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
vpermilpd $0x7b, 291(%rax,%r14,8), %ymm17
// CHECK: vpermilpd $123, (%rcx){1to4}, %ymm17
// CHECK: encoding: [0x62,0xe3,0xfd,0x38,0x05,0x09,0x7b]
vpermilpd $0x7b, (%rcx){1to4}, %ymm17
// CHECK: vpermilpd $123, 4064(%rdx), %ymm17
// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x05,0x4a,0x7f,0x7b]
vpermilpd $0x7b, 4064(%rdx), %ymm17
// CHECK: vpermilpd $123, 4096(%rdx), %ymm17
// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x05,0x8a,0x00,0x10,0x00,0x00,0x7b]
vpermilpd $0x7b, 4096(%rdx), %ymm17
// CHECK: vpermilpd $123, -4096(%rdx), %ymm17
// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x05,0x4a,0x80,0x7b]
vpermilpd $0x7b, -4096(%rdx), %ymm17
// CHECK: vpermilpd $123, -4128(%rdx), %ymm17
// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x05,0x8a,0xe0,0xef,0xff,0xff,0x7b]
vpermilpd $0x7b, -4128(%rdx), %ymm17
// CHECK: vpermilpd $123, 1016(%rdx){1to4}, %ymm17
// CHECK: encoding: [0x62,0xe3,0xfd,0x38,0x05,0x4a,0x7f,0x7b]
vpermilpd $0x7b, 1016(%rdx){1to4}, %ymm17
// CHECK: vpermilpd $123, 1024(%rdx){1to4}, %ymm17
// CHECK: encoding: [0x62,0xe3,0xfd,0x38,0x05,0x8a,0x00,0x04,0x00,0x00,0x7b]
vpermilpd $0x7b, 1024(%rdx){1to4}, %ymm17
// CHECK: vpermilpd $123, -1024(%rdx){1to4}, %ymm17
// CHECK: encoding: [0x62,0xe3,0xfd,0x38,0x05,0x4a,0x80,0x7b]
vpermilpd $0x7b, -1024(%rdx){1to4}, %ymm17
// CHECK: vpermilpd $123, -1032(%rdx){1to4}, %ymm17
// CHECK: encoding: [0x62,0xe3,0xfd,0x38,0x05,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
vpermilpd $0x7b, -1032(%rdx){1to4}, %ymm17
// CHECK: vpermilpd %xmm17, %xmm27, %xmm26
// CHECK: encoding: [0x62,0x22,0xa5,0x00,0x0d,0xd1]
vpermilpd %xmm17, %xmm27, %xmm26
// CHECK: vpermilpd %xmm17, %xmm27, %xmm26 {%k2}
// CHECK: encoding: [0x62,0x22,0xa5,0x02,0x0d,0xd1]
vpermilpd %xmm17, %xmm27, %xmm26 {%k2}
// CHECK: vpermilpd %xmm17, %xmm27, %xmm26 {%k2} {z}
// CHECK: encoding: [0x62,0x22,0xa5,0x82,0x0d,0xd1]
vpermilpd %xmm17, %xmm27, %xmm26 {%k2} {z}
// CHECK: vpermilpd (%rcx), %xmm27, %xmm26
// CHECK: encoding: [0x62,0x62,0xa5,0x00,0x0d,0x11]
vpermilpd (%rcx), %xmm27, %xmm26
// CHECK: vpermilpd 291(%rax,%r14,8), %xmm27, %xmm26
// CHECK: encoding: [0x62,0x22,0xa5,0x00,0x0d,0x94,0xf0,0x23,0x01,0x00,0x00]
vpermilpd 291(%rax,%r14,8), %xmm27, %xmm26
// CHECK: vpermilpd (%rcx){1to2}, %xmm27, %xmm26
// CHECK: encoding: [0x62,0x62,0xa5,0x10,0x0d,0x11]
vpermilpd (%rcx){1to2}, %xmm27, %xmm26
// CHECK: vpermilpd 2032(%rdx), %xmm27, %xmm26
// CHECK: encoding: [0x62,0x62,0xa5,0x00,0x0d,0x52,0x7f]
vpermilpd 2032(%rdx), %xmm27, %xmm26
// CHECK: vpermilpd 2048(%rdx), %xmm27, %xmm26
// CHECK: encoding: [0x62,0x62,0xa5,0x00,0x0d,0x92,0x00,0x08,0x00,0x00]
vpermilpd 2048(%rdx), %xmm27, %xmm26
// CHECK: vpermilpd -2048(%rdx), %xmm27, %xmm26
// CHECK: encoding: [0x62,0x62,0xa5,0x00,0x0d,0x52,0x80]
vpermilpd -2048(%rdx), %xmm27, %xmm26
// CHECK: vpermilpd -2064(%rdx), %xmm27, %xmm26
// CHECK: encoding: [0x62,0x62,0xa5,0x00,0x0d,0x92,0xf0,0xf7,0xff,0xff]
vpermilpd -2064(%rdx), %xmm27, %xmm26
// CHECK: vpermilpd 1016(%rdx){1to2}, %xmm27, %xmm26
// CHECK: encoding: [0x62,0x62,0xa5,0x10,0x0d,0x52,0x7f]
vpermilpd 1016(%rdx){1to2}, %xmm27, %xmm26
// CHECK: vpermilpd 1024(%rdx){1to2}, %xmm27, %xmm26
// CHECK: encoding: [0x62,0x62,0xa5,0x10,0x0d,0x92,0x00,0x04,0x00,0x00]
vpermilpd 1024(%rdx){1to2}, %xmm27, %xmm26
// CHECK: vpermilpd -1024(%rdx){1to2}, %xmm27, %xmm26
// CHECK: encoding: [0x62,0x62,0xa5,0x10,0x0d,0x52,0x80]
vpermilpd -1024(%rdx){1to2}, %xmm27, %xmm26
// CHECK: vpermilpd -1032(%rdx){1to2}, %xmm27, %xmm26
// CHECK: encoding: [0x62,0x62,0xa5,0x10,0x0d,0x92,0xf8,0xfb,0xff,0xff]
vpermilpd -1032(%rdx){1to2}, %xmm27, %xmm26
// CHECK: vpermilpd %ymm24, %ymm26, %ymm26
// CHECK: encoding: [0x62,0x02,0xad,0x20,0x0d,0xd0]
vpermilpd %ymm24, %ymm26, %ymm26
// CHECK: vpermilpd %ymm24, %ymm26, %ymm26 {%k5}
// CHECK: encoding: [0x62,0x02,0xad,0x25,0x0d,0xd0]
vpermilpd %ymm24, %ymm26, %ymm26 {%k5}
// CHECK: vpermilpd %ymm24, %ymm26, %ymm26 {%k5} {z}
// CHECK: encoding: [0x62,0x02,0xad,0xa5,0x0d,0xd0]
vpermilpd %ymm24, %ymm26, %ymm26 {%k5} {z}
// CHECK: vpermilpd (%rcx), %ymm26, %ymm26
// CHECK: encoding: [0x62,0x62,0xad,0x20,0x0d,0x11]
vpermilpd (%rcx), %ymm26, %ymm26
// CHECK: vpermilpd 291(%rax,%r14,8), %ymm26, %ymm26
// CHECK: encoding: [0x62,0x22,0xad,0x20,0x0d,0x94,0xf0,0x23,0x01,0x00,0x00]
vpermilpd 291(%rax,%r14,8), %ymm26, %ymm26
// CHECK: vpermilpd (%rcx){1to4}, %ymm26, %ymm26
// CHECK: encoding: [0x62,0x62,0xad,0x30,0x0d,0x11]
vpermilpd (%rcx){1to4}, %ymm26, %ymm26
// CHECK: vpermilpd 4064(%rdx), %ymm26, %ymm26
// CHECK: encoding: [0x62,0x62,0xad,0x20,0x0d,0x52,0x7f]
vpermilpd 4064(%rdx), %ymm26, %ymm26
// CHECK: vpermilpd 4096(%rdx), %ymm26, %ymm26
// CHECK: encoding: [0x62,0x62,0xad,0x20,0x0d,0x92,0x00,0x10,0x00,0x00]
vpermilpd 4096(%rdx), %ymm26, %ymm26
// CHECK: vpermilpd -4096(%rdx), %ymm26, %ymm26
// CHECK: encoding: [0x62,0x62,0xad,0x20,0x0d,0x52,0x80]
vpermilpd -4096(%rdx), %ymm26, %ymm26
// CHECK: vpermilpd -4128(%rdx), %ymm26, %ymm26
// CHECK: encoding: [0x62,0x62,0xad,0x20,0x0d,0x92,0xe0,0xef,0xff,0xff]
vpermilpd -4128(%rdx), %ymm26, %ymm26
// CHECK: vpermilpd 1016(%rdx){1to4}, %ymm26, %ymm26
// CHECK: encoding: [0x62,0x62,0xad,0x30,0x0d,0x52,0x7f]
vpermilpd 1016(%rdx){1to4}, %ymm26, %ymm26
// CHECK: vpermilpd 1024(%rdx){1to4}, %ymm26, %ymm26
// CHECK: encoding: [0x62,0x62,0xad,0x30,0x0d,0x92,0x00,0x04,0x00,0x00]
vpermilpd 1024(%rdx){1to4}, %ymm26, %ymm26
// CHECK: vpermilpd -1024(%rdx){1to4}, %ymm26, %ymm26
// CHECK: encoding: [0x62,0x62,0xad,0x30,0x0d,0x52,0x80]
vpermilpd -1024(%rdx){1to4}, %ymm26, %ymm26
// CHECK: vpermilpd -1032(%rdx){1to4}, %ymm26, %ymm26
// CHECK: encoding: [0x62,0x62,0xad,0x30,0x0d,0x92,0xf8,0xfb,0xff,0xff]
vpermilpd -1032(%rdx){1to4}, %ymm26, %ymm26
// CHECK: vcvtpd2dq %xmm20, %xmm25
// CHECK: encoding: [0x62,0x21,0xff,0x08,0xe6,0xcc]
vcvtpd2dq %xmm20, %xmm25