mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-11 21:45:16 +00:00
AVX512: Implemented encoding and intrinsics for VPERMILPS/PD instructions.
Added tests for intrinsics and encoding. Differential Revision: http://reviews.llvm.org/D12690 llvm-svn: 249261
This commit is contained in:
parent
28f3a7787c
commit
38dd6d8710
@ -1406,6 +1406,78 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
[llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermil_pd_128 :
|
||||
GCCBuiltin<"__builtin_ia32_vpermilpd_mask">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_i32_ty, llvm_v2f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermil_pd_256 :
|
||||
GCCBuiltin<"__builtin_ia32_vpermilpd256_mask">,
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermil_pd_512 :
|
||||
GCCBuiltin<"__builtin_ia32_vpermilpd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermil_ps_128 :
|
||||
GCCBuiltin<"__builtin_ia32_vpermilps_mask">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_i32_ty, llvm_v4f32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermil_ps_256 :
|
||||
GCCBuiltin<"__builtin_ia32_vpermilps256_mask">,
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermil_ps_512 :
|
||||
GCCBuiltin<"__builtin_ia32_vpermilps512_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty],
|
||||
[llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermilvar_pd_256 :
|
||||
GCCBuiltin<"__builtin_ia32_vpermilvarpd256_mask">,
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermilvar_pd_512 :
|
||||
GCCBuiltin<"__builtin_ia32_vpermilvarpd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v8i64_ty, llvm_v8f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermilvar_pd_128 :
|
||||
GCCBuiltin<"__builtin_ia32_vpermilvarpd_mask">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermilvar_ps_256 :
|
||||
GCCBuiltin<"__builtin_ia32_vpermilvarps256_mask">,
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermilvar_ps_512 :
|
||||
GCCBuiltin<"__builtin_ia32_vpermilvarps512_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty],
|
||||
[llvm_v16f32_ty, llvm_v16i32_ty, llvm_v16f32_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_vpermilvar_ps_128 :
|
||||
GCCBuiltin<"__builtin_ia32_vpermilvarps_mask">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_pshuf_b_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pshufb128_mask">,
|
||||
Intrinsic<[llvm_v16i8_ty],
|
||||
|
@ -16003,11 +16003,16 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
|
||||
RoundingMode, Sae),
|
||||
Mask, Src0, Subtarget, DAG);
|
||||
}
|
||||
case INTR_TYPE_2OP_MASK: {
|
||||
case INTR_TYPE_2OP_MASK:
|
||||
case INTR_TYPE_2OP_IMM8_MASK: {
|
||||
SDValue Src1 = Op.getOperand(1);
|
||||
SDValue Src2 = Op.getOperand(2);
|
||||
SDValue PassThru = Op.getOperand(3);
|
||||
SDValue Mask = Op.getOperand(4);
|
||||
|
||||
if (IntrData->Type == INTR_TYPE_2OP_IMM8_MASK)
|
||||
Src2 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src2);
|
||||
|
||||
// We specify 2 possible opcodes for intrinsics with rounding modes.
|
||||
// First, we check if the intrinsic may have non-default rounding mode,
|
||||
// (IntrData->Opc1 != 0), then we check the rounding mode operand.
|
||||
|
@ -1109,62 +1109,6 @@ defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AVX-512 - VPERM
|
||||
//
|
||||
// -- immediate form --
|
||||
multiclass avx512_perm_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
X86VectorVTInfo _> {
|
||||
let ExeDomain = _.ExeDomain in {
|
||||
def ri : AVX512AIi8<opc, MRMSrcReg, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, u8imm:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set _.RC:$dst,
|
||||
(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>,
|
||||
EVEX;
|
||||
def mi : AVX512AIi8<opc, MRMSrcMem, (outs _.RC:$dst),
|
||||
(ins _.MemOp:$src1, u8imm:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set _.RC:$dst,
|
||||
(_.VT (OpNode (_.LdFrag addr:$src1),
|
||||
(i8 imm:$src2))))]>,
|
||||
EVEX, EVEX_CD8<_.EltSize, CD8VF>;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass avx512_permil<bits<8> OpcImm, bits<8> OpcVar, X86VectorVTInfo _,
|
||||
X86VectorVTInfo Ctrl> :
|
||||
avx512_perm_imm<OpcImm, "vpermil" # _.Suffix, X86VPermilpi, _> {
|
||||
let ExeDomain = _.ExeDomain in {
|
||||
def rr : AVX5128I<OpcVar, MRMSrcReg, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.RC:$src2),
|
||||
!strconcat("vpermil" # _.Suffix,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set _.RC:$dst,
|
||||
(_.VT (X86VPermilpv _.RC:$src1,
|
||||
(Ctrl.VT Ctrl.RC:$src2))))]>,
|
||||
EVEX_4V;
|
||||
def rm : AVX5128I<OpcVar, MRMSrcMem, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, Ctrl.MemOp:$src2),
|
||||
!strconcat("vpermil" # _.Suffix,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set _.RC:$dst,
|
||||
(_.VT (X86VPermilpv _.RC:$src1,
|
||||
(Ctrl.VT (Ctrl.LdFrag addr:$src2)))))]>,
|
||||
EVEX_4V;
|
||||
}
|
||||
}
|
||||
defm VPERMILPSZ : avx512_permil<0x04, 0x0C, v16f32_info, v16i32_info>,
|
||||
EVEX_V512;
|
||||
defm VPERMILPDZ : avx512_permil<0x05, 0x0D, v8f64_info, v8i64_info>,
|
||||
EVEX_V512, VEX_W;
|
||||
|
||||
def : Pat<(v16i32 (X86VPermilpi VR512:$src1, (i8 imm:$imm))),
|
||||
(VPERMILPSZri VR512:$src1, imm:$imm)>;
|
||||
def : Pat<(v8i64 (X86VPermilpi VR512:$src1, (i8 imm:$imm))),
|
||||
(VPERMILPDZri VR512:$src1, imm:$imm)>;
|
||||
|
||||
// -- VPERM2I - 3 source operands form --
|
||||
multiclass avx512_perm_3src<bits<8> opc, string OpcodeStr,
|
||||
SDNode OpNode, X86VectorVTInfo _> {
|
||||
@ -4130,7 +4074,73 @@ defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
|
||||
defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
|
||||
X86VPermi, avx512vl_f64_info>,
|
||||
EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AVX-512 - VPERMIL
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
|
||||
X86VectorVTInfo _, X86VectorVTInfo Ctrl> {
|
||||
defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
|
||||
"$src2, $src1", "$src1, $src2",
|
||||
(_.VT (OpNode _.RC:$src1,
|
||||
(Ctrl.VT Ctrl.RC:$src2)))>,
|
||||
T8PD, EVEX_4V;
|
||||
let mayLoad = 1 in {
|
||||
defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
|
||||
"$src2, $src1", "$src1, $src2",
|
||||
(_.VT (OpNode
|
||||
_.RC:$src1,
|
||||
(Ctrl.VT (bitconvert(Ctrl.LdFrag addr:$src2)))))>,
|
||||
T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
|
||||
defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
|
||||
"${src2}"##_.BroadcastStr##", $src1",
|
||||
"$src1, ${src2}"##_.BroadcastStr,
|
||||
(_.VT (OpNode
|
||||
_.RC:$src1,
|
||||
(Ctrl.VT (X86VBroadcast
|
||||
(Ctrl.ScalarLdFrag addr:$src2)))))>,
|
||||
T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
|
||||
}//let mayLoad = 1
|
||||
}
|
||||
|
||||
multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
|
||||
AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
|
||||
let Predicates = [HasAVX512] in {
|
||||
defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, _.info512,
|
||||
Ctrl.info512>, EVEX_V512;
|
||||
}
|
||||
let Predicates = [HasAVX512, HasVLX] in {
|
||||
defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, _.info128,
|
||||
Ctrl.info128>, EVEX_V128;
|
||||
defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, _.info256,
|
||||
Ctrl.info256>, EVEX_V256;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
|
||||
AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
|
||||
|
||||
defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, _, Ctrl>;
|
||||
defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
|
||||
X86VPermilpi, _>,
|
||||
EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
|
||||
|
||||
let isCodeGenOnly = 1 in {
|
||||
// lowering implementation with the alternative types
|
||||
defm NAME#_I: avx512_permil_vec_common<OpcodeStr, OpcVar, Ctrl, Ctrl>;
|
||||
defm NAME#_I: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem,
|
||||
OpcodeStr, X86VPermilpi, Ctrl>,
|
||||
EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
|
||||
}
|
||||
}
|
||||
|
||||
defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
|
||||
avx512vl_i32_info>;
|
||||
defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
|
||||
avx512vl_i64_info>, VEX_W;
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -8087,17 +8087,19 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
|
||||
(bitconvert (i_frag addr:$src2))))]>, VEX_4V,
|
||||
Sched<[WriteFShuffleLd, ReadAfterLd]>;
|
||||
|
||||
def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
|
||||
let Predicates = [HasAVX, NoVLX] in {
|
||||
def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
|
||||
(ins RC:$src1, u8imm:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set RC:$dst, (vt (X86VPermilpi RC:$src1, (i8 imm:$src2))))]>, VEX,
|
||||
Sched<[WriteFShuffle]>;
|
||||
def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst),
|
||||
def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst),
|
||||
(ins x86memop_f:$src1, u8imm:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set RC:$dst,
|
||||
(vt (X86VPermilpi (load addr:$src1), (i8 imm:$src2))))]>, VEX,
|
||||
Sched<[WriteFShuffleLd]>;
|
||||
}// Predicates = [HasAVX, NoVLX]
|
||||
}
|
||||
|
||||
let ExeDomain = SSEPackedSingle in {
|
||||
@ -8113,7 +8115,7 @@ let ExeDomain = SSEPackedDouble in {
|
||||
loadv4i64, int_x86_avx_vpermilvar_pd_256, v4f64>, VEX_L;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
let Predicates = [HasAVX, NoVLX] in {
|
||||
def : Pat<(v8f32 (X86VPermilpv VR256:$src1, (v8i32 VR256:$src2))),
|
||||
(VPERMILPSYrr VR256:$src1, VR256:$src2)>;
|
||||
def : Pat<(v8f32 (X86VPermilpv VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)))),
|
||||
|
@ -22,7 +22,7 @@ enum IntrinsicType {
|
||||
INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_2OP_IMM8, INTR_TYPE_3OP, INTR_TYPE_4OP,
|
||||
CMP_MASK, CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, VSHIFT_MASK, COMI,
|
||||
INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM,
|
||||
INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM,
|
||||
INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM, INTR_TYPE_2OP_IMM8_MASK,
|
||||
INTR_TYPE_3OP_MASK, INTR_TYPE_3OP_MASK_RM, INTR_TYPE_3OP_IMM8_MASK,
|
||||
FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK,
|
||||
VPERM_3OP_MASKZ, INTR_TYPE_SCALAR_MASK,
|
||||
@ -1407,6 +1407,30 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86ISD::VPERMIV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_512, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMIV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermil_pd_128, INTR_TYPE_2OP_IMM8_MASK,
|
||||
X86ISD::VPERMILPI, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermil_pd_256, INTR_TYPE_2OP_IMM8_MASK,
|
||||
X86ISD::VPERMILPI, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermil_pd_512, INTR_TYPE_2OP_IMM8_MASK,
|
||||
X86ISD::VPERMILPI, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermil_ps_128, INTR_TYPE_2OP_IMM8_MASK,
|
||||
X86ISD::VPERMILPI, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermil_ps_256, INTR_TYPE_2OP_IMM8_MASK,
|
||||
X86ISD::VPERMILPI, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermil_ps_512, INTR_TYPE_2OP_IMM8_MASK,
|
||||
X86ISD::VPERMILPI, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermilvar_pd_128, INTR_TYPE_2OP_MASK,
|
||||
X86ISD::VPERMILPV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermilvar_pd_256, INTR_TYPE_2OP_MASK,
|
||||
X86ISD::VPERMILPV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermilvar_pd_512, INTR_TYPE_2OP_MASK,
|
||||
X86ISD::VPERMILPV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermilvar_ps_128, INTR_TYPE_2OP_MASK,
|
||||
X86ISD::VPERMILPV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermilvar_ps_256, INTR_TYPE_2OP_MASK,
|
||||
X86ISD::VPERMILPV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermilvar_ps_512, INTR_TYPE_2OP_MASK,
|
||||
X86ISD::VPERMILPV, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_128, VPERM_3OP_MASK,
|
||||
X86ISD::VPERMV3, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_128, VPERM_3OP_MASK,
|
||||
|
@ -4348,6 +4348,88 @@ define <16 x float>@test_int_x86_avx512_mask_shuf_ps_512(<16 x float> %x0, <16 x
|
||||
ret <16 x float> %res2
|
||||
}
|
||||
|
||||
declare <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double>, i32, <8 x double>, i8)
|
||||
|
||||
define <8 x double>@test_int_x86_avx512_mask_vpermil_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: movzbl %dil, %eax
|
||||
; CHECK-NEXT: kmovw %eax, %k1
|
||||
; CHECK-NEXT: vpermilpd $22, %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vpermilpd $22, %zmm0, %zmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpermilpd $22, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm1
|
||||
; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 %x3)
|
||||
%res1 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> zeroinitializer, i8 %x3)
|
||||
%res2 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 -1)
|
||||
%res3 = fadd <8 x double> %res, %res1
|
||||
%res4 = fadd <8 x double> %res3, %res2
|
||||
ret <8 x double> %res4
|
||||
}
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float>, i32, <16 x float>, i16)
|
||||
|
||||
define <16 x float>@test_int_x86_avx512_mask_vpermil_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpermilps $22, %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vpermilps $22, %zmm0, %zmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpermilps $22, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm1
|
||||
; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 %x3)
|
||||
%res1 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> zeroinitializer, i16 %x3)
|
||||
%res2 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 -1)
|
||||
%res3 = fadd <16 x float> %res, %res1
|
||||
%res4 = fadd <16 x float> %res3, %res2
|
||||
ret <16 x float> %res4
|
||||
}
|
||||
|
||||
declare <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
|
||||
|
||||
define <8 x double>@test_int_x86_avx512_mask_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: movzbl %dil, %eax
|
||||
; CHECK-NEXT: kmovw %eax, %k1
|
||||
; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm3 {%k1} {z}
|
||||
; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vaddpd %zmm3, %zmm2, %zmm1
|
||||
; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
|
||||
%res1 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> zeroinitializer, i8 %x3)
|
||||
%res2 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
|
||||
%res3 = fadd <8 x double> %res, %res1
|
||||
%res4 = fadd <8 x double> %res2, %res3
|
||||
ret <8 x double> %res4
|
||||
}
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
|
||||
|
||||
define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm3 {%k1} {z}
|
||||
; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm1
|
||||
; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
|
||||
%res1 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> zeroinitializer, i16 %x3)
|
||||
%res2 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
|
||||
%res3 = fadd <16 x float> %res, %res1
|
||||
%res4 = fadd <16 x float> %res2, %res3
|
||||
ret <16 x float> %res4
|
||||
}
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float>, <4 x float>, i32, <16 x float>, i8)
|
||||
|
||||
define <16 x float>@test_int_x86_avx512_mask_insertf32x4_512(<16 x float> %x0, <4 x float> %x1, <16 x float> %x3, i8 %x4) {
|
||||
|
@ -4812,6 +4812,174 @@ define <4 x i64>@test_int_x86_avx512_mask_valign_q_256(<4 x i64> %x0, <4 x i64>
|
||||
ret <4 x i64> %res2
|
||||
}
|
||||
|
||||
declare <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double>, i32, <4 x double>, i8)
|
||||
|
||||
define <4 x double>@test_int_x86_avx512_mask_vpermil_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: movzbl %dil, %eax
|
||||
; CHECK-NEXT: kmovw %eax, %k1
|
||||
; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm1
|
||||
; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 %x3)
|
||||
%res1 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> zeroinitializer, i8 %x3)
|
||||
%res2 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 -1)
|
||||
%res3 = fadd <4 x double> %res, %res1
|
||||
%res4 = fadd <4 x double> %res2, %res3
|
||||
ret <4 x double> %res4
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double>, i32, <2 x double>, i8)
|
||||
|
||||
define <2 x double>@test_int_x86_avx512_mask_vpermil_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: movzbl %dil, %eax
|
||||
; CHECK-NEXT: kmovw %eax, %k1
|
||||
; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vaddpd %xmm2, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 %x3)
|
||||
%res1 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> zeroinitializer, i8 %x3)
|
||||
%res2 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 -1)
|
||||
%res3 = fadd <2 x double> %res, %res1
|
||||
%res4 = fadd <2 x double> %res3, %res2
|
||||
ret <2 x double> %res4
|
||||
}
|
||||
|
||||
declare <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float>, i32, <8 x float>, i8)
|
||||
|
||||
define <8 x float>@test_int_x86_avx512_mask_vpermil_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: movzbl %dil, %eax
|
||||
; CHECK-NEXT: kmovw %eax, %k1
|
||||
; CHECK-NEXT: vpermilps $22, %ymm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vpermilps $22, %ymm0, %ymm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpermilps $22, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm1
|
||||
; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 %x3)
|
||||
%res1 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> zeroinitializer, i8 %x3)
|
||||
%res2 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 -1)
|
||||
%res3 = fadd <8 x float> %res, %res1
|
||||
%res4 = fadd <8 x float> %res3, %res2
|
||||
ret <8 x float> %res4
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float>, i32, <4 x float>, i8)
|
||||
|
||||
define <4 x float>@test_int_x86_avx512_mask_vpermil_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: movzbl %dil, %eax
|
||||
; CHECK-NEXT: kmovw %eax, %k1
|
||||
; CHECK-NEXT: vpermilps $22, %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vpermilps $22, %xmm0, %xmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vpermilps $22, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vaddps %xmm2, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 %x3)
|
||||
%res1 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> zeroinitializer, i8 %x3)
|
||||
%res2 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 -1)
|
||||
%res3 = fadd <4 x float> %res, %res1
|
||||
%res4 = fadd <4 x float> %res2, %res3
|
||||
ret <4 x float> %res4
|
||||
}
|
||||
|
||||
declare <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8)
|
||||
|
||||
define <4 x double>@test_int_x86_avx512_mask_vpermilvar_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: movzbl %dil, %eax
|
||||
; CHECK-NEXT: kmovw %eax, %k1
|
||||
; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm2 {%k1}
|
||||
; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm3 {%k1} {z}
|
||||
; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm1
|
||||
; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3)
|
||||
%res1 = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> zeroinitializer, i8 %x3)
|
||||
%res2 = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1)
|
||||
%res3 = fadd <4 x double> %res, %res1
|
||||
%res4 = fadd <4 x double> %res2, %res3
|
||||
ret <4 x double> %res4
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double>, <2 x i64>, <2 x double>, i8)
|
||||
|
||||
define <2 x double>@test_int_x86_avx512_mask_vpermilvar_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: movzbl %dil, %eax
|
||||
; CHECK-NEXT: kmovw %eax, %k1
|
||||
; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm2 {%k1}
|
||||
; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm3 {%k1} {z}
|
||||
; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vaddpd %xmm3, %xmm2, %xmm1
|
||||
; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3)
|
||||
%res1 = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> zeroinitializer, i8 %x3)
|
||||
%res2 = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1)
|
||||
%res3 = fadd <2 x double> %res, %res1
|
||||
%res4 = fadd <2 x double> %res3, %res2
|
||||
ret <2 x double> %res4
|
||||
}
|
||||
|
||||
declare <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float>, <8 x i32>, <8 x float>, i8)
|
||||
|
||||
define <8 x float>@test_int_x86_avx512_mask_vpermilvar_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: movzbl %dil, %eax
|
||||
; CHECK-NEXT: kmovw %eax, %k1
|
||||
; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm2 {%k1}
|
||||
; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm3 {%k1} {z}
|
||||
; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vaddps %ymm3, %ymm2, %ymm1
|
||||
; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3)
|
||||
%res1 = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> zeroinitializer, i8 %x3)
|
||||
%res2 = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
|
||||
%res3 = fadd <8 x float> %res, %res1
|
||||
%res4 = fadd <8 x float> %res3, %res2
|
||||
ret <8 x float> %res4
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float>, <4 x i32>, <4 x float>, i8)
|
||||
|
||||
define <4 x float>@test_int_x86_avx512_mask_vpermilvar_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_128:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: movzbl %dil, %eax
|
||||
; CHECK-NEXT: kmovw %eax, %k1
|
||||
; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm2 {%k1}
|
||||
; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm3 {%k1} {z}
|
||||
; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm1
|
||||
; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3)
|
||||
%res1 = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> zeroinitializer, i8 %x3)
|
||||
%res2 = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1)
|
||||
%res3 = fadd <4 x float> %res, %res1
|
||||
%res4 = fadd <4 x float> %res2, %res3
|
||||
ret <4 x float> %res4
|
||||
}
|
||||
|
||||
declare <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float>, <4 x float>, i32, <8 x float>, i8)
|
||||
|
||||
define <8 x float>@test_int_x86_avx512_mask_insertf32x4_256(<8 x float> %x0, <4 x float> %x1, <8 x float> %x3, i8 %x4) {
|
||||
|
@ -4,6 +4,7 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -mattr=+avx512vl | FileCheck %s --check-prefix=AVX512VL
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-unknown"
|
||||
@ -135,6 +136,11 @@ define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) {
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
||||
; AVX-NEXT: retq
|
||||
|
||||
; AVX512VL-LABEL: shuffle_v2f64_10:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vpermilpd $1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
%shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0>
|
||||
ret <2 x double> %shuffle
|
||||
}
|
||||
@ -191,6 +197,11 @@ define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) {
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
|
||||
; AVX-NEXT: retq
|
||||
|
||||
; AVX512VL-LABEL: shuffle_v2f64_32:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vpermilpd $1, %xmm1, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
%shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 2>
|
||||
ret <2 x double> %shuffle
|
||||
}
|
||||
@ -1167,6 +1178,11 @@ define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) {
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0]
|
||||
; AVX-NEXT: retq
|
||||
|
||||
; AVX512VL-LABEL: shuffle_mem_v2f64_10:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vpermilpd $1, (%rdi), %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
%a = load <2 x double>, <2 x double>* %ptr
|
||||
%shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0>
|
||||
ret <2 x double> %shuffle
|
||||
|
@ -1,5 +1,6 @@
|
||||
; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
|
||||
; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
|
||||
; RUN: llc < %s -mcpu=knl -mattr=+avx512vl | FileCheck %s --check-prefix=AVX512VL
|
||||
|
||||
target triple = "x86_64-unknown-unknown"
|
||||
|
||||
@ -133,6 +134,11 @@ define <4 x double> @shuffle_v4f64_0023(<4 x double> %a, <4 x double> %b) {
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,3]
|
||||
; ALL-NEXT: retq
|
||||
|
||||
; AVX512VL-LABEL: shuffle_v4f64_0023:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vpermilpd $8, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 3>
|
||||
ret <4 x double> %shuffle
|
||||
}
|
||||
|
@ -15926,6 +15926,238 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
|
||||
// CHECK: encoding: [0x62,0xa2,0xfd,0x41,0xa2,0x94,0x81,0x00,0x04,0x00,0x00]
|
||||
vscatterdpd %zmm18, 1024(%rcx, %ymm24,4) {%k1}
|
||||
|
||||
// CHECK: vpermilps $171, %zmm22, %zmm2
|
||||
// CHECK: encoding: [0x62,0xb3,0x7d,0x48,0x04,0xd6,0xab]
|
||||
vpermilps $0xab, %zmm22, %zmm2
|
||||
|
||||
// CHECK: vpermilps $171, %zmm22, %zmm2 {%k2}
|
||||
// CHECK: encoding: [0x62,0xb3,0x7d,0x4a,0x04,0xd6,0xab]
|
||||
vpermilps $0xab, %zmm22, %zmm2 {%k2}
|
||||
|
||||
// CHECK: vpermilps $171, %zmm22, %zmm2 {%k2} {z}
|
||||
// CHECK: encoding: [0x62,0xb3,0x7d,0xca,0x04,0xd6,0xab]
|
||||
vpermilps $0xab, %zmm22, %zmm2 {%k2} {z}
|
||||
|
||||
// CHECK: vpermilps $123, %zmm22, %zmm2
|
||||
// CHECK: encoding: [0x62,0xb3,0x7d,0x48,0x04,0xd6,0x7b]
|
||||
vpermilps $0x7b, %zmm22, %zmm2
|
||||
|
||||
// CHECK: vpermilps $123, (%rcx), %zmm2
|
||||
// CHECK: encoding: [0x62,0xf3,0x7d,0x48,0x04,0x11,0x7b]
|
||||
vpermilps $0x7b, (%rcx), %zmm2
|
||||
|
||||
// CHECK: vpermilps $123, 291(%rax,%r14,8), %zmm2
|
||||
// CHECK: encoding: [0x62,0xb3,0x7d,0x48,0x04,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
|
||||
vpermilps $0x7b, 291(%rax,%r14,8), %zmm2
|
||||
|
||||
// CHECK: vpermilps $123, (%rcx){1to16}, %zmm2
|
||||
// CHECK: encoding: [0x62,0xf3,0x7d,0x58,0x04,0x11,0x7b]
|
||||
vpermilps $0x7b, (%rcx){1to16}, %zmm2
|
||||
|
||||
// CHECK: vpermilps $123, 8128(%rdx), %zmm2
|
||||
// CHECK: encoding: [0x62,0xf3,0x7d,0x48,0x04,0x52,0x7f,0x7b]
|
||||
vpermilps $0x7b, 8128(%rdx), %zmm2
|
||||
|
||||
// CHECK: vpermilps $123, 8192(%rdx), %zmm2
|
||||
// CHECK: encoding: [0x62,0xf3,0x7d,0x48,0x04,0x92,0x00,0x20,0x00,0x00,0x7b]
|
||||
vpermilps $0x7b, 8192(%rdx), %zmm2
|
||||
|
||||
// CHECK: vpermilps $123, -8192(%rdx), %zmm2
|
||||
// CHECK: encoding: [0x62,0xf3,0x7d,0x48,0x04,0x52,0x80,0x7b]
|
||||
vpermilps $0x7b, -8192(%rdx), %zmm2
|
||||
|
||||
// CHECK: vpermilps $123, -8256(%rdx), %zmm2
|
||||
// CHECK: encoding: [0x62,0xf3,0x7d,0x48,0x04,0x92,0xc0,0xdf,0xff,0xff,0x7b]
|
||||
vpermilps $0x7b, -8256(%rdx), %zmm2
|
||||
|
||||
// CHECK: vpermilps $123, 508(%rdx){1to16}, %zmm2
|
||||
// CHECK: encoding: [0x62,0xf3,0x7d,0x58,0x04,0x52,0x7f,0x7b]
|
||||
vpermilps $0x7b, 508(%rdx){1to16}, %zmm2
|
||||
|
||||
// CHECK: vpermilps $123, 512(%rdx){1to16}, %zmm2
|
||||
// CHECK: encoding: [0x62,0xf3,0x7d,0x58,0x04,0x92,0x00,0x02,0x00,0x00,0x7b]
|
||||
vpermilps $0x7b, 512(%rdx){1to16}, %zmm2
|
||||
|
||||
// CHECK: vpermilps $123, -512(%rdx){1to16}, %zmm2
|
||||
// CHECK: encoding: [0x62,0xf3,0x7d,0x58,0x04,0x52,0x80,0x7b]
|
||||
vpermilps $0x7b, -512(%rdx){1to16}, %zmm2
|
||||
|
||||
// CHECK: vpermilps $123, -516(%rdx){1to16}, %zmm2
|
||||
// CHECK: encoding: [0x62,0xf3,0x7d,0x58,0x04,0x92,0xfc,0xfd,0xff,0xff,0x7b]
|
||||
vpermilps $0x7b, -516(%rdx){1to16}, %zmm2
|
||||
|
||||
// CHECK: vpermilps %zmm2, %zmm20, %zmm13
|
||||
// CHECK: encoding: [0x62,0x72,0x5d,0x40,0x0c,0xea]
|
||||
vpermilps %zmm2, %zmm20, %zmm13
|
||||
|
||||
// CHECK: vpermilps %zmm2, %zmm20, %zmm13 {%k1}
|
||||
// CHECK: encoding: [0x62,0x72,0x5d,0x41,0x0c,0xea]
|
||||
vpermilps %zmm2, %zmm20, %zmm13 {%k1}
|
||||
|
||||
// CHECK: vpermilps %zmm2, %zmm20, %zmm13 {%k1} {z}
|
||||
// CHECK: encoding: [0x62,0x72,0x5d,0xc1,0x0c,0xea]
|
||||
vpermilps %zmm2, %zmm20, %zmm13 {%k1} {z}
|
||||
|
||||
// CHECK: vpermilps (%rcx), %zmm20, %zmm13
|
||||
// CHECK: encoding: [0x62,0x72,0x5d,0x40,0x0c,0x29]
|
||||
vpermilps (%rcx), %zmm20, %zmm13
|
||||
|
||||
// CHECK: vpermilps 291(%rax,%r14,8), %zmm20, %zmm13
|
||||
// CHECK: encoding: [0x62,0x32,0x5d,0x40,0x0c,0xac,0xf0,0x23,0x01,0x00,0x00]
|
||||
vpermilps 291(%rax,%r14,8), %zmm20, %zmm13
|
||||
|
||||
// CHECK: vpermilps (%rcx){1to16}, %zmm20, %zmm13
|
||||
// CHECK: encoding: [0x62,0x72,0x5d,0x50,0x0c,0x29]
|
||||
vpermilps (%rcx){1to16}, %zmm20, %zmm13
|
||||
|
||||
// CHECK: vpermilps 8128(%rdx), %zmm20, %zmm13
|
||||
// CHECK: encoding: [0x62,0x72,0x5d,0x40,0x0c,0x6a,0x7f]
|
||||
vpermilps 8128(%rdx), %zmm20, %zmm13
|
||||
|
||||
// CHECK: vpermilps 8192(%rdx), %zmm20, %zmm13
|
||||
// CHECK: encoding: [0x62,0x72,0x5d,0x40,0x0c,0xaa,0x00,0x20,0x00,0x00]
|
||||
vpermilps 8192(%rdx), %zmm20, %zmm13
|
||||
|
||||
// CHECK: vpermilps -8192(%rdx), %zmm20, %zmm13
|
||||
// CHECK: encoding: [0x62,0x72,0x5d,0x40,0x0c,0x6a,0x80]
|
||||
vpermilps -8192(%rdx), %zmm20, %zmm13
|
||||
|
||||
// CHECK: vpermilps -8256(%rdx), %zmm20, %zmm13
|
||||
// CHECK: encoding: [0x62,0x72,0x5d,0x40,0x0c,0xaa,0xc0,0xdf,0xff,0xff]
|
||||
vpermilps -8256(%rdx), %zmm20, %zmm13
|
||||
|
||||
// CHECK: vpermilps 508(%rdx){1to16}, %zmm20, %zmm13
|
||||
// CHECK: encoding: [0x62,0x72,0x5d,0x50,0x0c,0x6a,0x7f]
|
||||
vpermilps 508(%rdx){1to16}, %zmm20, %zmm13
|
||||
|
||||
// CHECK: vpermilps 512(%rdx){1to16}, %zmm20, %zmm13
|
||||
// CHECK: encoding: [0x62,0x72,0x5d,0x50,0x0c,0xaa,0x00,0x02,0x00,0x00]
|
||||
vpermilps 512(%rdx){1to16}, %zmm20, %zmm13
|
||||
|
||||
// CHECK: vpermilps -512(%rdx){1to16}, %zmm20, %zmm13
|
||||
// CHECK: encoding: [0x62,0x72,0x5d,0x50,0x0c,0x6a,0x80]
|
||||
vpermilps -512(%rdx){1to16}, %zmm20, %zmm13
|
||||
|
||||
// CHECK: vpermilps -516(%rdx){1to16}, %zmm20, %zmm13
|
||||
// CHECK: encoding: [0x62,0x72,0x5d,0x50,0x0c,0xaa,0xfc,0xfd,0xff,0xff]
|
||||
vpermilps -516(%rdx){1to16}, %zmm20, %zmm13
|
||||
|
||||
// CHECK: vpermilpd $171, %zmm4, %zmm19
|
||||
// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x05,0xdc,0xab]
|
||||
vpermilpd $0xab, %zmm4, %zmm19
|
||||
|
||||
// CHECK: vpermilpd $171, %zmm4, %zmm19 {%k1}
|
||||
// CHECK: encoding: [0x62,0xe3,0xfd,0x49,0x05,0xdc,0xab]
|
||||
vpermilpd $0xab, %zmm4, %zmm19 {%k1}
|
||||
|
||||
// CHECK: vpermilpd $171, %zmm4, %zmm19 {%k1} {z}
|
||||
// CHECK: encoding: [0x62,0xe3,0xfd,0xc9,0x05,0xdc,0xab]
|
||||
vpermilpd $0xab, %zmm4, %zmm19 {%k1} {z}
|
||||
|
||||
// CHECK: vpermilpd $123, %zmm4, %zmm19
|
||||
// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x05,0xdc,0x7b]
|
||||
vpermilpd $0x7b, %zmm4, %zmm19
|
||||
|
||||
// CHECK: vpermilpd $123, (%rcx), %zmm19
|
||||
// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x05,0x19,0x7b]
|
||||
vpermilpd $0x7b, (%rcx), %zmm19
|
||||
|
||||
// CHECK: vpermilpd $123, 291(%rax,%r14,8), %zmm19
|
||||
// CHECK: encoding: [0x62,0xa3,0xfd,0x48,0x05,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
|
||||
vpermilpd $0x7b, 291(%rax,%r14,8), %zmm19
|
||||
|
||||
// CHECK: vpermilpd $123, (%rcx){1to8}, %zmm19
|
||||
// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x05,0x19,0x7b]
|
||||
vpermilpd $0x7b, (%rcx){1to8}, %zmm19
|
||||
|
||||
// CHECK: vpermilpd $123, 8128(%rdx), %zmm19
|
||||
// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x05,0x5a,0x7f,0x7b]
|
||||
vpermilpd $0x7b, 8128(%rdx), %zmm19
|
||||
|
||||
// CHECK: vpermilpd $123, 8192(%rdx), %zmm19
|
||||
// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x05,0x9a,0x00,0x20,0x00,0x00,0x7b]
|
||||
vpermilpd $0x7b, 8192(%rdx), %zmm19
|
||||
|
||||
// CHECK: vpermilpd $123, -8192(%rdx), %zmm19
|
||||
// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x05,0x5a,0x80,0x7b]
|
||||
vpermilpd $0x7b, -8192(%rdx), %zmm19
|
||||
|
||||
// CHECK: vpermilpd $123, -8256(%rdx), %zmm19
|
||||
// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x05,0x9a,0xc0,0xdf,0xff,0xff,0x7b]
|
||||
vpermilpd $0x7b, -8256(%rdx), %zmm19
|
||||
|
||||
// CHECK: vpermilpd $123, 1016(%rdx){1to8}, %zmm19
|
||||
// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x05,0x5a,0x7f,0x7b]
|
||||
vpermilpd $0x7b, 1016(%rdx){1to8}, %zmm19
|
||||
|
||||
// CHECK: vpermilpd $123, 1024(%rdx){1to8}, %zmm19
|
||||
// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x05,0x9a,0x00,0x04,0x00,0x00,0x7b]
|
||||
vpermilpd $0x7b, 1024(%rdx){1to8}, %zmm19
|
||||
|
||||
// CHECK: vpermilpd $123, -1024(%rdx){1to8}, %zmm19
|
||||
// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x05,0x5a,0x80,0x7b]
|
||||
vpermilpd $0x7b, -1024(%rdx){1to8}, %zmm19
|
||||
|
||||
// CHECK: vpermilpd $123, -1032(%rdx){1to8}, %zmm19
|
||||
// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x05,0x9a,0xf8,0xfb,0xff,0xff,0x7b]
|
||||
vpermilpd $0x7b, -1032(%rdx){1to8}, %zmm19
|
||||
|
||||
// CHECK: vpermilpd %zmm21, %zmm26, %zmm1
|
||||
// CHECK: encoding: [0x62,0xb2,0xad,0x40,0x0d,0xcd]
|
||||
vpermilpd %zmm21, %zmm26, %zmm1
|
||||
|
||||
// CHECK: vpermilpd %zmm21, %zmm26, %zmm1 {%k2}
|
||||
// CHECK: encoding: [0x62,0xb2,0xad,0x42,0x0d,0xcd]
|
||||
vpermilpd %zmm21, %zmm26, %zmm1 {%k2}
|
||||
|
||||
// CHECK: vpermilpd %zmm21, %zmm26, %zmm1 {%k2} {z}
|
||||
// CHECK: encoding: [0x62,0xb2,0xad,0xc2,0x0d,0xcd]
|
||||
vpermilpd %zmm21, %zmm26, %zmm1 {%k2} {z}
|
||||
|
||||
// CHECK: vpermilpd (%rcx), %zmm26, %zmm1
|
||||
// CHECK: encoding: [0x62,0xf2,0xad,0x40,0x0d,0x09]
|
||||
vpermilpd (%rcx), %zmm26, %zmm1
|
||||
|
||||
// CHECK: vpermilpd 291(%rax,%r14,8), %zmm26, %zmm1
|
||||
// CHECK: encoding: [0x62,0xb2,0xad,0x40,0x0d,0x8c,0xf0,0x23,0x01,0x00,0x00]
|
||||
vpermilpd 291(%rax,%r14,8), %zmm26, %zmm1
|
||||
|
||||
// CHECK: vpermilpd (%rcx){1to8}, %zmm26, %zmm1
|
||||
// CHECK: encoding: [0x62,0xf2,0xad,0x50,0x0d,0x09]
|
||||
vpermilpd (%rcx){1to8}, %zmm26, %zmm1
|
||||
|
||||
// CHECK: vpermilpd 8128(%rdx), %zmm26, %zmm1
|
||||
// CHECK: encoding: [0x62,0xf2,0xad,0x40,0x0d,0x4a,0x7f]
|
||||
vpermilpd 8128(%rdx), %zmm26, %zmm1
|
||||
|
||||
// CHECK: vpermilpd 8192(%rdx), %zmm26, %zmm1
|
||||
// CHECK: encoding: [0x62,0xf2,0xad,0x40,0x0d,0x8a,0x00,0x20,0x00,0x00]
|
||||
vpermilpd 8192(%rdx), %zmm26, %zmm1
|
||||
|
||||
// CHECK: vpermilpd -8192(%rdx), %zmm26, %zmm1
|
||||
// CHECK: encoding: [0x62,0xf2,0xad,0x40,0x0d,0x4a,0x80]
|
||||
vpermilpd -8192(%rdx), %zmm26, %zmm1
|
||||
|
||||
// CHECK: vpermilpd -8256(%rdx), %zmm26, %zmm1
|
||||
// CHECK: encoding: [0x62,0xf2,0xad,0x40,0x0d,0x8a,0xc0,0xdf,0xff,0xff]
|
||||
vpermilpd -8256(%rdx), %zmm26, %zmm1
|
||||
|
||||
// CHECK: vpermilpd 1016(%rdx){1to8}, %zmm26, %zmm1
|
||||
// CHECK: encoding: [0x62,0xf2,0xad,0x50,0x0d,0x4a,0x7f]
|
||||
vpermilpd 1016(%rdx){1to8}, %zmm26, %zmm1
|
||||
|
||||
// CHECK: vpermilpd 1024(%rdx){1to8}, %zmm26, %zmm1
|
||||
// CHECK: encoding: [0x62,0xf2,0xad,0x50,0x0d,0x8a,0x00,0x04,0x00,0x00]
|
||||
vpermilpd 1024(%rdx){1to8}, %zmm26, %zmm1
|
||||
|
||||
// CHECK: vpermilpd -1024(%rdx){1to8}, %zmm26, %zmm1
|
||||
// CHECK: encoding: [0x62,0xf2,0xad,0x50,0x0d,0x4a,0x80]
|
||||
vpermilpd -1024(%rdx){1to8}, %zmm26, %zmm1
|
||||
|
||||
// CHECK: vpermilpd -1032(%rdx){1to8}, %zmm26, %zmm1
|
||||
// CHECK: encoding: [0x62,0xf2,0xad,0x50,0x0d,0x8a,0xf8,0xfb,0xff,0xff]
|
||||
vpermilpd -1032(%rdx){1to8}, %zmm26, %zmm1
|
||||
|
||||
// CHECK: vcvtpd2dq %zmm15, %ymm24
|
||||
// CHECK: encoding: [0x62,0x41,0xff,0x48,0xe6,0xc7]
|
||||
vcvtpd2dq %zmm15, %ymm24
|
||||
|
@ -20523,6 +20523,470 @@ vaddpd {rz-sae}, %zmm2, %zmm1, %zmm1
|
||||
// CHECK: encoding: [0x62,0x22,0xfd,0x21,0xa2,0xb4,0xb9,0x00,0x04,0x00,0x00]
|
||||
vscatterdpd %ymm30, 1024(%rcx, %xmm31,4) {%k1}
|
||||
|
||||
// CHECK: vpermilps $171, %xmm28, %xmm20
|
||||
// CHECK: encoding: [0x62,0x83,0x7d,0x08,0x04,0xe4,0xab]
|
||||
vpermilps $0xab, %xmm28, %xmm20
|
||||
|
||||
// CHECK: vpermilps $171, %xmm28, %xmm20 {%k4}
|
||||
// CHECK: encoding: [0x62,0x83,0x7d,0x0c,0x04,0xe4,0xab]
|
||||
vpermilps $0xab, %xmm28, %xmm20 {%k4}
|
||||
|
||||
// CHECK: vpermilps $171, %xmm28, %xmm20 {%k4} {z}
|
||||
// CHECK: encoding: [0x62,0x83,0x7d,0x8c,0x04,0xe4,0xab]
|
||||
vpermilps $0xab, %xmm28, %xmm20 {%k4} {z}
|
||||
|
||||
// CHECK: vpermilps $123, %xmm28, %xmm20
|
||||
// CHECK: encoding: [0x62,0x83,0x7d,0x08,0x04,0xe4,0x7b]
|
||||
vpermilps $0x7b, %xmm28, %xmm20
|
||||
|
||||
// CHECK: vpermilps $123, (%rcx), %xmm20
|
||||
// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x04,0x21,0x7b]
|
||||
vpermilps $0x7b, (%rcx), %xmm20
|
||||
|
||||
// CHECK: vpermilps $123, 291(%rax,%r14,8), %xmm20
|
||||
// CHECK: encoding: [0x62,0xa3,0x7d,0x08,0x04,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
|
||||
vpermilps $0x7b, 291(%rax,%r14,8), %xmm20
|
||||
|
||||
// CHECK: vpermilps $123, (%rcx){1to4}, %xmm20
|
||||
// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x04,0x21,0x7b]
|
||||
vpermilps $0x7b, (%rcx){1to4}, %xmm20
|
||||
|
||||
// CHECK: vpermilps $123, 2032(%rdx), %xmm20
|
||||
// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x04,0x62,0x7f,0x7b]
|
||||
vpermilps $0x7b, 2032(%rdx), %xmm20
|
||||
|
||||
// CHECK: vpermilps $123, 2048(%rdx), %xmm20
|
||||
// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x04,0xa2,0x00,0x08,0x00,0x00,0x7b]
|
||||
vpermilps $0x7b, 2048(%rdx), %xmm20
|
||||
|
||||
// CHECK: vpermilps $123, -2048(%rdx), %xmm20
|
||||
// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x04,0x62,0x80,0x7b]
|
||||
vpermilps $0x7b, -2048(%rdx), %xmm20
|
||||
|
||||
// CHECK: vpermilps $123, -2064(%rdx), %xmm20
|
||||
// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x04,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
|
||||
vpermilps $0x7b, -2064(%rdx), %xmm20
|
||||
|
||||
// CHECK: vpermilps $123, 508(%rdx){1to4}, %xmm20
|
||||
// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x04,0x62,0x7f,0x7b]
|
||||
vpermilps $0x7b, 508(%rdx){1to4}, %xmm20
|
||||
|
||||
// CHECK: vpermilps $123, 512(%rdx){1to4}, %xmm20
|
||||
// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x04,0xa2,0x00,0x02,0x00,0x00,0x7b]
|
||||
vpermilps $0x7b, 512(%rdx){1to4}, %xmm20
|
||||
|
||||
// CHECK: vpermilps $123, -512(%rdx){1to4}, %xmm20
|
||||
// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x04,0x62,0x80,0x7b]
|
||||
vpermilps $0x7b, -512(%rdx){1to4}, %xmm20
|
||||
|
||||
// CHECK: vpermilps $123, -516(%rdx){1to4}, %xmm20
|
||||
// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x04,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
|
||||
vpermilps $0x7b, -516(%rdx){1to4}, %xmm20
|
||||
|
||||
// CHECK: vpermilps $171, %ymm17, %ymm30
|
||||
// CHECK: encoding: [0x62,0x23,0x7d,0x28,0x04,0xf1,0xab]
|
||||
vpermilps $0xab, %ymm17, %ymm30
|
||||
|
||||
// CHECK: vpermilps $171, %ymm17, %ymm30 {%k5}
|
||||
// CHECK: encoding: [0x62,0x23,0x7d,0x2d,0x04,0xf1,0xab]
|
||||
vpermilps $0xab, %ymm17, %ymm30 {%k5}
|
||||
|
||||
// CHECK: vpermilps $171, %ymm17, %ymm30 {%k5} {z}
|
||||
// CHECK: encoding: [0x62,0x23,0x7d,0xad,0x04,0xf1,0xab]
|
||||
vpermilps $0xab, %ymm17, %ymm30 {%k5} {z}
|
||||
|
||||
// CHECK: vpermilps $123, %ymm17, %ymm30
|
||||
// CHECK: encoding: [0x62,0x23,0x7d,0x28,0x04,0xf1,0x7b]
|
||||
vpermilps $0x7b, %ymm17, %ymm30
|
||||
|
||||
// CHECK: vpermilps $123, (%rcx), %ymm30
|
||||
// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x04,0x31,0x7b]
|
||||
vpermilps $0x7b, (%rcx), %ymm30
|
||||
|
||||
// CHECK: vpermilps $123, 291(%rax,%r14,8), %ymm30
|
||||
// CHECK: encoding: [0x62,0x23,0x7d,0x28,0x04,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b]
|
||||
vpermilps $0x7b, 291(%rax,%r14,8), %ymm30
|
||||
|
||||
// CHECK: vpermilps $123, (%rcx){1to8}, %ymm30
|
||||
// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x04,0x31,0x7b]
|
||||
vpermilps $0x7b, (%rcx){1to8}, %ymm30
|
||||
|
||||
// CHECK: vpermilps $123, 4064(%rdx), %ymm30
|
||||
// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x04,0x72,0x7f,0x7b]
|
||||
vpermilps $0x7b, 4064(%rdx), %ymm30
|
||||
|
||||
// CHECK: vpermilps $123, 4096(%rdx), %ymm30
|
||||
// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x04,0xb2,0x00,0x10,0x00,0x00,0x7b]
|
||||
vpermilps $0x7b, 4096(%rdx), %ymm30
|
||||
|
||||
// CHECK: vpermilps $123, -4096(%rdx), %ymm30
|
||||
// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x04,0x72,0x80,0x7b]
|
||||
vpermilps $0x7b, -4096(%rdx), %ymm30
|
||||
|
||||
// CHECK: vpermilps $123, -4128(%rdx), %ymm30
|
||||
// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x04,0xb2,0xe0,0xef,0xff,0xff,0x7b]
|
||||
vpermilps $0x7b, -4128(%rdx), %ymm30
|
||||
|
||||
// CHECK: vpermilps $123, 508(%rdx){1to8}, %ymm30
|
||||
// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x04,0x72,0x7f,0x7b]
|
||||
vpermilps $0x7b, 508(%rdx){1to8}, %ymm30
|
||||
|
||||
// CHECK: vpermilps $123, 512(%rdx){1to8}, %ymm30
|
||||
// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x04,0xb2,0x00,0x02,0x00,0x00,0x7b]
|
||||
vpermilps $0x7b, 512(%rdx){1to8}, %ymm30
|
||||
|
||||
// CHECK: vpermilps $123, -512(%rdx){1to8}, %ymm30
|
||||
// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x04,0x72,0x80,0x7b]
|
||||
vpermilps $0x7b, -512(%rdx){1to8}, %ymm30
|
||||
|
||||
// CHECK: vpermilps $123, -516(%rdx){1to8}, %ymm30
|
||||
// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x04,0xb2,0xfc,0xfd,0xff,0xff,0x7b]
|
||||
vpermilps $0x7b, -516(%rdx){1to8}, %ymm30
|
||||
|
||||
// CHECK: vpermilps %xmm22, %xmm28, %xmm28
|
||||
// CHECK: encoding: [0x62,0x22,0x1d,0x00,0x0c,0xe6]
|
||||
vpermilps %xmm22, %xmm28, %xmm28
|
||||
|
||||
// CHECK: vpermilps %xmm22, %xmm28, %xmm28 {%k6}
|
||||
// CHECK: encoding: [0x62,0x22,0x1d,0x06,0x0c,0xe6]
|
||||
vpermilps %xmm22, %xmm28, %xmm28 {%k6}
|
||||
|
||||
// CHECK: vpermilps %xmm22, %xmm28, %xmm28 {%k6} {z}
|
||||
// CHECK: encoding: [0x62,0x22,0x1d,0x86,0x0c,0xe6]
|
||||
vpermilps %xmm22, %xmm28, %xmm28 {%k6} {z}
|
||||
|
||||
// CHECK: vpermilps (%rcx), %xmm28, %xmm28
|
||||
// CHECK: encoding: [0x62,0x62,0x1d,0x00,0x0c,0x21]
|
||||
vpermilps (%rcx), %xmm28, %xmm28
|
||||
|
||||
// CHECK: vpermilps 291(%rax,%r14,8), %xmm28, %xmm28
|
||||
// CHECK: encoding: [0x62,0x22,0x1d,0x00,0x0c,0xa4,0xf0,0x23,0x01,0x00,0x00]
|
||||
vpermilps 291(%rax,%r14,8), %xmm28, %xmm28
|
||||
|
||||
// CHECK: vpermilps (%rcx){1to4}, %xmm28, %xmm28
|
||||
// CHECK: encoding: [0x62,0x62,0x1d,0x10,0x0c,0x21]
|
||||
vpermilps (%rcx){1to4}, %xmm28, %xmm28
|
||||
|
||||
// CHECK: vpermilps 2032(%rdx), %xmm28, %xmm28
|
||||
// CHECK: encoding: [0x62,0x62,0x1d,0x00,0x0c,0x62,0x7f]
|
||||
vpermilps 2032(%rdx), %xmm28, %xmm28
|
||||
|
||||
// CHECK: vpermilps 2048(%rdx), %xmm28, %xmm28
|
||||
// CHECK: encoding: [0x62,0x62,0x1d,0x00,0x0c,0xa2,0x00,0x08,0x00,0x00]
|
||||
vpermilps 2048(%rdx), %xmm28, %xmm28
|
||||
|
||||
// CHECK: vpermilps -2048(%rdx), %xmm28, %xmm28
|
||||
// CHECK: encoding: [0x62,0x62,0x1d,0x00,0x0c,0x62,0x80]
|
||||
vpermilps -2048(%rdx), %xmm28, %xmm28
|
||||
|
||||
// CHECK: vpermilps -2064(%rdx), %xmm28, %xmm28
|
||||
// CHECK: encoding: [0x62,0x62,0x1d,0x00,0x0c,0xa2,0xf0,0xf7,0xff,0xff]
|
||||
vpermilps -2064(%rdx), %xmm28, %xmm28
|
||||
|
||||
// CHECK: vpermilps 508(%rdx){1to4}, %xmm28, %xmm28
|
||||
// CHECK: encoding: [0x62,0x62,0x1d,0x10,0x0c,0x62,0x7f]
|
||||
vpermilps 508(%rdx){1to4}, %xmm28, %xmm28
|
||||
|
||||
// CHECK: vpermilps 512(%rdx){1to4}, %xmm28, %xmm28
|
||||
// CHECK: encoding: [0x62,0x62,0x1d,0x10,0x0c,0xa2,0x00,0x02,0x00,0x00]
|
||||
vpermilps 512(%rdx){1to4}, %xmm28, %xmm28
|
||||
|
||||
// CHECK: vpermilps -512(%rdx){1to4}, %xmm28, %xmm28
|
||||
// CHECK: encoding: [0x62,0x62,0x1d,0x10,0x0c,0x62,0x80]
|
||||
vpermilps -512(%rdx){1to4}, %xmm28, %xmm28
|
||||
|
||||
// CHECK: vpermilps -516(%rdx){1to4}, %xmm28, %xmm28
|
||||
// CHECK: encoding: [0x62,0x62,0x1d,0x10,0x0c,0xa2,0xfc,0xfd,0xff,0xff]
|
||||
vpermilps -516(%rdx){1to4}, %xmm28, %xmm28
|
||||
|
||||
// CHECK: vpermilps %ymm21, %ymm28, %ymm29
|
||||
// CHECK: encoding: [0x62,0x22,0x1d,0x20,0x0c,0xed]
|
||||
vpermilps %ymm21, %ymm28, %ymm29
|
||||
|
||||
// CHECK: vpermilps %ymm21, %ymm28, %ymm29 {%k2}
|
||||
// CHECK: encoding: [0x62,0x22,0x1d,0x22,0x0c,0xed]
|
||||
vpermilps %ymm21, %ymm28, %ymm29 {%k2}
|
||||
|
||||
// CHECK: vpermilps %ymm21, %ymm28, %ymm29 {%k2} {z}
|
||||
// CHECK: encoding: [0x62,0x22,0x1d,0xa2,0x0c,0xed]
|
||||
vpermilps %ymm21, %ymm28, %ymm29 {%k2} {z}
|
||||
|
||||
// CHECK: vpermilps (%rcx), %ymm28, %ymm29
|
||||
// CHECK: encoding: [0x62,0x62,0x1d,0x20,0x0c,0x29]
|
||||
vpermilps (%rcx), %ymm28, %ymm29
|
||||
|
||||
// CHECK: vpermilps 291(%rax,%r14,8), %ymm28, %ymm29
|
||||
// CHECK: encoding: [0x62,0x22,0x1d,0x20,0x0c,0xac,0xf0,0x23,0x01,0x00,0x00]
|
||||
vpermilps 291(%rax,%r14,8), %ymm28, %ymm29
|
||||
|
||||
// CHECK: vpermilps (%rcx){1to8}, %ymm28, %ymm29
|
||||
// CHECK: encoding: [0x62,0x62,0x1d,0x30,0x0c,0x29]
|
||||
vpermilps (%rcx){1to8}, %ymm28, %ymm29
|
||||
|
||||
// CHECK: vpermilps 4064(%rdx), %ymm28, %ymm29
|
||||
// CHECK: encoding: [0x62,0x62,0x1d,0x20,0x0c,0x6a,0x7f]
|
||||
vpermilps 4064(%rdx), %ymm28, %ymm29
|
||||
|
||||
// CHECK: vpermilps 4096(%rdx), %ymm28, %ymm29
|
||||
// CHECK: encoding: [0x62,0x62,0x1d,0x20,0x0c,0xaa,0x00,0x10,0x00,0x00]
|
||||
vpermilps 4096(%rdx), %ymm28, %ymm29
|
||||
|
||||
// CHECK: vpermilps -4096(%rdx), %ymm28, %ymm29
|
||||
// CHECK: encoding: [0x62,0x62,0x1d,0x20,0x0c,0x6a,0x80]
|
||||
vpermilps -4096(%rdx), %ymm28, %ymm29
|
||||
|
||||
// CHECK: vpermilps -4128(%rdx), %ymm28, %ymm29
|
||||
// CHECK: encoding: [0x62,0x62,0x1d,0x20,0x0c,0xaa,0xe0,0xef,0xff,0xff]
|
||||
vpermilps -4128(%rdx), %ymm28, %ymm29
|
||||
|
||||
// CHECK: vpermilps 508(%rdx){1to8}, %ymm28, %ymm29
|
||||
// CHECK: encoding: [0x62,0x62,0x1d,0x30,0x0c,0x6a,0x7f]
|
||||
vpermilps 508(%rdx){1to8}, %ymm28, %ymm29
|
||||
|
||||
// CHECK: vpermilps 512(%rdx){1to8}, %ymm28, %ymm29
|
||||
// CHECK: encoding: [0x62,0x62,0x1d,0x30,0x0c,0xaa,0x00,0x02,0x00,0x00]
|
||||
vpermilps 512(%rdx){1to8}, %ymm28, %ymm29
|
||||
|
||||
// CHECK: vpermilps -512(%rdx){1to8}, %ymm28, %ymm29
|
||||
// CHECK: encoding: [0x62,0x62,0x1d,0x30,0x0c,0x6a,0x80]
|
||||
vpermilps -512(%rdx){1to8}, %ymm28, %ymm29
|
||||
|
||||
// CHECK: vpermilps -516(%rdx){1to8}, %ymm28, %ymm29
|
||||
// CHECK: encoding: [0x62,0x62,0x1d,0x30,0x0c,0xaa,0xfc,0xfd,0xff,0xff]
|
||||
vpermilps -516(%rdx){1to8}, %ymm28, %ymm29
|
||||
|
||||
// CHECK: vpermilpd $171, %xmm19, %xmm29
|
||||
// CHECK: encoding: [0x62,0x23,0xfd,0x08,0x05,0xeb,0xab]
|
||||
vpermilpd $0xab, %xmm19, %xmm29
|
||||
|
||||
// CHECK: vpermilpd $171, %xmm19, %xmm29 {%k7}
|
||||
// CHECK: encoding: [0x62,0x23,0xfd,0x0f,0x05,0xeb,0xab]
|
||||
vpermilpd $0xab, %xmm19, %xmm29 {%k7}
|
||||
|
||||
// CHECK: vpermilpd $171, %xmm19, %xmm29 {%k7} {z}
|
||||
// CHECK: encoding: [0x62,0x23,0xfd,0x8f,0x05,0xeb,0xab]
|
||||
vpermilpd $0xab, %xmm19, %xmm29 {%k7} {z}
|
||||
|
||||
// CHECK: vpermilpd $123, %xmm19, %xmm29
|
||||
// CHECK: encoding: [0x62,0x23,0xfd,0x08,0x05,0xeb,0x7b]
|
||||
vpermilpd $0x7b, %xmm19, %xmm29
|
||||
|
||||
// CHECK: vpermilpd $123, (%rcx), %xmm29
|
||||
// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x05,0x29,0x7b]
|
||||
vpermilpd $0x7b, (%rcx), %xmm29
|
||||
|
||||
// CHECK: vpermilpd $123, 291(%rax,%r14,8), %xmm29
|
||||
// CHECK: encoding: [0x62,0x23,0xfd,0x08,0x05,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
|
||||
vpermilpd $0x7b, 291(%rax,%r14,8), %xmm29
|
||||
|
||||
// CHECK: vpermilpd $123, (%rcx){1to2}, %xmm29
|
||||
// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x05,0x29,0x7b]
|
||||
vpermilpd $0x7b, (%rcx){1to2}, %xmm29
|
||||
|
||||
// CHECK: vpermilpd $123, 2032(%rdx), %xmm29
|
||||
// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x05,0x6a,0x7f,0x7b]
|
||||
vpermilpd $0x7b, 2032(%rdx), %xmm29
|
||||
|
||||
// CHECK: vpermilpd $123, 2048(%rdx), %xmm29
|
||||
// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x05,0xaa,0x00,0x08,0x00,0x00,0x7b]
|
||||
vpermilpd $0x7b, 2048(%rdx), %xmm29
|
||||
|
||||
// CHECK: vpermilpd $123, -2048(%rdx), %xmm29
|
||||
// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x05,0x6a,0x80,0x7b]
|
||||
vpermilpd $0x7b, -2048(%rdx), %xmm29
|
||||
|
||||
// CHECK: vpermilpd $123, -2064(%rdx), %xmm29
|
||||
// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x05,0xaa,0xf0,0xf7,0xff,0xff,0x7b]
|
||||
vpermilpd $0x7b, -2064(%rdx), %xmm29
|
||||
|
||||
// CHECK: vpermilpd $123, 1016(%rdx){1to2}, %xmm29
|
||||
// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x05,0x6a,0x7f,0x7b]
|
||||
vpermilpd $0x7b, 1016(%rdx){1to2}, %xmm29
|
||||
|
||||
// CHECK: vpermilpd $123, 1024(%rdx){1to2}, %xmm29
|
||||
// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x05,0xaa,0x00,0x04,0x00,0x00,0x7b]
|
||||
vpermilpd $0x7b, 1024(%rdx){1to2}, %xmm29
|
||||
|
||||
// CHECK: vpermilpd $123, -1024(%rdx){1to2}, %xmm29
|
||||
// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x05,0x6a,0x80,0x7b]
|
||||
vpermilpd $0x7b, -1024(%rdx){1to2}, %xmm29
|
||||
|
||||
// CHECK: vpermilpd $123, -1032(%rdx){1to2}, %xmm29
|
||||
// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x05,0xaa,0xf8,0xfb,0xff,0xff,0x7b]
|
||||
vpermilpd $0x7b, -1032(%rdx){1to2}, %xmm29
|
||||
|
||||
// CHECK: vpermilpd $171, %ymm24, %ymm17
|
||||
// CHECK: encoding: [0x62,0x83,0xfd,0x28,0x05,0xc8,0xab]
|
||||
vpermilpd $0xab, %ymm24, %ymm17
|
||||
|
||||
// CHECK: vpermilpd $171, %ymm24, %ymm17 {%k6}
|
||||
// CHECK: encoding: [0x62,0x83,0xfd,0x2e,0x05,0xc8,0xab]
|
||||
vpermilpd $0xab, %ymm24, %ymm17 {%k6}
|
||||
|
||||
// CHECK: vpermilpd $171, %ymm24, %ymm17 {%k6} {z}
|
||||
// CHECK: encoding: [0x62,0x83,0xfd,0xae,0x05,0xc8,0xab]
|
||||
vpermilpd $0xab, %ymm24, %ymm17 {%k6} {z}
|
||||
|
||||
// CHECK: vpermilpd $123, %ymm24, %ymm17
|
||||
// CHECK: encoding: [0x62,0x83,0xfd,0x28,0x05,0xc8,0x7b]
|
||||
vpermilpd $0x7b, %ymm24, %ymm17
|
||||
|
||||
// CHECK: vpermilpd $123, (%rcx), %ymm17
|
||||
// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x05,0x09,0x7b]
|
||||
vpermilpd $0x7b, (%rcx), %ymm17
|
||||
|
||||
// CHECK: vpermilpd $123, 291(%rax,%r14,8), %ymm17
|
||||
// CHECK: encoding: [0x62,0xa3,0xfd,0x28,0x05,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
|
||||
vpermilpd $0x7b, 291(%rax,%r14,8), %ymm17
|
||||
|
||||
// CHECK: vpermilpd $123, (%rcx){1to4}, %ymm17
|
||||
// CHECK: encoding: [0x62,0xe3,0xfd,0x38,0x05,0x09,0x7b]
|
||||
vpermilpd $0x7b, (%rcx){1to4}, %ymm17
|
||||
|
||||
// CHECK: vpermilpd $123, 4064(%rdx), %ymm17
|
||||
// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x05,0x4a,0x7f,0x7b]
|
||||
vpermilpd $0x7b, 4064(%rdx), %ymm17
|
||||
|
||||
// CHECK: vpermilpd $123, 4096(%rdx), %ymm17
|
||||
// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x05,0x8a,0x00,0x10,0x00,0x00,0x7b]
|
||||
vpermilpd $0x7b, 4096(%rdx), %ymm17
|
||||
|
||||
// CHECK: vpermilpd $123, -4096(%rdx), %ymm17
|
||||
// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x05,0x4a,0x80,0x7b]
|
||||
vpermilpd $0x7b, -4096(%rdx), %ymm17
|
||||
|
||||
// CHECK: vpermilpd $123, -4128(%rdx), %ymm17
|
||||
// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x05,0x8a,0xe0,0xef,0xff,0xff,0x7b]
|
||||
vpermilpd $0x7b, -4128(%rdx), %ymm17
|
||||
|
||||
// CHECK: vpermilpd $123, 1016(%rdx){1to4}, %ymm17
|
||||
// CHECK: encoding: [0x62,0xe3,0xfd,0x38,0x05,0x4a,0x7f,0x7b]
|
||||
vpermilpd $0x7b, 1016(%rdx){1to4}, %ymm17
|
||||
|
||||
// CHECK: vpermilpd $123, 1024(%rdx){1to4}, %ymm17
|
||||
// CHECK: encoding: [0x62,0xe3,0xfd,0x38,0x05,0x8a,0x00,0x04,0x00,0x00,0x7b]
|
||||
vpermilpd $0x7b, 1024(%rdx){1to4}, %ymm17
|
||||
|
||||
// CHECK: vpermilpd $123, -1024(%rdx){1to4}, %ymm17
|
||||
// CHECK: encoding: [0x62,0xe3,0xfd,0x38,0x05,0x4a,0x80,0x7b]
|
||||
vpermilpd $0x7b, -1024(%rdx){1to4}, %ymm17
|
||||
|
||||
// CHECK: vpermilpd $123, -1032(%rdx){1to4}, %ymm17
|
||||
// CHECK: encoding: [0x62,0xe3,0xfd,0x38,0x05,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
|
||||
vpermilpd $0x7b, -1032(%rdx){1to4}, %ymm17
|
||||
|
||||
// CHECK: vpermilpd %xmm17, %xmm27, %xmm26
|
||||
// CHECK: encoding: [0x62,0x22,0xa5,0x00,0x0d,0xd1]
|
||||
vpermilpd %xmm17, %xmm27, %xmm26
|
||||
|
||||
// CHECK: vpermilpd %xmm17, %xmm27, %xmm26 {%k2}
|
||||
// CHECK: encoding: [0x62,0x22,0xa5,0x02,0x0d,0xd1]
|
||||
vpermilpd %xmm17, %xmm27, %xmm26 {%k2}
|
||||
|
||||
// CHECK: vpermilpd %xmm17, %xmm27, %xmm26 {%k2} {z}
|
||||
// CHECK: encoding: [0x62,0x22,0xa5,0x82,0x0d,0xd1]
|
||||
vpermilpd %xmm17, %xmm27, %xmm26 {%k2} {z}
|
||||
|
||||
// CHECK: vpermilpd (%rcx), %xmm27, %xmm26
|
||||
// CHECK: encoding: [0x62,0x62,0xa5,0x00,0x0d,0x11]
|
||||
vpermilpd (%rcx), %xmm27, %xmm26
|
||||
|
||||
// CHECK: vpermilpd 291(%rax,%r14,8), %xmm27, %xmm26
|
||||
// CHECK: encoding: [0x62,0x22,0xa5,0x00,0x0d,0x94,0xf0,0x23,0x01,0x00,0x00]
|
||||
vpermilpd 291(%rax,%r14,8), %xmm27, %xmm26
|
||||
|
||||
// CHECK: vpermilpd (%rcx){1to2}, %xmm27, %xmm26
|
||||
// CHECK: encoding: [0x62,0x62,0xa5,0x10,0x0d,0x11]
|
||||
vpermilpd (%rcx){1to2}, %xmm27, %xmm26
|
||||
|
||||
// CHECK: vpermilpd 2032(%rdx), %xmm27, %xmm26
|
||||
// CHECK: encoding: [0x62,0x62,0xa5,0x00,0x0d,0x52,0x7f]
|
||||
vpermilpd 2032(%rdx), %xmm27, %xmm26
|
||||
|
||||
// CHECK: vpermilpd 2048(%rdx), %xmm27, %xmm26
|
||||
// CHECK: encoding: [0x62,0x62,0xa5,0x00,0x0d,0x92,0x00,0x08,0x00,0x00]
|
||||
vpermilpd 2048(%rdx), %xmm27, %xmm26
|
||||
|
||||
// CHECK: vpermilpd -2048(%rdx), %xmm27, %xmm26
|
||||
// CHECK: encoding: [0x62,0x62,0xa5,0x00,0x0d,0x52,0x80]
|
||||
vpermilpd -2048(%rdx), %xmm27, %xmm26
|
||||
|
||||
// CHECK: vpermilpd -2064(%rdx), %xmm27, %xmm26
|
||||
// CHECK: encoding: [0x62,0x62,0xa5,0x00,0x0d,0x92,0xf0,0xf7,0xff,0xff]
|
||||
vpermilpd -2064(%rdx), %xmm27, %xmm26
|
||||
|
||||
// CHECK: vpermilpd 1016(%rdx){1to2}, %xmm27, %xmm26
|
||||
// CHECK: encoding: [0x62,0x62,0xa5,0x10,0x0d,0x52,0x7f]
|
||||
vpermilpd 1016(%rdx){1to2}, %xmm27, %xmm26
|
||||
|
||||
// CHECK: vpermilpd 1024(%rdx){1to2}, %xmm27, %xmm26
|
||||
// CHECK: encoding: [0x62,0x62,0xa5,0x10,0x0d,0x92,0x00,0x04,0x00,0x00]
|
||||
vpermilpd 1024(%rdx){1to2}, %xmm27, %xmm26
|
||||
|
||||
// CHECK: vpermilpd -1024(%rdx){1to2}, %xmm27, %xmm26
|
||||
// CHECK: encoding: [0x62,0x62,0xa5,0x10,0x0d,0x52,0x80]
|
||||
vpermilpd -1024(%rdx){1to2}, %xmm27, %xmm26
|
||||
|
||||
// CHECK: vpermilpd -1032(%rdx){1to2}, %xmm27, %xmm26
|
||||
// CHECK: encoding: [0x62,0x62,0xa5,0x10,0x0d,0x92,0xf8,0xfb,0xff,0xff]
|
||||
vpermilpd -1032(%rdx){1to2}, %xmm27, %xmm26
|
||||
|
||||
// CHECK: vpermilpd %ymm24, %ymm26, %ymm26
|
||||
// CHECK: encoding: [0x62,0x02,0xad,0x20,0x0d,0xd0]
|
||||
vpermilpd %ymm24, %ymm26, %ymm26
|
||||
|
||||
// CHECK: vpermilpd %ymm24, %ymm26, %ymm26 {%k5}
|
||||
// CHECK: encoding: [0x62,0x02,0xad,0x25,0x0d,0xd0]
|
||||
vpermilpd %ymm24, %ymm26, %ymm26 {%k5}
|
||||
|
||||
// CHECK: vpermilpd %ymm24, %ymm26, %ymm26 {%k5} {z}
|
||||
// CHECK: encoding: [0x62,0x02,0xad,0xa5,0x0d,0xd0]
|
||||
vpermilpd %ymm24, %ymm26, %ymm26 {%k5} {z}
|
||||
|
||||
// CHECK: vpermilpd (%rcx), %ymm26, %ymm26
|
||||
// CHECK: encoding: [0x62,0x62,0xad,0x20,0x0d,0x11]
|
||||
vpermilpd (%rcx), %ymm26, %ymm26
|
||||
|
||||
// CHECK: vpermilpd 291(%rax,%r14,8), %ymm26, %ymm26
|
||||
// CHECK: encoding: [0x62,0x22,0xad,0x20,0x0d,0x94,0xf0,0x23,0x01,0x00,0x00]
|
||||
vpermilpd 291(%rax,%r14,8), %ymm26, %ymm26
|
||||
|
||||
// CHECK: vpermilpd (%rcx){1to4}, %ymm26, %ymm26
|
||||
// CHECK: encoding: [0x62,0x62,0xad,0x30,0x0d,0x11]
|
||||
vpermilpd (%rcx){1to4}, %ymm26, %ymm26
|
||||
|
||||
// CHECK: vpermilpd 4064(%rdx), %ymm26, %ymm26
|
||||
// CHECK: encoding: [0x62,0x62,0xad,0x20,0x0d,0x52,0x7f]
|
||||
vpermilpd 4064(%rdx), %ymm26, %ymm26
|
||||
|
||||
// CHECK: vpermilpd 4096(%rdx), %ymm26, %ymm26
|
||||
// CHECK: encoding: [0x62,0x62,0xad,0x20,0x0d,0x92,0x00,0x10,0x00,0x00]
|
||||
vpermilpd 4096(%rdx), %ymm26, %ymm26
|
||||
|
||||
// CHECK: vpermilpd -4096(%rdx), %ymm26, %ymm26
|
||||
// CHECK: encoding: [0x62,0x62,0xad,0x20,0x0d,0x52,0x80]
|
||||
vpermilpd -4096(%rdx), %ymm26, %ymm26
|
||||
|
||||
// CHECK: vpermilpd -4128(%rdx), %ymm26, %ymm26
|
||||
// CHECK: encoding: [0x62,0x62,0xad,0x20,0x0d,0x92,0xe0,0xef,0xff,0xff]
|
||||
vpermilpd -4128(%rdx), %ymm26, %ymm26
|
||||
|
||||
// CHECK: vpermilpd 1016(%rdx){1to4}, %ymm26, %ymm26
|
||||
// CHECK: encoding: [0x62,0x62,0xad,0x30,0x0d,0x52,0x7f]
|
||||
vpermilpd 1016(%rdx){1to4}, %ymm26, %ymm26
|
||||
|
||||
// CHECK: vpermilpd 1024(%rdx){1to4}, %ymm26, %ymm26
|
||||
// CHECK: encoding: [0x62,0x62,0xad,0x30,0x0d,0x92,0x00,0x04,0x00,0x00]
|
||||
vpermilpd 1024(%rdx){1to4}, %ymm26, %ymm26
|
||||
|
||||
// CHECK: vpermilpd -1024(%rdx){1to4}, %ymm26, %ymm26
|
||||
// CHECK: encoding: [0x62,0x62,0xad,0x30,0x0d,0x52,0x80]
|
||||
vpermilpd -1024(%rdx){1to4}, %ymm26, %ymm26
|
||||
|
||||
// CHECK: vpermilpd -1032(%rdx){1to4}, %ymm26, %ymm26
|
||||
// CHECK: encoding: [0x62,0x62,0xad,0x30,0x0d,0x92,0xf8,0xfb,0xff,0xff]
|
||||
vpermilpd -1032(%rdx){1to4}, %ymm26, %ymm26
|
||||
|
||||
// CHECK: vcvtpd2dq %xmm20, %xmm25
|
||||
// CHECK: encoding: [0x62,0x21,0xff,0x08,0xe6,0xcc]
|
||||
vcvtpd2dq %xmm20, %xmm25
|
||||
|
Loading…
Reference in New Issue
Block a user