[X86][AVX512VLBW] add support in byte shift and SAD

add byte shift left/right
add SAD - compute sum of absolute differences

Differential Revision: http://reviews.llvm.org/D12479

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@246654 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Asaf Badouh 2015-09-02 14:21:54 +00:00
parent f8f78cdf6d
commit 05859c7cbb
10 changed files with 596 additions and 8 deletions

View File

@ -2356,6 +2356,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_psll_dq_512 : GCCBuiltin<"__builtin_ia32_pslldq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_avx512_psrl_dq_512 : GCCBuiltin<"__builtin_ia32_psrldq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_i32_ty],
[IntrNoMem]>;
}
// Gather ops
@ -4981,6 +4987,9 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_avx512_psad_bw_512 : GCCBuiltin<"__builtin_ia32_psadbw512">,
Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty],
[IntrNoMem]>;
}
// FP logical ops
let TargetPrefix = "x86" in {

View File

@ -15576,6 +15576,9 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
case INTR_TYPE_2OP:
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2));
case INTR_TYPE_2OP_IMM8:
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(2)));
case INTR_TYPE_3OP:
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3));

View File

@ -6822,3 +6822,71 @@ multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
//===----------------------------------------------------------------------===//
// AVX-512 - Byte shift Left/Right
//===----------------------------------------------------------------------===//
multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
Format MRMm, string OpcodeStr, X86VectorVTInfo _>{
def rr : AVX512<opc, MRMr,
(outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>;
let mayLoad = 1 in
def rm : AVX512<opc, MRMm,
(outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _.RC:$dst,(_.VT (OpNode
(_.LdFrag addr:$src1), (i8 imm:$src2))))]>;
}
multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
Format MRMm, string OpcodeStr, Predicate prd>{
let Predicates = [prd] in
defm Z512 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
OpcodeStr, v8i64_info>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
OpcodeStr, v4i64x_info>, EVEX_V256;
defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
OpcodeStr, v2i64x_info>, EVEX_V128;
}
}
defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
HasBWI>, AVX512PDIi8Base, EVEX_4V;
defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
HasBWI>, AVX512PDIi8Base, EVEX_4V;
multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
string OpcodeStr, X86VectorVTInfo _src>{
def rr : AVX512BI<opc, MRMSrcReg,
(outs _src.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _src.RC:$dst,(_src.VT
(OpNode _src.RC:$src1, _src.RC:$src2)))]>;
let mayLoad = 1 in
def rm : AVX512BI<opc, MRMSrcMem,
(outs _src.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _src.RC:$dst,(_src.VT
(OpNode _src.RC:$src1,
(_src.VT (bitconvert
(_src.LdFrag addr:$src2))))))]>;
}
multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
string OpcodeStr, Predicate prd> {
let Predicates = [prd] in
defm Z512 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, v64i8_info>,
EVEX_V512;
let Predicates = [prd, HasVLX] in {
defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, v32i8x_info>,
EVEX_V256;
defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, v16i8x_info>,
EVEX_V128;
}
}
defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
HasBWI>, EVEX_4V;

View File

@ -4137,8 +4137,10 @@ defm VPSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
defm VPSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
VR128, v4i32, v4i32, bc_v4i32, loadv2i64,
SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
} // Predicates = [HasAVX]
let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] ,
Predicates = [HasAVX, NoVLX_Or_NoBWI]in {
// 128-bit logical shifts.
def VPSLLDQri : PDIi8<0x73, MRM7r,
(outs VR128:$dst), (ins VR128:$src1, u8imm:$src2),
@ -4153,8 +4155,7 @@ let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
(v2i64 (X86vshrdq VR128:$src1, (i8 imm:$src2))))]>,
VEX_4V;
// PSRADQri doesn't exist in SSE[1-3].
}
} // Predicates = [HasAVX]
} // Predicates = [HasAVX, NoVLX_Or_NoBWI]
let Predicates = [HasAVX2, NoVLX] in {
defm VPSLLWY : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
@ -4183,8 +4184,10 @@ defm VPSRAWY : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
defm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
VR256, v8i32, v4i32, bc_v4i32, loadv2i64,
SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
}// Predicates = [HasAVX2]
let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 in {
let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 ,
Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
// 256-bit logical shifts.
def VPSLLDQYri : PDIi8<0x73, MRM7r,
(outs VR256:$dst), (ins VR256:$src1, u8imm:$src2),
@ -4199,8 +4202,7 @@ let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 in {
(v4i64 (X86vshrdq VR256:$src1, (i8 imm:$src2))))]>,
VEX_4V, VEX_L;
// PSRADQYri doesn't exist in SSE[1-3].
}
} // Predicates = [HasAVX2]
} // Predicates = [HasAVX2, NoVLX_Or_NoBWI]
let Constraints = "$src1 = $dst" in {
defm PSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli,

View File

@ -19,7 +19,7 @@ namespace llvm {
enum IntrinsicType {
INTR_NO_TYPE,
GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX,
INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP,
INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_2OP_IMM8, INTR_TYPE_3OP, INTR_TYPE_4OP,
CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM,
INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM,
@ -1426,6 +1426,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_q_512, VPERM_3OP_MASKZ,
X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_psad_bw_512, INTR_TYPE_2OP, X86ISD::PSADBW, 0),
X86_INTRINSIC_DATA(avx512_psll_dq_512, INTR_TYPE_2OP_IMM8, X86ISD::VSHLDQ, 0),
X86_INTRINSIC_DATA(avx512_psrl_dq_512, INTR_TYPE_2OP_IMM8, X86ISD::VSRLDQ, 0),
X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0),
X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0),
X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0),

View File

@ -261,4 +261,9 @@ define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
%shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
%bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double>
ret <2 x double> %bitcast64
}
}
define <16 x i16> @shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24(<16 x i16> %a) {
%shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24>
ret <16 x i16> %shuffle
}

View File

@ -1221,3 +1221,41 @@ define <32 x i16>@test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8>
%res4 = add <32 x i16> %res3, %res2
ret <32 x i16> %res4
}
declare <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64>, i32)
; CHECK-LABEL: @test_int_x86_avx512_mask_psll_dq_512
; CHECK-NOT: call
; CHECK: vpslldq
; CHECK: vpslldq
define <8 x i64>@test_int_x86_avx512_mask_psll_dq_512(<8 x i64> %x0) {
%res = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 8)
%res1 = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 4)
%res2 = add <8 x i64> %res, %res1
ret <8 x i64> %res2
}
declare <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64>, i32)
; CHECK-LABEL: @test_int_x86_avx512_mask_psrl_dq_512
; CHECK-NOT: call
; CHECK: vpsrldq
; CHECK: vpsrldq
define <8 x i64>@test_int_x86_avx512_mask_psrl_dq_512(<8 x i64> %x0) {
%res = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 8)
%res1 = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 4)
%res2 = add <8 x i64> %res, %res1
ret <8 x i64> %res2
}
declare <64 x i8> @llvm.x86.avx512.psad.bw.512(<64 x i8>, <64 x i8>)
; CHECK-LABEL: @test_int_x86_avx512_mask_psadb_w_512
; CHECK-NOT: call
; CHECK: vpsadbw %zmm1
; CHECK: vpsadbw %zmm2
define <64 x i8>@test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2){
%res = call <64 x i8> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x1)
%res1 = call <64 x i8> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x2)
%res2 = add <64 x i8> %res, %res1
ret <64 x i8> %res2
}

View File

@ -4275,3 +4275,4 @@ define <16 x i16>@test_int_x86_avx512_mask_dbpsadbw_256(<32 x i8> %x0, <32 x i8>
%res4 = add <16 x i16> %res3, %res2
ret <16 x i16> %res4
}

View File

@ -4216,3 +4216,94 @@
// CHECK: encoding: [0x62,0xe3,0x5d,0x40,0x42,0xaa,0xc0,0xdf,0xff,0xff,0x7b]
vdbpsadbw $123, -8256(%rdx), %zmm20, %zmm21
// CHECK: vpslldq $171, %zmm28, %zmm20
// CHECK: encoding: [0x62,0x91,0x5d,0x40,0x73,0xfc,0xab]
vpslldq $171, %zmm28, %zmm20
// CHECK: vpslldq $123, %zmm28, %zmm20
// CHECK: encoding: [0x62,0x91,0x5d,0x40,0x73,0xfc,0x7b]
vpslldq $123, %zmm28, %zmm20
// CHECK: vpslldq $123, (%rcx), %zmm20
// CHECK: encoding: [0x62,0xf1,0x5d,0x40,0x73,0x39,0x7b]
vpslldq $123, (%rcx), %zmm20
// CHECK: vpslldq $123, 291(%rax,%r14,8), %zmm20
// CHECK: encoding: [0x62,0xb1,0x5d,0x40,0x73,0xbc,0xf0,0x23,0x01,0x00,0x00,0x7b]
vpslldq $123, 291(%rax,%r14,8), %zmm20
// CHECK: vpslldq $123, 8128(%rdx), %zmm20
// CHECK: encoding: [0x62,0xf1,0x5d,0x40,0x73,0x7a,0x7f,0x7b]
vpslldq $123, 8128(%rdx), %zmm20
// CHECK: vpslldq $123, 8192(%rdx), %zmm20
// CHECK: encoding: [0x62,0xf1,0x5d,0x40,0x73,0xba,0x00,0x20,0x00,0x00,0x7b]
vpslldq $123, 8192(%rdx), %zmm20
// CHECK: vpslldq $123, -8192(%rdx), %zmm20
// CHECK: encoding: [0x62,0xf1,0x5d,0x40,0x73,0x7a,0x80,0x7b]
vpslldq $123, -8192(%rdx), %zmm20
// CHECK: vpslldq $123, -8256(%rdx), %zmm20
// CHECK: encoding: [0x62,0xf1,0x5d,0x40,0x73,0xba,0xc0,0xdf,0xff,0xff,0x7b]
vpslldq $123, -8256(%rdx), %zmm20
// CHECK: vpsrldq $171, %zmm26, %zmm18
// CHECK: encoding: [0x62,0x91,0x6d,0x40,0x73,0xda,0xab]
vpsrldq $171, %zmm26, %zmm18
// CHECK: vpsrldq $123, %zmm26, %zmm18
// CHECK: encoding: [0x62,0x91,0x6d,0x40,0x73,0xda,0x7b]
vpsrldq $123, %zmm26, %zmm18
// CHECK: vpsrldq $123, (%rcx), %zmm18
// CHECK: encoding: [0x62,0xf1,0x6d,0x40,0x73,0x19,0x7b]
vpsrldq $123, (%rcx), %zmm18
// CHECK: vpsrldq $123, 291(%rax,%r14,8), %zmm18
// CHECK: encoding: [0x62,0xb1,0x6d,0x40,0x73,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
vpsrldq $123, 291(%rax,%r14,8), %zmm18
// CHECK: vpsrldq $123, 8128(%rdx), %zmm18
// CHECK: encoding: [0x62,0xf1,0x6d,0x40,0x73,0x5a,0x7f,0x7b]
vpsrldq $123, 8128(%rdx), %zmm18
// CHECK: vpsrldq $123, 8192(%rdx), %zmm18
// CHECK: encoding: [0x62,0xf1,0x6d,0x40,0x73,0x9a,0x00,0x20,0x00,0x00,0x7b]
vpsrldq $123, 8192(%rdx), %zmm18
// CHECK: vpsrldq $123, -8192(%rdx), %zmm18
// CHECK: encoding: [0x62,0xf1,0x6d,0x40,0x73,0x5a,0x80,0x7b]
vpsrldq $123, -8192(%rdx), %zmm18
// CHECK: vpsrldq $123, -8256(%rdx), %zmm18
// CHECK: encoding: [0x62,0xf1,0x6d,0x40,0x73,0x9a,0xc0,0xdf,0xff,0xff,0x7b]
vpsrldq $123, -8256(%rdx), %zmm18
// CHECK: vpsadbw %zmm22, %zmm25, %zmm28
// CHECK: encoding: [0x62,0x21,0x35,0x40,0xf6,0xe6]
vpsadbw %zmm22, %zmm25, %zmm28
// CHECK: vpsadbw (%rcx), %zmm25, %zmm28
// CHECK: encoding: [0x62,0x61,0x35,0x40,0xf6,0x21]
vpsadbw (%rcx), %zmm25, %zmm28
// CHECK: vpsadbw 291(%rax,%r14,8), %zmm25, %zmm28
// CHECK: encoding: [0x62,0x21,0x35,0x40,0xf6,0xa4,0xf0,0x23,0x01,0x00,0x00]
vpsadbw 291(%rax,%r14,8), %zmm25, %zmm28
// CHECK: vpsadbw 8128(%rdx), %zmm25, %zmm28
// CHECK: encoding: [0x62,0x61,0x35,0x40,0xf6,0x62,0x7f]
vpsadbw 8128(%rdx), %zmm25, %zmm28
// CHECK: vpsadbw 8192(%rdx), %zmm25, %zmm28
// CHECK: encoding: [0x62,0x61,0x35,0x40,0xf6,0xa2,0x00,0x20,0x00,0x00]
vpsadbw 8192(%rdx), %zmm25, %zmm28
// CHECK: vpsadbw -8192(%rdx), %zmm25, %zmm28
// CHECK: encoding: [0x62,0x61,0x35,0x40,0xf6,0x62,0x80]
vpsadbw -8192(%rdx), %zmm25, %zmm28
// CHECK: vpsadbw -8256(%rdx), %zmm25, %zmm28
// CHECK: encoding: [0x62,0x61,0x35,0x40,0xf6,0xa2,0xc0,0xdf,0xff,0xff]
vpsadbw -8256(%rdx), %zmm25, %zmm28

View File

@ -8399,6 +8399,7 @@
// CHECK: encoding: [0x62,0x61,0x35,0x20,0x69,0xa2,0xe0,0xef,0xff,0xff]
vpunpckhwd -4128(%rdx), %ymm25, %ymm28
// CHECK: vpalignr $171, %xmm21, %xmm26, %xmm19
// CHECK: encoding: [0x62,0xa3,0x2d,0x00,0x0f,0xdd,0xab]
vpalignr $171, %xmm21, %xmm26, %xmm19
@ -8718,3 +8719,370 @@
// CHECK: vdbpsadbw $123, -4128(%rdx), %ymm19, %ymm17
// CHECK: encoding: [0x62,0xe3,0x65,0x20,0x42,0x8a,0xe0,0xef,0xff,0xff,0x7b]
vdbpsadbw $123, -4128(%rdx), %ymm19, %ymm17
// CHECK: vpslldq $171, %xmm24, %xmm20
// CHECK: encoding: [0x62,0x91,0x5d,0x00,0x73,0xf8,0xab]
vpslldq $171, %xmm24, %xmm20
// CHECK: vpslldq $123, %xmm24, %xmm20
// CHECK: encoding: [0x62,0x91,0x5d,0x00,0x73,0xf8,0x7b]
vpslldq $123, %xmm24, %xmm20
// CHECK: vpslldq $123, (%rcx), %xmm20
// CHECK: encoding: [0x62,0xf1,0x5d,0x00,0x73,0x39,0x7b]
vpslldq $123, (%rcx), %xmm20
// CHECK: vpslldq $123, 291(%rax,%r14,8), %xmm20
// CHECK: encoding: [0x62,0xb1,0x5d,0x00,0x73,0xbc,0xf0,0x23,0x01,0x00,0x00,0x7b]
vpslldq $123, 291(%rax,%r14,8), %xmm20
// CHECK: vpslldq $123, 2032(%rdx), %xmm20
// CHECK: encoding: [0x62,0xf1,0x5d,0x00,0x73,0x7a,0x7f,0x7b]
vpslldq $123, 2032(%rdx), %xmm20
// CHECK: vpslldq $123, 2048(%rdx), %xmm20
// CHECK: encoding: [0x62,0xf1,0x5d,0x00,0x73,0xba,0x00,0x08,0x00,0x00,0x7b]
vpslldq $123, 2048(%rdx), %xmm20
// CHECK: vpslldq $123, -2048(%rdx), %xmm20
// CHECK: encoding: [0x62,0xf1,0x5d,0x00,0x73,0x7a,0x80,0x7b]
vpslldq $123, -2048(%rdx), %xmm20
// CHECK: vpslldq $123, -2064(%rdx), %xmm20
// CHECK: encoding: [0x62,0xf1,0x5d,0x00,0x73,0xba,0xf0,0xf7,0xff,0xff,0x7b]
vpslldq $123, -2064(%rdx), %xmm20
// CHECK: vpslldq $171, %ymm25, %ymm26
// CHECK: encoding: [0x62,0x91,0x2d,0x20,0x73,0xf9,0xab]
vpslldq $171, %ymm25, %ymm26
// CHECK: vpslldq $123, %ymm25, %ymm26
// CHECK: encoding: [0x62,0x91,0x2d,0x20,0x73,0xf9,0x7b]
vpslldq $123, %ymm25, %ymm26
// CHECK: vpslldq $123, (%rcx), %ymm26
// CHECK: encoding: [0x62,0xf1,0x2d,0x20,0x73,0x39,0x7b]
vpslldq $123, (%rcx), %ymm26
// CHECK: vpslldq $123, 291(%rax,%r14,8), %ymm26
// CHECK: encoding: [0x62,0xb1,0x2d,0x20,0x73,0xbc,0xf0,0x23,0x01,0x00,0x00,0x7b]
vpslldq $123, 291(%rax,%r14,8), %ymm26
// CHECK: vpslldq $123, 4064(%rdx), %ymm26
// CHECK: encoding: [0x62,0xf1,0x2d,0x20,0x73,0x7a,0x7f,0x7b]
vpslldq $123, 4064(%rdx), %ymm26
// CHECK: vpslldq $123, 4096(%rdx), %ymm26
// CHECK: encoding: [0x62,0xf1,0x2d,0x20,0x73,0xba,0x00,0x10,0x00,0x00,0x7b]
vpslldq $123, 4096(%rdx), %ymm26
// CHECK: vpslldq $123, -4096(%rdx), %ymm26
// CHECK: encoding: [0x62,0xf1,0x2d,0x20,0x73,0x7a,0x80,0x7b]
vpslldq $123, -4096(%rdx), %ymm26
// CHECK: vpslldq $123, -4128(%rdx), %ymm26
// CHECK: encoding: [0x62,0xf1,0x2d,0x20,0x73,0xba,0xe0,0xef,0xff,0xff,0x7b]
vpslldq $123, -4128(%rdx), %ymm26
// CHECK: vpslldq $171, %xmm19, %xmm23
// CHECK: encoding: [0x62,0xb1,0x45,0x00,0x73,0xfb,0xab]
vpslldq $0xab, %xmm19, %xmm23
// CHECK: vpslldq $123, %xmm19, %xmm23
// CHECK: encoding: [0x62,0xb1,0x45,0x00,0x73,0xfb,0x7b]
vpslldq $0x7b, %xmm19, %xmm23
// CHECK: vpslldq $123, (%rcx), %xmm23
// CHECK: encoding: [0x62,0xf1,0x45,0x00,0x73,0x39,0x7b]
vpslldq $0x7b,(%rcx), %xmm23
// CHECK: vpslldq $123, 4660(%rax,%r14,8), %xmm23
// CHECK: encoding: [0x62,0xb1,0x45,0x00,0x73,0xbc,0xf0,0x34,0x12,0x00,0x00,0x7b]
vpslldq $0x7b,4660(%rax,%r14,8), %xmm23
// CHECK: vpslldq $123, 2032(%rdx), %xmm23
// CHECK: encoding: [0x62,0xf1,0x45,0x00,0x73,0x7a,0x7f,0x7b]
vpslldq $0x7b,2032(%rdx), %xmm23
// CHECK: vpslldq $123, 2048(%rdx), %xmm23
// CHECK: encoding: [0x62,0xf1,0x45,0x00,0x73,0xba,0x00,0x08,0x00,0x00,0x7b]
vpslldq $0x7b,2048(%rdx), %xmm23
// CHECK: vpslldq $123, -2048(%rdx), %xmm23
// CHECK: encoding: [0x62,0xf1,0x45,0x00,0x73,0x7a,0x80,0x7b]
vpslldq $0x7b,-2048(%rdx), %xmm23
// CHECK: vpslldq $123, -2064(%rdx), %xmm23
// CHECK: encoding: [0x62,0xf1,0x45,0x00,0x73,0xba,0xf0,0xf7,0xff,0xff,0x7b]
vpslldq $0x7b,-2064(%rdx), %xmm23
// CHECK: vpslldq $171, %ymm25, %ymm29
// CHECK: encoding: [0x62,0x91,0x15,0x20,0x73,0xf9,0xab]
vpslldq $0xab, %ymm25, %ymm29
// CHECK: vpslldq $123, %ymm25, %ymm29
// CHECK: encoding: [0x62,0x91,0x15,0x20,0x73,0xf9,0x7b]
vpslldq $0x7b, %ymm25, %ymm29
// CHECK: vpslldq $123, (%rcx), %ymm29
// CHECK: encoding: [0x62,0xf1,0x15,0x20,0x73,0x39,0x7b]
vpslldq $0x7b,(%rcx), %ymm29
// CHECK: vpslldq $123, 4660(%rax,%r14,8), %ymm29
// CHECK: encoding: [0x62,0xb1,0x15,0x20,0x73,0xbc,0xf0,0x34,0x12,0x00,0x00,0x7b]
vpslldq $0x7b,4660(%rax,%r14,8), %ymm29
// CHECK: vpslldq $123, 4064(%rdx), %ymm29
// CHECK: encoding: [0x62,0xf1,0x15,0x20,0x73,0x7a,0x7f,0x7b]
vpslldq $0x7b,4064(%rdx), %ymm29
// CHECK: vpslldq $123, 4096(%rdx), %ymm29
// CHECK: encoding: [0x62,0xf1,0x15,0x20,0x73,0xba,0x00,0x10,0x00,0x00,0x7b]
vpslldq $0x7b,4096(%rdx), %ymm29
// CHECK: vpslldq $123, -4096(%rdx), %ymm29
// CHECK: encoding: [0x62,0xf1,0x15,0x20,0x73,0x7a,0x80,0x7b]
vpslldq $0x7b,-4096(%rdx), %ymm29
// CHECK: vpslldq $123, -4128(%rdx), %ymm29
// CHECK: encoding: [0x62,0xf1,0x15,0x20,0x73,0xba,0xe0,0xef,0xff,0xff,0x7b]
vpslldq $0x7b,-4128(%rdx), %ymm29
// CHECK: vpsrldq $171, %xmm21, %xmm24
// CHECK: encoding: [0x62,0xb1,0x3d,0x00,0x73,0xdd,0xab]
vpsrldq $171, %xmm21, %xmm24
// CHECK: vpsrldq $123, %xmm21, %xmm24
// CHECK: encoding: [0x62,0xb1,0x3d,0x00,0x73,0xdd,0x7b]
vpsrldq $123, %xmm21, %xmm24
// CHECK: vpsrldq $123, (%rcx), %xmm24
// CHECK: encoding: [0x62,0xf1,0x3d,0x00,0x73,0x19,0x7b]
vpsrldq $123, (%rcx), %xmm24
// CHECK: vpsrldq $123, 291(%rax,%r14,8), %xmm24
// CHECK: encoding: [0x62,0xb1,0x3d,0x00,0x73,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
vpsrldq $123, 291(%rax,%r14,8), %xmm24
// CHECK: vpsrldq $123, 2032(%rdx), %xmm24
// CHECK: encoding: [0x62,0xf1,0x3d,0x00,0x73,0x5a,0x7f,0x7b]
vpsrldq $123, 2032(%rdx), %xmm24
// CHECK: vpsrldq $123, 2048(%rdx), %xmm24
// CHECK: encoding: [0x62,0xf1,0x3d,0x00,0x73,0x9a,0x00,0x08,0x00,0x00,0x7b]
vpsrldq $123, 2048(%rdx), %xmm24
// CHECK: vpsrldq $123, -2048(%rdx), %xmm24
// CHECK: encoding: [0x62,0xf1,0x3d,0x00,0x73,0x5a,0x80,0x7b]
vpsrldq $123, -2048(%rdx), %xmm24
// CHECK: vpsrldq $123, -2064(%rdx), %xmm24
// CHECK: encoding: [0x62,0xf1,0x3d,0x00,0x73,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
vpsrldq $123, -2064(%rdx), %xmm24
// CHECK: vpsrldq $171, %ymm25, %ymm24
// CHECK: encoding: [0x62,0x91,0x3d,0x20,0x73,0xd9,0xab]
vpsrldq $171, %ymm25, %ymm24
// CHECK: vpsrldq $123, %ymm25, %ymm24
// CHECK: encoding: [0x62,0x91,0x3d,0x20,0x73,0xd9,0x7b]
vpsrldq $123, %ymm25, %ymm24
// CHECK: vpsrldq $123, (%rcx), %ymm24
// CHECK: encoding: [0x62,0xf1,0x3d,0x20,0x73,0x19,0x7b]
vpsrldq $123, (%rcx), %ymm24
// CHECK: vpsrldq $123, 291(%rax,%r14,8), %ymm24
// CHECK: encoding: [0x62,0xb1,0x3d,0x20,0x73,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
vpsrldq $123, 291(%rax,%r14,8), %ymm24
// CHECK: vpsrldq $123, 4064(%rdx), %ymm24
// CHECK: encoding: [0x62,0xf1,0x3d,0x20,0x73,0x5a,0x7f,0x7b]
vpsrldq $123, 4064(%rdx), %ymm24
// CHECK: vpsrldq $123, 4096(%rdx), %ymm24
// CHECK: encoding: [0x62,0xf1,0x3d,0x20,0x73,0x9a,0x00,0x10,0x00,0x00,0x7b]
vpsrldq $123, 4096(%rdx), %ymm24
// CHECK: vpsrldq $123, -4096(%rdx), %ymm24
// CHECK: encoding: [0x62,0xf1,0x3d,0x20,0x73,0x5a,0x80,0x7b]
vpsrldq $123, -4096(%rdx), %ymm24
// CHECK: vpsrldq $123, -4128(%rdx), %ymm24
// CHECK: encoding: [0x62,0xf1,0x3d,0x20,0x73,0x9a,0xe0,0xef,0xff,0xff,0x7b]
vpsrldq $123, -4128(%rdx), %ymm24
// CHECK: vpsrldq $171, %xmm17, %xmm18
// CHECK: encoding: [0x62,0xb1,0x6d,0x00,0x73,0xd9,0xab]
vpsrldq $0xab, %xmm17, %xmm18
// CHECK: vpsrldq $123, %xmm17, %xmm18
// CHECK: encoding: [0x62,0xb1,0x6d,0x00,0x73,0xd9,0x7b]
vpsrldq $0x7b, %xmm17, %xmm18
// CHECK: vpsrldq $123, (%rcx), %xmm18
// CHECK: encoding: [0x62,0xf1,0x6d,0x00,0x73,0x19,0x7b]
vpsrldq $0x7b,(%rcx), %xmm18
// CHECK: vpsrldq $123, 4660(%rax,%r14,8), %xmm18
// CHECK: encoding: [0x62,0xb1,0x6d,0x00,0x73,0x9c,0xf0,0x34,0x12,0x00,0x00,0x7b]
vpsrldq $0x7b,4660(%rax,%r14,8), %xmm18
// CHECK: vpsrldq $123, 2032(%rdx), %xmm18
// CHECK: encoding: [0x62,0xf1,0x6d,0x00,0x73,0x5a,0x7f,0x7b]
vpsrldq $0x7b,2032(%rdx), %xmm18
// CHECK: vpsrldq $123, 2048(%rdx), %xmm18
// CHECK: encoding: [0x62,0xf1,0x6d,0x00,0x73,0x9a,0x00,0x08,0x00,0x00,0x7b]
vpsrldq $0x7b,2048(%rdx), %xmm18
// CHECK: vpsrldq $123, -2048(%rdx), %xmm18
// CHECK: encoding: [0x62,0xf1,0x6d,0x00,0x73,0x5a,0x80,0x7b]
vpsrldq $0x7b,-2048(%rdx), %xmm18
// CHECK: vpsrldq $123, -2064(%rdx), %xmm18
// CHECK: encoding: [0x62,0xf1,0x6d,0x00,0x73,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
vpsrldq $0x7b,-2064(%rdx), %xmm18
// CHECK: vpsrldq $171, %ymm28, %ymm20
// CHECK: encoding: [0x62,0x91,0x5d,0x20,0x73,0xdc,0xab]
vpsrldq $0xab, %ymm28, %ymm20
// CHECK: vpsrldq $123, %ymm28, %ymm20
// CHECK: encoding: [0x62,0x91,0x5d,0x20,0x73,0xdc,0x7b]
vpsrldq $0x7b, %ymm28, %ymm20
// CHECK: vpsrldq $123, (%rcx), %ymm20
// CHECK: encoding: [0x62,0xf1,0x5d,0x20,0x73,0x19,0x7b]
vpsrldq $0x7b,(%rcx), %ymm20
// CHECK: vpsrldq $123, 4660(%rax,%r14,8), %ymm20
// CHECK: encoding: [0x62,0xb1,0x5d,0x20,0x73,0x9c,0xf0,0x34,0x12,0x00,0x00,0x7b]
vpsrldq $0x7b,4660(%rax,%r14,8), %ymm20
// CHECK: vpsrldq $123, 4064(%rdx), %ymm20
// CHECK: encoding: [0x62,0xf1,0x5d,0x20,0x73,0x5a,0x7f,0x7b]
vpsrldq $0x7b,4064(%rdx), %ymm20
// CHECK: vpsrldq $123, 4096(%rdx), %ymm20
// CHECK: encoding: [0x62,0xf1,0x5d,0x20,0x73,0x9a,0x00,0x10,0x00,0x00,0x7b]
vpsrldq $0x7b,4096(%rdx), %ymm20
// CHECK: vpsrldq $123, -4096(%rdx), %ymm20
// CHECK: encoding: [0x62,0xf1,0x5d,0x20,0x73,0x5a,0x80,0x7b]
vpsrldq $0x7b,-4096(%rdx), %ymm20
// CHECK: vpsrldq $123, -4128(%rdx), %ymm20
// CHECK: encoding: [0x62,0xf1,0x5d,0x20,0x73,0x9a,0xe0,0xef,0xff,0xff,0x7b]
vpsrldq $0x7b,-4128(%rdx), %ymm20
// CHECK: vpsadbw %xmm24, %xmm24, %xmm17
// CHECK: encoding: [0x62,0x81,0x3d,0x00,0xf6,0xc8]
vpsadbw %xmm24, %xmm24, %xmm17
// CHECK: vpsadbw (%rcx), %xmm24, %xmm17
// CHECK: encoding: [0x62,0xe1,0x3d,0x00,0xf6,0x09]
vpsadbw (%rcx), %xmm24, %xmm17
// CHECK: vpsadbw 291(%rax,%r14,8), %xmm24, %xmm17
// CHECK: encoding: [0x62,0xa1,0x3d,0x00,0xf6,0x8c,0xf0,0x23,0x01,0x00,0x00]
vpsadbw 291(%rax,%r14,8), %xmm24, %xmm17
// CHECK: vpsadbw 2032(%rdx), %xmm24, %xmm17
// CHECK: encoding: [0x62,0xe1,0x3d,0x00,0xf6,0x4a,0x7f]
vpsadbw 2032(%rdx), %xmm24, %xmm17
// CHECK: vpsadbw 2048(%rdx), %xmm24, %xmm17
// CHECK: encoding: [0x62,0xe1,0x3d,0x00,0xf6,0x8a,0x00,0x08,0x00,0x00]
vpsadbw 2048(%rdx), %xmm24, %xmm17
// CHECK: vpsadbw -2048(%rdx), %xmm24, %xmm17
// CHECK: encoding: [0x62,0xe1,0x3d,0x00,0xf6,0x4a,0x80]
vpsadbw -2048(%rdx), %xmm24, %xmm17
// CHECK: vpsadbw -2064(%rdx), %xmm24, %xmm17
// CHECK: encoding: [0x62,0xe1,0x3d,0x00,0xf6,0x8a,0xf0,0xf7,0xff,0xff]
vpsadbw -2064(%rdx), %xmm24, %xmm17
// CHECK: vpsadbw %ymm24, %ymm27, %ymm19
// CHECK: encoding: [0x62,0x81,0x25,0x20,0xf6,0xd8]
vpsadbw %ymm24, %ymm27, %ymm19
// CHECK: vpsadbw (%rcx), %ymm27, %ymm19
// CHECK: encoding: [0x62,0xe1,0x25,0x20,0xf6,0x19]
vpsadbw (%rcx), %ymm27, %ymm19
// CHECK: vpsadbw 291(%rax,%r14,8), %ymm27, %ymm19
// CHECK: encoding: [0x62,0xa1,0x25,0x20,0xf6,0x9c,0xf0,0x23,0x01,0x00,0x00]
vpsadbw 291(%rax,%r14,8), %ymm27, %ymm19
// CHECK: vpsadbw 4064(%rdx), %ymm27, %ymm19
// CHECK: encoding: [0x62,0xe1,0x25,0x20,0xf6,0x5a,0x7f]
vpsadbw 4064(%rdx), %ymm27, %ymm19
// CHECK: vpsadbw 4096(%rdx), %ymm27, %ymm19
// CHECK: encoding: [0x62,0xe1,0x25,0x20,0xf6,0x9a,0x00,0x10,0x00,0x00]
vpsadbw 4096(%rdx), %ymm27, %ymm19
// CHECK: vpsadbw -4096(%rdx), %ymm27, %ymm19
// CHECK: encoding: [0x62,0xe1,0x25,0x20,0xf6,0x5a,0x80]
vpsadbw -4096(%rdx), %ymm27, %ymm19
// CHECK: vpsadbw -4128(%rdx), %ymm27, %ymm19
// CHECK: encoding: [0x62,0xe1,0x25,0x20,0xf6,0x9a,0xe0,0xef,0xff,0xff]
vpsadbw -4128(%rdx), %ymm27, %ymm19
// CHECK: vpsadbw %xmm21, %xmm19, %xmm30
// CHECK: encoding: [0x62,0x21,0x65,0x00,0xf6,0xf5]
vpsadbw %xmm21, %xmm19, %xmm30
// CHECK: vpsadbw (%rcx), %xmm19, %xmm30
// CHECK: encoding: [0x62,0x61,0x65,0x00,0xf6,0x31]
vpsadbw (%rcx), %xmm19, %xmm30
// CHECK: vpsadbw 4660(%rax,%r14,8), %xmm19, %xmm30
// CHECK: encoding: [0x62,0x21,0x65,0x00,0xf6,0xb4,0xf0,0x34,0x12,0x00,0x00]
vpsadbw 4660(%rax,%r14,8), %xmm19, %xmm30
// CHECK: vpsadbw 2032(%rdx), %xmm19, %xmm30
// CHECK: encoding: [0x62,0x61,0x65,0x00,0xf6,0x72,0x7f]
vpsadbw 2032(%rdx), %xmm19, %xmm30
// CHECK: vpsadbw 2048(%rdx), %xmm19, %xmm30
// CHECK: encoding: [0x62,0x61,0x65,0x00,0xf6,0xb2,0x00,0x08,0x00,0x00]
vpsadbw 2048(%rdx), %xmm19, %xmm30
// CHECK: vpsadbw -2048(%rdx), %xmm19, %xmm30
// CHECK: encoding: [0x62,0x61,0x65,0x00,0xf6,0x72,0x80]
vpsadbw -2048(%rdx), %xmm19, %xmm30
// CHECK: vpsadbw -2064(%rdx), %xmm19, %xmm30
// CHECK: encoding: [0x62,0x61,0x65,0x00,0xf6,0xb2,0xf0,0xf7,0xff,0xff]
vpsadbw -2064(%rdx), %xmm19, %xmm30
// CHECK: vpsadbw %ymm27, %ymm26, %ymm20
// CHECK: encoding: [0x62,0x81,0x2d,0x20,0xf6,0xe3]
vpsadbw %ymm27, %ymm26, %ymm20
// CHECK: vpsadbw (%rcx), %ymm26, %ymm20
// CHECK: encoding: [0x62,0xe1,0x2d,0x20,0xf6,0x21]
vpsadbw (%rcx), %ymm26, %ymm20
// CHECK: vpsadbw 4660(%rax,%r14,8), %ymm26, %ymm20
// CHECK: encoding: [0x62,0xa1,0x2d,0x20,0xf6,0xa4,0xf0,0x34,0x12,0x00,0x00]
vpsadbw 4660(%rax,%r14,8), %ymm26, %ymm20
// CHECK: vpsadbw 4064(%rdx), %ymm26, %ymm20
// CHECK: encoding: [0x62,0xe1,0x2d,0x20,0xf6,0x62,0x7f]
vpsadbw 4064(%rdx), %ymm26, %ymm20
// CHECK: vpsadbw 4096(%rdx), %ymm26, %ymm20
// CHECK: encoding: [0x62,0xe1,0x2d,0x20,0xf6,0xa2,0x00,0x10,0x00,0x00]
vpsadbw 4096(%rdx), %ymm26, %ymm20
// CHECK: vpsadbw -4096(%rdx), %ymm26, %ymm20
// CHECK: encoding: [0x62,0xe1,0x2d,0x20,0xf6,0x62,0x80]
vpsadbw -4096(%rdx), %ymm26, %ymm20
// CHECK: vpsadbw -4128(%rdx), %ymm26, %ymm20
// CHECK: encoding: [0x62,0xe1,0x2d,0x20,0xf6,0xa2,0xe0,0xef,0xff,0xff]
vpsadbw -4128(%rdx), %ymm26, %ymm20