AVX-512: Added all SKX forms of GATHER instructions.

Added intrinsics.
Added encoding and tests.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@240905 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Elena Demikhovsky 2015-06-28 10:53:29 +00:00
parent 1e7ecc8e57
commit b23b2fbd3a
11 changed files with 935 additions and 120 deletions

View File

@ -4264,6 +4264,102 @@ let TargetPrefix = "x86" in {
llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrReadArgMem]>;
def int_x86_avx512_gather3div2_df :
GCCBuiltin<"__builtin_ia32_gather3div2df">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrReadArgMem]>;
def int_x86_avx512_gather3div2_di :
GCCBuiltin<"__builtin_ia32_gather3div2di">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v2i64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrReadArgMem]>;
def int_x86_avx512_gather3div4_df :
GCCBuiltin<"__builtin_ia32_gather3div4df">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrReadArgMem]>;
def int_x86_avx512_gather3div4_di :
GCCBuiltin<"__builtin_ia32_gather3div4di">,
Intrinsic<[llvm_v8i32_ty],
[llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrReadArgMem]>;
def int_x86_avx512_gather3div4_sf :
GCCBuiltin<"__builtin_ia32_gather3div4sf">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrReadArgMem]>;
def int_x86_avx512_gather3div4_si :
GCCBuiltin<"__builtin_ia32_gather3div4si">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrReadArgMem]>;
def int_x86_avx512_gather3div8_sf :
GCCBuiltin<"__builtin_ia32_gather3div8sf">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrReadArgMem]>;
def int_x86_avx512_gather3div8_si :
GCCBuiltin<"__builtin_ia32_gather3div8si">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
[IntrReadArgMem]>;
def int_x86_avx512_gather3siv2_df :
GCCBuiltin<"__builtin_ia32_gather3siv2df">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty],
[IntrReadArgMem]>;
def int_x86_avx512_gather3siv2_di :
GCCBuiltin<"__builtin_ia32_gather3siv2di">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v2i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty],
[IntrReadArgMem]>;
def int_x86_avx512_gather3siv4_df :
GCCBuiltin<"__builtin_ia32_gather3siv4df">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty],
[IntrReadArgMem]>;
def int_x86_avx512_gather3siv4_di :
GCCBuiltin<"__builtin_ia32_gather3siv4di">,
Intrinsic<[llvm_v8i32_ty],
[llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty],
[IntrReadArgMem]>;
def int_x86_avx512_gather3siv4_sf :
GCCBuiltin<"__builtin_ia32_gather3siv4sf">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty],
[IntrReadArgMem]>;
def int_x86_avx512_gather3siv4_si :
GCCBuiltin<"__builtin_ia32_gather3siv4si">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty],
[IntrReadArgMem]>;
def int_x86_avx512_gather3siv8_sf :
GCCBuiltin<"__builtin_ia32_gather3siv8sf">,
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
[IntrReadArgMem]>;
def int_x86_avx512_gather3siv8_si :
GCCBuiltin<"__builtin_ia32_gather3siv8si">,
Intrinsic<[llvm_v8i32_ty],
[llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
[IntrReadArgMem]>;
// scatter
def int_x86_avx512_scatter_dpd_512 : GCCBuiltin<"__builtin_ia32_scattersiv8df">,
Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty,

View File

@ -238,18 +238,34 @@ struct X86Operand : public MCParsedAsmOperand {
return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15;
}
bool isMemVX32X() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM31;
}
bool isMemVY32() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15;
}
bool isMemVY32X() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM31;
}
bool isMemVX64() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15;
}
bool isMemVX64X() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM31;
}
bool isMemVY64() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15;
}
bool isMemVY64X() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM31;
}
bool isMemVZ32() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
getMemIndexReg() >= X86::ZMM0 && getMemIndexReg() <= X86::ZMM31;

View File

@ -15424,7 +15424,12 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
const X86Subtarget * Subtarget) {
SDLoc dl(Op);
ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
assert(C && "Invalid scale type");
if (!C)
llvm_unreachable("Invalid scale type");
unsigned ScaleVal = C->getZExtValue();
if (ScaleVal > 2 && ScaleVal != 4 && ScaleVal != 8)
llvm_unreachable("Valid scale values are 1, 2, 4, 8");
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
EVT MaskVT = MVT::getVectorVT(MVT::i1,
Index.getSimpleValueType().getVectorNumElements());
@ -15432,8 +15437,16 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask);
if (MaskC)
MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT);
else
MaskInReg = DAG.getBitcast(MaskVT, Mask);
else {
EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
Mask.getValueType().getSizeInBits());
// In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
// are extracted by EXTRACT_SUBVECTOR.
MaskInReg = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
DAG.getBitcast(BitcastVT, Mask),
DAG.getIntPtrConstant(0, dl));
}
SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);

View File

@ -5440,10 +5440,11 @@ defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, "s">;
multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86MemOperand memop, PatFrag GatherNode> {
let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb" in
let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
ExeDomain = _.ExeDomain in
def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, _.KRCWM:$mask_wb),
(ins _.RC:$src1, _.KRCWM:$mask, memop:$src2),
!strconcat(OpcodeStr,
!strconcat(OpcodeStr#_.Suffix,
"\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
[(set _.RC:$dst, _.KRCWM:$mask_wb,
(GatherNode (_.VT _.RC:$src1), _.KRCWM:$mask,
@ -5451,29 +5452,48 @@ multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
EVEX_CD8<_.EltSize, CD8VT1>;
}
let ExeDomain = SSEPackedDouble in {
defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", v8f64_info, vy64xmem,
mgatherv8i32>, EVEX_V512, VEX_W;
defm VGATHERQPDZ : avx512_gather<0x93, "vgatherqpd", v8f64_info, vz64mem,
mgatherv8i64>, EVEX_V512, VEX_W;
multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
vy32xmem, mgatherv8i32>, EVEX_V512, VEX_W;
defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
vz64mem, mgatherv8i64>, EVEX_V512, VEX_W;
let Predicates = [HasVLX] in {
defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
vx32xmem, mgatherv4i32>, EVEX_V256, VEX_W;
defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
vy64xmem, mgatherv4i64>, EVEX_V256, VEX_W;
defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
vx32xmem, mgatherv4i32>, EVEX_V128, VEX_W;
defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
vx64xmem, mgatherv2i64>, EVEX_V128, VEX_W;
}
}
let ExeDomain = SSEPackedSingle in {
defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", v16f32_info, vz32mem,
mgatherv16i32>, EVEX_V512;
defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", v8f32x_info, vz64mem,
mgatherv8i64>, EVEX_V512;
multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz32mem,
mgatherv16i32>, EVEX_V512;
defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz64mem,
mgatherv8i64>, EVEX_V512;
let Predicates = [HasVLX] in {
defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
vy32xmem, mgatherv8i32>, EVEX_V256;
defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
vy64xmem, mgatherv4i64>, EVEX_V256;
defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
vx32xmem, mgatherv4i32>, EVEX_V128;
defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
vx64xmem, mgatherv2i64>, EVEX_V128;
}
}
defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", v8i64_info, vy64xmem,
mgatherv8i32>, EVEX_V512, VEX_W;
defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", v16i32_info, vz32mem,
mgatherv16i32>, EVEX_V512;
defm VPGATHERQQZ : avx512_gather<0x91, "vpgatherqq", v8i64_info, vz64mem,
mgatherv8i64>, EVEX_V512, VEX_W;
defm VPGATHERQDZ : avx512_gather<0x91, "vpgatherqd", v8i32x_info, vz64mem,
mgatherv8i64>, EVEX_V512;
defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86MemOperand memop, PatFrag ScatterNode> {

View File

@ -560,6 +560,14 @@ def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
return false;
}]>;
def mgatherv4i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(masked_gather node:$src1, node:$src2, node:$src3) , [{
if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
return (Mgt->getIndex().getValueType() == MVT::v4i32 ||
Mgt->getBasePtr().getValueType() == MVT::v4i32);
return false;
}]>;
def mgatherv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(masked_gather node:$src1, node:$src2, node:$src3) , [{
if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
@ -568,6 +576,20 @@ def mgatherv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
return false;
}]>;
def mgatherv2i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(masked_gather node:$src1, node:$src2, node:$src3) , [{
if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
return (Mgt->getIndex().getValueType() == MVT::v2i64 ||
Mgt->getBasePtr().getValueType() == MVT::v2i64);
return false;
}]>;
def mgatherv4i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(masked_gather node:$src1, node:$src2, node:$src3) , [{
if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
return (Mgt->getIndex().getValueType() == MVT::v4i64 ||
Mgt->getBasePtr().getValueType() == MVT::v4i64);
return false;
}]>;
def mgatherv8i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(masked_gather node:$src1, node:$src2, node:$src3) , [{
if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))

View File

@ -282,6 +282,10 @@ let RenderMethod = "addMemOperands" in {
def X86MemVX64Operand : AsmOperandClass { let Name = "MemVX64"; }
def X86MemVY64Operand : AsmOperandClass { let Name = "MemVY64"; }
def X86MemVZ64Operand : AsmOperandClass { let Name = "MemVZ64"; }
def X86MemVX32XOperand : AsmOperandClass { let Name = "MemVX32X"; }
def X86MemVY32XOperand : AsmOperandClass { let Name = "MemVY32X"; }
def X86MemVX64XOperand : AsmOperandClass { let Name = "MemVX64X"; }
def X86MemVY64XOperand : AsmOperandClass { let Name = "MemVY64X"; }
}
def X86AbsMemAsmOperand : AsmOperandClass {
@ -332,7 +336,11 @@ def vx32mem : X86VMemOperand<VR128, "printi32mem", X86MemVX32Operand>;
def vy32mem : X86VMemOperand<VR256, "printi32mem", X86MemVY32Operand>;
def vx64mem : X86VMemOperand<VR128, "printi64mem", X86MemVX64Operand>;
def vy64mem : X86VMemOperand<VR256, "printi64mem", X86MemVY64Operand>;
def vy64xmem : X86VMemOperand<VR256X, "printi64mem", X86MemVY64Operand>;
def vx32xmem : X86VMemOperand<VR128X, "printi32mem", X86MemVX32XOperand>;
def vx64xmem : X86VMemOperand<VR128X, "printi32mem", X86MemVX64XOperand>;
def vy32xmem : X86VMemOperand<VR256X, "printi32mem", X86MemVY32XOperand>;
def vy64xmem : X86VMemOperand<VR256X, "printi64mem", X86MemVY64XOperand>;
def vz32mem : X86VMemOperand<VR512, "printi32mem", X86MemVZ32Operand>;
def vz64mem : X86VMemOperand<VR512, "printi64mem", X86MemVZ64Operand>;

View File

@ -56,6 +56,22 @@ static const IntrinsicData IntrinsicsWithChain[] = {
X86_INTRINSIC_DATA(addcarryx_u32, ADX, X86ISD::ADC, 0),
X86_INTRINSIC_DATA(addcarryx_u64, ADX, X86ISD::ADC, 0),
X86_INTRINSIC_DATA(avx512_gather3div2_df, GATHER, X86::VGATHERQPDZ128rm, 0),
X86_INTRINSIC_DATA(avx512_gather3div2_di, GATHER, X86::VPGATHERQQZ128rm, 0),
X86_INTRINSIC_DATA(avx512_gather3div4_df, GATHER, X86::VGATHERQPDZ256rm, 0),
X86_INTRINSIC_DATA(avx512_gather3div4_di, GATHER, X86::VPGATHERQQZ256rm, 0),
X86_INTRINSIC_DATA(avx512_gather3div4_sf, GATHER, X86::VGATHERQPSZ128rm, 0),
X86_INTRINSIC_DATA(avx512_gather3div4_si, GATHER, X86::VPGATHERQDZ128rm, 0),
X86_INTRINSIC_DATA(avx512_gather3div8_sf, GATHER, X86::VGATHERQPSZ256rm, 0),
X86_INTRINSIC_DATA(avx512_gather3div8_si, GATHER, X86::VPGATHERQDZ256rm, 0),
X86_INTRINSIC_DATA(avx512_gather3siv2_df, GATHER, X86::VGATHERDPDZ128rm, 0),
X86_INTRINSIC_DATA(avx512_gather3siv2_di, GATHER, X86::VPGATHERDQZ128rm, 0),
X86_INTRINSIC_DATA(avx512_gather3siv4_df, GATHER, X86::VGATHERDPDZ256rm, 0),
X86_INTRINSIC_DATA(avx512_gather3siv4_di, GATHER, X86::VPGATHERDQZ256rm, 0),
X86_INTRINSIC_DATA(avx512_gather3siv4_sf, GATHER, X86::VGATHERDPSZ128rm, 0),
X86_INTRINSIC_DATA(avx512_gather3siv4_si, GATHER, X86::VPGATHERDDZ128rm, 0),
X86_INTRINSIC_DATA(avx512_gather3siv8_sf, GATHER, X86::VGATHERDPSZ256rm, 0),
X86_INTRINSIC_DATA(avx512_gather3siv8_si, GATHER, X86::VPGATHERDDZ256rm, 0),
X86_INTRINSIC_DATA(avx512_gather_dpd_512, GATHER, X86::VGATHERDPDZrm, 0),
X86_INTRINSIC_DATA(avx512_gather_dpi_512, GATHER, X86::VPGATHERDDZrm, 0),
X86_INTRINSIC_DATA(avx512_gather_dpq_512, GATHER, X86::VPGATHERDQZrm, 0),

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
declare <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float>, i8*, <16 x i32>, i16, i32)
declare void @llvm.x86.avx512.scatter.dps.512 (i8*, i16, <16 x i32>, <16 x float>, i32)
@ -10,52 +10,60 @@ declare void @llvm.x86.avx512.scatter.qps.512 (i8*, i8, <8 x i64>, <8 x float>,
declare <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double>, i8*, <8 x i64>, i8, i32)
declare void @llvm.x86.avx512.scatter.qpd.512 (i8*, i8, <8 x i64>, <8 x double>, i32)
;CHECK-LABEL: gather_mask_dps
;CHECK: kmovw
;CHECK: vgatherdps
;CHECK: vpadd
;CHECK: vscatterdps
;CHECK: ret
define void @gather_mask_dps(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base, i8* %stbuf) {
; CHECK-LABEL: gather_mask_dps:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: kmovw %k1, %k2
; CHECK-NEXT: vgatherdps (%rsi,%zmm0,4), %zmm1 {%k2}
; CHECK-NEXT: vpaddd {{.*}}(%rip), %zmm0, %zmm0
; CHECK-NEXT: vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1}
; CHECK-NEXT: retq
%x = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4)
%ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x float> %x, i32 4)
ret void
}
;CHECK-LABEL: gather_mask_dpd
;CHECK: kmovw
;CHECK: vgatherdpd
;CHECK: vpadd
;CHECK: vscatterdpd
;CHECK: ret
define void @gather_mask_dpd(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf) {
; CHECK-LABEL: gather_mask_dpd:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %edi, %k1
; CHECK-NEXT: kmovw %k1, %k2
; CHECK-NEXT: vgatherdpd (%rsi,%ymm0,4), %zmm1 {%k2}
; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
; CHECK-NEXT: vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1}
; CHECK-NEXT: retq
%x = call <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4)
%ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x double> %x, i32 4)
ret void
}
;CHECK-LABEL: gather_mask_qps
;CHECK: kmovw
;CHECK: vgatherqps
;CHECK: vpadd
;CHECK: vscatterqps
;CHECK: ret
define void @gather_mask_qps(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base, i8* %stbuf) {
; CHECK-LABEL: gather_mask_qps:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %edi, %k1
; CHECK-NEXT: kmovw %k1, %k2
; CHECK-NEXT: vgatherqps (%rsi,%zmm0,4), %ymm1 {%k2}
; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
; CHECK-NEXT: vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
; CHECK-NEXT: retq
%x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
%ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x float> %x, i32 4)
ret void
}
;CHECK-LABEL: gather_mask_qpd
;CHECK: kmovw
;CHECK: vgatherqpd
;CHECK: vpadd
;CHECK: vscatterqpd
;CHECK: ret
define void @gather_mask_qpd(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf) {
; CHECK-LABEL: gather_mask_qpd:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %edi, %k1
; CHECK-NEXT: kmovw %k1, %k2
; CHECK-NEXT: vgatherqpd (%rsi,%zmm0,4), %zmm1 {%k2}
; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
; CHECK-NEXT: vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1}
; CHECK-NEXT: retq
%x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
%ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x double> %x, i32 4)
@ -74,162 +82,469 @@ declare void @llvm.x86.avx512.scatter.qpi.512 (i8*, i8, <8 x i64>, <8 x i32>, i3
declare <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64>, i8*, <8 x i64>, i8, i32)
declare void @llvm.x86.avx512.scatter.qpq.512 (i8*, i8, <8 x i64>, <8 x i64>, i32)
;CHECK-LABEL: gather_mask_dd
;CHECK: kmovw
;CHECK: vpgatherdd
;CHECK: vpadd
;CHECK: vpscatterdd
;CHECK: ret
define void @gather_mask_dd(<16 x i32> %ind, <16 x i32> %src, i16 %mask, i8* %base, i8* %stbuf) {
; CHECK-LABEL: gather_mask_dd:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: kmovw %k1, %k2
; CHECK-NEXT: vpgatherdd (%rsi,%zmm0,4), %zmm1 {%k2}
; CHECK-NEXT: vpaddd {{.*}}(%rip), %zmm0, %zmm0
; CHECK-NEXT: vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1}
; CHECK-NEXT: retq
%x = call <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4)
%ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
call void @llvm.x86.avx512.scatter.dpi.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x i32> %x, i32 4)
ret void
}
;CHECK-LABEL: gather_mask_qd
;CHECK: kmovw
;CHECK: vpgatherqd
;CHECK: vpadd
;CHECK: vpscatterqd
;CHECK: ret
define void @gather_mask_qd(<8 x i64> %ind, <8 x i32> %src, i8 %mask, i8* %base, i8* %stbuf) {
; CHECK-LABEL: gather_mask_qd:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %edi, %k1
; CHECK-NEXT: kmovw %k1, %k2
; CHECK-NEXT: vpgatherqd (%rsi,%zmm0,4), %ymm1 {%k2}
; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
; CHECK-NEXT: vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
; CHECK-NEXT: retq
%x = call <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i32> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
%ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
call void @llvm.x86.avx512.scatter.qpi.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i32> %x, i32 4)
ret void
}
;CHECK-LABEL: gather_mask_qq
;CHECK: kmovw
;CHECK: vpgatherqq
;CHECK: vpadd
;CHECK: vpscatterqq
;CHECK: ret
define void @gather_mask_qq(<8 x i64> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf) {
; CHECK-LABEL: gather_mask_qq:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %edi, %k1
; CHECK-NEXT: kmovw %k1, %k2
; CHECK-NEXT: vpgatherqq (%rsi,%zmm0,4), %zmm1 {%k2}
; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
; CHECK-NEXT: vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1}
; CHECK-NEXT: retq
%x = call <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
%ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
call void @llvm.x86.avx512.scatter.qpq.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i64> %x, i32 4)
ret void
}
;CHECK-LABEL: gather_mask_dq
;CHECK: kmovw
;CHECK: vpgatherdq
;CHECK: vpadd
;CHECK: vpscatterdq
;CHECK: ret
define void @gather_mask_dq(<8 x i32> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf) {
; CHECK-LABEL: gather_mask_dq:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %edi, %k1
; CHECK-NEXT: kmovw %k1, %k2
; CHECK-NEXT: vpgatherdq (%rsi,%ymm0,4), %zmm1 {%k2}
; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
; CHECK-NEXT: vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1}
; CHECK-NEXT: retq
%x = call <8 x i64> @llvm.x86.avx512.gather.dpq.512 (<8 x i64> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4)
%ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
call void @llvm.x86.avx512.scatter.dpq.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x i64> %x, i32 4)
ret void
}
;CHECK-LABEL: gather_mask_dpd_execdomain
;CHECK: vgatherdpd
;CHECK: vmovapd
;CHECK: ret
define void @gather_mask_dpd_execdomain(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf) {
; CHECK-LABEL: gather_mask_dpd_execdomain:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %edi, %k1
; CHECK-NEXT: vgatherdpd (%rsi,%ymm0,4), %zmm1 {%k1}
; CHECK-NEXT: vmovapd %zmm1, (%rdx)
; CHECK-NEXT: retq
%x = call <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4)
store <8 x double> %x, <8 x double>* %stbuf
ret void
}
;CHECK-LABEL: gather_mask_qpd_execdomain
;CHECK: vgatherqpd
;CHECK: vmovapd
;CHECK: ret
define void @gather_mask_qpd_execdomain(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf) {
; CHECK-LABEL: gather_mask_qpd_execdomain:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %edi, %k1
; CHECK-NEXT: vgatherqpd (%rsi,%zmm0,4), %zmm1 {%k1}
; CHECK-NEXT: vmovapd %zmm1, (%rdx)
; CHECK-NEXT: retq
%x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
store <8 x double> %x, <8 x double>* %stbuf
ret void
}
;CHECK-LABEL: gather_mask_dps_execdomain
;CHECK: vgatherdps
;CHECK: vmovaps
;CHECK: ret
define <16 x float> @gather_mask_dps_execdomain(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base) {
; CHECK-LABEL: gather_mask_dps_execdomain:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vgatherdps (%rsi,%zmm0,4), %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4)
ret <16 x float> %res;
}
;CHECK-LABEL: gather_mask_qps_execdomain
;CHECK: vgatherqps
;CHECK: vmovaps
;CHECK: ret
define <8 x float> @gather_mask_qps_execdomain(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base) {
; CHECK-LABEL: gather_mask_qps_execdomain:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %edi, %k1
; CHECK-NEXT: vgatherqps (%rsi,%zmm0,4), %ymm1 {%k1}
; CHECK-NEXT: vmovaps %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
ret <8 x float> %res;
}
;CHECK-LABEL: scatter_mask_dpd_execdomain
;CHECK: vmovapd
;CHECK: vscatterdpd
;CHECK: ret
define void @scatter_mask_dpd_execdomain(<8 x i32> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf) {
%x = load <8 x double>, <8 x double>* %src, align 64
; CHECK-LABEL: scatter_mask_dpd_execdomain:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vmovapd (%rdi), %zmm1
; CHECK-NEXT: vscatterdpd %zmm1, (%rcx,%ymm0,4) {%k1}
; CHECK-NEXT: retq
%x = load <8 x double>, <8 x double>* %src, align 64
call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind, <8 x double> %x, i32 4)
ret void
}
;CHECK-LABEL: scatter_mask_qpd_execdomain
;CHECK: vmovapd
;CHECK: vscatterqpd
;CHECK: ret
define void @scatter_mask_qpd_execdomain(<8 x i64> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf) {
; CHECK-LABEL: scatter_mask_qpd_execdomain:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vmovapd (%rdi), %zmm1
; CHECK-NEXT: vscatterqpd %zmm1, (%rcx,%zmm0,4) {%k1}
; CHECK-NEXT: retq
%x = load <8 x double>, <8 x double>* %src, align 64
call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x double> %x, i32 4)
ret void
}
;CHECK-LABEL: scatter_mask_dps_execdomain
;CHECK: vmovaps
;CHECK: vscatterdps
;CHECK: ret
define void @scatter_mask_dps_execdomain(<16 x i32> %ind, <16 x float>* %src, i16 %mask, i8* %base, i8* %stbuf) {
; CHECK-LABEL: scatter_mask_dps_execdomain:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vmovaps (%rdi), %zmm1
; CHECK-NEXT: vscatterdps %zmm1, (%rcx,%zmm0,4) {%k1}
; CHECK-NEXT: retq
%x = load <16 x float>, <16 x float>* %src, align 64
call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind, <16 x float> %x, i32 4)
ret void
}
;CHECK-LABEL: scatter_mask_qps_execdomain
;CHECK: vmovaps
;CHECK: vscatterqps
;CHECK: ret
define void @scatter_mask_qps_execdomain(<8 x i64> %ind, <8 x float>* %src, i8 %mask, i8* %base, i8* %stbuf) {
%x = load <8 x float>, <8 x float>* %src, align 32
; CHECK-LABEL: scatter_mask_qps_execdomain:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vmovaps (%rdi), %ymm1
; CHECK-NEXT: vscatterqps %ymm1, (%rcx,%zmm0,4) {%k1}
; CHECK-NEXT: retq
%x = load <8 x float>, <8 x float>* %src, align 32
call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x float> %x, i32 4)
ret void
}
;CHECK-LABEL: gather_qps
;CHECK: kxnorw
;CHECK: vgatherqps
;CHECK: vpadd
;CHECK: vscatterqps
;CHECK: ret
define void @gather_qps(<8 x i64> %ind, <8 x float> %src, i8* %base, i8* %stbuf) {
; CHECK-LABEL: gather_qps:
; CHECK: ## BB#0:
; CHECK-NEXT: kxnorw %k1, %k1, %k1
; CHECK-NEXT: kxnorw %k2, %k2, %k2
; CHECK-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm1 {%k2}
; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
; CHECK-NEXT: vscatterqps %ymm1, (%rsi,%zmm0,4) {%k1}
; CHECK-NEXT: retq
%x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 -1, i32 4)
%ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 -1, <8 x i64>%ind2, <8 x float> %x, i32 4)
ret void
}
;CHECK-LABEL: prefetch
;CHECK: gatherpf0
;CHECK: gatherpf1
;CHECK: scatterpf0
;CHECK: scatterpf1
;CHECK: ret
declare void @llvm.x86.avx512.gatherpf.qps.512(i8, <8 x i64>, i8* , i32, i32);
declare void @llvm.x86.avx512.scatterpf.qps.512(i8, <8 x i64>, i8* , i32, i32);
define void @prefetch(<8 x i64> %ind, i8* %base) {
; CHECK-LABEL: prefetch:
; CHECK: ## BB#0:
; CHECK-NEXT: kxnorw %k1, %k1, %k1
; CHECK-NEXT: vgatherpf0qps (%rdi,%zmm0,4) {%k1}
; CHECK-NEXT: vgatherpf1qps (%rdi,%zmm0,4) {%k1}
; CHECK-NEXT: vscatterpf0qps (%rdi,%zmm0,2) {%k1}
; CHECK-NEXT: vscatterpf1qps (%rdi,%zmm0,2) {%k1}
; CHECK-NEXT: retq
call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 0)
call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 1)
call void @llvm.x86.avx512.scatterpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 2, i32 0)
call void @llvm.x86.avx512.scatterpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 2, i32 1)
ret void
}
declare <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double>, i8*, <2 x i64>, i8, i32)
define <2 x double>@test_int_x86_avx512_gather3div2_df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_gather3div2_df:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm2
; CHECK-NEXT: vgatherqpd (%rdi,%xmm1,4), %xmm2 {%k1}
; CHECK-NEXT: kxnorw %k1, %k1, %k1
; CHECK-NEXT: vgatherqpd (%rdi,%xmm1,0), %xmm0 {%k1}
; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 4)
%res1 = call <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 0)
%res2 = fadd <2 x double> %res, %res1
ret <2 x double> %res2
}
declare <4 x i32> @llvm.x86.avx512.gather3div2.di(<2 x i64>, i8*, <2 x i64>, i8, i32)
define <4 x i32>@test_int_x86_avx512_gather3div2_di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_gather3div2_di:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vpgatherqq (%rdi,%xmm1,8), %xmm0 {%k1}
; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <4 x i32> @llvm.x86.avx512.gather3div2.di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 8)
%res1 = call <4 x i32> @llvm.x86.avx512.gather3div2.di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 8)
%res2 = add <4 x i32> %res, %res1
ret <4 x i32> %res2
}
declare <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double>, i8*, <4 x i64>, i8, i32)
define <4 x double>@test_int_x86_avx512_gather3div4_df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_gather3div4_df:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm2
; CHECK-NEXT: vgatherqpd (%rdi,%ymm1,4), %ymm2 {%k1}
; CHECK-NEXT: kxnorw %k1, %k1, %k1
; CHECK-NEXT: vgatherqpd (%rdi,%ymm1,0), %ymm0 {%k1}
; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0
; CHECK-NEXT: retq
%res = call <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 4)
%res1 = call <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 0)
%res2 = fadd <4 x double> %res, %res1
ret <4 x double> %res2
}
declare <8 x i32> @llvm.x86.avx512.gather3div4.di(<4 x i64>, i8*, <4 x i64>, i8, i32)
define <8 x i32>@test_int_x86_avx512_gather3div4_di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_gather3div4_di:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm2
; CHECK-NEXT: vpgatherqq (%rdi,%ymm1,8), %ymm2 {%k1}
; CHECK-NEXT: kxnorw %k1, %k1, %k1
; CHECK-NEXT: vpgatherqq (%rdi,%ymm1,8), %ymm0 {%k1}
; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0
; CHECK-NEXT: retq
%res = call <8 x i32> @llvm.x86.avx512.gather3div4.di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 8)
%res1 = call <8 x i32> @llvm.x86.avx512.gather3div4.di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 8)
%res2 = add <8 x i32> %res, %res1
ret <8 x i32> %res2
}
declare <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float>, i8*, <2 x i64>, i8, i32)
define <4 x float>@test_int_x86_avx512_gather3div4_sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_gather3div4_sf:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm2
; CHECK-NEXT: vgatherqps (%rdi,%xmm1,4), %xmm2 {%k1}
; CHECK-NEXT: kxnorw %k1, %k1, %k1
; CHECK-NEXT: vgatherqps (%rdi,%xmm1,0), %xmm0 {%k1}
; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 4)
%res1 = call <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 0)
%res2 = fadd <4 x float> %res, %res1
ret <4 x float> %res2
}
declare <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32>, i8*, <2 x i64>, i8, i32)
define <4 x i32>@test_int_x86_avx512_gather3div4_si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_gather3div4_si:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: kxnorw %k2, %k2, %k2
; CHECK-NEXT: vmovaps %zmm0, %zmm2
; CHECK-NEXT: vpgatherqd (%rdi,%xmm1,4), %xmm2 {%k2}
; CHECK-NEXT: vpgatherqd (%rdi,%xmm1,4), %xmm0 {%k1}
; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 4)
%res1 = call <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 4)
%res2 = add <4 x i32> %res, %res1
ret <4 x i32> %res2
}
declare <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float>, i8*, <4 x i64>, i8, i32)
define <4 x float>@test_int_x86_avx512_gather3div8_sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_gather3div8_sf:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm2
; CHECK-NEXT: vgatherqps (%rdi,%ymm1,4), %xmm2 {%k1}
; CHECK-NEXT: kxnorw %k1, %k1, %k1
; CHECK-NEXT: vgatherqps (%rdi,%ymm1,0), %xmm0 {%k1}
; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 4)
%res1 = call <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 0)
%res2 = fadd <4 x float> %res, %res1
ret <4 x float> %res2
}
declare <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32>, i8*, <4 x i64>, i8, i32)
define <4 x i32>@test_int_x86_avx512_gather3div8_si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_gather3div8_si:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm2
; CHECK-NEXT: kmovw %k1, %k2
; CHECK-NEXT: vpgatherqd (%rdi,%ymm1,4), %xmm2 {%k2}
; CHECK-NEXT: vpgatherqd (%rdi,%ymm1,2), %xmm0 {%k1}
; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 4)
%res1 = call <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 2)
%res2 = add <4 x i32> %res, %res1
ret <4 x i32> %res2
}
declare <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double>, i8*, <4 x i32>, i8, i32)
define <2 x double>@test_int_x86_avx512_gather3siv2_df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_gather3siv2_df:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm2
; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,4), %xmm2 {%k1}
; CHECK-NEXT: kxnorw %k1, %k1, %k1
; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,0), %xmm0 {%k1}
; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4)
%res1 = call <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 0)
%res2 = fadd <2 x double> %res, %res1
ret <2 x double> %res2
}
declare <4 x i32> @llvm.x86.avx512.gather3siv2.di(<2 x i64>, i8*, <4 x i32>, i8, i32)
define <4 x i32>@test_int_x86_avx512_gather3siv2_di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_gather3siv2_di:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vpgatherdq (%rdi,%xmm1,8), %xmm0 {%k1}
; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <4 x i32> @llvm.x86.avx512.gather3siv2.di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8)
%res1 = call <4 x i32> @llvm.x86.avx512.gather3siv2.di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8)
%res2 = add <4 x i32> %res, %res1
ret <4 x i32> %res2
}
declare <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double>, i8*, <4 x i32>, i8, i32)
define <4 x double>@test_int_x86_avx512_gather3siv4_df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_gather3siv4_df:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm2
; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,4), %ymm2 {%k1}
; CHECK-NEXT: kxnorw %k1, %k1, %k1
; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,0), %ymm0 {%k1}
; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0
; CHECK-NEXT: retq
%res = call <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4)
%res1 = call <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 0)
%res2 = fadd <4 x double> %res, %res1
ret <4 x double> %res2
}
declare <8 x i32> @llvm.x86.avx512.gather3siv4.di(<4 x i64>, i8*, <4 x i32>, i8, i32)
define <8 x i32>@test_int_x86_avx512_gather3siv4_di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_gather3siv4_di:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vpgatherdq (%rdi,%xmm1,8), %ymm0 {%k1}
; CHECK-NEXT: vpaddd %ymm0, %ymm0, %ymm0
; CHECK-NEXT: retq
%res = call <8 x i32> @llvm.x86.avx512.gather3siv4.di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8)
%res1 = call <8 x i32> @llvm.x86.avx512.gather3siv4.di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8)
%res2 = add <8 x i32> %res, %res1
ret <8 x i32> %res2
}
declare <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float>, i8*, <4 x i32>, i8, i32)
define <4 x float>@test_int_x86_avx512_gather3siv4_sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_gather3siv4_sf:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm2
; CHECK-NEXT: vgatherdps (%rdi,%xmm1,4), %xmm2 {%k1}
; CHECK-NEXT: kxnorw %k1, %k1, %k1
; CHECK-NEXT: vgatherdps (%rdi,%xmm1,0), %xmm0 {%k1}
; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4)
%res1 = call <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 0)
%res2 = fadd <4 x float> %res, %res1
ret <4 x float> %res2
}
declare <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32>, i8*, <4 x i32>, i8, i32)
define <4 x i32>@test_int_x86_avx512_gather3siv4_si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_gather3siv4_si:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: kxnorw %k2, %k2, %k2
; CHECK-NEXT: vmovaps %zmm0, %zmm2
; CHECK-NEXT: vpgatherdd (%rdi,%xmm1,4), %xmm2 {%k2}
; CHECK-NEXT: vpgatherdd (%rdi,%xmm1,0), %xmm0 {%k1}
; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 4)
%res1 = call <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 0)
%res2 = add <4 x i32> %res, %res1
ret <4 x i32> %res2
}
declare <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float>, i8*, <8 x i32>, i8, i32)
define <8 x float>@test_int_x86_avx512_gather3siv8_sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_gather3siv8_sf:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm2
; CHECK-NEXT: vgatherdps (%rdi,%ymm1,4), %ymm2 {%k1}
; CHECK-NEXT: kxnorw %k1, %k1, %k1
; CHECK-NEXT: vgatherdps (%rdi,%ymm1,0), %ymm0 {%k1}
; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0
; CHECK-NEXT: retq
%res = call <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 4)
%res1 = call <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 -1, i32 0)
%res2 = fadd <8 x float> %res, %res1
ret <8 x float> %res2
}
declare <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32>, i8*, <8 x i32>, i8, i32)
define <8 x i32>@test_int_x86_avx512_gather3siv8_si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_gather3siv8_si:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm2
; CHECK-NEXT: kmovw %k1, %k2
; CHECK-NEXT: vpgatherdd (%rdi,%ymm1,4), %ymm2 {%k2}
; CHECK-NEXT: vpgatherdd (%rdi,%ymm1,0), %ymm0 {%k1}
; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0
; CHECK-NEXT: retq
%res = call <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 4)
%res1 = call <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 0)
%res2 = add <8 x i32> %res, %res1
ret <8 x i32> %res2
}

View File

@ -9793,3 +9793,115 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: vpabsq -1032(%rdx){1to8}, %zmm5
// CHECK: encoding: [0x62,0xf2,0xfd,0x58,0x1f,0xaa,0xf8,0xfb,0xff,0xff]
vpabsq -1032(%rdx){1to8}, %zmm5
// CHECK: vpgatherdd 123(%r14,%zmm11,8), %zmm17 {%k1}
// CHECK: encoding: [0x62,0x82,0x7d,0x49,0x90,0x8c,0xde,0x7b,0x00,0x00,0x00]
vpgatherdd 123(%r14, %zmm11,8), %zmm17 {%k1}
// CHECK: vpgatherdd 256(%r9,%zmm11), %zmm17 {%k1}
// CHECK: encoding: [0x62,0x82,0x7d,0x49,0x90,0x4c,0x19,0x40]
vpgatherdd 256(%r9,%zmm11), %zmm17 {%k1}
// CHECK: vpgatherdd 1024(%rcx,%zmm11,4), %zmm17 {%k1}
// CHECK: encoding: [0x62,0xa2,0x7d,0x49,0x90,0x8c,0x99,0x00,0x04,0x00,0x00]
vpgatherdd 1024(%rcx, %zmm11,4), %zmm17 {%k1}
// CHECK: vpgatherdq 123(%r14,%ymm14,8), %zmm8 {%k1}
// CHECK: encoding: [0x62,0x12,0xfd,0x49,0x90,0x84,0xf6,0x7b,0x00,0x00,0x00]
vpgatherdq 123(%r14, %ymm14,8), %zmm8 {%k1}
// CHECK: vpgatherdq 256(%r9,%ymm14), %zmm8 {%k1}
// CHECK: encoding: [0x62,0x12,0xfd,0x49,0x90,0x44,0x31,0x20]
vpgatherdq 256(%r9, %ymm14), %zmm8 {%k1}
// CHECK: vpgatherdq 1024(%rcx,%ymm14,4), %zmm8 {%k1}
// CHECK: encoding: [0x62,0x32,0xfd,0x49,0x90,0x84,0xb1,0x00,0x04,0x00,0x00]
vpgatherdq 1024(%rcx, %ymm14,4), %zmm8 {%k1}
// CHECK: vpgatherqd 123(%r14,%zmm17,8), %ymm3 {%k1}
// CHECK: encoding: [0x62,0xd2,0x7d,0x41,0x91,0x9c,0xce,0x7b,0x00,0x00,0x00]
vpgatherqd 123(%r14, %zmm17,8), %ymm3 {%k1}
// CHECK: vpgatherqd 256(%r9,%zmm17), %ymm3 {%k1}
// CHECK: encoding: [0x62,0xd2,0x7d,0x41,0x91,0x5c,0x09,0x40]
vpgatherqd 256(%r9,%zmm17), %ymm3 {%k1}
// CHECK: vpgatherqd 1024(%rcx,%zmm17,4), %ymm3 {%k1}
// CHECK: encoding: [0x62,0xf2,0x7d,0x41,0x91,0x9c,0x89,0x00,0x04,0x00,0x00]
vpgatherqd 1024(%rcx, %zmm17,4), %ymm3 {%k1}
// CHECK: vpgatherqq 123(%r14,%zmm21,8), %zmm17 {%k1}
// CHECK: encoding: [0x62,0xc2,0xfd,0x41,0x91,0x8c,0xee,0x7b,0x00,0x00,0x00]
vpgatherqq 123(%r14, %zmm21,8), %zmm17 {%k1}
// CHECK: vpgatherqq 256(%r9,%zmm21), %zmm17 {%k1}
// CHECK: encoding: [0x62,0xc2,0xfd,0x41,0x91,0x4c,0x29,0x20]
vpgatherqq 256(%r9,%zmm21), %zmm17 {%k1}
// CHECK: vpgatherqq 1024(%rcx,%zmm21,4), %zmm17 {%k1}
// CHECK: encoding: [0x62,0xe2,0xfd,0x41,0x91,0x8c,0xa9,0x00,0x04,0x00,0x00]
vpgatherqq 1024(%rcx, %zmm21,4), %zmm17 {%k1}
// CHECK: vpscatterdd %zmm19, 123(%r14,%zmm16,8) {%k1}
// CHECK: encoding: [0x62,0xc2,0x7d,0x41,0xa0,0x9c,0xc6,0x7b,0x00,0x00,0x00]
vpscatterdd %zmm19, 123(%r14,%zmm16,8) {%k1}
// CHECK: vpscatterdd %zmm19, 123(%r14,%zmm16,8) {%k1}
// CHECK: encoding: [0x62,0xc2,0x7d,0x41,0xa0,0x9c,0xc6,0x7b,0x00,0x00,0x00]
vpscatterdd %zmm19, 123(%r14,%zmm16,8) {%k1}
// CHECK: vpscatterdd %zmm19, 256(%r9,%zmm16) {%k1}
// CHECK: encoding: [0x62,0xc2,0x7d,0x41,0xa0,0x5c,0x01,0x40]
vpscatterdd %zmm19, 256(%r9,%zmm16) {%k1}
// CHECK: vpscatterdd %zmm19, 1024(%rcx,%zmm16,4) {%k1}
// CHECK: encoding: [0x62,0xe2,0x7d,0x41,0xa0,0x9c,0x81,0x00,0x04,0x00,0x00]
vpscatterdd %zmm19, 1024(%rcx,%zmm16,4) {%k1}
// CHECK: vpscatterdq %zmm5, 123(%r14,%ymm6,8) {%k1}
// CHECK: encoding: [0x62,0xd2,0xfd,0x49,0xa0,0xac,0xf6,0x7b,0x00,0x00,0x00]
vpscatterdq %zmm5, 123(%r14,%ymm6,8) {%k1}
// CHECK: vpscatterdq %zmm5, 123(%r14,%ymm6,8) {%k1}
// CHECK: encoding: [0x62,0xd2,0xfd,0x49,0xa0,0xac,0xf6,0x7b,0x00,0x00,0x00]
vpscatterdq %zmm5, 123(%r14,%ymm6,8) {%k1}
// CHECK: vpscatterdq %zmm5, 256(%r9,%ymm6) {%k1}
// CHECK: encoding: [0x62,0xd2,0xfd,0x49,0xa0,0x6c,0x31,0x20]
vpscatterdq %zmm5, 256(%r9,%ymm6) {%k1}
// CHECK: vpscatterdq %zmm5, 1024(%rcx,%ymm6,4) {%k1}
// CHECK: encoding: [0x62,0xf2,0xfd,0x49,0xa0,0xac,0xb1,0x00,0x04,0x00,0x00]
vpscatterdq %zmm5, 1024(%rcx,%ymm6,4) {%k1}
// CHECK: vpscatterqd %ymm20, 123(%r14,%zmm2,8) {%k1}
// CHECK: encoding: [0x62,0xc2,0x7d,0x49,0xa1,0xa4,0xd6,0x7b,0x00,0x00,0x00]
vpscatterqd %ymm20, 123(%r14,%zmm2,8) {%k1}
// CHECK: vpscatterqd %ymm20, 123(%r14,%zmm2,8) {%k1}
// CHECK: encoding: [0x62,0xc2,0x7d,0x49,0xa1,0xa4,0xd6,0x7b,0x00,0x00,0x00]
vpscatterqd %ymm20, 123(%r14,%zmm2,8) {%k1}
// CHECK: vpscatterqd %ymm20, 256(%r9,%zmm2) {%k1}
// CHECK: encoding: [0x62,0xc2,0x7d,0x49,0xa1,0x64,0x11,0x40]
vpscatterqd %ymm20, 256(%r9,%zmm2) {%k1}
// CHECK: vpscatterqd %ymm20, 1024(%rcx,%zmm2,4) {%k1}
// CHECK: encoding: [0x62,0xe2,0x7d,0x49,0xa1,0xa4,0x91,0x00,0x04,0x00,0x00]
vpscatterqd %ymm20, 1024(%rcx,%zmm2,4) {%k1}
// CHECK: vpscatterqq %zmm14, 123(%r14,%zmm20,8) {%k1}
// CHECK: encoding: [0x62,0x52,0xfd,0x41,0xa1,0xb4,0xe6,0x7b,0x00,0x00,0x00]
vpscatterqq %zmm14, 123(%r14,%zmm20,8) {%k1}
// CHECK: vpscatterqq %zmm14, 123(%r14,%zmm20,8) {%k1}
// CHECK: encoding: [0x62,0x52,0xfd,0x41,0xa1,0xb4,0xe6,0x7b,0x00,0x00,0x00]
vpscatterqq %zmm14, 123(%r14,%zmm20,8) {%k1}
// CHECK: vpscatterqq %zmm14, 256(%r9,%zmm20) {%k1}
// CHECK: encoding: [0x62,0x52,0xfd,0x41,0xa1,0x74,0x21,0x20]
vpscatterqq %zmm14, 256(%r9,%zmm20) {%k1}
// CHECK: vpscatterqq %zmm14, 1024(%rcx,%zmm20,4) {%k1}
// CHECK: encoding: [0x62,0x72,0xfd,0x41,0xa1,0xb4,0xa1,0x00,0x04,0x00,0x00]
vpscatterqq %zmm14, 1024(%rcx,%zmm20,4) {%k1}

View File

@ -1452,3 +1452,194 @@
// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x1f,0xb2,0xf8,0xfb,0xff,0xff]
vpabsq -1032(%rdx){1to4}, %ymm22
// CHECK: vpgatherdd 123(%r14,%xmm31,8), %xmm17 {%k1}
// CHECK: encoding: [0x62,0x82,0x7d,0x01,0x90,0x8c,0xfe,0x7b,0x00,0x00,0x00]
vpgatherdd 123(%r14,%xmm31,8), %xmm17 {%k1}
// CHECK: vpgatherdd 256(%r9,%xmm31), %xmm17 {%k1}
// CHECK: encoding: [0x62,0x82,0x7d,0x01,0x90,0x4c,0x39,0x40]
vpgatherdd 256(%r9,%xmm31), %xmm17 {%k1}
// CHECK: vpgatherdd 1024(%rcx,%xmm31,4), %xmm17 {%k1}
// CHECK: encoding: [0x62,0xa2,0x7d,0x01,0x90,0x8c,0xb9,0x00,0x04,0x00,0x00]
vpgatherdd 1024(%rcx,%xmm31,4), %xmm17 {%k1}
// CHECK: vpgatherdd 123(%r14,%ymm31,8), %ymm19 {%k1}
// CHECK: encoding: [0x62,0x82,0x7d,0x21,0x90,0x9c,0xfe,0x7b,0x00,0x00,0x00]
vpgatherdd 123(%r14,%ymm31,8), %ymm19 {%k1}
// CHECK: vpgatherdd 256(%r9,%ymm31), %ymm19 {%k1}
// CHECK: encoding: [0x62,0x82,0x7d,0x21,0x90,0x5c,0x39,0x40]
vpgatherdd 256(%r9,%ymm31), %ymm19 {%k1}
// CHECK: vpgatherdd 1024(%rcx,%ymm31,4), %ymm19 {%k1}
// CHECK: encoding: [0x62,0xa2,0x7d,0x21,0x90,0x9c,0xb9,0x00,0x04,0x00,0x00]
vpgatherdd 1024(%rcx,%ymm31,4), %ymm19 {%k1}
// CHECK: vpgatherdq 123(%r14,%xmm31,8), %xmm17 {%k1}
// CHECK: encoding: [0x62,0x82,0xfd,0x01,0x90,0x8c,0xfe,0x7b,0x00,0x00,0x00]
vpgatherdq 123(%r14,%xmm31,8), %xmm17 {%k1}
// CHECK: vpgatherdq 256(%r9,%xmm31), %xmm17 {%k1}
// CHECK: encoding: [0x62,0x82,0xfd,0x01,0x90,0x4c,0x39,0x20]
vpgatherdq 256(%r9,%xmm31), %xmm17 {%k1}
// CHECK: vpgatherdq 1024(%rcx,%xmm31,4), %xmm17 {%k1}
// CHECK: encoding: [0x62,0xa2,0xfd,0x01,0x90,0x8c,0xb9,0x00,0x04,0x00,0x00]
vpgatherdq 1024(%rcx,%xmm31,4), %xmm17 {%k1}
// CHECK: vpgatherdq 123(%r14,%xmm31,8), %ymm26 {%k1}
// CHECK: encoding: [0x62,0x02,0xfd,0x21,0x90,0x94,0xfe,0x7b,0x00,0x00,0x00]
vpgatherdq 123(%r14,%xmm31,8), %ymm26 {%k1}
// CHECK: vpgatherdq 256(%r9,%xmm31), %ymm26 {%k1}
// CHECK: encoding: [0x62,0x02,0xfd,0x21,0x90,0x54,0x39,0x20]
vpgatherdq 256(%r9,%xmm31), %ymm26 {%k1}
// CHECK: vpgatherdq 1024(%rcx,%xmm31,4), %ymm26 {%k1}
// CHECK: encoding: [0x62,0x22,0xfd,0x21,0x90,0x94,0xb9,0x00,0x04,0x00,0x00]
vpgatherdq 1024(%rcx,%xmm31,4), %ymm26 {%k1}
// CHECK: vpgatherqd 123(%r14,%xmm31,8), %xmm21 {%k1}
// CHECK: encoding: [0x62,0x82,0x7d,0x01,0x91,0xac,0xfe,0x7b,0x00,0x00,0x00]
vpgatherqd 123(%r14,%xmm31,8), %xmm21 {%k1}
// CHECK: vpgatherqd 256(%r9,%xmm31), %xmm21 {%k1}
// CHECK: encoding: [0x62,0x82,0x7d,0x01,0x91,0x6c,0x39,0x40]
vpgatherqd 256(%r9,%xmm31), %xmm21 {%k1}
// CHECK: vpgatherqd 1024(%rcx,%xmm31,4), %xmm21 {%k1}
// CHECK: encoding: [0x62,0xa2,0x7d,0x01,0x91,0xac,0xb9,0x00,0x04,0x00,0x00]
vpgatherqd 1024(%rcx,%xmm31,4), %xmm21 {%k1}
// CHECK: vpgatherqd 123(%r14,%ymm31,8), %xmm25 {%k1}
// CHECK: encoding: [0x62,0x02,0x7d,0x21,0x91,0x8c,0xfe,0x7b,0x00,0x00,0x00]
vpgatherqd 123(%r14,%ymm31,8), %xmm25 {%k1}
// CHECK: vpgatherqd 256(%r9,%ymm31), %xmm25 {%k1}
// CHECK: encoding: [0x62,0x02,0x7d,0x21,0x91,0x4c,0x39,0x40]
vpgatherqd 256(%r9,%ymm31), %xmm25 {%k1}
// CHECK: vpgatherqd 1024(%rcx,%ymm31,4), %xmm25 {%k1}
// CHECK: encoding: [0x62,0x22,0x7d,0x21,0x91,0x8c,0xb9,0x00,0x04,0x00,0x00]
vpgatherqd 1024(%rcx,%ymm31,4), %xmm25 {%k1}
// CHECK: vpgatherqq 123(%r14,%xmm31,8), %xmm18 {%k1}
// CHECK: encoding: [0x62,0x82,0xfd,0x01,0x91,0x94,0xfe,0x7b,0x00,0x00,0x00]
vpgatherqq 123(%r14,%xmm31,8), %xmm18 {%k1}
// CHECK: vpgatherqq 256(%r9,%xmm31), %xmm18 {%k1}
// CHECK: encoding: [0x62,0x82,0xfd,0x01,0x91,0x54,0x39,0x20]
vpgatherqq 256(%r9,%xmm31), %xmm18 {%k1}
// CHECK: vpgatherqq 1024(%rcx,%xmm31,4), %xmm18 {%k1}
// CHECK: encoding: [0x62,0xa2,0xfd,0x01,0x91,0x94,0xb9,0x00,0x04,0x00,0x00]
vpgatherqq 1024(%rcx,%xmm31,4), %xmm18 {%k1}
// CHECK: vpgatherqq 123(%r14,%ymm31,8), %ymm19 {%k1}
// CHECK: encoding: [0x62,0x82,0xfd,0x21,0x91,0x9c,0xfe,0x7b,0x00,0x00,0x00]
vpgatherqq 123(%r14,%ymm31,8), %ymm19 {%k1}
// CHECK: vpgatherqq 256(%r9,%ymm31), %ymm19 {%k1}
// CHECK: encoding: [0x62,0x82,0xfd,0x21,0x91,0x5c,0x39,0x20]
vpgatherqq 256(%r9,%ymm31), %ymm19 {%k1}
// CHECK: vpgatherqq 1024(%rcx,%ymm31,4), %ymm19 {%k1}
// CHECK: encoding: [0x62,0xa2,0xfd,0x21,0x91,0x9c,0xb9,0x00,0x04,0x00,0x00]
vpgatherqq 1024(%rcx,%ymm31,4), %ymm19 {%k1}
// CHECK: vgatherdpd 123(%r14,%xmm31,8), %xmm17 {%k1}
// CHECK: encoding: [0x62,0x82,0xfd,0x01,0x92,0x8c,0xfe,0x7b,0x00,0x00,0x00]
vgatherdpd 123(%r14,%xmm31,8), %xmm17 {%k1}
// CHECK: vgatherdpd 256(%r9,%xmm31), %xmm17 {%k1}
// CHECK: encoding: [0x62,0x82,0xfd,0x01,0x92,0x4c,0x39,0x20]
vgatherdpd 256(%r9,%xmm31), %xmm17 {%k1}
// CHECK: vgatherdpd 1024(%rcx,%xmm31,4), %xmm17 {%k1}
// CHECK: encoding: [0x62,0xa2,0xfd,0x01,0x92,0x8c,0xb9,0x00,0x04,0x00,0x00]
vgatherdpd 1024(%rcx,%xmm31,4), %xmm17 {%k1}
// CHECK: vgatherdpd 123(%r14,%xmm31,8), %ymm23 {%k1}
// CHECK: encoding: [0x62,0x82,0xfd,0x21,0x92,0xbc,0xfe,0x7b,0x00,0x00,0x00]
vgatherdpd 123(%r14,%xmm31,8), %ymm23 {%k1}
// CHECK: vgatherdpd 256(%r9,%xmm31), %ymm23 {%k1}
// CHECK: encoding: [0x62,0x82,0xfd,0x21,0x92,0x7c,0x39,0x20]
vgatherdpd 256(%r9,%xmm31), %ymm23 {%k1}
// CHECK: vgatherdpd 1024(%rcx,%xmm31,4), %ymm23 {%k1}
// CHECK: encoding: [0x62,0xa2,0xfd,0x21,0x92,0xbc,0xb9,0x00,0x04,0x00,0x00]
vgatherdpd 1024(%rcx,%xmm31,4), %ymm23 {%k1}
// CHECK: vgatherdps 123(%r14,%xmm31,8), %xmm18 {%k1}
// CHECK: encoding: [0x62,0x82,0x7d,0x01,0x92,0x94,0xfe,0x7b,0x00,0x00,0x00]
vgatherdps 123(%r14,%xmm31,8), %xmm18 {%k1}
// CHECK: vgatherdps 256(%r9,%xmm31), %xmm18 {%k1}
// CHECK: encoding: [0x62,0x82,0x7d,0x01,0x92,0x54,0x39,0x40]
vgatherdps 256(%r9,%xmm31), %xmm18 {%k1}
// CHECK: vgatherdps 1024(%rcx,%xmm31,4), %xmm18 {%k1}
// CHECK: encoding: [0x62,0xa2,0x7d,0x01,0x92,0x94,0xb9,0x00,0x04,0x00,0x00]
vgatherdps 1024(%rcx,%xmm31,4), %xmm18 {%k1}
// CHECK: vgatherdps 123(%r14,%ymm31,8), %ymm27 {%k1}
// CHECK: encoding: [0x62,0x02,0x7d,0x21,0x92,0x9c,0xfe,0x7b,0x00,0x00,0x00]
vgatherdps 123(%r14,%ymm31,8), %ymm27 {%k1}
// CHECK: vgatherdps 256(%r9,%ymm31), %ymm27 {%k1}
// CHECK: encoding: [0x62,0x02,0x7d,0x21,0x92,0x5c,0x39,0x40]
vgatherdps 256(%r9,%ymm31), %ymm27 {%k1}
// CHECK: vgatherdps 1024(%rcx,%ymm31,4), %ymm27 {%k1}
// CHECK: encoding: [0x62,0x22,0x7d,0x21,0x92,0x9c,0xb9,0x00,0x04,0x00,0x00]
vgatherdps 1024(%rcx,%ymm31,4), %ymm27 {%k1}
// CHECK: vgatherqpd 123(%r14,%xmm31,8), %xmm17 {%k1}
// CHECK: encoding: [0x62,0x82,0xfd,0x01,0x93,0x8c,0xfe,0x7b,0x00,0x00,0x00]
vgatherqpd 123(%r14,%xmm31,8), %xmm17 {%k1}
// CHECK: vgatherqpd 256(%r9,%xmm31), %xmm17 {%k1}
// CHECK: encoding: [0x62,0x82,0xfd,0x01,0x93,0x4c,0x39,0x20]
vgatherqpd 256(%r9,%xmm31), %xmm17 {%k1}
// CHECK: vgatherqpd 1024(%rcx,%xmm31,4), %xmm17 {%k1}
// CHECK: encoding: [0x62,0xa2,0xfd,0x01,0x93,0x8c,0xb9,0x00,0x04,0x00,0x00]
vgatherqpd 1024(%rcx,%xmm31,4), %xmm17 {%k1}
// CHECK: vgatherqpd 123(%r14,%ymm31,8), %ymm29 {%k1}
// CHECK: encoding: [0x62,0x02,0xfd,0x21,0x93,0xac,0xfe,0x7b,0x00,0x00,0x00]
vgatherqpd 123(%r14,%ymm31,8), %ymm29 {%k1}
// CHECK: vgatherqpd 256(%r9,%ymm31), %ymm29 {%k1}
// CHECK: encoding: [0x62,0x02,0xfd,0x21,0x93,0x6c,0x39,0x20]
vgatherqpd 256(%r9,%ymm31), %ymm29 {%k1}
// CHECK: vgatherqpd 1024(%rcx,%ymm31,4), %ymm29 {%k1}
// CHECK: encoding: [0x62,0x22,0xfd,0x21,0x93,0xac,0xb9,0x00,0x04,0x00,0x00]
vgatherqpd 1024(%rcx,%ymm31,4), %ymm29 {%k1}
// CHECK: vgatherqps 123(%r14,%xmm31,8), %xmm21 {%k1}
// CHECK: encoding: [0x62,0x82,0x7d,0x01,0x93,0xac,0xfe,0x7b,0x00,0x00,0x00]
vgatherqps 123(%r14,%xmm31,8), %xmm21 {%k1}
// CHECK: vgatherqps 256(%r9,%xmm31), %xmm21 {%k1}
// CHECK: encoding: [0x62,0x82,0x7d,0x01,0x93,0x6c,0x39,0x40]
vgatherqps 256(%r9,%xmm31), %xmm21 {%k1}
// CHECK: vgatherqps 1024(%rcx,%xmm31,4), %xmm21 {%k1}
// CHECK: encoding: [0x62,0xa2,0x7d,0x01,0x93,0xac,0xb9,0x00,0x04,0x00,0x00]
vgatherqps 1024(%rcx,%xmm31,4), %xmm21 {%k1}
// CHECK: vgatherqps 123(%r14,%ymm31,8), %xmm19 {%k1}
// CHECK: encoding: [0x62,0x82,0x7d,0x21,0x93,0x9c,0xfe,0x7b,0x00,0x00,0x00]
vgatherqps 123(%r14,%ymm31,8), %xmm19 {%k1}
// CHECK: vgatherqps 256(%r9,%ymm31), %xmm19 {%k1}
// CHECK: encoding: [0x62,0x82,0x7d,0x21,0x93,0x5c,0x39,0x40]
vgatherqps 256(%r9,%ymm31), %xmm19 {%k1}
// CHECK: vgatherqps 1024(%rcx,%ymm31,4), %xmm19 {%k1}
// CHECK: encoding: [0x62,0xa2,0x7d,0x21,0x93,0x9c,0xb9,0x00,0x04,0x00,0x00]
vgatherqps 1024(%rcx,%ymm31,4), %xmm19 {%k1}

View File

@ -1027,9 +1027,12 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
TYPE("GR32_NOAX", TYPE_Rv)
TYPE("GR64_NOAX", TYPE_R64)
TYPE("vx32mem", TYPE_M32)
TYPE("vx32xmem", TYPE_M32)
TYPE("vy32mem", TYPE_M32)
TYPE("vy32xmem", TYPE_M32)
TYPE("vz32mem", TYPE_M32)
TYPE("vx64mem", TYPE_M64)
TYPE("vx64xmem", TYPE_M64)
TYPE("vy64mem", TYPE_M64)
TYPE("vy64xmem", TYPE_M64)
TYPE("vz64mem", TYPE_M64)
@ -1213,9 +1216,12 @@ RecognizableInstr::memoryEncodingFromString(const std::string &s,
ENCODING("opaque80mem", ENCODING_RM)
ENCODING("opaque512mem", ENCODING_RM)
ENCODING("vx32mem", ENCODING_RM)
ENCODING("vx32xmem", ENCODING_RM)
ENCODING("vy32mem", ENCODING_RM)
ENCODING("vy32xmem", ENCODING_RM)
ENCODING("vz32mem", ENCODING_RM)
ENCODING("vx64mem", ENCODING_RM)
ENCODING("vx64xmem", ENCODING_RM)
ENCODING("vy64mem", ENCODING_RM)
ENCODING("vy64xmem", ENCODING_RM)
ENCODING("vz64mem", ENCODING_RM)