mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-14 07:31:53 +00:00
AVX-512: Added all SKX forms of GATHER instructions.
Added intrinsics. Added encoding and tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@240905 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
1e7ecc8e57
commit
b23b2fbd3a
@ -4264,6 +4264,102 @@ let TargetPrefix = "x86" in {
|
||||
llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
|
||||
[IntrReadArgMem]>;
|
||||
|
||||
def int_x86_avx512_gather3div2_df :
|
||||
GCCBuiltin<"__builtin_ia32_gather3div2df">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty, llvm_i32_ty],
|
||||
[IntrReadArgMem]>;
|
||||
|
||||
def int_x86_avx512_gather3div2_di :
|
||||
GCCBuiltin<"__builtin_ia32_gather3div2di">,
|
||||
Intrinsic<[llvm_v4i32_ty],
|
||||
[llvm_v2i64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty, llvm_i32_ty],
|
||||
[IntrReadArgMem]>;
|
||||
|
||||
def int_x86_avx512_gather3div4_df :
|
||||
GCCBuiltin<"__builtin_ia32_gather3div4df">,
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
|
||||
[IntrReadArgMem]>;
|
||||
|
||||
def int_x86_avx512_gather3div4_di :
|
||||
GCCBuiltin<"__builtin_ia32_gather3div4di">,
|
||||
Intrinsic<[llvm_v8i32_ty],
|
||||
[llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
|
||||
[IntrReadArgMem]>;
|
||||
|
||||
def int_x86_avx512_gather3div4_sf :
|
||||
GCCBuiltin<"__builtin_ia32_gather3div4sf">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty, llvm_i32_ty],
|
||||
[IntrReadArgMem]>;
|
||||
|
||||
def int_x86_avx512_gather3div4_si :
|
||||
GCCBuiltin<"__builtin_ia32_gather3div4si">,
|
||||
Intrinsic<[llvm_v4i32_ty],
|
||||
[llvm_v4i32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty, llvm_i32_ty],
|
||||
[IntrReadArgMem]>;
|
||||
|
||||
def int_x86_avx512_gather3div8_sf :
|
||||
GCCBuiltin<"__builtin_ia32_gather3div8sf">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
|
||||
[IntrReadArgMem]>;
|
||||
|
||||
def int_x86_avx512_gather3div8_si :
|
||||
GCCBuiltin<"__builtin_ia32_gather3div8si">,
|
||||
Intrinsic<[llvm_v4i32_ty],
|
||||
[llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
|
||||
[IntrReadArgMem]>;
|
||||
|
||||
def int_x86_avx512_gather3siv2_df :
|
||||
GCCBuiltin<"__builtin_ia32_gather3siv2df">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty],
|
||||
[IntrReadArgMem]>;
|
||||
|
||||
def int_x86_avx512_gather3siv2_di :
|
||||
GCCBuiltin<"__builtin_ia32_gather3siv2di">,
|
||||
Intrinsic<[llvm_v4i32_ty],
|
||||
[llvm_v2i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty],
|
||||
[IntrReadArgMem]>;
|
||||
|
||||
def int_x86_avx512_gather3siv4_df :
|
||||
GCCBuiltin<"__builtin_ia32_gather3siv4df">,
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty],
|
||||
[IntrReadArgMem]>;
|
||||
|
||||
def int_x86_avx512_gather3siv4_di :
|
||||
GCCBuiltin<"__builtin_ia32_gather3siv4di">,
|
||||
Intrinsic<[llvm_v8i32_ty],
|
||||
[llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty],
|
||||
[IntrReadArgMem]>;
|
||||
|
||||
def int_x86_avx512_gather3siv4_sf :
|
||||
GCCBuiltin<"__builtin_ia32_gather3siv4sf">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty],
|
||||
[IntrReadArgMem]>;
|
||||
|
||||
def int_x86_avx512_gather3siv4_si :
|
||||
GCCBuiltin<"__builtin_ia32_gather3siv4si">,
|
||||
Intrinsic<[llvm_v4i32_ty],
|
||||
[llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty],
|
||||
[IntrReadArgMem]>;
|
||||
|
||||
def int_x86_avx512_gather3siv8_sf :
|
||||
GCCBuiltin<"__builtin_ia32_gather3siv8sf">,
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
|
||||
[IntrReadArgMem]>;
|
||||
|
||||
def int_x86_avx512_gather3siv8_si :
|
||||
GCCBuiltin<"__builtin_ia32_gather3siv8si">,
|
||||
Intrinsic<[llvm_v8i32_ty],
|
||||
[llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
|
||||
[IntrReadArgMem]>;
|
||||
|
||||
// scatter
|
||||
def int_x86_avx512_scatter_dpd_512 : GCCBuiltin<"__builtin_ia32_scattersiv8df">,
|
||||
Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty,
|
||||
|
@ -238,18 +238,34 @@ struct X86Operand : public MCParsedAsmOperand {
|
||||
return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
|
||||
getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15;
|
||||
}
|
||||
bool isMemVX32X() const {
|
||||
return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
|
||||
getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM31;
|
||||
}
|
||||
bool isMemVY32() const {
|
||||
return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
|
||||
getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15;
|
||||
}
|
||||
bool isMemVY32X() const {
|
||||
return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
|
||||
getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM31;
|
||||
}
|
||||
bool isMemVX64() const {
|
||||
return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
|
||||
getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15;
|
||||
}
|
||||
bool isMemVX64X() const {
|
||||
return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
|
||||
getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM31;
|
||||
}
|
||||
bool isMemVY64() const {
|
||||
return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
|
||||
getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15;
|
||||
}
|
||||
bool isMemVY64X() const {
|
||||
return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
|
||||
getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM31;
|
||||
}
|
||||
bool isMemVZ32() const {
|
||||
return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
|
||||
getMemIndexReg() >= X86::ZMM0 && getMemIndexReg() <= X86::ZMM31;
|
||||
|
@ -15424,7 +15424,12 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
|
||||
const X86Subtarget * Subtarget) {
|
||||
SDLoc dl(Op);
|
||||
ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
|
||||
assert(C && "Invalid scale type");
|
||||
if (!C)
|
||||
llvm_unreachable("Invalid scale type");
|
||||
unsigned ScaleVal = C->getZExtValue();
|
||||
if (ScaleVal > 2 && ScaleVal != 4 && ScaleVal != 8)
|
||||
llvm_unreachable("Valid scale values are 1, 2, 4, 8");
|
||||
|
||||
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
|
||||
EVT MaskVT = MVT::getVectorVT(MVT::i1,
|
||||
Index.getSimpleValueType().getVectorNumElements());
|
||||
@ -15432,8 +15437,16 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
|
||||
ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask);
|
||||
if (MaskC)
|
||||
MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT);
|
||||
else
|
||||
MaskInReg = DAG.getBitcast(MaskVT, Mask);
|
||||
else {
|
||||
EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
|
||||
Mask.getValueType().getSizeInBits());
|
||||
|
||||
// In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
|
||||
// are extracted by EXTRACT_SUBVECTOR.
|
||||
MaskInReg = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
|
||||
DAG.getBitcast(BitcastVT, Mask),
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
}
|
||||
SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
|
||||
SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
|
||||
SDValue Segment = DAG.getRegister(0, MVT::i32);
|
||||
|
@ -5440,10 +5440,11 @@ defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, "s">;
|
||||
|
||||
multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
|
||||
X86MemOperand memop, PatFrag GatherNode> {
|
||||
let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb" in
|
||||
let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
|
||||
ExeDomain = _.ExeDomain in
|
||||
def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, _.KRCWM:$mask_wb),
|
||||
(ins _.RC:$src1, _.KRCWM:$mask, memop:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
!strconcat(OpcodeStr#_.Suffix,
|
||||
"\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
|
||||
[(set _.RC:$dst, _.KRCWM:$mask_wb,
|
||||
(GatherNode (_.VT _.RC:$src1), _.KRCWM:$mask,
|
||||
@ -5451,29 +5452,48 @@ multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
|
||||
EVEX_CD8<_.EltSize, CD8VT1>;
|
||||
}
|
||||
|
||||
let ExeDomain = SSEPackedDouble in {
|
||||
defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", v8f64_info, vy64xmem,
|
||||
mgatherv8i32>, EVEX_V512, VEX_W;
|
||||
defm VGATHERQPDZ : avx512_gather<0x93, "vgatherqpd", v8f64_info, vz64mem,
|
||||
mgatherv8i64>, EVEX_V512, VEX_W;
|
||||
multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
|
||||
AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
|
||||
defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
|
||||
vy32xmem, mgatherv8i32>, EVEX_V512, VEX_W;
|
||||
defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
|
||||
vz64mem, mgatherv8i64>, EVEX_V512, VEX_W;
|
||||
let Predicates = [HasVLX] in {
|
||||
defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
|
||||
vx32xmem, mgatherv4i32>, EVEX_V256, VEX_W;
|
||||
defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
|
||||
vy64xmem, mgatherv4i64>, EVEX_V256, VEX_W;
|
||||
defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
|
||||
vx32xmem, mgatherv4i32>, EVEX_V128, VEX_W;
|
||||
defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
|
||||
vx64xmem, mgatherv2i64>, EVEX_V128, VEX_W;
|
||||
}
|
||||
}
|
||||
|
||||
let ExeDomain = SSEPackedSingle in {
|
||||
defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", v16f32_info, vz32mem,
|
||||
mgatherv16i32>, EVEX_V512;
|
||||
defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", v8f32x_info, vz64mem,
|
||||
mgatherv8i64>, EVEX_V512;
|
||||
multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
|
||||
AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
|
||||
defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz32mem,
|
||||
mgatherv16i32>, EVEX_V512;
|
||||
defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz64mem,
|
||||
mgatherv8i64>, EVEX_V512;
|
||||
let Predicates = [HasVLX] in {
|
||||
defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
|
||||
vy32xmem, mgatherv8i32>, EVEX_V256;
|
||||
defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
|
||||
vy64xmem, mgatherv4i64>, EVEX_V256;
|
||||
defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
|
||||
vx32xmem, mgatherv4i32>, EVEX_V128;
|
||||
defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
|
||||
vx64xmem, mgatherv2i64>, EVEX_V128;
|
||||
}
|
||||
}
|
||||
|
||||
defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", v8i64_info, vy64xmem,
|
||||
mgatherv8i32>, EVEX_V512, VEX_W;
|
||||
defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", v16i32_info, vz32mem,
|
||||
mgatherv16i32>, EVEX_V512;
|
||||
|
||||
defm VPGATHERQQZ : avx512_gather<0x91, "vpgatherqq", v8i64_info, vz64mem,
|
||||
mgatherv8i64>, EVEX_V512, VEX_W;
|
||||
defm VPGATHERQDZ : avx512_gather<0x91, "vpgatherqd", v8i32x_info, vz64mem,
|
||||
mgatherv8i64>, EVEX_V512;
|
||||
defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
|
||||
avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
|
||||
|
||||
defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
|
||||
avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
|
||||
|
||||
multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
|
||||
X86MemOperand memop, PatFrag ScatterNode> {
|
||||
|
@ -560,6 +560,14 @@ def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
|
||||
return false;
|
||||
}]>;
|
||||
|
||||
def mgatherv4i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
|
||||
(masked_gather node:$src1, node:$src2, node:$src3) , [{
|
||||
if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
|
||||
return (Mgt->getIndex().getValueType() == MVT::v4i32 ||
|
||||
Mgt->getBasePtr().getValueType() == MVT::v4i32);
|
||||
return false;
|
||||
}]>;
|
||||
|
||||
def mgatherv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
|
||||
(masked_gather node:$src1, node:$src2, node:$src3) , [{
|
||||
if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
|
||||
@ -568,6 +576,20 @@ def mgatherv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
|
||||
return false;
|
||||
}]>;
|
||||
|
||||
def mgatherv2i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
|
||||
(masked_gather node:$src1, node:$src2, node:$src3) , [{
|
||||
if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
|
||||
return (Mgt->getIndex().getValueType() == MVT::v2i64 ||
|
||||
Mgt->getBasePtr().getValueType() == MVT::v2i64);
|
||||
return false;
|
||||
}]>;
|
||||
def mgatherv4i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
|
||||
(masked_gather node:$src1, node:$src2, node:$src3) , [{
|
||||
if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
|
||||
return (Mgt->getIndex().getValueType() == MVT::v4i64 ||
|
||||
Mgt->getBasePtr().getValueType() == MVT::v4i64);
|
||||
return false;
|
||||
}]>;
|
||||
def mgatherv8i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
|
||||
(masked_gather node:$src1, node:$src2, node:$src3) , [{
|
||||
if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
|
||||
|
@ -282,6 +282,10 @@ let RenderMethod = "addMemOperands" in {
|
||||
def X86MemVX64Operand : AsmOperandClass { let Name = "MemVX64"; }
|
||||
def X86MemVY64Operand : AsmOperandClass { let Name = "MemVY64"; }
|
||||
def X86MemVZ64Operand : AsmOperandClass { let Name = "MemVZ64"; }
|
||||
def X86MemVX32XOperand : AsmOperandClass { let Name = "MemVX32X"; }
|
||||
def X86MemVY32XOperand : AsmOperandClass { let Name = "MemVY32X"; }
|
||||
def X86MemVX64XOperand : AsmOperandClass { let Name = "MemVX64X"; }
|
||||
def X86MemVY64XOperand : AsmOperandClass { let Name = "MemVY64X"; }
|
||||
}
|
||||
|
||||
def X86AbsMemAsmOperand : AsmOperandClass {
|
||||
@ -332,7 +336,11 @@ def vx32mem : X86VMemOperand<VR128, "printi32mem", X86MemVX32Operand>;
|
||||
def vy32mem : X86VMemOperand<VR256, "printi32mem", X86MemVY32Operand>;
|
||||
def vx64mem : X86VMemOperand<VR128, "printi64mem", X86MemVX64Operand>;
|
||||
def vy64mem : X86VMemOperand<VR256, "printi64mem", X86MemVY64Operand>;
|
||||
def vy64xmem : X86VMemOperand<VR256X, "printi64mem", X86MemVY64Operand>;
|
||||
|
||||
def vx32xmem : X86VMemOperand<VR128X, "printi32mem", X86MemVX32XOperand>;
|
||||
def vx64xmem : X86VMemOperand<VR128X, "printi32mem", X86MemVX64XOperand>;
|
||||
def vy32xmem : X86VMemOperand<VR256X, "printi32mem", X86MemVY32XOperand>;
|
||||
def vy64xmem : X86VMemOperand<VR256X, "printi64mem", X86MemVY64XOperand>;
|
||||
def vz32mem : X86VMemOperand<VR512, "printi32mem", X86MemVZ32Operand>;
|
||||
def vz64mem : X86VMemOperand<VR512, "printi64mem", X86MemVZ64Operand>;
|
||||
|
||||
|
@ -56,6 +56,22 @@ static const IntrinsicData IntrinsicsWithChain[] = {
|
||||
X86_INTRINSIC_DATA(addcarryx_u32, ADX, X86ISD::ADC, 0),
|
||||
X86_INTRINSIC_DATA(addcarryx_u64, ADX, X86ISD::ADC, 0),
|
||||
|
||||
X86_INTRINSIC_DATA(avx512_gather3div2_df, GATHER, X86::VGATHERQPDZ128rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3div2_di, GATHER, X86::VPGATHERQQZ128rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3div4_df, GATHER, X86::VGATHERQPDZ256rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3div4_di, GATHER, X86::VPGATHERQQZ256rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3div4_sf, GATHER, X86::VGATHERQPSZ128rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3div4_si, GATHER, X86::VPGATHERQDZ128rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3div8_sf, GATHER, X86::VGATHERQPSZ256rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3div8_si, GATHER, X86::VPGATHERQDZ256rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3siv2_df, GATHER, X86::VGATHERDPDZ128rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3siv2_di, GATHER, X86::VPGATHERDQZ128rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3siv4_df, GATHER, X86::VGATHERDPDZ256rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3siv4_di, GATHER, X86::VPGATHERDQZ256rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3siv4_sf, GATHER, X86::VGATHERDPSZ128rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3siv4_si, GATHER, X86::VPGATHERDDZ128rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3siv8_sf, GATHER, X86::VGATHERDPSZ256rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3siv8_si, GATHER, X86::VPGATHERDDZ256rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather_dpd_512, GATHER, X86::VGATHERDPDZrm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather_dpi_512, GATHER, X86::VPGATHERDDZrm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather_dpq_512, GATHER, X86::VPGATHERDQZrm, 0),
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float>, i8*, <16 x i32>, i16, i32)
|
||||
declare void @llvm.x86.avx512.scatter.dps.512 (i8*, i16, <16 x i32>, <16 x float>, i32)
|
||||
@ -10,52 +10,60 @@ declare void @llvm.x86.avx512.scatter.qps.512 (i8*, i8, <8 x i64>, <8 x float>,
|
||||
declare <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double>, i8*, <8 x i64>, i8, i32)
|
||||
declare void @llvm.x86.avx512.scatter.qpd.512 (i8*, i8, <8 x i64>, <8 x double>, i32)
|
||||
|
||||
;CHECK-LABEL: gather_mask_dps
|
||||
;CHECK: kmovw
|
||||
;CHECK: vgatherdps
|
||||
;CHECK: vpadd
|
||||
;CHECK: vscatterdps
|
||||
;CHECK: ret
|
||||
define void @gather_mask_dps(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base, i8* %stbuf) {
|
||||
; CHECK-LABEL: gather_mask_dps:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: kmovw %k1, %k2
|
||||
; CHECK-NEXT: vgatherdps (%rsi,%zmm0,4), %zmm1 {%k2}
|
||||
; CHECK-NEXT: vpaddd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; CHECK-NEXT: vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%x = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4)
|
||||
%ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
||||
call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x float> %x, i32 4)
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: gather_mask_dpd
|
||||
;CHECK: kmovw
|
||||
;CHECK: vgatherdpd
|
||||
;CHECK: vpadd
|
||||
;CHECK: vscatterdpd
|
||||
;CHECK: ret
|
||||
define void @gather_mask_dpd(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf) {
|
||||
; CHECK-LABEL: gather_mask_dpd:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %edi, %k1
|
||||
; CHECK-NEXT: kmovw %k1, %k2
|
||||
; CHECK-NEXT: vgatherdpd (%rsi,%ymm0,4), %zmm1 {%k2}
|
||||
; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
|
||||
; CHECK-NEXT: vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%x = call <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4)
|
||||
%ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
||||
call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x double> %x, i32 4)
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: gather_mask_qps
|
||||
;CHECK: kmovw
|
||||
;CHECK: vgatherqps
|
||||
;CHECK: vpadd
|
||||
;CHECK: vscatterqps
|
||||
;CHECK: ret
|
||||
define void @gather_mask_qps(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base, i8* %stbuf) {
|
||||
; CHECK-LABEL: gather_mask_qps:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %edi, %k1
|
||||
; CHECK-NEXT: kmovw %k1, %k2
|
||||
; CHECK-NEXT: vgatherqps (%rsi,%zmm0,4), %ymm1 {%k2}
|
||||
; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
|
||||
; CHECK-NEXT: vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
|
||||
%ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
|
||||
call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x float> %x, i32 4)
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: gather_mask_qpd
|
||||
;CHECK: kmovw
|
||||
;CHECK: vgatherqpd
|
||||
;CHECK: vpadd
|
||||
;CHECK: vscatterqpd
|
||||
;CHECK: ret
|
||||
define void @gather_mask_qpd(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf) {
|
||||
; CHECK-LABEL: gather_mask_qpd:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %edi, %k1
|
||||
; CHECK-NEXT: kmovw %k1, %k2
|
||||
; CHECK-NEXT: vgatherqpd (%rsi,%zmm0,4), %zmm1 {%k2}
|
||||
; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
|
||||
; CHECK-NEXT: vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
|
||||
%ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
|
||||
call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x double> %x, i32 4)
|
||||
@ -74,162 +82,469 @@ declare void @llvm.x86.avx512.scatter.qpi.512 (i8*, i8, <8 x i64>, <8 x i32>, i3
|
||||
declare <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64>, i8*, <8 x i64>, i8, i32)
|
||||
declare void @llvm.x86.avx512.scatter.qpq.512 (i8*, i8, <8 x i64>, <8 x i64>, i32)
|
||||
|
||||
;CHECK-LABEL: gather_mask_dd
|
||||
;CHECK: kmovw
|
||||
;CHECK: vpgatherdd
|
||||
;CHECK: vpadd
|
||||
;CHECK: vpscatterdd
|
||||
;CHECK: ret
|
||||
define void @gather_mask_dd(<16 x i32> %ind, <16 x i32> %src, i16 %mask, i8* %base, i8* %stbuf) {
|
||||
; CHECK-LABEL: gather_mask_dd:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: kmovw %k1, %k2
|
||||
; CHECK-NEXT: vpgatherdd (%rsi,%zmm0,4), %zmm1 {%k2}
|
||||
; CHECK-NEXT: vpaddd {{.*}}(%rip), %zmm0, %zmm0
|
||||
; CHECK-NEXT: vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%x = call <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4)
|
||||
%ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
||||
call void @llvm.x86.avx512.scatter.dpi.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x i32> %x, i32 4)
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: gather_mask_qd
|
||||
;CHECK: kmovw
|
||||
;CHECK: vpgatherqd
|
||||
;CHECK: vpadd
|
||||
;CHECK: vpscatterqd
|
||||
;CHECK: ret
|
||||
define void @gather_mask_qd(<8 x i64> %ind, <8 x i32> %src, i8 %mask, i8* %base, i8* %stbuf) {
|
||||
; CHECK-LABEL: gather_mask_qd:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %edi, %k1
|
||||
; CHECK-NEXT: kmovw %k1, %k2
|
||||
; CHECK-NEXT: vpgatherqd (%rsi,%zmm0,4), %ymm1 {%k2}
|
||||
; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
|
||||
; CHECK-NEXT: vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%x = call <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i32> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
|
||||
%ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
|
||||
call void @llvm.x86.avx512.scatter.qpi.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i32> %x, i32 4)
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: gather_mask_qq
|
||||
;CHECK: kmovw
|
||||
;CHECK: vpgatherqq
|
||||
;CHECK: vpadd
|
||||
;CHECK: vpscatterqq
|
||||
;CHECK: ret
|
||||
define void @gather_mask_qq(<8 x i64> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf) {
|
||||
; CHECK-LABEL: gather_mask_qq:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %edi, %k1
|
||||
; CHECK-NEXT: kmovw %k1, %k2
|
||||
; CHECK-NEXT: vpgatherqq (%rsi,%zmm0,4), %zmm1 {%k2}
|
||||
; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
|
||||
; CHECK-NEXT: vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%x = call <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
|
||||
%ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
|
||||
call void @llvm.x86.avx512.scatter.qpq.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i64> %x, i32 4)
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: gather_mask_dq
|
||||
;CHECK: kmovw
|
||||
;CHECK: vpgatherdq
|
||||
;CHECK: vpadd
|
||||
;CHECK: vpscatterdq
|
||||
;CHECK: ret
|
||||
define void @gather_mask_dq(<8 x i32> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf) {
|
||||
; CHECK-LABEL: gather_mask_dq:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %edi, %k1
|
||||
; CHECK-NEXT: kmovw %k1, %k2
|
||||
; CHECK-NEXT: vpgatherdq (%rsi,%ymm0,4), %zmm1 {%k2}
|
||||
; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
|
||||
; CHECK-NEXT: vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%x = call <8 x i64> @llvm.x86.avx512.gather.dpq.512 (<8 x i64> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4)
|
||||
%ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
||||
call void @llvm.x86.avx512.scatter.dpq.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x i64> %x, i32 4)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;CHECK-LABEL: gather_mask_dpd_execdomain
|
||||
;CHECK: vgatherdpd
|
||||
;CHECK: vmovapd
|
||||
;CHECK: ret
|
||||
define void @gather_mask_dpd_execdomain(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf) {
|
||||
; CHECK-LABEL: gather_mask_dpd_execdomain:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %edi, %k1
|
||||
; CHECK-NEXT: vgatherdpd (%rsi,%ymm0,4), %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovapd %zmm1, (%rdx)
|
||||
; CHECK-NEXT: retq
|
||||
%x = call <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4)
|
||||
store <8 x double> %x, <8 x double>* %stbuf
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: gather_mask_qpd_execdomain
|
||||
;CHECK: vgatherqpd
|
||||
;CHECK: vmovapd
|
||||
;CHECK: ret
|
||||
define void @gather_mask_qpd_execdomain(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf) {
|
||||
; CHECK-LABEL: gather_mask_qpd_execdomain:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %edi, %k1
|
||||
; CHECK-NEXT: vgatherqpd (%rsi,%zmm0,4), %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovapd %zmm1, (%rdx)
|
||||
; CHECK-NEXT: retq
|
||||
%x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
|
||||
store <8 x double> %x, <8 x double>* %stbuf
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: gather_mask_dps_execdomain
|
||||
;CHECK: vgatherdps
|
||||
;CHECK: vmovaps
|
||||
;CHECK: ret
|
||||
define <16 x float> @gather_mask_dps_execdomain(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base) {
|
||||
; CHECK-LABEL: gather_mask_dps_execdomain:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vgatherdps (%rsi,%zmm0,4), %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4)
|
||||
ret <16 x float> %res;
|
||||
}
|
||||
|
||||
;CHECK-LABEL: gather_mask_qps_execdomain
|
||||
;CHECK: vgatherqps
|
||||
;CHECK: vmovaps
|
||||
;CHECK: ret
|
||||
define <8 x float> @gather_mask_qps_execdomain(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base) {
|
||||
; CHECK-LABEL: gather_mask_qps_execdomain:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %edi, %k1
|
||||
; CHECK-NEXT: vgatherqps (%rsi,%zmm0,4), %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
|
||||
ret <8 x float> %res;
|
||||
}
|
||||
|
||||
;CHECK-LABEL: scatter_mask_dpd_execdomain
|
||||
;CHECK: vmovapd
|
||||
;CHECK: vscatterdpd
|
||||
;CHECK: ret
|
||||
define void @scatter_mask_dpd_execdomain(<8 x i32> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf) {
|
||||
%x = load <8 x double>, <8 x double>* %src, align 64
|
||||
; CHECK-LABEL: scatter_mask_dpd_execdomain:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1
|
||||
; CHECK-NEXT: vmovapd (%rdi), %zmm1
|
||||
; CHECK-NEXT: vscatterdpd %zmm1, (%rcx,%ymm0,4) {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%x = load <8 x double>, <8 x double>* %src, align 64
|
||||
call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind, <8 x double> %x, i32 4)
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: scatter_mask_qpd_execdomain
|
||||
;CHECK: vmovapd
|
||||
;CHECK: vscatterqpd
|
||||
;CHECK: ret
|
||||
define void @scatter_mask_qpd_execdomain(<8 x i64> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf) {
|
||||
; CHECK-LABEL: scatter_mask_qpd_execdomain:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1
|
||||
; CHECK-NEXT: vmovapd (%rdi), %zmm1
|
||||
; CHECK-NEXT: vscatterqpd %zmm1, (%rcx,%zmm0,4) {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%x = load <8 x double>, <8 x double>* %src, align 64
|
||||
call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x double> %x, i32 4)
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: scatter_mask_dps_execdomain
|
||||
;CHECK: vmovaps
|
||||
;CHECK: vscatterdps
|
||||
;CHECK: ret
|
||||
define void @scatter_mask_dps_execdomain(<16 x i32> %ind, <16 x float>* %src, i16 %mask, i8* %base, i8* %stbuf) {
|
||||
; CHECK-LABEL: scatter_mask_dps_execdomain:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vmovaps (%rdi), %zmm1
|
||||
; CHECK-NEXT: vscatterdps %zmm1, (%rcx,%zmm0,4) {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%x = load <16 x float>, <16 x float>* %src, align 64
|
||||
call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind, <16 x float> %x, i32 4)
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: scatter_mask_qps_execdomain
|
||||
;CHECK: vmovaps
|
||||
;CHECK: vscatterqps
|
||||
;CHECK: ret
|
||||
define void @scatter_mask_qps_execdomain(<8 x i64> %ind, <8 x float>* %src, i8 %mask, i8* %base, i8* %stbuf) {
|
||||
%x = load <8 x float>, <8 x float>* %src, align 32
|
||||
; CHECK-LABEL: scatter_mask_qps_execdomain:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1
|
||||
; CHECK-NEXT: vmovaps (%rdi), %ymm1
|
||||
; CHECK-NEXT: vscatterqps %ymm1, (%rcx,%zmm0,4) {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%x = load <8 x float>, <8 x float>* %src, align 32
|
||||
call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x float> %x, i32 4)
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: gather_qps
|
||||
;CHECK: kxnorw
|
||||
;CHECK: vgatherqps
|
||||
;CHECK: vpadd
|
||||
;CHECK: vscatterqps
|
||||
;CHECK: ret
|
||||
define void @gather_qps(<8 x i64> %ind, <8 x float> %src, i8* %base, i8* %stbuf) {
|
||||
; CHECK-LABEL: gather_qps:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kxnorw %k1, %k1, %k1
|
||||
; CHECK-NEXT: kxnorw %k2, %k2, %k2
|
||||
; CHECK-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm1 {%k2}
|
||||
; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
|
||||
; CHECK-NEXT: vscatterqps %ymm1, (%rsi,%zmm0,4) {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 -1, i32 4)
|
||||
%ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
|
||||
call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 -1, <8 x i64>%ind2, <8 x float> %x, i32 4)
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: prefetch
|
||||
;CHECK: gatherpf0
|
||||
;CHECK: gatherpf1
|
||||
;CHECK: scatterpf0
|
||||
;CHECK: scatterpf1
|
||||
;CHECK: ret
|
||||
declare void @llvm.x86.avx512.gatherpf.qps.512(i8, <8 x i64>, i8* , i32, i32);
|
||||
declare void @llvm.x86.avx512.scatterpf.qps.512(i8, <8 x i64>, i8* , i32, i32);
|
||||
define void @prefetch(<8 x i64> %ind, i8* %base) {
|
||||
; CHECK-LABEL: prefetch:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kxnorw %k1, %k1, %k1
|
||||
; CHECK-NEXT: vgatherpf0qps (%rdi,%zmm0,4) {%k1}
|
||||
; CHECK-NEXT: vgatherpf1qps (%rdi,%zmm0,4) {%k1}
|
||||
; CHECK-NEXT: vscatterpf0qps (%rdi,%zmm0,2) {%k1}
|
||||
; CHECK-NEXT: vscatterpf1qps (%rdi,%zmm0,2) {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 0)
|
||||
call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 1)
|
||||
call void @llvm.x86.avx512.scatterpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 2, i32 0)
|
||||
call void @llvm.x86.avx512.scatterpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 2, i32 1)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
declare <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double>, i8*, <2 x i64>, i8, i32)
|
||||
|
||||
define <2 x double>@test_int_x86_avx512_gather3div2_df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_gather3div2_df:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1
|
||||
; CHECK-NEXT: vmovaps %zmm0, %zmm2
|
||||
; CHECK-NEXT: vgatherqpd (%rdi,%xmm1,4), %xmm2 {%k1}
|
||||
; CHECK-NEXT: kxnorw %k1, %k1, %k1
|
||||
; CHECK-NEXT: vgatherqpd (%rdi,%xmm1,0), %xmm0 {%k1}
|
||||
; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 4)
|
||||
%res1 = call <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 0)
|
||||
%res2 = fadd <2 x double> %res, %res1
|
||||
ret <2 x double> %res2
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512.gather3div2.di(<2 x i64>, i8*, <2 x i64>, i8, i32)
|
||||
|
||||
define <4 x i32>@test_int_x86_avx512_gather3div2_di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_gather3div2_di:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1
|
||||
; CHECK-NEXT: vpgatherqq (%rdi,%xmm1,8), %xmm0 {%k1}
|
||||
; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x i32> @llvm.x86.avx512.gather3div2.di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 8)
|
||||
%res1 = call <4 x i32> @llvm.x86.avx512.gather3div2.di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 8)
|
||||
%res2 = add <4 x i32> %res, %res1
|
||||
ret <4 x i32> %res2
|
||||
}
|
||||
|
||||
declare <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double>, i8*, <4 x i64>, i8, i32)
|
||||
|
||||
define <4 x double>@test_int_x86_avx512_gather3div4_df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_gather3div4_df:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1
|
||||
; CHECK-NEXT: vmovaps %zmm0, %zmm2
|
||||
; CHECK-NEXT: vgatherqpd (%rdi,%ymm1,4), %ymm2 {%k1}
|
||||
; CHECK-NEXT: kxnorw %k1, %k1, %k1
|
||||
; CHECK-NEXT: vgatherqpd (%rdi,%ymm1,0), %ymm0 {%k1}
|
||||
; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 4)
|
||||
%res1 = call <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 0)
|
||||
%res2 = fadd <4 x double> %res, %res1
|
||||
ret <4 x double> %res2
|
||||
}
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx512.gather3div4.di(<4 x i64>, i8*, <4 x i64>, i8, i32)
|
||||
|
||||
define <8 x i32>@test_int_x86_avx512_gather3div4_di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_gather3div4_di:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1
|
||||
; CHECK-NEXT: vmovaps %zmm0, %zmm2
|
||||
; CHECK-NEXT: vpgatherqq (%rdi,%ymm1,8), %ymm2 {%k1}
|
||||
; CHECK-NEXT: kxnorw %k1, %k1, %k1
|
||||
; CHECK-NEXT: vpgatherqq (%rdi,%ymm1,8), %ymm0 {%k1}
|
||||
; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i32> @llvm.x86.avx512.gather3div4.di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 8)
|
||||
%res1 = call <8 x i32> @llvm.x86.avx512.gather3div4.di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 8)
|
||||
%res2 = add <8 x i32> %res, %res1
|
||||
ret <8 x i32> %res2
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float>, i8*, <2 x i64>, i8, i32)
|
||||
|
||||
define <4 x float>@test_int_x86_avx512_gather3div4_sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_gather3div4_sf:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1
|
||||
; CHECK-NEXT: vmovaps %zmm0, %zmm2
|
||||
; CHECK-NEXT: vgatherqps (%rdi,%xmm1,4), %xmm2 {%k1}
|
||||
; CHECK-NEXT: kxnorw %k1, %k1, %k1
|
||||
; CHECK-NEXT: vgatherqps (%rdi,%xmm1,0), %xmm0 {%k1}
|
||||
; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 4)
|
||||
%res1 = call <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 0)
|
||||
%res2 = fadd <4 x float> %res, %res1
|
||||
ret <4 x float> %res2
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32>, i8*, <2 x i64>, i8, i32)
|
||||
|
||||
define <4 x i32>@test_int_x86_avx512_gather3div4_si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_gather3div4_si:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1
|
||||
; CHECK-NEXT: kxnorw %k2, %k2, %k2
|
||||
; CHECK-NEXT: vmovaps %zmm0, %zmm2
|
||||
; CHECK-NEXT: vpgatherqd (%rdi,%xmm1,4), %xmm2 {%k2}
|
||||
; CHECK-NEXT: vpgatherqd (%rdi,%xmm1,4), %xmm0 {%k1}
|
||||
; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 4)
|
||||
%res1 = call <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 4)
|
||||
%res2 = add <4 x i32> %res, %res1
|
||||
ret <4 x i32> %res2
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float>, i8*, <4 x i64>, i8, i32)
|
||||
|
||||
define <4 x float>@test_int_x86_avx512_gather3div8_sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_gather3div8_sf:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1
|
||||
; CHECK-NEXT: vmovaps %zmm0, %zmm2
|
||||
; CHECK-NEXT: vgatherqps (%rdi,%ymm1,4), %xmm2 {%k1}
|
||||
; CHECK-NEXT: kxnorw %k1, %k1, %k1
|
||||
; CHECK-NEXT: vgatherqps (%rdi,%ymm1,0), %xmm0 {%k1}
|
||||
; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 4)
|
||||
%res1 = call <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 0)
|
||||
%res2 = fadd <4 x float> %res, %res1
|
||||
ret <4 x float> %res2
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32>, i8*, <4 x i64>, i8, i32)
|
||||
|
||||
define <4 x i32>@test_int_x86_avx512_gather3div8_si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_gather3div8_si:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1
|
||||
; CHECK-NEXT: vmovaps %zmm0, %zmm2
|
||||
; CHECK-NEXT: kmovw %k1, %k2
|
||||
; CHECK-NEXT: vpgatherqd (%rdi,%ymm1,4), %xmm2 {%k2}
|
||||
; CHECK-NEXT: vpgatherqd (%rdi,%ymm1,2), %xmm0 {%k1}
|
||||
; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 4)
|
||||
%res1 = call <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 2)
|
||||
%res2 = add <4 x i32> %res, %res1
|
||||
ret <4 x i32> %res2
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double>, i8*, <4 x i32>, i8, i32)
|
||||
|
||||
define <2 x double>@test_int_x86_avx512_gather3siv2_df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_gather3siv2_df:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1
|
||||
; CHECK-NEXT: vmovaps %zmm0, %zmm2
|
||||
; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,4), %xmm2 {%k1}
|
||||
; CHECK-NEXT: kxnorw %k1, %k1, %k1
|
||||
; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,0), %xmm0 {%k1}
|
||||
; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4)
|
||||
%res1 = call <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 0)
|
||||
%res2 = fadd <2 x double> %res, %res1
|
||||
ret <2 x double> %res2
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512.gather3siv2.di(<2 x i64>, i8*, <4 x i32>, i8, i32)
|
||||
|
||||
define <4 x i32>@test_int_x86_avx512_gather3siv2_di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_gather3siv2_di:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1
|
||||
; CHECK-NEXT: vpgatherdq (%rdi,%xmm1,8), %xmm0 {%k1}
|
||||
; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x i32> @llvm.x86.avx512.gather3siv2.di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8)
|
||||
%res1 = call <4 x i32> @llvm.x86.avx512.gather3siv2.di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8)
|
||||
%res2 = add <4 x i32> %res, %res1
|
||||
ret <4 x i32> %res2
|
||||
}
|
||||
|
||||
declare <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double>, i8*, <4 x i32>, i8, i32)
|
||||
|
||||
define <4 x double>@test_int_x86_avx512_gather3siv4_df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_gather3siv4_df:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1
|
||||
; CHECK-NEXT: vmovaps %zmm0, %zmm2
|
||||
; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,4), %ymm2 {%k1}
|
||||
; CHECK-NEXT: kxnorw %k1, %k1, %k1
|
||||
; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,0), %ymm0 {%k1}
|
||||
; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4)
|
||||
%res1 = call <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 0)
|
||||
%res2 = fadd <4 x double> %res, %res1
|
||||
ret <4 x double> %res2
|
||||
}
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx512.gather3siv4.di(<4 x i64>, i8*, <4 x i32>, i8, i32)
|
||||
|
||||
define <8 x i32>@test_int_x86_avx512_gather3siv4_di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_gather3siv4_di:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1
|
||||
; CHECK-NEXT: vpgatherdq (%rdi,%xmm1,8), %ymm0 {%k1}
|
||||
; CHECK-NEXT: vpaddd %ymm0, %ymm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i32> @llvm.x86.avx512.gather3siv4.di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8)
|
||||
%res1 = call <8 x i32> @llvm.x86.avx512.gather3siv4.di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8)
|
||||
%res2 = add <8 x i32> %res, %res1
|
||||
ret <8 x i32> %res2
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float>, i8*, <4 x i32>, i8, i32)
|
||||
|
||||
define <4 x float>@test_int_x86_avx512_gather3siv4_sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_gather3siv4_sf:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1
|
||||
; CHECK-NEXT: vmovaps %zmm0, %zmm2
|
||||
; CHECK-NEXT: vgatherdps (%rdi,%xmm1,4), %xmm2 {%k1}
|
||||
; CHECK-NEXT: kxnorw %k1, %k1, %k1
|
||||
; CHECK-NEXT: vgatherdps (%rdi,%xmm1,0), %xmm0 {%k1}
|
||||
; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4)
|
||||
%res1 = call <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 0)
|
||||
%res2 = fadd <4 x float> %res, %res1
|
||||
ret <4 x float> %res2
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32>, i8*, <4 x i32>, i8, i32)
|
||||
|
||||
define <4 x i32>@test_int_x86_avx512_gather3siv4_si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_gather3siv4_si:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1
|
||||
; CHECK-NEXT: kxnorw %k2, %k2, %k2
|
||||
; CHECK-NEXT: vmovaps %zmm0, %zmm2
|
||||
; CHECK-NEXT: vpgatherdd (%rdi,%xmm1,4), %xmm2 {%k2}
|
||||
; CHECK-NEXT: vpgatherdd (%rdi,%xmm1,0), %xmm0 {%k1}
|
||||
; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 4)
|
||||
%res1 = call <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 0)
|
||||
%res2 = add <4 x i32> %res, %res1
|
||||
ret <4 x i32> %res2
|
||||
}
|
||||
|
||||
declare <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float>, i8*, <8 x i32>, i8, i32)
|
||||
|
||||
define <8 x float>@test_int_x86_avx512_gather3siv8_sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_gather3siv8_sf:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1
|
||||
; CHECK-NEXT: vmovaps %zmm0, %zmm2
|
||||
; CHECK-NEXT: vgatherdps (%rdi,%ymm1,4), %ymm2 {%k1}
|
||||
; CHECK-NEXT: kxnorw %k1, %k1, %k1
|
||||
; CHECK-NEXT: vgatherdps (%rdi,%ymm1,0), %ymm0 {%k1}
|
||||
; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 4)
|
||||
%res1 = call <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 -1, i32 0)
|
||||
%res2 = fadd <8 x float> %res, %res1
|
||||
ret <8 x float> %res2
|
||||
}
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32>, i8*, <8 x i32>, i8, i32)
|
||||
|
||||
define <8 x i32>@test_int_x86_avx512_gather3siv8_si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_gather3siv8_si:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovb %esi, %k1
|
||||
; CHECK-NEXT: vmovaps %zmm0, %zmm2
|
||||
; CHECK-NEXT: kmovw %k1, %k2
|
||||
; CHECK-NEXT: vpgatherdd (%rdi,%ymm1,4), %ymm2 {%k2}
|
||||
; CHECK-NEXT: vpgatherdd (%rdi,%ymm1,0), %ymm0 {%k1}
|
||||
; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 4)
|
||||
%res1 = call <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 0)
|
||||
%res2 = add <8 x i32> %res, %res1
|
||||
ret <8 x i32> %res2
|
||||
}
|
||||
|
@ -9793,3 +9793,115 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
|
||||
// CHECK: vpabsq -1032(%rdx){1to8}, %zmm5
|
||||
// CHECK: encoding: [0x62,0xf2,0xfd,0x58,0x1f,0xaa,0xf8,0xfb,0xff,0xff]
|
||||
vpabsq -1032(%rdx){1to8}, %zmm5
|
||||
|
||||
// CHECK: vpgatherdd 123(%r14,%zmm11,8), %zmm17 {%k1}
|
||||
// CHECK: encoding: [0x62,0x82,0x7d,0x49,0x90,0x8c,0xde,0x7b,0x00,0x00,0x00]
|
||||
vpgatherdd 123(%r14, %zmm11,8), %zmm17 {%k1}
|
||||
|
||||
// CHECK: vpgatherdd 256(%r9,%zmm11), %zmm17 {%k1}
|
||||
// CHECK: encoding: [0x62,0x82,0x7d,0x49,0x90,0x4c,0x19,0x40]
|
||||
vpgatherdd 256(%r9,%zmm11), %zmm17 {%k1}
|
||||
|
||||
// CHECK: vpgatherdd 1024(%rcx,%zmm11,4), %zmm17 {%k1}
|
||||
// CHECK: encoding: [0x62,0xa2,0x7d,0x49,0x90,0x8c,0x99,0x00,0x04,0x00,0x00]
|
||||
vpgatherdd 1024(%rcx, %zmm11,4), %zmm17 {%k1}
|
||||
|
||||
// CHECK: vpgatherdq 123(%r14,%ymm14,8), %zmm8 {%k1}
|
||||
// CHECK: encoding: [0x62,0x12,0xfd,0x49,0x90,0x84,0xf6,0x7b,0x00,0x00,0x00]
|
||||
vpgatherdq 123(%r14, %ymm14,8), %zmm8 {%k1}
|
||||
|
||||
// CHECK: vpgatherdq 256(%r9,%ymm14), %zmm8 {%k1}
|
||||
// CHECK: encoding: [0x62,0x12,0xfd,0x49,0x90,0x44,0x31,0x20]
|
||||
vpgatherdq 256(%r9, %ymm14), %zmm8 {%k1}
|
||||
|
||||
// CHECK: vpgatherdq 1024(%rcx,%ymm14,4), %zmm8 {%k1}
|
||||
// CHECK: encoding: [0x62,0x32,0xfd,0x49,0x90,0x84,0xb1,0x00,0x04,0x00,0x00]
|
||||
vpgatherdq 1024(%rcx, %ymm14,4), %zmm8 {%k1}
|
||||
|
||||
// CHECK: vpgatherqd 123(%r14,%zmm17,8), %ymm3 {%k1}
|
||||
// CHECK: encoding: [0x62,0xd2,0x7d,0x41,0x91,0x9c,0xce,0x7b,0x00,0x00,0x00]
|
||||
vpgatherqd 123(%r14, %zmm17,8), %ymm3 {%k1}
|
||||
|
||||
// CHECK: vpgatherqd 256(%r9,%zmm17), %ymm3 {%k1}
|
||||
// CHECK: encoding: [0x62,0xd2,0x7d,0x41,0x91,0x5c,0x09,0x40]
|
||||
vpgatherqd 256(%r9,%zmm17), %ymm3 {%k1}
|
||||
|
||||
// CHECK: vpgatherqd 1024(%rcx,%zmm17,4), %ymm3 {%k1}
|
||||
// CHECK: encoding: [0x62,0xf2,0x7d,0x41,0x91,0x9c,0x89,0x00,0x04,0x00,0x00]
|
||||
vpgatherqd 1024(%rcx, %zmm17,4), %ymm3 {%k1}
|
||||
|
||||
// CHECK: vpgatherqq 123(%r14,%zmm21,8), %zmm17 {%k1}
|
||||
// CHECK: encoding: [0x62,0xc2,0xfd,0x41,0x91,0x8c,0xee,0x7b,0x00,0x00,0x00]
|
||||
vpgatherqq 123(%r14, %zmm21,8), %zmm17 {%k1}
|
||||
|
||||
// CHECK: vpgatherqq 256(%r9,%zmm21), %zmm17 {%k1}
|
||||
// CHECK: encoding: [0x62,0xc2,0xfd,0x41,0x91,0x4c,0x29,0x20]
|
||||
vpgatherqq 256(%r9,%zmm21), %zmm17 {%k1}
|
||||
|
||||
// CHECK: vpgatherqq 1024(%rcx,%zmm21,4), %zmm17 {%k1}
|
||||
// CHECK: encoding: [0x62,0xe2,0xfd,0x41,0x91,0x8c,0xa9,0x00,0x04,0x00,0x00]
|
||||
vpgatherqq 1024(%rcx, %zmm21,4), %zmm17 {%k1}
|
||||
|
||||
// CHECK: vpscatterdd %zmm19, 123(%r14,%zmm16,8) {%k1}
|
||||
// CHECK: encoding: [0x62,0xc2,0x7d,0x41,0xa0,0x9c,0xc6,0x7b,0x00,0x00,0x00]
|
||||
vpscatterdd %zmm19, 123(%r14,%zmm16,8) {%k1}
|
||||
|
||||
// CHECK: vpscatterdd %zmm19, 123(%r14,%zmm16,8) {%k1}
|
||||
// CHECK: encoding: [0x62,0xc2,0x7d,0x41,0xa0,0x9c,0xc6,0x7b,0x00,0x00,0x00]
|
||||
vpscatterdd %zmm19, 123(%r14,%zmm16,8) {%k1}
|
||||
|
||||
// CHECK: vpscatterdd %zmm19, 256(%r9,%zmm16) {%k1}
|
||||
// CHECK: encoding: [0x62,0xc2,0x7d,0x41,0xa0,0x5c,0x01,0x40]
|
||||
vpscatterdd %zmm19, 256(%r9,%zmm16) {%k1}
|
||||
|
||||
// CHECK: vpscatterdd %zmm19, 1024(%rcx,%zmm16,4) {%k1}
|
||||
// CHECK: encoding: [0x62,0xe2,0x7d,0x41,0xa0,0x9c,0x81,0x00,0x04,0x00,0x00]
|
||||
vpscatterdd %zmm19, 1024(%rcx,%zmm16,4) {%k1}
|
||||
|
||||
// CHECK: vpscatterdq %zmm5, 123(%r14,%ymm6,8) {%k1}
|
||||
// CHECK: encoding: [0x62,0xd2,0xfd,0x49,0xa0,0xac,0xf6,0x7b,0x00,0x00,0x00]
|
||||
vpscatterdq %zmm5, 123(%r14,%ymm6,8) {%k1}
|
||||
|
||||
// CHECK: vpscatterdq %zmm5, 123(%r14,%ymm6,8) {%k1}
|
||||
// CHECK: encoding: [0x62,0xd2,0xfd,0x49,0xa0,0xac,0xf6,0x7b,0x00,0x00,0x00]
|
||||
vpscatterdq %zmm5, 123(%r14,%ymm6,8) {%k1}
|
||||
|
||||
// CHECK: vpscatterdq %zmm5, 256(%r9,%ymm6) {%k1}
|
||||
// CHECK: encoding: [0x62,0xd2,0xfd,0x49,0xa0,0x6c,0x31,0x20]
|
||||
vpscatterdq %zmm5, 256(%r9,%ymm6) {%k1}
|
||||
|
||||
// CHECK: vpscatterdq %zmm5, 1024(%rcx,%ymm6,4) {%k1}
|
||||
// CHECK: encoding: [0x62,0xf2,0xfd,0x49,0xa0,0xac,0xb1,0x00,0x04,0x00,0x00]
|
||||
vpscatterdq %zmm5, 1024(%rcx,%ymm6,4) {%k1}
|
||||
|
||||
// CHECK: vpscatterqd %ymm20, 123(%r14,%zmm2,8) {%k1}
|
||||
// CHECK: encoding: [0x62,0xc2,0x7d,0x49,0xa1,0xa4,0xd6,0x7b,0x00,0x00,0x00]
|
||||
vpscatterqd %ymm20, 123(%r14,%zmm2,8) {%k1}
|
||||
|
||||
// CHECK: vpscatterqd %ymm20, 123(%r14,%zmm2,8) {%k1}
|
||||
// CHECK: encoding: [0x62,0xc2,0x7d,0x49,0xa1,0xa4,0xd6,0x7b,0x00,0x00,0x00]
|
||||
vpscatterqd %ymm20, 123(%r14,%zmm2,8) {%k1}
|
||||
|
||||
// CHECK: vpscatterqd %ymm20, 256(%r9,%zmm2) {%k1}
|
||||
// CHECK: encoding: [0x62,0xc2,0x7d,0x49,0xa1,0x64,0x11,0x40]
|
||||
vpscatterqd %ymm20, 256(%r9,%zmm2) {%k1}
|
||||
|
||||
// CHECK: vpscatterqd %ymm20, 1024(%rcx,%zmm2,4) {%k1}
|
||||
// CHECK: encoding: [0x62,0xe2,0x7d,0x49,0xa1,0xa4,0x91,0x00,0x04,0x00,0x00]
|
||||
vpscatterqd %ymm20, 1024(%rcx,%zmm2,4) {%k1}
|
||||
|
||||
// CHECK: vpscatterqq %zmm14, 123(%r14,%zmm20,8) {%k1}
|
||||
// CHECK: encoding: [0x62,0x52,0xfd,0x41,0xa1,0xb4,0xe6,0x7b,0x00,0x00,0x00]
|
||||
vpscatterqq %zmm14, 123(%r14,%zmm20,8) {%k1}
|
||||
|
||||
// CHECK: vpscatterqq %zmm14, 123(%r14,%zmm20,8) {%k1}
|
||||
// CHECK: encoding: [0x62,0x52,0xfd,0x41,0xa1,0xb4,0xe6,0x7b,0x00,0x00,0x00]
|
||||
vpscatterqq %zmm14, 123(%r14,%zmm20,8) {%k1}
|
||||
|
||||
// CHECK: vpscatterqq %zmm14, 256(%r9,%zmm20) {%k1}
|
||||
// CHECK: encoding: [0x62,0x52,0xfd,0x41,0xa1,0x74,0x21,0x20]
|
||||
vpscatterqq %zmm14, 256(%r9,%zmm20) {%k1}
|
||||
|
||||
// CHECK: vpscatterqq %zmm14, 1024(%rcx,%zmm20,4) {%k1}
|
||||
// CHECK: encoding: [0x62,0x72,0xfd,0x41,0xa1,0xb4,0xa1,0x00,0x04,0x00,0x00]
|
||||
vpscatterqq %zmm14, 1024(%rcx,%zmm20,4) {%k1}
|
||||
|
@ -1452,3 +1452,194 @@
|
||||
// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x1f,0xb2,0xf8,0xfb,0xff,0xff]
|
||||
vpabsq -1032(%rdx){1to4}, %ymm22
|
||||
|
||||
// CHECK: vpgatherdd 123(%r14,%xmm31,8), %xmm17 {%k1}
|
||||
// CHECK: encoding: [0x62,0x82,0x7d,0x01,0x90,0x8c,0xfe,0x7b,0x00,0x00,0x00]
|
||||
vpgatherdd 123(%r14,%xmm31,8), %xmm17 {%k1}
|
||||
|
||||
// CHECK: vpgatherdd 256(%r9,%xmm31), %xmm17 {%k1}
|
||||
// CHECK: encoding: [0x62,0x82,0x7d,0x01,0x90,0x4c,0x39,0x40]
|
||||
vpgatherdd 256(%r9,%xmm31), %xmm17 {%k1}
|
||||
|
||||
// CHECK: vpgatherdd 1024(%rcx,%xmm31,4), %xmm17 {%k1}
|
||||
// CHECK: encoding: [0x62,0xa2,0x7d,0x01,0x90,0x8c,0xb9,0x00,0x04,0x00,0x00]
|
||||
vpgatherdd 1024(%rcx,%xmm31,4), %xmm17 {%k1}
|
||||
|
||||
// CHECK: vpgatherdd 123(%r14,%ymm31,8), %ymm19 {%k1}
|
||||
// CHECK: encoding: [0x62,0x82,0x7d,0x21,0x90,0x9c,0xfe,0x7b,0x00,0x00,0x00]
|
||||
vpgatherdd 123(%r14,%ymm31,8), %ymm19 {%k1}
|
||||
|
||||
// CHECK: vpgatherdd 256(%r9,%ymm31), %ymm19 {%k1}
|
||||
// CHECK: encoding: [0x62,0x82,0x7d,0x21,0x90,0x5c,0x39,0x40]
|
||||
vpgatherdd 256(%r9,%ymm31), %ymm19 {%k1}
|
||||
|
||||
// CHECK: vpgatherdd 1024(%rcx,%ymm31,4), %ymm19 {%k1}
|
||||
// CHECK: encoding: [0x62,0xa2,0x7d,0x21,0x90,0x9c,0xb9,0x00,0x04,0x00,0x00]
|
||||
vpgatherdd 1024(%rcx,%ymm31,4), %ymm19 {%k1}
|
||||
|
||||
// CHECK: vpgatherdq 123(%r14,%xmm31,8), %xmm17 {%k1}
|
||||
// CHECK: encoding: [0x62,0x82,0xfd,0x01,0x90,0x8c,0xfe,0x7b,0x00,0x00,0x00]
|
||||
vpgatherdq 123(%r14,%xmm31,8), %xmm17 {%k1}
|
||||
|
||||
// CHECK: vpgatherdq 256(%r9,%xmm31), %xmm17 {%k1}
|
||||
// CHECK: encoding: [0x62,0x82,0xfd,0x01,0x90,0x4c,0x39,0x20]
|
||||
vpgatherdq 256(%r9,%xmm31), %xmm17 {%k1}
|
||||
|
||||
// CHECK: vpgatherdq 1024(%rcx,%xmm31,4), %xmm17 {%k1}
|
||||
// CHECK: encoding: [0x62,0xa2,0xfd,0x01,0x90,0x8c,0xb9,0x00,0x04,0x00,0x00]
|
||||
vpgatherdq 1024(%rcx,%xmm31,4), %xmm17 {%k1}
|
||||
|
||||
// CHECK: vpgatherdq 123(%r14,%xmm31,8), %ymm26 {%k1}
|
||||
// CHECK: encoding: [0x62,0x02,0xfd,0x21,0x90,0x94,0xfe,0x7b,0x00,0x00,0x00]
|
||||
vpgatherdq 123(%r14,%xmm31,8), %ymm26 {%k1}
|
||||
|
||||
// CHECK: vpgatherdq 256(%r9,%xmm31), %ymm26 {%k1}
|
||||
// CHECK: encoding: [0x62,0x02,0xfd,0x21,0x90,0x54,0x39,0x20]
|
||||
vpgatherdq 256(%r9,%xmm31), %ymm26 {%k1}
|
||||
|
||||
// CHECK: vpgatherdq 1024(%rcx,%xmm31,4), %ymm26 {%k1}
|
||||
// CHECK: encoding: [0x62,0x22,0xfd,0x21,0x90,0x94,0xb9,0x00,0x04,0x00,0x00]
|
||||
vpgatherdq 1024(%rcx,%xmm31,4), %ymm26 {%k1}
|
||||
|
||||
// CHECK: vpgatherqd 123(%r14,%xmm31,8), %xmm21 {%k1}
|
||||
// CHECK: encoding: [0x62,0x82,0x7d,0x01,0x91,0xac,0xfe,0x7b,0x00,0x00,0x00]
|
||||
vpgatherqd 123(%r14,%xmm31,8), %xmm21 {%k1}
|
||||
|
||||
// CHECK: vpgatherqd 256(%r9,%xmm31), %xmm21 {%k1}
|
||||
// CHECK: encoding: [0x62,0x82,0x7d,0x01,0x91,0x6c,0x39,0x40]
|
||||
vpgatherqd 256(%r9,%xmm31), %xmm21 {%k1}
|
||||
|
||||
// CHECK: vpgatherqd 1024(%rcx,%xmm31,4), %xmm21 {%k1}
|
||||
// CHECK: encoding: [0x62,0xa2,0x7d,0x01,0x91,0xac,0xb9,0x00,0x04,0x00,0x00]
|
||||
vpgatherqd 1024(%rcx,%xmm31,4), %xmm21 {%k1}
|
||||
|
||||
// CHECK: vpgatherqd 123(%r14,%ymm31,8), %xmm25 {%k1}
|
||||
// CHECK: encoding: [0x62,0x02,0x7d,0x21,0x91,0x8c,0xfe,0x7b,0x00,0x00,0x00]
|
||||
vpgatherqd 123(%r14,%ymm31,8), %xmm25 {%k1}
|
||||
|
||||
// CHECK: vpgatherqd 256(%r9,%ymm31), %xmm25 {%k1}
|
||||
// CHECK: encoding: [0x62,0x02,0x7d,0x21,0x91,0x4c,0x39,0x40]
|
||||
vpgatherqd 256(%r9,%ymm31), %xmm25 {%k1}
|
||||
|
||||
// CHECK: vpgatherqd 1024(%rcx,%ymm31,4), %xmm25 {%k1}
|
||||
// CHECK: encoding: [0x62,0x22,0x7d,0x21,0x91,0x8c,0xb9,0x00,0x04,0x00,0x00]
|
||||
vpgatherqd 1024(%rcx,%ymm31,4), %xmm25 {%k1}
|
||||
|
||||
// CHECK: vpgatherqq 123(%r14,%xmm31,8), %xmm18 {%k1}
|
||||
// CHECK: encoding: [0x62,0x82,0xfd,0x01,0x91,0x94,0xfe,0x7b,0x00,0x00,0x00]
|
||||
vpgatherqq 123(%r14,%xmm31,8), %xmm18 {%k1}
|
||||
|
||||
// CHECK: vpgatherqq 256(%r9,%xmm31), %xmm18 {%k1}
|
||||
// CHECK: encoding: [0x62,0x82,0xfd,0x01,0x91,0x54,0x39,0x20]
|
||||
vpgatherqq 256(%r9,%xmm31), %xmm18 {%k1}
|
||||
|
||||
// CHECK: vpgatherqq 1024(%rcx,%xmm31,4), %xmm18 {%k1}
|
||||
// CHECK: encoding: [0x62,0xa2,0xfd,0x01,0x91,0x94,0xb9,0x00,0x04,0x00,0x00]
|
||||
vpgatherqq 1024(%rcx,%xmm31,4), %xmm18 {%k1}
|
||||
|
||||
// CHECK: vpgatherqq 123(%r14,%ymm31,8), %ymm19 {%k1}
|
||||
// CHECK: encoding: [0x62,0x82,0xfd,0x21,0x91,0x9c,0xfe,0x7b,0x00,0x00,0x00]
|
||||
vpgatherqq 123(%r14,%ymm31,8), %ymm19 {%k1}
|
||||
|
||||
// CHECK: vpgatherqq 256(%r9,%ymm31), %ymm19 {%k1}
|
||||
// CHECK: encoding: [0x62,0x82,0xfd,0x21,0x91,0x5c,0x39,0x20]
|
||||
vpgatherqq 256(%r9,%ymm31), %ymm19 {%k1}
|
||||
|
||||
// CHECK: vpgatherqq 1024(%rcx,%ymm31,4), %ymm19 {%k1}
|
||||
// CHECK: encoding: [0x62,0xa2,0xfd,0x21,0x91,0x9c,0xb9,0x00,0x04,0x00,0x00]
|
||||
vpgatherqq 1024(%rcx,%ymm31,4), %ymm19 {%k1}
|
||||
|
||||
// CHECK: vgatherdpd 123(%r14,%xmm31,8), %xmm17 {%k1}
|
||||
// CHECK: encoding: [0x62,0x82,0xfd,0x01,0x92,0x8c,0xfe,0x7b,0x00,0x00,0x00]
|
||||
vgatherdpd 123(%r14,%xmm31,8), %xmm17 {%k1}
|
||||
|
||||
// CHECK: vgatherdpd 256(%r9,%xmm31), %xmm17 {%k1}
|
||||
// CHECK: encoding: [0x62,0x82,0xfd,0x01,0x92,0x4c,0x39,0x20]
|
||||
vgatherdpd 256(%r9,%xmm31), %xmm17 {%k1}
|
||||
|
||||
// CHECK: vgatherdpd 1024(%rcx,%xmm31,4), %xmm17 {%k1}
|
||||
// CHECK: encoding: [0x62,0xa2,0xfd,0x01,0x92,0x8c,0xb9,0x00,0x04,0x00,0x00]
|
||||
vgatherdpd 1024(%rcx,%xmm31,4), %xmm17 {%k1}
|
||||
|
||||
// CHECK: vgatherdpd 123(%r14,%xmm31,8), %ymm23 {%k1}
|
||||
// CHECK: encoding: [0x62,0x82,0xfd,0x21,0x92,0xbc,0xfe,0x7b,0x00,0x00,0x00]
|
||||
vgatherdpd 123(%r14,%xmm31,8), %ymm23 {%k1}
|
||||
|
||||
// CHECK: vgatherdpd 256(%r9,%xmm31), %ymm23 {%k1}
|
||||
// CHECK: encoding: [0x62,0x82,0xfd,0x21,0x92,0x7c,0x39,0x20]
|
||||
vgatherdpd 256(%r9,%xmm31), %ymm23 {%k1}
|
||||
|
||||
// CHECK: vgatherdpd 1024(%rcx,%xmm31,4), %ymm23 {%k1}
|
||||
// CHECK: encoding: [0x62,0xa2,0xfd,0x21,0x92,0xbc,0xb9,0x00,0x04,0x00,0x00]
|
||||
vgatherdpd 1024(%rcx,%xmm31,4), %ymm23 {%k1}
|
||||
|
||||
// CHECK: vgatherdps 123(%r14,%xmm31,8), %xmm18 {%k1}
|
||||
// CHECK: encoding: [0x62,0x82,0x7d,0x01,0x92,0x94,0xfe,0x7b,0x00,0x00,0x00]
|
||||
vgatherdps 123(%r14,%xmm31,8), %xmm18 {%k1}
|
||||
|
||||
// CHECK: vgatherdps 256(%r9,%xmm31), %xmm18 {%k1}
|
||||
// CHECK: encoding: [0x62,0x82,0x7d,0x01,0x92,0x54,0x39,0x40]
|
||||
vgatherdps 256(%r9,%xmm31), %xmm18 {%k1}
|
||||
|
||||
// CHECK: vgatherdps 1024(%rcx,%xmm31,4), %xmm18 {%k1}
|
||||
// CHECK: encoding: [0x62,0xa2,0x7d,0x01,0x92,0x94,0xb9,0x00,0x04,0x00,0x00]
|
||||
vgatherdps 1024(%rcx,%xmm31,4), %xmm18 {%k1}
|
||||
|
||||
// CHECK: vgatherdps 123(%r14,%ymm31,8), %ymm27 {%k1}
|
||||
// CHECK: encoding: [0x62,0x02,0x7d,0x21,0x92,0x9c,0xfe,0x7b,0x00,0x00,0x00]
|
||||
vgatherdps 123(%r14,%ymm31,8), %ymm27 {%k1}
|
||||
|
||||
// CHECK: vgatherdps 256(%r9,%ymm31), %ymm27 {%k1}
|
||||
// CHECK: encoding: [0x62,0x02,0x7d,0x21,0x92,0x5c,0x39,0x40]
|
||||
vgatherdps 256(%r9,%ymm31), %ymm27 {%k1}
|
||||
|
||||
// CHECK: vgatherdps 1024(%rcx,%ymm31,4), %ymm27 {%k1}
|
||||
// CHECK: encoding: [0x62,0x22,0x7d,0x21,0x92,0x9c,0xb9,0x00,0x04,0x00,0x00]
|
||||
vgatherdps 1024(%rcx,%ymm31,4), %ymm27 {%k1}
|
||||
|
||||
// CHECK: vgatherqpd 123(%r14,%xmm31,8), %xmm17 {%k1}
|
||||
// CHECK: encoding: [0x62,0x82,0xfd,0x01,0x93,0x8c,0xfe,0x7b,0x00,0x00,0x00]
|
||||
vgatherqpd 123(%r14,%xmm31,8), %xmm17 {%k1}
|
||||
|
||||
// CHECK: vgatherqpd 256(%r9,%xmm31), %xmm17 {%k1}
|
||||
// CHECK: encoding: [0x62,0x82,0xfd,0x01,0x93,0x4c,0x39,0x20]
|
||||
vgatherqpd 256(%r9,%xmm31), %xmm17 {%k1}
|
||||
|
||||
// CHECK: vgatherqpd 1024(%rcx,%xmm31,4), %xmm17 {%k1}
|
||||
// CHECK: encoding: [0x62,0xa2,0xfd,0x01,0x93,0x8c,0xb9,0x00,0x04,0x00,0x00]
|
||||
vgatherqpd 1024(%rcx,%xmm31,4), %xmm17 {%k1}
|
||||
|
||||
// CHECK: vgatherqpd 123(%r14,%ymm31,8), %ymm29 {%k1}
|
||||
// CHECK: encoding: [0x62,0x02,0xfd,0x21,0x93,0xac,0xfe,0x7b,0x00,0x00,0x00]
|
||||
vgatherqpd 123(%r14,%ymm31,8), %ymm29 {%k1}
|
||||
|
||||
// CHECK: vgatherqpd 256(%r9,%ymm31), %ymm29 {%k1}
|
||||
// CHECK: encoding: [0x62,0x02,0xfd,0x21,0x93,0x6c,0x39,0x20]
|
||||
vgatherqpd 256(%r9,%ymm31), %ymm29 {%k1}
|
||||
|
||||
// CHECK: vgatherqpd 1024(%rcx,%ymm31,4), %ymm29 {%k1}
|
||||
// CHECK: encoding: [0x62,0x22,0xfd,0x21,0x93,0xac,0xb9,0x00,0x04,0x00,0x00]
|
||||
vgatherqpd 1024(%rcx,%ymm31,4), %ymm29 {%k1}
|
||||
|
||||
// CHECK: vgatherqps 123(%r14,%xmm31,8), %xmm21 {%k1}
|
||||
// CHECK: encoding: [0x62,0x82,0x7d,0x01,0x93,0xac,0xfe,0x7b,0x00,0x00,0x00]
|
||||
vgatherqps 123(%r14,%xmm31,8), %xmm21 {%k1}
|
||||
|
||||
// CHECK: vgatherqps 256(%r9,%xmm31), %xmm21 {%k1}
|
||||
// CHECK: encoding: [0x62,0x82,0x7d,0x01,0x93,0x6c,0x39,0x40]
|
||||
vgatherqps 256(%r9,%xmm31), %xmm21 {%k1}
|
||||
|
||||
// CHECK: vgatherqps 1024(%rcx,%xmm31,4), %xmm21 {%k1}
|
||||
// CHECK: encoding: [0x62,0xa2,0x7d,0x01,0x93,0xac,0xb9,0x00,0x04,0x00,0x00]
|
||||
vgatherqps 1024(%rcx,%xmm31,4), %xmm21 {%k1}
|
||||
|
||||
// CHECK: vgatherqps 123(%r14,%ymm31,8), %xmm19 {%k1}
|
||||
// CHECK: encoding: [0x62,0x82,0x7d,0x21,0x93,0x9c,0xfe,0x7b,0x00,0x00,0x00]
|
||||
vgatherqps 123(%r14,%ymm31,8), %xmm19 {%k1}
|
||||
|
||||
// CHECK: vgatherqps 256(%r9,%ymm31), %xmm19 {%k1}
|
||||
// CHECK: encoding: [0x62,0x82,0x7d,0x21,0x93,0x5c,0x39,0x40]
|
||||
vgatherqps 256(%r9,%ymm31), %xmm19 {%k1}
|
||||
|
||||
// CHECK: vgatherqps 1024(%rcx,%ymm31,4), %xmm19 {%k1}
|
||||
// CHECK: encoding: [0x62,0xa2,0x7d,0x21,0x93,0x9c,0xb9,0x00,0x04,0x00,0x00]
|
||||
vgatherqps 1024(%rcx,%ymm31,4), %xmm19 {%k1}
|
||||
|
@ -1027,9 +1027,12 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
|
||||
TYPE("GR32_NOAX", TYPE_Rv)
|
||||
TYPE("GR64_NOAX", TYPE_R64)
|
||||
TYPE("vx32mem", TYPE_M32)
|
||||
TYPE("vx32xmem", TYPE_M32)
|
||||
TYPE("vy32mem", TYPE_M32)
|
||||
TYPE("vy32xmem", TYPE_M32)
|
||||
TYPE("vz32mem", TYPE_M32)
|
||||
TYPE("vx64mem", TYPE_M64)
|
||||
TYPE("vx64xmem", TYPE_M64)
|
||||
TYPE("vy64mem", TYPE_M64)
|
||||
TYPE("vy64xmem", TYPE_M64)
|
||||
TYPE("vz64mem", TYPE_M64)
|
||||
@ -1213,9 +1216,12 @@ RecognizableInstr::memoryEncodingFromString(const std::string &s,
|
||||
ENCODING("opaque80mem", ENCODING_RM)
|
||||
ENCODING("opaque512mem", ENCODING_RM)
|
||||
ENCODING("vx32mem", ENCODING_RM)
|
||||
ENCODING("vx32xmem", ENCODING_RM)
|
||||
ENCODING("vy32mem", ENCODING_RM)
|
||||
ENCODING("vy32xmem", ENCODING_RM)
|
||||
ENCODING("vz32mem", ENCODING_RM)
|
||||
ENCODING("vx64mem", ENCODING_RM)
|
||||
ENCODING("vx64xmem", ENCODING_RM)
|
||||
ENCODING("vy64mem", ENCODING_RM)
|
||||
ENCODING("vy64xmem", ENCODING_RM)
|
||||
ENCODING("vz64mem", ENCODING_RM)
|
||||
|
Loading…
Reference in New Issue
Block a user