mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-09 22:04:10 +00:00
[X86] Lower avx2/avx512f gather intrinsics to X86MaskedGatherSDNode instead of going directly to MachineSDNode.:
This sends these intrinsics through isel in a much more normal way. This should allow addressing mode matching in isel to make better use of the displacement field. Differential Revision: https://reviews.llvm.org/D56827 llvm-svn: 351570
This commit is contained in:
parent
5f3b78720d
commit
5c2f5a0877
@ -4810,6 +4810,18 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
|
||||
Info.flags |= MachineMemOperand::MOStore;
|
||||
break;
|
||||
}
|
||||
case GATHER:
|
||||
case GATHER_AVX2: {
|
||||
Info.ptrVal = nullptr;
|
||||
MVT DataVT = MVT::getVT(I.getType());
|
||||
MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
|
||||
unsigned NumElts = std::min(DataVT.getVectorNumElements(),
|
||||
IndexVT.getVectorNumElements());
|
||||
Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
|
||||
Info.align = 1;
|
||||
Info.flags |= MachineMemOperand::MOLoad;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
@ -22376,25 +22388,26 @@ static SDValue getAVX2GatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
|
||||
if (!C)
|
||||
return SDValue();
|
||||
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
|
||||
EVT MaskVT = Mask.getValueType();
|
||||
EVT MaskVT = Mask.getValueType().changeVectorElementTypeToInteger();
|
||||
SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
|
||||
SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
|
||||
SDValue Segment = DAG.getRegister(0, MVT::i32);
|
||||
// If source is undef or we know it won't be used, use a zero vector
|
||||
// to break register dependency.
|
||||
// TODO: use undef instead and let BreakFalseDeps deal with it?
|
||||
if (Src.isUndef() || ISD::isBuildVectorAllOnes(Mask.getNode()))
|
||||
Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl);
|
||||
SDValue Ops[] = {Src, Base, Scale, Index, Disp, Segment, Mask, Chain};
|
||||
SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
|
||||
SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) };
|
||||
return DAG.getMergeValues(RetOps, dl);
|
||||
|
||||
MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op);
|
||||
|
||||
SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale };
|
||||
SDValue Res = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
|
||||
VTs, Ops, dl, MemIntr->getMemoryVT(), MemIntr->getMemOperand());
|
||||
return DAG.getMergeValues({ Res, Res.getValue(2) }, dl);
|
||||
}
|
||||
|
||||
static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
|
||||
SDValue Src, SDValue Mask, SDValue Base,
|
||||
SDValue Index, SDValue ScaleOp, SDValue Chain,
|
||||
const X86Subtarget &Subtarget) {
|
||||
static SDValue getGatherNode(SDValue Op, SelectionDAG &DAG,
|
||||
SDValue Src, SDValue Mask, SDValue Base,
|
||||
SDValue Index, SDValue ScaleOp, SDValue Chain,
|
||||
const X86Subtarget &Subtarget) {
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
SDLoc dl(Op);
|
||||
auto *C = dyn_cast<ConstantSDNode>(ScaleOp);
|
||||
@ -22412,17 +22425,18 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
|
||||
Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
|
||||
|
||||
SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
|
||||
SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
|
||||
SDValue Segment = DAG.getRegister(0, MVT::i32);
|
||||
// If source is undef or we know it won't be used, use a zero vector
|
||||
// to break register dependency.
|
||||
// TODO: use undef instead and let BreakFalseDeps deal with it?
|
||||
if (Src.isUndef() || ISD::isBuildVectorAllOnes(Mask.getNode()))
|
||||
Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl);
|
||||
SDValue Ops[] = {Src, Mask, Base, Scale, Index, Disp, Segment, Chain};
|
||||
SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
|
||||
SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) };
|
||||
return DAG.getMergeValues(RetOps, dl);
|
||||
|
||||
MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op);
|
||||
|
||||
SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale };
|
||||
SDValue Res = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
|
||||
VTs, Ops, dl, MemIntr->getMemoryVT(), MemIntr->getMemOperand());
|
||||
return DAG.getMergeValues({ Res, Res.getValue(2) }, dl);
|
||||
}
|
||||
|
||||
static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
|
||||
@ -22787,7 +22801,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
|
||||
SDValue Index = Op.getOperand(4);
|
||||
SDValue Mask = Op.getOperand(5);
|
||||
SDValue Scale = Op.getOperand(6);
|
||||
return getGatherNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index, Scale,
|
||||
return getGatherNode(Op, DAG, Src, Mask, Base, Index, Scale,
|
||||
Chain, Subtarget);
|
||||
}
|
||||
case SCATTER: {
|
||||
|
@ -8363,7 +8363,7 @@ multiclass avx2_gather<bits<8> opc, string OpcodeStr, ValueType VTx,
|
||||
VEX, VEX_L, Sched<[WriteLoad]>;
|
||||
}
|
||||
|
||||
let Predicates = [UseAVX2] in {
|
||||
let Predicates = [HasAVX2] in {
|
||||
let mayLoad = 1, hasSideEffects = 0, Constraints
|
||||
= "@earlyclobber $dst,@earlyclobber $mask_wb, $src1 = $dst, $mask = $mask_wb"
|
||||
in {
|
||||
|
@ -64,47 +64,47 @@ struct IntrinsicData {
|
||||
* the alphabetical order.
|
||||
*/
|
||||
static const IntrinsicData IntrinsicsWithChain[] = {
|
||||
X86_INTRINSIC_DATA(avx2_gather_d_d, GATHER_AVX2, X86::VPGATHERDDrm, 0),
|
||||
X86_INTRINSIC_DATA(avx2_gather_d_d_256, GATHER_AVX2, X86::VPGATHERDDYrm, 0),
|
||||
X86_INTRINSIC_DATA(avx2_gather_d_pd, GATHER_AVX2, X86::VGATHERDPDrm, 0),
|
||||
X86_INTRINSIC_DATA(avx2_gather_d_pd_256, GATHER_AVX2, X86::VGATHERDPDYrm, 0),
|
||||
X86_INTRINSIC_DATA(avx2_gather_d_ps, GATHER_AVX2, X86::VGATHERDPSrm, 0),
|
||||
X86_INTRINSIC_DATA(avx2_gather_d_ps_256, GATHER_AVX2, X86::VGATHERDPSYrm, 0),
|
||||
X86_INTRINSIC_DATA(avx2_gather_d_q, GATHER_AVX2, X86::VPGATHERDQrm, 0),
|
||||
X86_INTRINSIC_DATA(avx2_gather_d_q_256, GATHER_AVX2, X86::VPGATHERDQYrm, 0),
|
||||
X86_INTRINSIC_DATA(avx2_gather_q_d, GATHER_AVX2, X86::VPGATHERQDrm, 0),
|
||||
X86_INTRINSIC_DATA(avx2_gather_q_d_256, GATHER_AVX2, X86::VPGATHERQDYrm, 0),
|
||||
X86_INTRINSIC_DATA(avx2_gather_q_pd, GATHER_AVX2, X86::VGATHERQPDrm, 0),
|
||||
X86_INTRINSIC_DATA(avx2_gather_q_pd_256, GATHER_AVX2, X86::VGATHERQPDYrm, 0),
|
||||
X86_INTRINSIC_DATA(avx2_gather_q_ps, GATHER_AVX2, X86::VGATHERQPSrm, 0),
|
||||
X86_INTRINSIC_DATA(avx2_gather_q_ps_256, GATHER_AVX2, X86::VGATHERQPSYrm, 0),
|
||||
X86_INTRINSIC_DATA(avx2_gather_q_q, GATHER_AVX2, X86::VPGATHERQQrm, 0),
|
||||
X86_INTRINSIC_DATA(avx2_gather_q_q_256, GATHER_AVX2, X86::VPGATHERQQYrm, 0),
|
||||
X86_INTRINSIC_DATA(avx2_gather_d_d, GATHER_AVX2, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx2_gather_d_d_256, GATHER_AVX2, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx2_gather_d_pd, GATHER_AVX2, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx2_gather_d_pd_256, GATHER_AVX2, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx2_gather_d_ps, GATHER_AVX2, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx2_gather_d_ps_256, GATHER_AVX2, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx2_gather_d_q, GATHER_AVX2, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx2_gather_d_q_256, GATHER_AVX2, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx2_gather_q_d, GATHER_AVX2, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx2_gather_q_d_256, GATHER_AVX2, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx2_gather_q_pd, GATHER_AVX2, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx2_gather_q_pd_256, GATHER_AVX2, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx2_gather_q_ps, GATHER_AVX2, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx2_gather_q_ps_256, GATHER_AVX2, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx2_gather_q_q, GATHER_AVX2, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx2_gather_q_q_256, GATHER_AVX2, 0, 0),
|
||||
|
||||
X86_INTRINSIC_DATA(avx512_gather_dpd_512, GATHER, X86::VGATHERDPDZrm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather_dpi_512, GATHER, X86::VPGATHERDDZrm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather_dpq_512, GATHER, X86::VPGATHERDQZrm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather_dps_512, GATHER, X86::VGATHERDPSZrm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather_qpd_512, GATHER, X86::VGATHERQPDZrm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather_qpi_512, GATHER, X86::VPGATHERQDZrm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather_qpq_512, GATHER, X86::VPGATHERQQZrm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather_qps_512, GATHER, X86::VGATHERQPSZrm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3div2_df, GATHER, X86::VGATHERQPDZ128rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3div2_di, GATHER, X86::VPGATHERQQZ128rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3div4_df, GATHER, X86::VGATHERQPDZ256rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3div4_di, GATHER, X86::VPGATHERQQZ256rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3div4_sf, GATHER, X86::VGATHERQPSZ128rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3div4_si, GATHER, X86::VPGATHERQDZ128rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3div8_sf, GATHER, X86::VGATHERQPSZ256rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3div8_si, GATHER, X86::VPGATHERQDZ256rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3siv2_df, GATHER, X86::VGATHERDPDZ128rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3siv2_di, GATHER, X86::VPGATHERDQZ128rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3siv4_df, GATHER, X86::VGATHERDPDZ256rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3siv4_di, GATHER, X86::VPGATHERDQZ256rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3siv4_sf, GATHER, X86::VGATHERDPSZ128rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3siv4_si, GATHER, X86::VPGATHERDDZ128rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3siv8_sf, GATHER, X86::VGATHERDPSZ256rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3siv8_si, GATHER, X86::VPGATHERDDZ256rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather_dpd_512, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather_dpi_512, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather_dpq_512, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather_dps_512, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather_qpd_512, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather_qpi_512, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather_qpq_512, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather_qps_512, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3div2_df, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3div2_di, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3div4_df, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3div4_di, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3div4_sf, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3div4_si, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3div8_sf, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3div8_si, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3siv2_df, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3siv2_di, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3siv4_df, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3siv4_di, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3siv4_sf, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3siv4_si, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3siv8_sf, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_gather3siv8_si, GATHER, 0, 0),
|
||||
|
||||
X86_INTRINSIC_DATA(avx512_gatherpf_dpd_512, PREFETCH,
|
||||
X86::VGATHERPF0DPDm, X86::VGATHERPF1DPDm),
|
||||
@ -115,30 +115,30 @@ static const IntrinsicData IntrinsicsWithChain[] = {
|
||||
X86_INTRINSIC_DATA(avx512_gatherpf_qps_512, PREFETCH,
|
||||
X86::VGATHERPF0QPSm, X86::VGATHERPF1QPSm),
|
||||
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather_dpd_512, GATHER, X86::VGATHERDPDZrm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather_dpi_512, GATHER, X86::VPGATHERDDZrm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather_dpq_512, GATHER, X86::VPGATHERDQZrm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather_dps_512, GATHER, X86::VGATHERDPSZrm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather_qpd_512, GATHER, X86::VGATHERQPDZrm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather_qpi_512, GATHER, X86::VPGATHERQDZrm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather_qpq_512, GATHER, X86::VPGATHERQQZrm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather_qps_512, GATHER, X86::VGATHERQPSZrm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather3div2_df, GATHER, X86::VGATHERQPDZ128rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather3div2_di, GATHER, X86::VPGATHERQQZ128rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather3div4_df, GATHER, X86::VGATHERQPDZ256rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather3div4_di, GATHER, X86::VPGATHERQQZ256rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather3div4_sf, GATHER, X86::VGATHERQPSZ128rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather3div4_si, GATHER, X86::VPGATHERQDZ128rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather3div8_sf, GATHER, X86::VGATHERQPSZ256rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather3div8_si, GATHER, X86::VPGATHERQDZ256rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather3siv2_df, GATHER, X86::VGATHERDPDZ128rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather3siv2_di, GATHER, X86::VPGATHERDQZ128rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather3siv4_df, GATHER, X86::VGATHERDPDZ256rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather3siv4_di, GATHER, X86::VPGATHERDQZ256rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather3siv4_sf, GATHER, X86::VGATHERDPSZ128rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather3siv4_si, GATHER, X86::VPGATHERDDZ128rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather3siv8_sf, GATHER, X86::VGATHERDPSZ256rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather3siv8_si, GATHER, X86::VPGATHERDDZ256rm, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather_dpd_512, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather_dpi_512, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather_dpq_512, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather_dps_512, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather_qpd_512, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather_qpi_512, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather_qpq_512, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather_qps_512, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather3div2_df, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather3div2_di, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather3div4_df, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather3div4_di, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather3div4_sf, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather3div4_si, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather3div8_sf, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather3div8_si, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather3siv2_df, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather3siv2_di, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather3siv4_df, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather3siv4_di, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather3siv4_sf, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather3siv4_si, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather3siv8_sf, GATHER, 0, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_gather3siv8_si, GATHER, 0, 0),
|
||||
|
||||
X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_128, TRUNCATE_TO_MEM_VI8,
|
||||
X86ISD::VTRUNC, 0),
|
||||
|
@ -152,9 +152,8 @@ define <4 x float> @gather_global(<4 x i64>, i32* nocapture readnone) {
|
||||
; X32-LABEL: gather_global:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; X32-NEXT: movl $x, %eax
|
||||
; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; X32-NEXT: vgatherqps %xmm2, (%eax,%ymm0,4), %xmm1
|
||||
; X32-NEXT: vgatherqps %xmm2, x(,%ymm0,4), %xmm1
|
||||
; X32-NEXT: vmovaps %xmm1, %xmm0
|
||||
; X32-NEXT: vzeroupper
|
||||
; X32-NEXT: retl
|
||||
@ -162,9 +161,8 @@ define <4 x float> @gather_global(<4 x i64>, i32* nocapture readnone) {
|
||||
; X64-LABEL: gather_global:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; X64-NEXT: movl $x, %eax
|
||||
; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; X64-NEXT: vgatherqps %xmm2, (%rax,%ymm0,4), %xmm1
|
||||
; X64-NEXT: vgatherqps %xmm2, x(,%ymm0,4), %xmm1
|
||||
; X64-NEXT: vmovaps %xmm1, %xmm0
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
|
@ -881,9 +881,8 @@ define <8 x float> @gather_global(<8 x i64>, i32* nocapture readnone) {
|
||||
; CHECK-LABEL: gather_global:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: kxnorw %k0, %k0, %k1
|
||||
; CHECK-NEXT: movl $x, %eax
|
||||
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vgatherqps (%rax,%zmm0,4), %ymm1 {%k1}
|
||||
; CHECK-NEXT: vgatherqps x(,%zmm0,4), %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%3 = tail call <8 x float> @llvm.x86.avx512.mask.gather.qps.512(<8 x float> zeroinitializer, i8* bitcast ([1024 x float]* @x to i8*), <8 x i64> %0, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 4)
|
||||
|
Loading…
x
Reference in New Issue
Block a user