From 26bc09167461968f88179fe95d39d4c10e40ce5b Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 24 Feb 2017 07:21:10 +0000 Subject: [PATCH] [AVX-512] Separate the fadd/fsub/fmul/fdiv/fmax/fmin with rounding mode ISD opcodes into separate packed and scalar opcodes. This is more consistent with the rest of the ISD opcodes. NFC git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@296094 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 6 ++++++ lib/Target/X86/X86ISelLowering.h | 12 ++++++------ lib/Target/X86/X86InstrAVX512.td | 12 ++++++------ lib/Target/X86/X86InstrFragmentsSIMD.td | 10 ++++++++-- lib/Target/X86/X86IntrinsicsInfo.h | 24 ++++++++++++------------ 5 files changed, 38 insertions(+), 26 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index cd8ef13c1a2..720f93a4fbe 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -23936,9 +23936,11 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FMAX: return "X86ISD::FMAX"; case X86ISD::FMAXS: return "X86ISD::FMAXS"; case X86ISD::FMAX_RND: return "X86ISD::FMAX_RND"; + case X86ISD::FMAXS_RND: return "X86ISD::FMAX_RND"; case X86ISD::FMIN: return "X86ISD::FMIN"; case X86ISD::FMINS: return "X86ISD::FMINS"; case X86ISD::FMIN_RND: return "X86ISD::FMIN_RND"; + case X86ISD::FMINS_RND: return "X86ISD::FMINS_RND"; case X86ISD::FMAXC: return "X86ISD::FMAXC"; case X86ISD::FMINC: return "X86ISD::FMINC"; case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; @@ -24131,9 +24133,13 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::RSQRT28: return "X86ISD::RSQRT28"; case X86ISD::RSQRT28S: return "X86ISD::RSQRT28S"; case X86ISD::FADD_RND: return "X86ISD::FADD_RND"; + case X86ISD::FADDS_RND: return "X86ISD::FADDS_RND"; case X86ISD::FSUB_RND: return "X86ISD::FSUB_RND"; + case X86ISD::FSUBS_RND: return "X86ISD::FSUBS_RND"; case X86ISD::FMUL_RND: return "X86ISD::FMUL_RND"; + case X86ISD::FMULS_RND: return "X86ISD::FMULS_RND"; case X86ISD::FDIV_RND: return "X86ISD::FDIV_RND"; + case X86ISD::FDIVS_RND: return "X86ISD::FDIVS_RND"; case X86ISD::FSQRT_RND: return "X86ISD::FSQRT_RND"; case X86ISD::FSQRTS_RND: return "X86ISD::FSQRTS_RND"; case X86ISD::FGETEXP_RND: return "X86ISD::FGETEXP_RND"; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 27668b28e5a..14c5531a51b 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -204,12 +204,12 @@ namespace llvm { ADDSUB, // FP vector ops with rounding mode. - FADD_RND, - FSUB_RND, - FMUL_RND, - FDIV_RND, - FMAX_RND, - FMIN_RND, + FADD_RND, FADDS_RND, + FSUB_RND, FSUBS_RND, + FMUL_RND, FMULS_RND, + FDIV_RND, FDIVS_RND, + FMAX_RND, FMAXS_RND, + FMIN_RND, FMINS_RND, FSQRT_RND, FSQRTS_RND, // FP vector get exponent. diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 8494307f3c7..69eb1536b06 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -4288,13 +4288,13 @@ multiclass avx512_binop_s_sae opc, string OpcodeStr, SDNode OpNode, VecNode, SaeNode, itins.d, IsCommutable>, XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; } -defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnd, SSE_ALU_ITINS_S, 1>; -defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnd, SSE_MUL_ITINS_S, 1>; -defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnd, SSE_ALU_ITINS_S, 0>; -defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnd, SSE_DIV_ITINS_S, 0>; -defm VMIN : avx512_binop_s_sae <0x5D, "vmin", X86fmin, X86fmins, X86fminRnd, +defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnds, SSE_ALU_ITINS_S, 1>; +defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds, SSE_MUL_ITINS_S, 1>; +defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds, SSE_ALU_ITINS_S, 0>; +defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds, SSE_DIV_ITINS_S, 0>; +defm VMIN : avx512_binop_s_sae <0x5D, "vmin", X86fmin, X86fmins, X86fminRnds, SSE_ALU_ITINS_S, 0>; -defm VMAX : avx512_binop_s_sae <0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnd, +defm VMAX : avx512_binop_s_sae <0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds, SSE_ALU_ITINS_S, 0>; // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 3a60f486f53..037f0282a91 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -464,13 +464,19 @@ def X86Blendi : SDNode<"X86ISD::BLENDI", SDTBlend>; def X86Addsub : SDNode<"X86ISD::ADDSUB", SDTFPBinOp>; def X86faddRnd : SDNode<"X86ISD::FADD_RND", SDTFPBinOpRound>; +def X86faddRnds : SDNode<"X86ISD::FADDS_RND", SDTFPBinOpRound>; def X86fsubRnd : SDNode<"X86ISD::FSUB_RND", SDTFPBinOpRound>; +def X86fsubRnds : SDNode<"X86ISD::FSUBS_RND", SDTFPBinOpRound>; def X86fmulRnd : SDNode<"X86ISD::FMUL_RND", SDTFPBinOpRound>; +def X86fmulRnds : SDNode<"X86ISD::FMULS_RND", SDTFPBinOpRound>; def X86fdivRnd : SDNode<"X86ISD::FDIV_RND", SDTFPBinOpRound>; -def X86fmaxRnd : SDNode<"X86ISD::FMAX_RND", SDTFPBinOpRound>; +def X86fdivRnds : SDNode<"X86ISD::FDIVS_RND", SDTFPBinOpRound>; +def X86fmaxRnd : SDNode<"X86ISD::FMAX_RND", SDTFPBinOpRound>; +def X86fmaxRnds : SDNode<"X86ISD::FMAXS_RND", SDTFPBinOpRound>; +def X86fminRnd : SDNode<"X86ISD::FMIN_RND", SDTFPBinOpRound>; +def X86fminRnds : SDNode<"X86ISD::FMINS_RND", SDTFPBinOpRound>; def X86scalef : SDNode<"X86ISD::SCALEF", SDTFPBinOpRound>; def X86scalefs : SDNode<"X86ISD::SCALEFS", SDTFPBinOpRound>; -def X86fminRnd : SDNode<"X86ISD::FMIN_RND", SDTFPBinOpRound>; def X86fsqrtRnd : SDNode<"X86ISD::FSQRT_RND", SDTFPUnaryOpRound>; def X86fsqrtRnds : SDNode<"X86ISD::FSQRTS_RND", SDTFPBinOpRound>; def X86fgetexpRnd : SDNode<"X86ISD::FGETEXP_RND", SDTFPUnaryOpRound>; diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index a927b3b437c..c2e950fdbcf 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -464,9 +464,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_add_ps_512, INTR_TYPE_2OP_MASK, ISD::FADD, X86ISD::FADD_RND), X86_INTRINSIC_DATA(avx512_mask_add_sd_round, INTR_TYPE_SCALAR_MASK_RM, - X86ISD::FADD_RND, 0), + X86ISD::FADDS_RND, 0), X86_INTRINSIC_DATA(avx512_mask_add_ss_round, INTR_TYPE_SCALAR_MASK_RM, - X86ISD::FADD_RND, 0), + X86ISD::FADDS_RND, 0), X86_INTRINSIC_DATA(avx512_mask_broadcastf32x2_256, BRCST32x2_TO_VEC, X86ISD::VBROADCAST, 0), X86_INTRINSIC_DATA(avx512_mask_broadcastf32x2_512, BRCST32x2_TO_VEC, @@ -720,9 +720,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_div_ps_512, INTR_TYPE_2OP_MASK, ISD::FDIV, X86ISD::FDIV_RND), X86_INTRINSIC_DATA(avx512_mask_div_sd_round, INTR_TYPE_SCALAR_MASK_RM, - X86ISD::FDIV_RND, 0), + X86ISD::FDIVS_RND, 0), X86_INTRINSIC_DATA(avx512_mask_div_ss_round, INTR_TYPE_SCALAR_MASK_RM, - X86ISD::FDIV_RND, 0), + X86ISD::FDIVS_RND, 0), X86_INTRINSIC_DATA(avx512_mask_expand_d_128, COMPRESS_EXPAND_IN_REG, X86ISD::EXPAND, 0), X86_INTRINSIC_DATA(avx512_mask_expand_d_256, COMPRESS_EXPAND_IN_REG, @@ -800,25 +800,25 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_max_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FMAX, X86ISD::FMAX_RND), X86_INTRINSIC_DATA(avx512_mask_max_sd_round, INTR_TYPE_SCALAR_MASK, - X86ISD::FMAXS, X86ISD::FMAX_RND), + X86ISD::FMAXS, X86ISD::FMAXS_RND), X86_INTRINSIC_DATA(avx512_mask_max_ss_round, INTR_TYPE_SCALAR_MASK, - X86ISD::FMAXS, X86ISD::FMAX_RND), + X86ISD::FMAXS, X86ISD::FMAXS_RND), X86_INTRINSIC_DATA(avx512_mask_min_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FMIN, X86ISD::FMIN_RND), X86_INTRINSIC_DATA(avx512_mask_min_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FMIN, X86ISD::FMIN_RND), X86_INTRINSIC_DATA(avx512_mask_min_sd_round, INTR_TYPE_SCALAR_MASK, - X86ISD::FMINS, X86ISD::FMIN_RND), + X86ISD::FMINS, X86ISD::FMINS_RND), X86_INTRINSIC_DATA(avx512_mask_min_ss_round, INTR_TYPE_SCALAR_MASK, - X86ISD::FMINS, X86ISD::FMIN_RND), + X86ISD::FMINS, X86ISD::FMINS_RND), X86_INTRINSIC_DATA(avx512_mask_mul_pd_512, INTR_TYPE_2OP_MASK, ISD::FMUL, X86ISD::FMUL_RND), X86_INTRINSIC_DATA(avx512_mask_mul_ps_512, INTR_TYPE_2OP_MASK, ISD::FMUL, X86ISD::FMUL_RND), X86_INTRINSIC_DATA(avx512_mask_mul_sd_round, INTR_TYPE_SCALAR_MASK_RM, - X86ISD::FMUL_RND, 0), + X86ISD::FMULS_RND, 0), X86_INTRINSIC_DATA(avx512_mask_mul_ss_round, INTR_TYPE_SCALAR_MASK_RM, - X86ISD::FMUL_RND, 0), + X86ISD::FMULS_RND, 0), X86_INTRINSIC_DATA(avx512_mask_pabs_b_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), X86_INTRINSIC_DATA(avx512_mask_pabs_b_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), X86_INTRINSIC_DATA(avx512_mask_pabs_b_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), @@ -1159,9 +1159,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_sub_ps_512, INTR_TYPE_2OP_MASK, ISD::FSUB, X86ISD::FSUB_RND), X86_INTRINSIC_DATA(avx512_mask_sub_sd_round, INTR_TYPE_SCALAR_MASK_RM, - X86ISD::FSUB_RND, 0), + X86ISD::FSUBS_RND, 0), X86_INTRINSIC_DATA(avx512_mask_sub_ss_round, INTR_TYPE_SCALAR_MASK_RM, - X86ISD::FSUB_RND, 0), + X86ISD::FSUBS_RND, 0), X86_INTRINSIC_DATA(avx512_mask_ucmp_b_128, CMP_MASK_CC, X86ISD::CMPMU, 0), X86_INTRINSIC_DATA(avx512_mask_ucmp_b_256, CMP_MASK_CC, X86ISD::CMPMU, 0), X86_INTRINSIC_DATA(avx512_mask_ucmp_b_512, CMP_MASK_CC, X86ISD::CMPMU, 0),