diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 7e283d1d01b..bd56edd5996 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -24646,7 +24646,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::VPCOM: return "X86ISD::VPCOM"; case X86ISD::VPCOMU: return "X86ISD::VPCOMU"; case X86ISD::VPERMIL2: return "X86ISD::VPERMIL2"; - case X86ISD::FMADD: return "X86ISD::FMADD"; case X86ISD::FMSUB: return "X86ISD::FMSUB"; case X86ISD::FNMADD: return "X86ISD::FNMADD"; case X86ISD::FNMSUB: return "X86ISD::FNMSUB"; @@ -34011,10 +34010,10 @@ static SDValue combineFneg(SDNode *N, SelectionDAG &DAG, unsigned NewOpcode = 0; if (Arg.hasOneUse()) { switch (Arg.getOpcode()) { - case X86ISD::FMADD: NewOpcode = X86ISD::FNMSUB; break; + case ISD::FMA: NewOpcode = X86ISD::FNMSUB; break; case X86ISD::FMSUB: NewOpcode = X86ISD::FNMADD; break; case X86ISD::FNMADD: NewOpcode = X86ISD::FMSUB; break; - case X86ISD::FNMSUB: NewOpcode = X86ISD::FMADD; break; + case X86ISD::FNMSUB: NewOpcode = ISD::FMA; break; case X86ISD::FMADD_RND: NewOpcode = X86ISD::FNMSUB_RND; break; case X86ISD::FMSUB_RND: NewOpcode = X86ISD::FNMADD_RND; break; case X86ISD::FNMADD_RND: NewOpcode = X86ISD::FMSUB_RND; break; @@ -34608,14 +34607,14 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG, unsigned NewOpcode; if (!NegMul) - NewOpcode = (!NegC) ? X86ISD::FMADD : X86ISD::FMSUB; + NewOpcode = (!NegC) ? ISD::FMA : X86ISD::FMSUB; else NewOpcode = (!NegC) ? X86ISD::FNMADD : X86ISD::FNMSUB; - // For FMA and FMADD, we risk reconstructing the node we started with. + // For FMA, we risk reconstructing the node we started with. // In order to avoid this, we check for negation or opcode change. If // one of the two happened, then it is a new node and we return it. - if (N->getOpcode() == X86ISD::FMADD || N->getOpcode() == ISD::FMA) { + if (N->getOpcode() == ISD::FMA) { if (HasNeg || NewOpcode != N->getOpcode()) return DAG.getNode(NewOpcode, dl, VT, A, B, C); return SDValue(); @@ -34623,21 +34622,21 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG, if (N->getOpcode() == X86ISD::FMADD_RND) { switch (NewOpcode) { - case X86ISD::FMADD: NewOpcode = X86ISD::FMADD_RND; break; + case ISD::FMA: NewOpcode = X86ISD::FMADD_RND; break; case X86ISD::FMSUB: NewOpcode = X86ISD::FMSUB_RND; break; case X86ISD::FNMADD: NewOpcode = X86ISD::FNMADD_RND; break; case X86ISD::FNMSUB: NewOpcode = X86ISD::FNMSUB_RND; break; } } else if (N->getOpcode() == X86ISD::FMADDS1_RND) { switch (NewOpcode) { - case X86ISD::FMADD: NewOpcode = X86ISD::FMADDS1_RND; break; + case ISD::FMA: NewOpcode = X86ISD::FMADDS1_RND; break; case X86ISD::FMSUB: NewOpcode = X86ISD::FMSUBS1_RND; break; case X86ISD::FNMADD: NewOpcode = X86ISD::FNMADDS1_RND; break; case X86ISD::FNMSUB: NewOpcode = X86ISD::FNMSUBS1_RND; break; } } else if (N->getOpcode() == X86ISD::FMADDS3_RND) { switch (NewOpcode) { - case X86ISD::FMADD: NewOpcode = X86ISD::FMADDS3_RND; break; + case ISD::FMA: NewOpcode = X86ISD::FMADDS3_RND; break; case X86ISD::FMSUB: NewOpcode = X86ISD::FMSUBS3_RND; break; case X86ISD::FNMADD: NewOpcode = X86ISD::FNMADDS3_RND; break; case X86ISD::FNMSUB: NewOpcode = X86ISD::FNMSUBS3_RND; break; @@ -35789,7 +35788,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::VPERM2X128: case X86ISD::VZEXT_MOVL: case ISD::VECTOR_SHUFFLE: return combineShuffle(N, DAG, DCI,Subtarget); - case X86ISD::FMADD: case X86ISD::FMADD_RND: case X86ISD::FMADDS1_RND: case X86ISD::FMADDS3_RND: diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 257a006897d..7307af100a6 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -474,7 +474,7 @@ namespace llvm { VPMADD52L, VPMADD52H, // FMA nodes. - FMADD, + // We use the target independent ISD::FMA for the non-inverted case. FNMADD, FMSUB, FNMSUB, diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 8ec7a4e08df..8f925e4ca55 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -6261,7 +6261,7 @@ multiclass avx512_fma3p_213_f opc, string OpcodeStr, SDNode OpNode, avx512vl_f64_info, "PD">, VEX_W; } -defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>; +defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", fma, X86FmaddRnd>; defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>; defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>; defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>; @@ -6346,7 +6346,7 @@ multiclass avx512_fma3p_231_f opc, string OpcodeStr, SDNode OpNode, avx512vl_f64_info, "PD">, VEX_W; } -defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>; +defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", fma, X86FmaddRnd>; defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>; defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>; defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>; @@ -6420,7 +6420,7 @@ multiclass avx512_fma3p_132_f opc, string OpcodeStr, SDNode OpNode, avx512vl_f64_info, "PD">, VEX_W; } -defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>; +defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", fma, X86FmaddRnd>; defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>; defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>; defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>; @@ -6514,7 +6514,7 @@ multiclass avx512_fma3s opc213, bits<8> opc231, bits<8> opc132, } } -defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnds1, +defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", fma, X86FmaddRnds1, X86FmaddRnds3>; defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnds1, X86FmsubRnds3>; diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td index 3a3cdc9fa57..ee0cc54765a 100644 --- a/lib/Target/X86/X86InstrFMA.td +++ b/lib/Target/X86/X86InstrFMA.td @@ -89,7 +89,7 @@ multiclass fma3p_forms opc132, bits<8> opc213, bits<8> opc231, // Fused Multiply-Add let ExeDomain = SSEPackedSingle in { defm VFMADD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps", "PS", - loadv4f32, loadv8f32, X86Fmadd, v4f32, v8f32>; + loadv4f32, loadv8f32, fma, v4f32, v8f32>; defm VFMSUB : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", "PS", loadv4f32, loadv8f32, X86Fmsub, v4f32, v8f32>; defm VFMADDSUB : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps", "PS", @@ -102,7 +102,7 @@ let ExeDomain = SSEPackedSingle in { let ExeDomain = SSEPackedDouble in { defm VFMADD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd", "PD", - loadv2f64, loadv4f64, X86Fmadd, v2f64, + loadv2f64, loadv4f64, fma, v2f64, v4f64>, VEX_W; defm VFMSUB : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", "PD", loadv2f64, loadv4f64, X86Fmsub, v2f64, @@ -254,7 +254,7 @@ multiclass fma3s opc132, bits<8> opc213, bits<8> opc231, } defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", int_x86_fma_vfmadd_ss, - int_x86_fma_vfmadd_sd, X86Fmadd>, VEX_LIG; + int_x86_fma_vfmadd_sd, fma>, VEX_LIG; defm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", int_x86_fma_vfmsub_ss, int_x86_fma_vfmsub_sd, X86Fmsub>, VEX_LIG; @@ -390,7 +390,7 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { let ExeDomain = SSEPackedSingle in { // Scalar Instructions - defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32>, + defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, fma, loadf32>, fma4s_int<0x6A, "vfmaddss", ssmem, sse_load_f32, int_x86_fma_vfmadd_ss>; defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32>, @@ -405,7 +405,7 @@ let ExeDomain = SSEPackedSingle in { fma4s_int<0x7E, "vfnmsubss", ssmem, sse_load_f32, int_x86_fma_vfnmsub_ss>; // Packed Instructions - defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32, + defm VFMADDPS4 : fma4p<0x68, "vfmaddps", fma, v4f32, v8f32, loadv4f32, loadv8f32>; defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", X86Fmsub, v4f32, v8f32, loadv4f32, loadv8f32>; @@ -421,7 +421,7 @@ let ExeDomain = SSEPackedSingle in { let ExeDomain = SSEPackedDouble in { // Scalar Instructions - defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64>, + defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, fma, loadf64>, fma4s_int<0x6B, "vfmaddsd", sdmem, sse_load_f64, int_x86_fma_vfmadd_sd>; defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64>, @@ -436,7 +436,7 @@ let ExeDomain = SSEPackedDouble in { fma4s_int<0x7F, "vfnmsubsd", sdmem, sse_load_f64, int_x86_fma_vfnmsub_sd>; // Packed Instructions - defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64, + defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", fma, v2f64, v4f64, loadv2f64, loadv4f64>; defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", X86Fmsub, v2f64, v4f64, loadv2f64, loadv4f64>; diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index e49fb62311d..225aaada34a 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -481,7 +481,7 @@ def X86fsqrtRnds : SDNode<"X86ISD::FSQRTS_RND", SDTFPBinOpRound>; def X86fgetexpRnd : SDNode<"X86ISD::FGETEXP_RND", SDTFPUnaryOpRound>; def X86fgetexpRnds : SDNode<"X86ISD::FGETEXPS_RND", SDTFPBinOpRound>; -def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFPTernaryOp>; +// No need for FMADD because we use ISD::FMA. def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFPTernaryOp>; def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFPTernaryOp>; def X86Fnmsub : SDNode<"X86ISD::FNMSUB", SDTFPTernaryOp>; diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index 6db06a38bfb..699b6effac3 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -1149,13 +1149,13 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::CVTPS2PH, 0), X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_512, INTR_TYPE_2OP_MASK, X86ISD::CVTPS2PH, 0), - X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_128, FMA_OP_MASK, X86ISD::FMADD, 0), - X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_256, FMA_OP_MASK, X86ISD::FMADD, 0), - X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_512, FMA_OP_MASK, X86ISD::FMADD, + X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_128, FMA_OP_MASK, ISD::FMA, 0), + X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_256, FMA_OP_MASK, ISD::FMA, 0), + X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_512, FMA_OP_MASK, ISD::FMA, X86ISD::FMADD_RND), - X86_INTRINSIC_DATA(avx512_mask_vfmadd_ps_128, FMA_OP_MASK, X86ISD::FMADD, 0), - X86_INTRINSIC_DATA(avx512_mask_vfmadd_ps_256, FMA_OP_MASK, X86ISD::FMADD, 0), - X86_INTRINSIC_DATA(avx512_mask_vfmadd_ps_512, FMA_OP_MASK, X86ISD::FMADD, + X86_INTRINSIC_DATA(avx512_mask_vfmadd_ps_128, FMA_OP_MASK, ISD::FMA, 0), + X86_INTRINSIC_DATA(avx512_mask_vfmadd_ps_256, FMA_OP_MASK, ISD::FMA, 0), + X86_INTRINSIC_DATA(avx512_mask_vfmadd_ps_512, FMA_OP_MASK, ISD::FMA, X86ISD::FMADD_RND), X86_INTRINSIC_DATA(avx512_mask_vfmadd_sd, FMA_OP_SCALAR_MASK, X86ISD::FMADDS1_RND, 0), @@ -1271,13 +1271,13 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::VPMADD52L, 0), X86_INTRINSIC_DATA(avx512_mask_vpmadd52l_uq_512 , FMA_OP_MASK, X86ISD::VPMADD52L, 0), - X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_128, FMA_OP_MASK3, X86ISD::FMADD, 0), - X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_256, FMA_OP_MASK3, X86ISD::FMADD, 0), - X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_512, FMA_OP_MASK3, X86ISD::FMADD, + X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_128, FMA_OP_MASK3, ISD::FMA, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_256, FMA_OP_MASK3, ISD::FMA, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_512, FMA_OP_MASK3, ISD::FMA, X86ISD::FMADD_RND), - X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ps_128, FMA_OP_MASK3, X86ISD::FMADD, 0), - X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ps_256, FMA_OP_MASK3, X86ISD::FMADD, 0), - X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ps_512, FMA_OP_MASK3, X86ISD::FMADD, + X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ps_128, FMA_OP_MASK3, ISD::FMA, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ps_256, FMA_OP_MASK3, ISD::FMA, 0), + X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ps_512, FMA_OP_MASK3, ISD::FMA, X86ISD::FMADD_RND), X86_INTRINSIC_DATA(avx512_mask3_vfmadd_sd, FMA_OP_SCALAR_MASK3, X86ISD::FMADDS3_RND, 0), @@ -1349,13 +1349,13 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::VPTERNLOG, 0), X86_INTRINSIC_DATA(avx512_maskz_pternlog_q_512, TERLOG_OP_MASKZ, X86ISD::VPTERNLOG, 0), - X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_128, FMA_OP_MASKZ, X86ISD::FMADD, 0), - X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_256, FMA_OP_MASKZ, X86ISD::FMADD, 0), - X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_512, FMA_OP_MASKZ, X86ISD::FMADD, + X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_128, FMA_OP_MASKZ, ISD::FMA, 0), + X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_256, FMA_OP_MASKZ, ISD::FMA, 0), + X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_512, FMA_OP_MASKZ, ISD::FMA, X86ISD::FMADD_RND), - X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ps_128, FMA_OP_MASKZ, X86ISD::FMADD, 0), - X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ps_256, FMA_OP_MASKZ, X86ISD::FMADD, 0), - X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ps_512, FMA_OP_MASKZ, X86ISD::FMADD, + X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ps_128, FMA_OP_MASKZ, ISD::FMA, 0), + X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ps_256, FMA_OP_MASKZ, ISD::FMA, 0), + X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ps_512, FMA_OP_MASKZ, ISD::FMA, X86ISD::FMADD_RND), X86_INTRINSIC_DATA(avx512_maskz_vfmadd_sd, FMA_OP_SCALAR_MASKZ, X86ISD::FMADDS1_RND, 0), @@ -1524,10 +1524,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_vcvtss2usi64, INTR_TYPE_2OP, X86ISD::CVTS2UI_RND, 0), X86_INTRINSIC_DATA(avx512_vpermilvar_pd_512, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0), X86_INTRINSIC_DATA(avx512_vpermilvar_ps_512, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0), - X86_INTRINSIC_DATA(fma_vfmadd_pd, INTR_TYPE_3OP, X86ISD::FMADD, 0), - X86_INTRINSIC_DATA(fma_vfmadd_pd_256, INTR_TYPE_3OP, X86ISD::FMADD, 0), - X86_INTRINSIC_DATA(fma_vfmadd_ps, INTR_TYPE_3OP, X86ISD::FMADD, 0), - X86_INTRINSIC_DATA(fma_vfmadd_ps_256, INTR_TYPE_3OP, X86ISD::FMADD, 0), + X86_INTRINSIC_DATA(fma_vfmadd_pd, INTR_TYPE_3OP, ISD::FMA, 0), + X86_INTRINSIC_DATA(fma_vfmadd_pd_256, INTR_TYPE_3OP, ISD::FMA, 0), + X86_INTRINSIC_DATA(fma_vfmadd_ps, INTR_TYPE_3OP, ISD::FMA, 0), + X86_INTRINSIC_DATA(fma_vfmadd_ps_256, INTR_TYPE_3OP, ISD::FMA, 0), X86_INTRINSIC_DATA(fma_vfmaddsub_pd, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0), X86_INTRINSIC_DATA(fma_vfmaddsub_pd_256, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0), X86_INTRINSIC_DATA(fma_vfmaddsub_ps, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),