Add ‘llvm.experimental.constrained.fma‘ Intrinsic.

Differential Revision: http://reviews.llvm.org/D36335 llvm-svn: 311629
2025-04-02 15:51:54 +00:00 · 2017-08-24 04:18:24 +00:00 · 2017-08-24 04:18:24 +00:00 · cb5ec6af94
commit cb5ec6af94
parent 41739d608d
12 changed files with 163 additions and 35 deletions
--- a/docs/LangRef.rst
+++ b/docs/LangRef.rst
@ -13021,6 +13021,41 @@ The value produced is the floating point remainder from the division of the two
 value operands and has the same type as the operands.  The remainder has the
 same sign as the dividend. 

+'``llvm.experimental.constrained.fma``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare <type>
+      @llvm.experimental.constrained.fma(<type> <op1>, <type> <op2>, <type> <op3>,
+                                          metadata <rounding mode>,
+                                          metadata <exception behavior>)
+
+Overview:
+"""""""""
+
+The '``llvm.experimental.constrained.fma``' intrinsic returns the result of a
+fused-multiply-add operation on its operands.
+
+Arguments:
+""""""""""
+
+The first three arguments to the '``llvm.experimental.constrained.fma``'
+intrinsic must be :ref:`floating point <t_floating>` or :ref:`vector
+<t_vector>` of floating point values. All arguments must have identical types.
+
+The fourth and fifth arguments specify the rounding mode and exception behavior
+as described above.
+
+Semantics:
+""""""""""
+
+The result produced is the product of the first two operands added to the third
+operand computed with infinite precision, and then rounded to the target
+precision.

 Constrained libm-equivalent Intrinsics
 --------------------------------------
--- a/include/llvm/CodeGen/ISDOpcodes.h
+++ b/include/llvm/CodeGen/ISDOpcodes.h
@ -263,6 +263,7 @@ namespace ISD {
    /// They are used to limit optimizations while the DAG is being
    /// optimized.
    STRICT_FADD, STRICT_FSUB, STRICT_FMUL, STRICT_FDIV, STRICT_FREM,
+    STRICT_FMA,

    /// Constrained versions of libm-equivalent floating point intrinsics.
    /// These will be lowered to the equivalent non-constrained pseudo-op
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@ -623,13 +623,14 @@ public:
  /// Test if this node is a strict floating point pseudo-op.
  bool isStrictFPOpcode() {
    switch (NodeType) {
-      default: 
+      default:
        return false;
      case ISD::STRICT_FADD:
      case ISD::STRICT_FSUB:
      case ISD::STRICT_FMUL:
      case ISD::STRICT_FDIV:
      case ISD::STRICT_FREM:
+      case ISD::STRICT_FMA:
      case ISD::STRICT_FSQRT:
      case ISD::STRICT_FPOW:
      case ISD::STRICT_FPOWI:
--- a/include/llvm/IR/IntrinsicInst.h
+++ b/include/llvm/IR/IntrinsicInst.h
@ -167,6 +167,7 @@ namespace llvm {
    };

    bool isUnaryOp() const;
+    bool isTernaryOp() const;
    RoundingMode getRoundingMode() const;
    ExceptionBehavior getExceptionBehavior() const;

@ -178,6 +179,7 @@ namespace llvm {
      case Intrinsic::experimental_constrained_fmul:
      case Intrinsic::experimental_constrained_fdiv:
      case Intrinsic::experimental_constrained_frem:
+      case Intrinsic::experimental_constrained_fma:
      case Intrinsic::experimental_constrained_sqrt:
      case Intrinsic::experimental_constrained_pow:
      case Intrinsic::experimental_constrained_powi:
--- a/include/llvm/IR/Intrinsics.td
+++ b/include/llvm/IR/Intrinsics.td
@ -490,6 +490,13 @@ let IntrProperties = [IntrInaccessibleMemOnly] in {
                                                      llvm_metadata_ty,
                                                      llvm_metadata_ty ]>;

+  def int_experimental_constrained_fma : Intrinsic<[ llvm_anyfloat_ty ],
+                                                    [ LLVMMatchType<0>,
+                                                      LLVMMatchType<0>,
+                                                      LLVMMatchType<0>,
+                                                      llvm_metadata_ty,
+                                                      llvm_metadata_ty ]>;
+
  // These intrinsics are sensitive to the rounding mode so we need constrained
  // versions of each of them.  When strict rounding and exception control are
  // not required the non-constrained versions of these intrinsics should be
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@ -907,6 +907,7 @@ getStrictFPOpcodeAction(const TargetLowering &TLI, unsigned Opcode, EVT VT) {
    case ISD::STRICT_FSQRT: EqOpc = ISD::FSQRT; break;
    case ISD::STRICT_FPOW: EqOpc = ISD::FPOW; break;
    case ISD::STRICT_FPOWI: EqOpc = ISD::FPOWI; break;
+    case ISD::STRICT_FMA: EqOpc = ISD::FMA; break;
    case ISD::STRICT_FSIN: EqOpc = ISD::FSIN; break;
    case ISD::STRICT_FCOS: EqOpc = ISD::FCOS; break;
    case ISD::STRICT_FEXP: EqOpc = ISD::FEXP; break;
@ -1072,6 +1073,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
    }
    break;
  case ISD::STRICT_FSQRT:
+  case ISD::STRICT_FMA:
  case ISD::STRICT_FPOW:
  case ISD::STRICT_FPOWI:
  case ISD::STRICT_FSIN:
@ -1240,7 +1242,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
      // If the index is dependent on the store we will introduce a cycle when
      // creating the load (the load uses the index, and by replacing the chain
      // we will make the index dependent on the load). Also, the store might be
-      // dependent on the extractelement and introduce a cycle when creating 
+      // dependent on the extractelement and introduce a cycle when creating
      // the load.
      if (SDNode::hasPredecessorHelper(ST, Visited, Worklist) ||
          ST->hasPredecessor(Op.getNode()))
@ -4065,6 +4067,10 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
    Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
                                      RTLIB::FMA_F80, RTLIB::FMA_F128,
                                      RTLIB::FMA_PPCF128));
+  case ISD::STRICT_FMA:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
+                                      RTLIB::FMA_F80, RTLIB::FMA_F128,
+                                      RTLIB::FMA_PPCF128));
    break;
  case ISD::FADD:
    Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@ -6695,6 +6695,7 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
  unsigned OrigOpc = Node->getOpcode();
  unsigned NewOpc;
  bool IsUnary = false;
+  bool IsTernary = false;
  switch (OrigOpc) {
  default:
    llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!");
@ -6703,6 +6704,7 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
  case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; break;
  case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; break;
  case ISD::STRICT_FREM: NewOpc = ISD::FREM; break;
+  case ISD::STRICT_FMA: NewOpc = ISD::FMA; IsTernary = true; break;
  case ISD::STRICT_FSQRT: NewOpc = ISD::FSQRT; IsUnary = true; break;
  case ISD::STRICT_FPOW: NewOpc = ISD::FPOW; break;
  case ISD::STRICT_FPOWI: NewOpc = ISD::FPOWI; break;
@ -6729,6 +6731,10 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
  SDNode *Res = nullptr;
  if (IsUnary)
    Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1) });
+  else if (IsTernary)
+    Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1),
+                                           Node->getOperand(2),
+                                           Node->getOperand(3)});
  else
    Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1),
                                           Node->getOperand(2) });
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@ -5432,6 +5432,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
  case Intrinsic::experimental_constrained_fmul:
  case Intrinsic::experimental_constrained_fdiv:
  case Intrinsic::experimental_constrained_frem:
+  case Intrinsic::experimental_constrained_fma:
  case Intrinsic::experimental_constrained_sqrt:
  case Intrinsic::experimental_constrained_pow:
  case Intrinsic::experimental_constrained_powi:
@ -5963,6 +5964,9 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
  case Intrinsic::experimental_constrained_frem:
    Opcode = ISD::STRICT_FREM;
    break;
+  case Intrinsic::experimental_constrained_fma:
+    Opcode = ISD::STRICT_FMA;
+    break;
  case Intrinsic::experimental_constrained_sqrt:
    Opcode = ISD::STRICT_FSQRT;
    break;
@ -6009,10 +6013,15 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
  SDVTList VTs = DAG.getVTList(ValueVTs);
  SDValue Result;
  if (FPI.isUnaryOp())
-    Result = DAG.getNode(Opcode, sdl, VTs, 
+    Result = DAG.getNode(Opcode, sdl, VTs,
                         { Chain, getValue(FPI.getArgOperand(0)) });
+  else if (FPI.isTernaryOp())
+    Result = DAG.getNode(Opcode, sdl, VTs,
+                         { Chain, getValue(FPI.getArgOperand(0)),
+                                  getValue(FPI.getArgOperand(1)),
+                                  getValue(FPI.getArgOperand(2)) });
  else
-    Result = DAG.getNode(Opcode, sdl, VTs, 
+    Result = DAG.getNode(Opcode, sdl, VTs,
                         { Chain, getValue(FPI.getArgOperand(0)),
                           getValue(FPI.getArgOperand(1))  });

--- a/lib/IR/IntrinsicInst.cpp
+++ b/lib/IR/IntrinsicInst.cpp
@ -14,10 +14,10 @@
 // are all subclasses of the CallInst class.  Note that none of these classes
 // has state or virtual methods, which is an important part of this gross/neat
 // hack working.
-// 
+//
 // In some cases, arguments to intrinsics need to be generic and are defined as
 // type pointer to empty struct { }*.  To access the real item of interest the
-// cast instruction needs to be stripped away. 
+// cast instruction needs to be stripped away.
 //
 //===----------------------------------------------------------------------===//

@ -98,7 +98,7 @@ Value *InstrProfIncrementInst::getStep() const {
 ConstrainedFPIntrinsic::RoundingMode
 ConstrainedFPIntrinsic::getRoundingMode() const {
  unsigned NumOperands = getNumArgOperands();
-  Metadata *MD = 
+  Metadata *MD =
      dyn_cast<MetadataAsValue>(getArgOperand(NumOperands - 2))->getMetadata();
  if (!MD || !isa<MDString>(MD))
    return rmInvalid;
@ -118,7 +118,7 @@ ConstrainedFPIntrinsic::getRoundingMode() const {
 ConstrainedFPIntrinsic::ExceptionBehavior
 ConstrainedFPIntrinsic::getExceptionBehavior() const {
  unsigned NumOperands = getNumArgOperands();
-  Metadata *MD = 
+  Metadata *MD =
      dyn_cast<MetadataAsValue>(getArgOperand(NumOperands - 1))->getMetadata();
  if (!MD || !isa<MDString>(MD))
    return ebInvalid;
@ -132,7 +132,7 @@ ConstrainedFPIntrinsic::getExceptionBehavior() const {

 bool ConstrainedFPIntrinsic::isUnaryOp() const {
  switch (getIntrinsicID()) {
-    default: 
+    default:
      return false;
    case Intrinsic::experimental_constrained_sqrt:
    case Intrinsic::experimental_constrained_sin:
@ -147,3 +147,13 @@ bool ConstrainedFPIntrinsic::isUnaryOp() const {
      return true;
  }
 }
+
+bool ConstrainedFPIntrinsic::isTernaryOp() const {
+  switch (getIntrinsicID()) {
+    default:
+      return false;
+    case Intrinsic::experimental_constrained_fma:
+      return true;
+  }
+}
+
--- a/lib/IR/Verifier.cpp
+++ b/lib/IR/Verifier.cpp
@ -3973,6 +3973,7 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
  case Intrinsic::experimental_constrained_fmul:
  case Intrinsic::experimental_constrained_fdiv:
  case Intrinsic::experimental_constrained_frem:
+  case Intrinsic::experimental_constrained_fma:
  case Intrinsic::experimental_constrained_sqrt:
  case Intrinsic::experimental_constrained_pow:
  case Intrinsic::experimental_constrained_powi:
@ -4433,8 +4434,9 @@ static DISubprogram *getSubprogram(Metadata *LocalScope) {

 void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
  unsigned NumOperands = FPI.getNumArgOperands();
-  Assert(((NumOperands == 3 && FPI.isUnaryOp()) || (NumOperands == 4)),
-         "invalid arguments for constrained FP intrinsic", &FPI);
+  Assert(((NumOperands == 5 && FPI.isTernaryOp()) ||
+          (NumOperands == 3 && FPI.isUnaryOp()) || (NumOperands == 4)),
+           "invalid arguments for constrained FP intrinsic", &FPI);
  Assert(isa<MetadataAsValue>(FPI.getArgOperand(NumOperands-1)),
         "invalid exception behavior argument", &FPI);
  Assert(isa<MetadataAsValue>(FPI.getArgOperand(NumOperands-2)),
--- a/test/CodeGen/X86/fp-intrinsics.ll
+++ b/test/CodeGen/X86/fp-intrinsics.ll
@ -1,4 +1,5 @@
-; RUN: llc -O3 -mtriple=x86_64-pc-linux < %s | FileCheck %s
+; RUN: llc -O3 -mtriple=x86_64-pc-linux < %s | FileCheck --check-prefix=COMMON --check-prefix=NO-FMA --check-prefix=FMACALL64 --check-prefix=FMACALL32 %s
+; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+fma < %s | FileCheck -check-prefix=COMMON --check-prefix=HAS-FMA --check-prefix=FMA64 --check-prefix=FMA32 %s

 ; Verify that constants aren't folded to inexact results when the rounding mode
 ; is unknown.
@ -9,7 +10,7 @@
 ; }
 ;
 ; CHECK-LABEL: f1
-; CHECK: divsd
+; COMMON: divsd
 define double @f1() {
 entry:
  %div = call double @llvm.experimental.constrained.fdiv.f64(
@ -29,7 +30,7 @@ entry:
 ; }
 ;
 ; CHECK-LABEL: f2
-; CHECK:  subsd
+; COMMON:  subsd
 define double @f2(double %a) {
 entry:
  %div = call double @llvm.experimental.constrained.fsub.f64(
@ -50,9 +51,9 @@ entry:
 ; }
 ;
 ; CHECK-LABEL: f3:
-; CHECK:  subsd
-; CHECK:  mulsd
-; CHECK:  subsd
+; COMMON:  subsd
+; COMMON:  mulsd
+; COMMON:  subsd
 define double @f3(double %a, double %b) {
 entry:
  %sub = call double @llvm.experimental.constrained.fsub.f64(
@ -81,11 +82,11 @@ entry:
 ;   return a;
 ; }
 ;
-; 
+;
 ; CHECK-LABEL: f4:
-; CHECK: testl
-; CHECK: jle
-; CHECK: addsd
+; COMMON: testl
+; COMMON: jle
+; COMMON: addsd
 define double @f4(i32 %n, double %a) {
 entry:
  %cmp = icmp sgt i32 %n, 0
@ -105,7 +106,7 @@ if.end:

 ; Verify that sqrt(42.0) isn't simplified when the rounding mode is unknown.
 ; CHECK-LABEL: f5
-; CHECK:  sqrtsd
+; COMMON:  sqrtsd
 define double @f5() {
 entry:
  %result = call double @llvm.experimental.constrained.sqrt.f64(double 42.0,
@ -116,7 +117,7 @@ entry:

 ; Verify that pow(42.1, 3.0) isn't simplified when the rounding mode is unknown.
 ; CHECK-LABEL: f6
-; CHECK:  pow
+; COMMON:  pow
 define double @f6() {
 entry:
  %result = call double @llvm.experimental.constrained.pow.f64(double 42.1,
@ -128,7 +129,7 @@ entry:

 ; Verify that powi(42.1, 3) isn't simplified when the rounding mode is unknown.
 ; CHECK-LABEL: f7
-; CHECK:  powi
+; COMMON:  powi
 define double @f7() {
 entry:
  %result = call double @llvm.experimental.constrained.powi.f64(double 42.1,
@ -140,7 +141,7 @@ entry:

 ; Verify that sin(42.0) isn't simplified when the rounding mode is unknown.
 ; CHECK-LABEL: f8
-; CHECK:  sin
+; COMMON:  sin
 define double @f8() {
 entry:
  %result = call double @llvm.experimental.constrained.sin.f64(double 42.0,
@ -151,7 +152,7 @@ entry:

 ; Verify that cos(42.0) isn't simplified when the rounding mode is unknown.
 ; CHECK-LABEL: f9
-; CHECK:  cos
+; COMMON:  cos
 define double @f9() {
 entry:
  %result = call double @llvm.experimental.constrained.cos.f64(double 42.0,
@ -162,7 +163,7 @@ entry:

 ; Verify that exp(42.0) isn't simplified when the rounding mode is unknown.
 ; CHECK-LABEL: f10
-; CHECK:  exp
+; COMMON:  exp
 define double @f10() {
 entry:
  %result = call double @llvm.experimental.constrained.exp.f64(double 42.0,
@ -173,7 +174,7 @@ entry:

 ; Verify that exp2(42.1) isn't simplified when the rounding mode is unknown.
 ; CHECK-LABEL: f11
-; CHECK:  exp2
+; COMMON:  exp2
 define double @f11() {
 entry:
  %result = call double @llvm.experimental.constrained.exp2.f64(double 42.1,
@ -184,7 +185,7 @@ entry:

 ; Verify that log(42.0) isn't simplified when the rounding mode is unknown.
 ; CHECK-LABEL: f12
-; CHECK:  log
+; COMMON:  log
 define double @f12() {
 entry:
  %result = call double @llvm.experimental.constrained.log.f64(double 42.0,
@ -195,7 +196,7 @@ entry:

 ; Verify that log10(42.0) isn't simplified when the rounding mode is unknown.
 ; CHECK-LABEL: f13
-; CHECK:  log10
+; COMMON:  log10
 define double @f13() {
 entry:
  %result = call double @llvm.experimental.constrained.log10.f64(double 42.0,
@ -206,7 +207,7 @@ entry:

 ; Verify that log2(42.0) isn't simplified when the rounding mode is unknown.
 ; CHECK-LABEL: f14
-; CHECK:  log2
+; COMMON:  log2
 define double @f14() {
 entry:
  %result = call double @llvm.experimental.constrained.log2.f64(double 42.0,
@ -217,7 +218,8 @@ entry:

 ; Verify that rint(42.1) isn't simplified when the rounding mode is unknown.
 ; CHECK-LABEL: f15
-; CHECK:  rint
+; NO-FMA:  rint
+; HAS-FMA: vroundsd
 define double @f15() {
 entry:
  %result = call double @llvm.experimental.constrained.rint.f64(double 42.1,
@ -229,7 +231,8 @@ entry:
 ; Verify that nearbyint(42.1) isn't simplified when the rounding mode is
 ; unknown.
 ; CHECK-LABEL: f16
-; CHECK:  nearbyint
+; NO-FMA:  nearbyint
+; HAS-FMA: vroundsd
 define double @f16() {
 entry:
  %result = call double @llvm.experimental.constrained.nearbyint.f64(
@ -239,6 +242,38 @@ entry:
  ret double %result
 }

+; Verify that fma(3.5) isn't simplified when the rounding mode is
+; unknown.
+; CHECK-LABEL: f17
+; FMACALL32: jmp fmaf  # TAILCALL
+; FMA32: vfmadd213ss
+define float @f17() {
+entry:
+  %result = call float @llvm.experimental.constrained.fma.f32(
+                                               float 3.5,
+                                               float 3.5,
+                                               float 3.5,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict")
+  ret float %result
+}
+
+; Verify that fma(42.1) isn't simplified when the rounding mode is
+; unknown.
+; CHECK-LABEL: f18
+; FMACALL64: jmp fma  # TAILCALL
+; FMA64: vfmadd213sd
+define double @f18() {
+entry:
+  %result = call double @llvm.experimental.constrained.fma.f64(
+                                               double 42.1,
+                                               double 42.1,
+                                               double 42.1,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict")
+  ret double %result
+}
+
@llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata"
 declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata)
 declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
@ -256,3 +291,5 @@ declare double @llvm.experimental.constrained.log10.f64(double, metadata, metada
 declare double @llvm.experimental.constrained.log2.f64(double, metadata, metadata)
 declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
 declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
+declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata)
+declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)
--- a/test/Feature/fp-intrinsics.ll
+++ b/test/Feature/fp-intrinsics.ll
@ -73,7 +73,7 @@ entry:
 ;   return a;
 ; }
 ;
-; 
+;
 ; CHECK-LABEL: @f4
 ; CHECK-NOT: select
 ; CHECK: br i1 %cmp
@ -94,7 +94,6 @@ if.end:
  ret double %a.0
 }

-
 ; Verify that sqrt(42.0) isn't simplified when the rounding mode is unknown.
 ; CHECK-LABEL: f5
 ; CHECK: call double @llvm.experimental.constrained.sqrt
@ -231,6 +230,18 @@ entry:
  ret double %result
 }

+; Verify that fma(42.1) isn't simplified when the rounding mode is
+; unknown.
+; CHECK-LABEL: f17
+; CHECK: call double @llvm.experimental.constrained.fma
+define double @f17() {
+entry:
+  %result = call double @llvm.experimental.constrained.fma.f64(double 42.1, double 42.1, double 42.1,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict")
+  ret double %result
+}
+
@llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata"
 declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata)
 declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
@ -248,3 +259,4 @@ declare double @llvm.experimental.constrained.log10.f64(double, metadata, metada
 declare double @llvm.experimental.constrained.log2.f64(double, metadata, metadata)
 declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
 declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
+declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)