propagate fast-math-flags on DAG nodes

After D10403, we had FMF in the DAG but disabled by default. Nick reported no crashing errors after some stress testing, so I enabled them at r243687. However, Escha soon notified us of a bug not covered by any in-tree regression tests: if we don't propagate the flags, we may fail to CSE DAG nodes because differing FMF causes them to not match. There is one test case in this patch to prove that point. This patch hopes to fix or leave a 'TODO' for all of the in-tree places where we create nodes that are FMF-capable. I did this by putting an assert in SelectionDAG.getNode() to find any FMF-capable node that was being created without FMF ( D11807 ). I then ran all regression tests and test-suite and confirmed that everything passes. This patch exposes remaining work to get DAG FMF to be fully functional: (1) add the flags to non-binary nodes such as FCMP, FMA and FNEG; (2) add the flags to intrinsics; (3) use the flags as conditions for transforms rather than the current global settings. Differential Revision: http://reviews.llvm.org/D12095 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@247815 91177308-0d34-0410-b5e6-96231b3b80d8
2025-02-25 13:20:57 +00:00 · 2015-09-16 16:31:21 +00:00 · 2015-09-16 16:31:21 +00:00 · 39490133e4
commit 39490133e4
parent cba458eecc
17 changed files with 344 additions and 205 deletions
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@ -670,7 +670,7 @@ public:
  SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT,
                  ArrayRef<SDUse> Ops);
  SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT,
-                  ArrayRef<SDValue> Ops);
+                  ArrayRef<SDValue> Ops, const SDNodeFlags *Flags = nullptr);
  SDValue getNode(unsigned Opcode, SDLoc DL, ArrayRef<EVT> ResultTys,
                  ArrayRef<SDValue> Ops);
  SDValue getNode(unsigned Opcode, SDLoc DL, SDVTList VTs,
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@ -44,6 +44,7 @@ class GlobalValue;
 class MachineBasicBlock;
 class MachineConstantPoolValue;
 class SDNode;
+class BinaryWithFlagsSDNode;
 class Value;
 class MCSymbol;
 template <typename T> struct DenseMapInfo;
@ -319,6 +320,61 @@ template<> struct simplify_type<SDUse> {
  }
 };

+/// These are IR-level optimization flags that may be propagated to SDNodes.
+/// TODO: This data structure should be shared by the IR optimizer and the
+/// the backend.
+struct SDNodeFlags {
+private:
+  bool NoUnsignedWrap : 1;
+  bool NoSignedWrap : 1;
+  bool Exact : 1;
+  bool UnsafeAlgebra : 1;
+  bool NoNaNs : 1;
+  bool NoInfs : 1;
+  bool NoSignedZeros : 1;
+  bool AllowReciprocal : 1;
+  
+public:
+  /// Default constructor turns off all optimization flags.
+  SDNodeFlags() {
+    NoUnsignedWrap = false;
+    NoSignedWrap = false;
+    Exact = false;
+    UnsafeAlgebra = false;
+    NoNaNs = false;
+    NoInfs = false;
+    NoSignedZeros = false;
+    AllowReciprocal = false;
+  }
+  
+  // These are mutators for each flag.
+  void setNoUnsignedWrap(bool b) { NoUnsignedWrap = b; }
+  void setNoSignedWrap(bool b) { NoSignedWrap = b; }
+  void setExact(bool b) { Exact = b; }
+  void setUnsafeAlgebra(bool b) { UnsafeAlgebra = b; }
+  void setNoNaNs(bool b) { NoNaNs = b; }
+  void setNoInfs(bool b) { NoInfs = b; }
+  void setNoSignedZeros(bool b) { NoSignedZeros = b; }
+  void setAllowReciprocal(bool b) { AllowReciprocal = b; }
+  
+  // These are accessors for each flag.
+  bool hasNoUnsignedWrap() const { return NoUnsignedWrap; }
+  bool hasNoSignedWrap() const { return NoSignedWrap; }
+  bool hasExact() const { return Exact; }
+  bool hasUnsafeAlgebra() const { return UnsafeAlgebra; }
+  bool hasNoNaNs() const { return NoNaNs; }
+  bool hasNoInfs() const { return NoInfs; }
+  bool hasNoSignedZeros() const { return NoSignedZeros; }
+  bool hasAllowReciprocal() const { return AllowReciprocal; }
+  
+  /// Return a raw encoding of the flags.
+  /// This function should only be used to add data to the NodeID value.
+  unsigned getRawFlags() const {
+    return (NoUnsignedWrap << 0) | (NoSignedWrap << 1) | (Exact << 2) |
+    (UnsafeAlgebra << 3) | (NoNaNs << 4) | (NoInfs << 5) |
+    (NoSignedZeros << 6) | (AllowReciprocal << 7);
+  }
+};

 /// Represents one node in the SelectionDAG.
 ///
@ -639,6 +695,10 @@ public:
    return nullptr;
  }

+  /// This could be defined as a virtual function and implemented more simply
+  /// and directly, but it is not to avoid creating a vtable for this class.
+  const SDNodeFlags *getFlags() const;
+  
  /// Return the number of values defined/returned by this operator.
  unsigned getNumValues() const { return NumValues; }

@ -950,62 +1010,6 @@ inline void SDUse::setNode(SDNode *N) {
  if (N) N->addUse(*this);
 }

-/// These are IR-level optimization flags that may be propagated to SDNodes.
-/// TODO: This data structure should be shared by the IR optimizer and the
-/// the backend.
-struct SDNodeFlags {
-private:
-  bool NoUnsignedWrap : 1;
-  bool NoSignedWrap : 1;
-  bool Exact : 1;
-  bool UnsafeAlgebra : 1;
-  bool NoNaNs : 1;
-  bool NoInfs : 1;
-  bool NoSignedZeros : 1;
-  bool AllowReciprocal : 1;
-
-public:
-  /// Default constructor turns off all optimization flags.
-  SDNodeFlags() {
-    NoUnsignedWrap = false;
-    NoSignedWrap = false;
-    Exact = false;
-    UnsafeAlgebra = false;
-    NoNaNs = false;
-    NoInfs = false;
-    NoSignedZeros = false;
-    AllowReciprocal = false;
-  }
-
-  // These are mutators for each flag.
-  void setNoUnsignedWrap(bool b) { NoUnsignedWrap = b; }
-  void setNoSignedWrap(bool b) { NoSignedWrap = b; }
-  void setExact(bool b) { Exact = b; }
-  void setUnsafeAlgebra(bool b) { UnsafeAlgebra = b; }
-  void setNoNaNs(bool b) { NoNaNs = b; }
-  void setNoInfs(bool b) { NoInfs = b; }
-  void setNoSignedZeros(bool b) { NoSignedZeros = b; }
-  void setAllowReciprocal(bool b) { AllowReciprocal = b; }
-
-  // These are accessors for each flag.
-  bool hasNoUnsignedWrap() const { return NoUnsignedWrap; }
-  bool hasNoSignedWrap() const { return NoSignedWrap; }
-  bool hasExact() const { return Exact; }
-  bool hasUnsafeAlgebra() const { return UnsafeAlgebra; }
-  bool hasNoNaNs() const { return NoNaNs; }
-  bool hasNoInfs() const { return NoInfs; }
-  bool hasNoSignedZeros() const { return NoSignedZeros; }
-  bool hasAllowReciprocal() const { return AllowReciprocal; }
-
-  /// Return a raw encoding of the flags.
-  /// This function should only be used to add data to the NodeID value.
-  unsigned getRawFlags() const {
-    return (NoUnsignedWrap << 0) | (NoSignedWrap << 1) | (Exact << 2) |
-           (UnsafeAlgebra << 3) | (NoNaNs << 4) | (NoInfs << 5) |
-           (NoSignedZeros << 6) | (AllowReciprocal << 7);
-  }
-};
-
 /// This class is used for single-operand SDNodes.  This is solely
 /// to allow co-allocation of node operands with the node itself.
 class UnarySDNode : public SDNode {
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@ -348,10 +348,12 @@ namespace {
    SDValue BuildSDIV(SDNode *N);
    SDValue BuildSDIVPow2(SDNode *N);
    SDValue BuildUDIV(SDNode *N);
-    SDValue BuildReciprocalEstimate(SDValue Op);
-    SDValue BuildRsqrtEstimate(SDValue Op);
-    SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations);
-    SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations);
+    SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags);
+    SDValue BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags);
+    SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations,
+                                 SDNodeFlags *Flags);
+    SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations,
+                                 SDNodeFlags *Flags);
    SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
                               bool DemandHighBits = true);
    SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
@ -614,6 +616,9 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
  assert(Op.hasOneUse() && "Unknown reuse!");

  assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
+
+  const SDNodeFlags *Flags = Op.getNode()->getFlags();
+  
  switch (Op.getOpcode()) {
  default: llvm_unreachable("Unknown code");
  case ISD::ConstantFP: {
@ -631,12 +636,12 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
      return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
                         GetNegatedExpression(Op.getOperand(0), DAG,
                                              LegalOperations, Depth+1),
-                         Op.getOperand(1));
+                         Op.getOperand(1), Flags);
    // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
    return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
                       GetNegatedExpression(Op.getOperand(1), DAG,
                                            LegalOperations, Depth+1),
-                       Op.getOperand(0));
+                       Op.getOperand(0), Flags);
  case ISD::FSUB:
    // We can't turn -(A-B) into B-A when we honor signed zeros.
    assert(Options.UnsafeFPMath);
@ -648,7 +653,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,

    // fold (fneg (fsub A, B)) -> (fsub B, A)
    return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
-                       Op.getOperand(1), Op.getOperand(0));
+                       Op.getOperand(1), Op.getOperand(0), Flags);

  case ISD::FMUL:
  case ISD::FDIV:
@ -660,13 +665,13 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
      return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
                         GetNegatedExpression(Op.getOperand(0), DAG,
                                              LegalOperations, Depth+1),
-                         Op.getOperand(1));
+                         Op.getOperand(1), Flags);

    // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
    return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
                       Op.getOperand(0),
                       GetNegatedExpression(Op.getOperand(1), DAG,
-                                            LegalOperations, Depth+1));
+                                            LegalOperations, Depth+1), Flags);

  case ISD::FP_EXTEND:
  case ISD::FSIN:
@ -1482,13 +1487,8 @@ SDValue DAGCombiner::combine(SDNode *N) {
    // Constant operands are canonicalized to RHS.
    if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
      SDValue Ops[] = {N1, N0};
-      SDNode *CSENode;
-      if (const auto *BinNode = dyn_cast<BinaryWithFlagsSDNode>(N)) {
-        CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
-                                      &BinNode->Flags);
-      } else {
-        CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops);
-      }
+      SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
+                                            N->getFlags());
      if (CSENode)
        return SDValue(CSENode, 0);
    }
@ -7931,6 +7931,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
  EVT VT = N->getValueType(0);
  SDLoc DL(N);
  const TargetOptions &Options = DAG.getTarget().Options;
+  const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;

  // fold vector ops
  if (VT.isVector())
@ -7939,23 +7940,23 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {

  // fold (fadd c1, c2) -> c1 + c2
  if (N0CFP && N1CFP)
-    return DAG.getNode(ISD::FADD, DL, VT, N0, N1);
+    return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);

  // canonicalize constant to RHS
  if (N0CFP && !N1CFP)
-    return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
+    return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);

  // fold (fadd A, (fneg B)) -> (fsub A, B)
  if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
      isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
    return DAG.getNode(ISD::FSUB, DL, VT, N0,
-                       GetNegatedExpression(N1, DAG, LegalOperations));
+                       GetNegatedExpression(N1, DAG, LegalOperations), Flags);

  // fold (fadd (fneg A), B) -> (fsub B, A)
  if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
      isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
    return DAG.getNode(ISD::FSUB, DL, VT, N1,
-                       GetNegatedExpression(N0, DAG, LegalOperations));
+                       GetNegatedExpression(N0, DAG, LegalOperations), Flags);

  // If 'unsafe math' is enabled, fold lots of things.
  if (Options.UnsafeFPMath) {
@ -7971,7 +7972,9 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
    if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
        isa<ConstantFPSDNode>(N0.getOperand(1)))
      return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
-                         DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1));
+                         DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1,
+                                     Flags),
+                         Flags);

    // If allowed, fold (fadd (fneg x), x) -> 0.0
    if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
@ -7992,8 +7995,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
        // (fadd (fmul x, c), x) -> (fmul x, c+1)
        if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
          SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0),
-                                       DAG.getConstantFP(1.0, DL, VT));
-          return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
+                                       DAG.getConstantFP(1.0, DL, VT), Flags);
+          return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
        }

        // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
@ -8001,8 +8004,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
            N1.getOperand(0) == N1.getOperand(1) &&
            N0.getOperand(0) == N1.getOperand(0)) {
          SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0),
-                                       DAG.getConstantFP(2.0, DL, VT));
-          return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
+                                       DAG.getConstantFP(2.0, DL, VT), Flags);
+          return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
        }
      }

@ -8013,8 +8016,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
        // (fadd x, (fmul x, c)) -> (fmul x, c+1)
        if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
          SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0),
-                                       DAG.getConstantFP(1.0, DL, VT));
-          return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
+                                       DAG.getConstantFP(1.0, DL, VT), Flags);
+          return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
        }

        // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
@ -8022,8 +8025,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
            N0.getOperand(0) == N0.getOperand(1) &&
            N1.getOperand(0) == N0.getOperand(0)) {
          SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0),
-                                       DAG.getConstantFP(2.0, DL, VT));
-          return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
+                                       DAG.getConstantFP(2.0, DL, VT), Flags);
+          return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
        }
      }

@ -8033,7 +8036,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
        if (!CFP && N0.getOperand(0) == N0.getOperand(1) &&
            (N0.getOperand(0) == N1)) {
          return DAG.getNode(ISD::FMUL, DL, VT,
-                             N1, DAG.getConstantFP(3.0, DL, VT));
+                             N1, DAG.getConstantFP(3.0, DL, VT), Flags);
        }
      }

@ -8043,7 +8046,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
        if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
            N1.getOperand(0) == N0) {
          return DAG.getNode(ISD::FMUL, DL, VT,
-                             N0, DAG.getConstantFP(3.0, DL, VT));
+                             N0, DAG.getConstantFP(3.0, DL, VT), Flags);
        }
      }

@ -8053,8 +8056,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
          N0.getOperand(0) == N0.getOperand(1) &&
          N1.getOperand(0) == N1.getOperand(1) &&
          N0.getOperand(0) == N1.getOperand(0)) {
-        return DAG.getNode(ISD::FMUL, DL, VT,
-                           N0.getOperand(0), DAG.getConstantFP(4.0, DL, VT));
+        return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
+                           DAG.getConstantFP(4.0, DL, VT), Flags);
      }
    }
  } // enable-unsafe-fp-math
@ -8076,6 +8079,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
  EVT VT = N->getValueType(0);
  SDLoc dl(N);
  const TargetOptions &Options = DAG.getTarget().Options;
+  const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;

  // fold vector ops
  if (VT.isVector())
@ -8084,12 +8088,12 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {

  // fold (fsub c1, c2) -> c1-c2
  if (N0CFP && N1CFP)
-    return DAG.getNode(ISD::FSUB, dl, VT, N0, N1);
+    return DAG.getNode(ISD::FSUB, dl, VT, N0, N1, Flags);

  // fold (fsub A, (fneg B)) -> (fadd A, B)
  if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
    return DAG.getNode(ISD::FADD, dl, VT, N0,
-                       GetNegatedExpression(N1, DAG, LegalOperations));
+                       GetNegatedExpression(N1, DAG, LegalOperations), Flags);

  // If 'unsafe math' is enabled, fold lots of things.
  if (Options.UnsafeFPMath) {
@ -8140,6 +8144,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
  EVT VT = N->getValueType(0);
  SDLoc DL(N);
  const TargetOptions &Options = DAG.getTarget().Options;
+  const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;

  // fold vector ops
  if (VT.isVector()) {
@ -8150,12 +8155,12 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {

  // fold (fmul c1, c2) -> c1*c2
  if (N0CFP && N1CFP)
-    return DAG.getNode(ISD::FMUL, DL, VT, N0, N1);
+    return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);

  // canonicalize constant to RHS
  if (isConstantFPBuildVectorOrConstantFP(N0) &&
     !isConstantFPBuildVectorOrConstantFP(N1))
-    return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
+    return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);

  // fold (fmul A, 1.0) -> A
  if (N1CFP && N1CFP->isExactlyValue(1.0))
@ -8184,8 +8189,8 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
        // the second operand of the outer multiply are constants.
        if ((N1CFP && isConstOrConstSplatFP(N01)) ||
            (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
-          SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
-          return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
+          SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
+          return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
        }
      }
    }
@ -8198,14 +8203,14 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
        (N0.getOperand(0) == N0.getOperand(1)) &&
        N0.hasOneUse()) {
      const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
-      SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
-      return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
+      SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
+      return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
    }
  }

  // fold (fmul X, 2.0) -> (fadd X, X)
  if (N1CFP && N1CFP->isExactlyValue(+2.0))
-    return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
+    return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);

  // fold (fmul X, -1.0) -> (fneg X)
  if (N1CFP && N1CFP->isExactlyValue(-1.0))
@ -8220,7 +8225,8 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
      if (LHSNeg == 2 || RHSNeg == 2)
        return DAG.getNode(ISD::FMUL, DL, VT,
                           GetNegatedExpression(N0, DAG, LegalOperations),
-                           GetNegatedExpression(N1, DAG, LegalOperations));
+                           GetNegatedExpression(N1, DAG, LegalOperations),
+                           Flags);
    }
  }

@ -8250,6 +8256,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
    if (N1CFP && N1CFP->isZero())
      return N2;
  }
+  // TODO: The FMA node should have flags that propagate to these nodes.
  if (N0CFP && N0CFP->isExactlyValue(1.0))
    return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
  if (N1CFP && N1CFP->isExactlyValue(1.0))
@ -8259,13 +8266,19 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
  if (N0CFP && !N1CFP)
    return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);

+  // TODO: FMA nodes should have flags that propagate to the created nodes.
+  // For now, create a Flags object for use with all unsafe math transforms.
+  SDNodeFlags Flags;
+  Flags.setUnsafeAlgebra(true);
+
  // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
  if (Options.UnsafeFPMath && N1CFP &&
      N2.getOpcode() == ISD::FMUL &&
      N0 == N2.getOperand(0) &&
      N2.getOperand(1).getOpcode() == ISD::ConstantFP) {
    return DAG.getNode(ISD::FMUL, dl, VT, N0,
-                       DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1)));
+                       DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1),
+                                   &Flags), &Flags);
  }


@ -8275,7 +8288,8 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
      N0.getOperand(1).getOpcode() == ISD::ConstantFP) {
    return DAG.getNode(ISD::FMA, dl, VT,
                       N0.getOperand(0),
-                       DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1)),
+                       DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1),
+                                   &Flags),
                       N2);
  }

@ -8283,29 +8297,33 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
  // (fma x, -1, y) -> (fadd (fneg x), y)
  if (N1CFP) {
    if (N1CFP->isExactlyValue(1.0))
+      // TODO: The FMA node should have flags that propagate to this node.
      return DAG.getNode(ISD::FADD, dl, VT, N0, N2);

    if (N1CFP->isExactlyValue(-1.0) &&
        (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
      SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0);
      AddToWorklist(RHSNeg.getNode());
+      // TODO: The FMA node should have flags that propagate to this node.
      return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg);
    }
  }

  // (fma x, c, x) -> (fmul x, (c+1))
-  if (Options.UnsafeFPMath && N1CFP && N0 == N2)
+  if (Options.UnsafeFPMath && N1CFP && N0 == N2) {
    return DAG.getNode(ISD::FMUL, dl, VT, N0,
                       DAG.getNode(ISD::FADD, dl, VT,
-                                   N1, DAG.getConstantFP(1.0, dl, VT)));
-
+                                   N1, DAG.getConstantFP(1.0, dl, VT),
+                                   &Flags), &Flags);
+  }
  // (fma x, c, (fneg x)) -> (fmul x, (c-1))
  if (Options.UnsafeFPMath && N1CFP &&
-      N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0)
+      N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
    return DAG.getNode(ISD::FMUL, dl, VT, N0,
                       DAG.getNode(ISD::FADD, dl, VT,
-                                   N1, DAG.getConstantFP(-1.0, dl, VT)));
-
+                                   N1, DAG.getConstantFP(-1.0, dl, VT),
+                                   &Flags), &Flags);
+  }

  return SDValue();
 }
@ -8349,17 +8367,15 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
  EVT VT = N->getValueType(0);
  SDLoc DL(N);
  SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
-  // FIXME: This optimization requires some level of fast-math, so the
-  // created reciprocal node should at least have the 'allowReciprocal'
-  // fast-math-flag set.
-  SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1);
+  const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
+  SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);

  // Dividend / Divisor -> Dividend * Reciprocal
  for (auto *U : Users) {
    SDValue Dividend = U->getOperand(0);
    if (Dividend != FPOne) {
      SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
-                                    Reciprocal);
+                                    Reciprocal, Flags);
      CombineTo(U, NewNode);
    } else if (U != Reciprocal.getNode()) {
      // In the absence of fast-math-flags, this user node is always the
@ -8378,6 +8394,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
  EVT VT = N->getValueType(0);
  SDLoc DL(N);
  const TargetOptions &Options = DAG.getTarget().Options;
+  SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;

  // fold vector ops
  if (VT.isVector())
@ -8386,7 +8403,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {

  // fold (fdiv c1, c2) -> c1/c2
  if (N0CFP && N1CFP)
-    return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1);
+    return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);

  if (Options.UnsafeFPMath) {
    // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
@ -8405,28 +8422,30 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
           TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
           TLI.isFPImmLegal(Recip, VT)))
        return DAG.getNode(ISD::FMUL, DL, VT, N0,
-                           DAG.getConstantFP(Recip, DL, VT));
+                           DAG.getConstantFP(Recip, DL, VT), Flags);
    }

    // If this FDIV is part of a reciprocal square root, it may be folded
    // into a target-specific square root estimate instruction.
    if (N1.getOpcode() == ISD::FSQRT) {
-      if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0))) {
-        return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+      if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0), Flags)) {
+        return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
      }
    } else if (N1.getOpcode() == ISD::FP_EXTEND &&
               N1.getOperand(0).getOpcode() == ISD::FSQRT) {
-      if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
+      if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0),
+                                          Flags)) {
        RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
        AddToWorklist(RV.getNode());
-        return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+        return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
      }
    } else if (N1.getOpcode() == ISD::FP_ROUND &&
               N1.getOperand(0).getOpcode() == ISD::FSQRT) {
-      if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
+      if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0),
+                                          Flags)) {
        RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
        AddToWorklist(RV.getNode());
-        return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+        return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
      }
    } else if (N1.getOpcode() == ISD::FMUL) {
      // Look through an FMUL. Even though this won't remove the FDIV directly,
@ -8443,18 +8462,18 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
      if (SqrtOp.getNode()) {
        // We found a FSQRT, so try to make this fold:
        // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
-        if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0))) {
-          RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp);
+        if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
+          RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
          AddToWorklist(RV.getNode());
-          return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+          return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
        }
      }
    }

    // Fold into a reciprocal estimate and multiply instead of a real divide.
-    if (SDValue RV = BuildReciprocalEstimate(N1)) {
+    if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
      AddToWorklist(RV.getNode());
-      return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+      return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
    }
  }

@ -8466,7 +8485,8 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
      if (LHSNeg == 2 || RHSNeg == 2)
        return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
                           GetNegatedExpression(N0, DAG, LegalOperations),
-                           GetNegatedExpression(N1, DAG, LegalOperations));
+                           GetNegatedExpression(N1, DAG, LegalOperations),
+                           Flags);
    }
  }

@ -8485,7 +8505,8 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {

  // fold (frem c1, c2) -> fmod(c1,c2)
  if (N0CFP && N1CFP)
-    return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1);
+    return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1,
+                       &cast<BinaryWithFlagsSDNode>(N)->Flags);

  return SDValue();
 }
@ -8494,14 +8515,19 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {
  if (!DAG.getTarget().Options.UnsafeFPMath || TLI.isFsqrtCheap())
    return SDValue();

+  // TODO: FSQRT nodes should have flags that propagate to the created nodes.
+  // For now, create a Flags object for use with all unsafe math transforms.
+  SDNodeFlags Flags;
+  Flags.setUnsafeAlgebra(true);
+
  // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5)
-  SDValue RV = BuildRsqrtEstimate(N->getOperand(0));
+  SDValue RV = BuildRsqrtEstimate(N->getOperand(0), &Flags);
  if (!RV)
    return SDValue();

  EVT VT = RV.getValueType();
  SDLoc DL(N);
-  RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV);
+  RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV, &Flags);
  AddToWorklist(RV.getNode());

  // Unfortunately, RV is now NaN if the input was exactly 0.
@ -8916,9 +8942,10 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
      if (Level >= AfterLegalizeDAG &&
          (TLI.isFPImmLegal(CVal, N->getValueType(0)) ||
           TLI.isOperationLegal(ISD::ConstantFP, N->getValueType(0))))
-        return DAG.getNode(
-            ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
-            DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)));
+        return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
+                           DAG.getNode(ISD::FNEG, SDLoc(N), VT,
+                                       N0.getOperand(1)),
+                           &cast<BinaryWithFlagsSDNode>(N0)->Flags);
    }
  }

@ -13346,7 +13373,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
      }

      SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(LHS), VT,
-                                   LHSOp, RHSOp);
+                                   LHSOp, RHSOp, N->getFlags());

      // We need the resulting constant to be legal if we are in a phase after
      // legalization, so zero extend to the smallest operand type if required.
@ -13383,7 +13410,8 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
      EVT VT = N->getValueType(0);
      SDValue UndefVector = LHS.getOperand(1);
      SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
-                                     LHS.getOperand(0), RHS.getOperand(0));
+                                     LHS.getOperand(0), RHS.getOperand(0),
+                                     N->getFlags());
      AddUsersToWorklist(N);
      return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
                                  &SVN0->getMask()[0]);
@ -13895,7 +13923,7 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
  return S;
 }

-SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) {
+SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) {
  if (Level >= AfterLegalizeDAG)
    return SDValue();

@ -13919,16 +13947,16 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) {

      // Newton iterations: Est = Est + Est (1 - Arg * Est)
      for (unsigned i = 0; i < Iterations; ++i) {
-        SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est);
+        SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
        AddToWorklist(NewEst.getNode());

-        NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst);
+        NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
        AddToWorklist(NewEst.getNode());

-        NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst);
+        NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
        AddToWorklist(NewEst.getNode());

-        Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst);
+        Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
        AddToWorklist(Est.getNode());
      }
    }
@ -13945,31 +13973,32 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) {
 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
 /// As a result, we precompute A/2 prior to the iteration loop.
 SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est,
-                                          unsigned Iterations) {
+                                          unsigned Iterations,
+                                          SDNodeFlags *Flags) {
  EVT VT = Arg.getValueType();
  SDLoc DL(Arg);
  SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);

  // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
  // this entire sequence requires only one FP constant.
-  SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg);
+  SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
  AddToWorklist(HalfArg.getNode());

-  HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg);
+  HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
  AddToWorklist(HalfArg.getNode());

  // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
  for (unsigned i = 0; i < Iterations; ++i) {
-    SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est);
+    SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
    AddToWorklist(NewEst.getNode());

-    NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst);
+    NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
    AddToWorklist(NewEst.getNode());

-    NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst);
+    NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
    AddToWorklist(NewEst.getNode());

-    Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst);
+    Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
    AddToWorklist(Est.getNode());
  }
  return Est;
@ -13981,7 +14010,8 @@ SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est,
 ///     =>
 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
 SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est,
-                                          unsigned Iterations) {
+                                          unsigned Iterations,
+                                          SDNodeFlags *Flags) {
  EVT VT = Arg.getValueType();
  SDLoc DL(Arg);
  SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
@ -13989,25 +14019,25 @@ SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est,

  // Newton iterations: Est = -0.5 * Est * (-3.0 + Arg * Est * Est)
  for (unsigned i = 0; i < Iterations; ++i) {
-    SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf);
+    SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
    AddToWorklist(HalfEst.getNode());

-    Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est);
+    Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
    AddToWorklist(Est.getNode());

-    Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg);
+    Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
    AddToWorklist(Est.getNode());

-    Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree);
+    Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree, Flags);
    AddToWorklist(Est.getNode());

-    Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst);
+    Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst, Flags);
    AddToWorklist(Est.getNode());
  }
  return Est;
 }

-SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) {
+SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
  if (Level >= AfterLegalizeDAG)
    return SDValue();

@ -14019,8 +14049,8 @@ SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) {
    AddToWorklist(Est.getNode());
    if (Iterations) {
      Est = UseOneConstNR ?
-        BuildRsqrtNROneConst(Op, Est, Iterations) :
-        BuildRsqrtNRTwoConst(Op, Est, Iterations);
+        BuildRsqrtNROneConst(Op, Est, Iterations, Flags) :
+        BuildRsqrtNRTwoConst(Op, Est, Iterations, Flags);
    }
    return Est;
  }
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@ -2443,6 +2443,8 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
                                                   SDValue Op0,
                                                   EVT DestVT,
                                                   SDLoc dl) {
+  // TODO: Should any fast-math-flags be set for the created nodes?
+  
  if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) {
    // simple 32-bit [signed|unsigned] integer to float/double expansion

@ -3120,6 +3122,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
                        Node->getOperand(0),
                        Tmp1, ISD::SETLT);
    True = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, Node->getOperand(0));
+    // TODO: Should any fast-math-flags be set for the FSUB?
    False = DAG.getNode(ISD::FP_TO_SINT, dl, NVT,
                        DAG.getNode(ISD::FSUB, dl, VT,
                                    Node->getOperand(0), Tmp1));
@ -3287,6 +3290,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
  case ISD::FNEG:
    // Expand Y = FNEG(X) ->  Y = SUB -0.0, X
    Tmp1 = DAG.getConstantFP(-0.0, dl, Node->getValueType(0));
+    // TODO: If FNEG has fast-math-flags, propagate them to the FSUB.
    Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1,
                       Node->getOperand(0));
    Results.push_back(Tmp1);
@ -3513,8 +3517,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
    EVT VT = Node->getValueType(0);
    if (TLI.isOperationLegalOrCustom(ISD::FADD, VT) &&
        TLI.isOperationLegalOrCustom(ISD::FNEG, VT)) {
+      const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(Node)->Flags;
      Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1));
-      Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1);
+      Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1, Flags);
      Results.push_back(Tmp1);
    } else {
      Results.push_back(ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64,
@ -4267,7 +4272,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
  case ISD::FPOW: {
    Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
    Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
-    Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
+    Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2,
+                       Node->getFlags());
    Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
                                  Tmp3, DAG.getIntPtrConstant(0, dl)));
    break;
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@ -1341,6 +1341,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
    break;
  }

+  // TODO: Are there fast-math-flags to propagate to this FADD?
  Lo = DAG.getNode(ISD::FADD, dl, VT, Hi,
                   DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble,
                                             APInt(128, Parts)),
@ -1511,6 +1512,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
    SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128);
    //  X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
    // FIXME: generated code sucks.
+    // TODO: Are there fast-math-flags to propagate to this FSUB?
    return DAG.getSelectCC(dl, N->getOperand(0), Tmp,
                           DAG.getNode(ISD::ADD, dl, MVT::i32,
                                       DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32,
@ -1912,8 +1914,7 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_BinOp(SDNode *N) {
  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
  SDValue Op0 = GetPromotedFloat(N->getOperand(0));
  SDValue Op1 = GetPromotedFloat(N->getOperand(1));
-
-  return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1);
+  return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1, N->getFlags());
 }

 SDValue DAGTypeLegalizer::PromoteFloatRes_FMAD(SDNode *N) {
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@ -415,8 +415,8 @@ SDValue VectorLegalizer::Promote(SDValue Op) {
    else
      Operands[j] = Op.getOperand(j);
  }
-
-  Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands);
+  
+  Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands, Op.getNode()->getFlags());
  if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
      (VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
       NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
@ -1001,6 +1001,7 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {

  // Convert hi and lo to floats
  // Convert the hi part back to the upper values
+  // TODO: Can any fast-math-flags be set on these nodes?
  SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI);
          fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW);
  SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO);
@ -1014,6 +1015,7 @@ SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
  if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
    SDLoc DL(Op);
    SDValue Zero = DAG.getConstantFP(-0.0, DL, Op.getValueType());
+    // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB.
    return DAG.getNode(ISD::FSUB, DL, Op.getValueType(),
                       Zero, Op.getOperand(0));
  }
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@ -141,7 +141,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) {
  SDValue LHS = GetScalarizedVector(N->getOperand(0));
  SDValue RHS = GetScalarizedVector(N->getOperand(1));
  return DAG.getNode(N->getOpcode(), SDLoc(N),
-                     LHS.getValueType(), LHS, RHS);
+                     LHS.getValueType(), LHS, RHS, N->getFlags());
 }

 SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) {
@ -704,8 +704,10 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
  GetSplitVector(N->getOperand(1), RHSLo, RHSHi);
  SDLoc dl(N);

-  Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, RHSLo);
-  Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi);
+  const SDNodeFlags *Flags = N->getFlags();
+  unsigned Opcode = N->getOpcode();
+  Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags);
+  Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags);
 }

 void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,
@ -2073,7 +2075,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
  SDValue InOp1 = GetWidenedVector(N->getOperand(0));
  SDValue InOp2 = GetWidenedVector(N->getOperand(1));
-  return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2);
+  return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, N->getFlags());
 }

 SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
@ -2084,6 +2086,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
  EVT WidenEltVT = WidenVT.getVectorElementType();
  EVT VT = WidenVT;
  unsigned NumElts =  VT.getVectorNumElements();
+  const SDNodeFlags *Flags = N->getFlags();
  while (!TLI.isTypeLegal(VT) && NumElts != 1) {
    NumElts = NumElts / 2;
    VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
@ -2093,7 +2096,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
    // Operation doesn't trap so just widen as normal.
    SDValue InOp1 = GetWidenedVector(N->getOperand(0));
    SDValue InOp2 = GetWidenedVector(N->getOperand(1));
-    return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2);
+    return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, Flags);
  }

  // No legal vector version so unroll the vector operation and then widen.
@ -2123,7 +2126,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
      SDValue EOp2 = DAG.getNode(
          ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
          DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
-      ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2);
+      ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2, Flags);
      Idx += NumElts;
      CurNumElts -= NumElts;
    }
@ -2141,7 +2144,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
            ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp2,
            DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
        ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
-                                             EOp1, EOp2);
+                                             EOp1, EOp2, Flags);
      }
      CurNumElts = 0;
    }
@ -2231,7 +2234,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {

  unsigned Opcode = N->getOpcode();
  unsigned InVTNumElts = InVT.getVectorNumElements();
-
+  const SDNodeFlags *Flags = N->getFlags();
  if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
    InOp = GetWidenedVector(N->getOperand(0));
    InVT = InOp.getValueType();
@ -2239,7 +2242,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
    if (InVTNumElts == WidenNumElts) {
      if (N->getNumOperands() == 1)
        return DAG.getNode(Opcode, DL, WidenVT, InOp);
-      return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1));
+      return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags);
    }
  }

@ -2260,7 +2263,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
      SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops);
      if (N->getNumOperands() == 1)
        return DAG.getNode(Opcode, DL, WidenVT, InVec);
-      return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1));
+      return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1), Flags);
    }

    if (InVTNumElts % WidenNumElts == 0) {
@ -2270,7 +2273,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
      // Extract the input and convert the shorten input vector.
      if (N->getNumOperands() == 1)
        return DAG.getNode(Opcode, DL, WidenVT, InVal);
-      return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1));
+      return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1), Flags);
    }
  }

@ -2286,7 +2289,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
    if (N->getNumOperands() == 1)
      Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val);
    else
-      Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1));
+      Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1), Flags);
  }

  SDValue UndefVal = DAG.getUNDEF(EltVT);
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@ -397,24 +397,21 @@ static void AddNodeIDOperands(FoldingSetNodeID &ID,
    ID.AddInteger(Op.getResNo());
  }
 }
+
 /// Add logical or fast math flag values to FoldingSetNodeID value.
 static void AddNodeIDFlags(FoldingSetNodeID &ID, unsigned Opcode,
                           const SDNodeFlags *Flags) {
-  if (!Flags || !isBinOpWithFlags(Opcode))
+  if (!isBinOpWithFlags(Opcode))
    return;

-  unsigned RawFlags = Flags->getRawFlags();
-  // If no flags are set, do not alter the ID. We must match the ID of nodes
-  // that were created without explicitly specifying flags. This also saves time
-  // and allows a gradual increase in API usage of the optional optimization
-  // flags.
-  if (RawFlags != 0)
-    ID.AddInteger(RawFlags);
+  unsigned RawFlags = 0;
+  if (Flags)
+    RawFlags = Flags->getRawFlags();
+  ID.AddInteger(RawFlags);
 }

 static void AddNodeIDFlags(FoldingSetNodeID &ID, const SDNode *N) {
-  if (auto *Node = dyn_cast<BinaryWithFlagsSDNode>(N))
-    AddNodeIDFlags(ID, Node->getOpcode(), &Node->Flags);
+  AddNodeIDFlags(ID, N->getOpcode(), N->getFlags());
 }

 static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned short OpC,
@ -3191,8 +3188,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
  case ISD::FNEG:
    // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
    if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB)
+      // FIXME: FNEG has no fast-math-flags to propagate; use the FSUB's flags?
      return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1),
-                     Operand.getNode()->getOperand(0));
+                       Operand.getNode()->getOperand(0),
+                       &cast<BinaryWithFlagsSDNode>(Operand.getNode())->Flags);
    if (OpOpcode == ISD::FNEG)  // --X -> X
      return Operand.getNode()->getOperand(0);
    break;
@ -5394,12 +5393,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
 }

 SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
-                              ArrayRef<SDValue> Ops) {
+                              ArrayRef<SDValue> Ops, const SDNodeFlags *Flags) {
  unsigned NumOps = Ops.size();
  switch (NumOps) {
  case 0: return getNode(Opcode, DL, VT);
  case 1: return getNode(Opcode, DL, VT, Ops[0]);
-  case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]);
+  case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Flags);
  case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]);
  default: break;
  }
@ -6850,6 +6849,12 @@ uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
  return cast<ConstantSDNode>(OperandList[Num])->getZExtValue();
 }

+const SDNodeFlags *SDNode::getFlags() const {
+  if (auto *FlagsNode = dyn_cast<BinaryWithFlagsSDNode>(this))
+    return &FlagsNode->Flags;
+  return nullptr;
+}
+
 SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
  assert(N->getNumValues() == 1 &&
         "Can't unroll a vector with multiple results!");
@ -6886,9 +6891,11 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
    }

    switch (N->getOpcode()) {
-    default:
-      Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands));
+    default: {
+      Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands,
+                                N->getFlags()));
      break;
+    }
    case ISD::VSELECT:
      Scalars.push_back(getNode(ISD::SELECT, dl, EltVT, Operands));
      break;
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@ -80,7 +80,7 @@ LimitFPPrecision("limit-float-precision",
                 cl::init(0));

 static cl::opt<bool>
-EnableFMFInDAG("enable-fmf-dag", cl::init(false), cl::Hidden,
+EnableFMFInDAG("enable-fmf-dag", cl::init(true), cl::Hidden,
                cl::desc("Enable fast-math-flags for DAG nodes"));

 // Limit the width of DAG chains. This is important in general to prevent
@ -2347,6 +2347,10 @@ void SelectionDAGBuilder::visitFCmp(const User &I) {
  SDValue Op1 = getValue(I.getOperand(0));
  SDValue Op2 = getValue(I.getOperand(1));
  ISD::CondCode Condition = getFCmpCondCode(predicate);
+  
+  // FIXME: Fcmp instructions have fast-math-flags in IR, so we should use them.
+  // FIXME: We should propagate the fast-math-flags to the DAG node itself for
+  // further optimization, but currently FMF is only applicable to binary nodes.
  if (TM.Options.NoNaNsFPMath)
    Condition = getFCmpCodeWithoutNaN(Condition);
  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
@ -3629,6 +3633,8 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt, SDLoc dl) {

 static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl,
                                       SelectionDAG &DAG) {
+  // TODO: What fast-math-flags should be set on the floating-point nodes?
+
  //   IntegerPartOfX = ((int32_t)(t0);
  SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);

@ -3727,6 +3733,8 @@ static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG,
    //
    //   #define LOG2OFe 1.4426950f
    //   t0 = Op * LOG2OFe
+
+    // TODO: What fast-math-flags should be set here?
    SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
                             getF32Constant(DAG, 0x3fb8aa3b, dl));
    return getLimitedPrecisionExp2(t0, dl, DAG);
@ -3740,6 +3748,9 @@ static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG,
 /// limited-precision mode.
 static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG,
                         const TargetLowering &TLI) {
+ 
+  // TODO: What fast-math-flags should be set on the floating-point nodes?
+
  if (Op.getValueType() == MVT::f32 &&
      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
    SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
@ -3836,6 +3847,9 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG,
 /// limited-precision mode.
 static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG,
                          const TargetLowering &TLI) {
+  
+  // TODO: What fast-math-flags should be set on the floating-point nodes?
+
  if (Op.getValueType() == MVT::f32 &&
      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
    SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
@ -3931,6 +3945,9 @@ static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG,
 /// limited-precision mode.
 static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG,
                           const TargetLowering &TLI) {
+
+  // TODO: What fast-math-flags should be set on the floating-point nodes?
+
  if (Op.getValueType() == MVT::f32 &&
      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
    SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
@ -4040,6 +4057,7 @@ static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS,
    }
  }

+  // TODO: What fast-math-flags should be set on the FMUL node?
  if (IsExp10) {
    // Put the exponent in the right bit position for later addition to the
    // final result:
@ -4083,6 +4101,8 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS,
      // the benefit of being both really simple and much better than a libcall.
      SDValue Res;  // Logically starts equal to 1.0
      SDValue CurSquare = LHS;
+      // TODO: Intrinsics should have fast-math-flags that propagate to these
+      // nodes.
      while (Val) {
        if (Val & 1) {
          if (Res.getNode())
@ -4736,6 +4756,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                               getValue(I.getArgOperand(1)),
                               getValue(I.getArgOperand(2))));
    } else {
+      // TODO: Intrinsic calls should have fast-math-flags.
      SDValue Mul = DAG.getNode(ISD::FMUL, sdl,
                                getValue(I.getArgOperand(0)).getValueType(),
                                getValue(I.getArgOperand(0)),
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@ -1088,6 +1088,7 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
                                                SelectionDAG &DAG) const {
  SDLoc DL(Op);
  EVT VT = Op.getValueType();
+  // TODO: Should this propagate fast-math-flags?
  SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
                                DAG.getConstantFP(1.0f, DL, MVT::f32),
                                Op.getOperand(1));
@ -1602,6 +1603,7 @@ SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool
  // float fb = (float)ib;
  SDValue fb = DAG.getNode(ToFp, DL, FltVT, ib);

+  // TODO: Should this propagate fast-math-flags?
  // float fq = native_divide(fa, fb);
  SDValue fq = DAG.getNode(ISD::FMUL, DL, FltVT,
                           fa, DAG.getNode(AMDGPUISD::RCP, DL, FltVT, fb));
@ -1912,6 +1914,8 @@ SDValue AMDGPUTargetLowering::LowerFREM(SDValue Op, SelectionDAG &DAG) const {
  SDValue X = Op.getOperand(0);
  SDValue Y = Op.getOperand(1);

+  // TODO: Should this propagate fast-math-flags?
+
  SDValue Div = DAG.getNode(ISD::FDIV, SL, VT, X, Y);
  SDValue Floor = DAG.getNode(ISD::FTRUNC, SL, VT, Div);
  SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Floor, Y);
@ -1940,6 +1944,7 @@ SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const {
  SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);

  SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, One, Zero);
+  // TODO: Should this propagate fast-math-flags?
  return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
 }

@ -2017,6 +2022,8 @@ SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
  SDValue C1 = DAG.getConstantFP(C1Val, SL, MVT::f64);
  SDValue CopySign = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, C1, Src);

+  // TODO: Should this propagate fast-math-flags?
+
  SDValue Tmp1 = DAG.getNode(ISD::FADD, SL, MVT::f64, Src, CopySign);
  SDValue Tmp2 = DAG.getNode(ISD::FSUB, SL, MVT::f64, Tmp1, CopySign);

@ -2046,6 +2053,8 @@ SDValue AMDGPUTargetLowering::LowerFROUND32(SDValue Op, SelectionDAG &DAG) const

  SDValue T = DAG.getNode(ISD::FTRUNC, SL, MVT::f32, X);

+  // TODO: Should this propagate fast-math-flags?
+
  SDValue Diff = DAG.getNode(ISD::FSUB, SL, MVT::f32, X, T);

  SDValue AbsDiff = DAG.getNode(ISD::FABS, SL, MVT::f32, Diff);
@ -2156,6 +2165,7 @@ SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const {
  SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);

  SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, NegOne, Zero);
+  // TODO: Should this propagate fast-math-flags?
  return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
 }

@ -2178,7 +2188,7 @@ SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG,

  SDValue LdExp = DAG.getNode(AMDGPUISD::LDEXP, SL, MVT::f64, CvtHi,
                              DAG.getConstant(32, SL, MVT::i32));
-
+  // TODO: Should this propagate fast-math-flags?
  return DAG.getNode(ISD::FADD, SL, MVT::f64, LdExp, CvtLo);
 }

@ -2203,6 +2213,7 @@ SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
                           DAG.getConstant(1, DL, MVT::i32));
  SDValue FloatHi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Hi);
+  // TODO: Should this propagate fast-math-flags?
  FloatHi = DAG.getNode(ISD::FMUL, DL, MVT::f32, FloatHi,
                        DAG.getConstantFP(4294967296.0f, DL, MVT::f32)); // 2^32
  return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi);
@ -2229,7 +2240,7 @@ SDValue AMDGPUTargetLowering::LowerFP64_TO_INT(SDValue Op, SelectionDAG &DAG,
                                 MVT::f64);
  SDValue K1 = DAG.getConstantFP(BitsToDouble(UINT64_C(0xc1f0000000000000)), SL,
                                 MVT::f64);
-
+  // TODO: Should this propagate fast-math-flags?
  SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f64, Trunc, K0);

  SDValue FloorMul = DAG.getNode(ISD::FFLOOR, SL, MVT::f64, Mul);
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@ -946,6 +946,8 @@ SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
  EVT VT = Op.getValueType();
  SDValue Arg = Op.getOperand(0);
  SDLoc DL(Op);
+
+  // TODO: Should this propagate fast-math-flags?
  SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
      DAG.getNode(ISD::FADD, DL, VT,
        DAG.getNode(ISD::FMUL, DL, VT, Arg,
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@ -1000,6 +1000,8 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
  SDLoc DL(Op);
  unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();

+  // TODO: Should this propagate fast-math-flags?
+
  switch (IntrinsicID) {
  case Intrinsic::r600_read_ngroups_x:
    return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
@ -1248,8 +1250,10 @@ SDValue SITargetLowering::LowerFastFDIV(SDValue Op, SelectionDAG &DAG) const {
  if (Unsafe) {
    // Turn into multiply by the reciprocal.
    // x / y -> x * (1.0 / y)
+    SDNodeFlags Flags;
+    Flags.setUnsafeAlgebra(true);
    SDValue Recip = DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS);
-    return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip);
+    return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip, &Flags);
  }

  return SDValue();
@ -1286,6 +1290,8 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {

  SDValue r3 = DAG.getNode(ISD::SELECT, SL, MVT::f32, r2, K1, One);

+  // TODO: Should this propagate fast-math-flags?
+
  r1 = DAG.getNode(ISD::FMUL, SL, MVT::f32, RHS, r3);

  SDValue r0 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, r1);
@ -1405,6 +1411,7 @@ SDValue SITargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
  SDLoc DL(Op);
  EVT VT = Op.getValueType();
  SDValue Arg = Op.getOperand(0);
+  // TODO: Should this propagate fast-math-flags?
  SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
                                  DAG.getNode(ISD::FMUL, DL, VT, Arg,
                                              DAG.getConstantFP(0.5/M_PI, DL,
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@ -6385,6 +6385,8 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {

 static SDValue
 LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) {
+  // TODO: Should this propagate fast-math-flags?
+
  // Convert to float
  // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
  // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
@ -6415,6 +6417,8 @@ LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) {

 static SDValue
 LowerSDIV_v4i16(SDValue N0, SDValue N1, SDLoc dl, SelectionDAG &DAG) {
+  // TODO: Should this propagate fast-math-flags?
+
  SDValue N2;
  // Convert to float.
  // float4 yf = vcvt_f32_s32(vmovl_s16(y));
@ -6487,6 +6491,7 @@ static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
 }

 static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
+  // TODO: Should this propagate fast-math-flags?
  EVT VT = Op.getValueType();
  assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
         "unexpected type for custom-lowering ISD::UDIV");
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@ -1786,9 +1786,11 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
    return DAG.getNode(ISD::UDIV, DL, Op->getValueType(0), Op->getOperand(1),
                       Op->getOperand(2));
  case Intrinsic::mips_fadd_w:
-  case Intrinsic::mips_fadd_d:
+  case Intrinsic::mips_fadd_d: {
+    // TODO: If intrinsics have fast-math-flags, propagate them.
    return DAG.getNode(ISD::FADD, DL, Op->getValueType(0), Op->getOperand(1),
                       Op->getOperand(2));
+  }
  // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away
  case Intrinsic::mips_fceq_w:
  case Intrinsic::mips_fceq_d:
@ -1831,9 +1833,11 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
    return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
                        Op->getOperand(2), ISD::SETUNE);
  case Intrinsic::mips_fdiv_w:
-  case Intrinsic::mips_fdiv_d:
+  case Intrinsic::mips_fdiv_d: {
+    // TODO: If intrinsics have fast-math-flags, propagate them.
    return DAG.getNode(ISD::FDIV, DL, Op->getValueType(0), Op->getOperand(1),
                       Op->getOperand(2));
+  }
  case Intrinsic::mips_ffint_u_w:
  case Intrinsic::mips_ffint_u_d:
    return DAG.getNode(ISD::UINT_TO_FP, DL, Op->getValueType(0),
@ -1856,6 +1860,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
  }
  case Intrinsic::mips_fexp2_w:
  case Intrinsic::mips_fexp2_d: {
+    // TODO: If intrinsics have fast-math-flags, propagate them.
    EVT ResTy = Op->getValueType(0);
    return DAG.getNode(
        ISD::FMUL, SDLoc(Op), ResTy, Op->getOperand(1),
@ -1869,11 +1874,14 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
    return DAG.getNode(ISD::FMA, SDLoc(Op), Op->getValueType(0),
                       Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
  case Intrinsic::mips_fmul_w:
-  case Intrinsic::mips_fmul_d:
+  case Intrinsic::mips_fmul_d: {
+    // TODO: If intrinsics have fast-math-flags, propagate them.
    return DAG.getNode(ISD::FMUL, DL, Op->getValueType(0), Op->getOperand(1),
                       Op->getOperand(2));
+  }
  case Intrinsic::mips_fmsub_w:
  case Intrinsic::mips_fmsub_d: {
+    // TODO: If intrinsics have fast-math-flags, propagate them.
    EVT ResTy = Op->getValueType(0);
    return DAG.getNode(ISD::FSUB, SDLoc(Op), ResTy, Op->getOperand(1),
                       DAG.getNode(ISD::FMUL, SDLoc(Op), ResTy,
@ -1886,9 +1894,11 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
  case Intrinsic::mips_fsqrt_d:
    return DAG.getNode(ISD::FSQRT, DL, Op->getValueType(0), Op->getOperand(1));
  case Intrinsic::mips_fsub_w:
-  case Intrinsic::mips_fsub_d:
+  case Intrinsic::mips_fsub_d: {
+    // TODO: If intrinsics have fast-math-flags, propagate them.
    return DAG.getNode(ISD::FSUB, DL, Op->getValueType(0), Op->getOperand(1),
                       Op->getOperand(2));
+  }
  case Intrinsic::mips_ftrunc_u_w:
  case Intrinsic::mips_ftrunc_u_d:
    return DAG.getNode(ISD::FP_TO_UINT, DL, Op->getValueType(0),
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@ -5999,7 +5999,11 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
  if (!DAG.getTarget().Options.NoInfsFPMath ||
      !DAG.getTarget().Options.NoNaNsFPMath)
    return Op;
-
+  // TODO: Propagate flags from the select rather than global settings.
+  SDNodeFlags Flags;
+  Flags.setNoInfs(true);
+  Flags.setNoNaNs(true);
+  
  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();

  EVT ResVT = Op.getValueType();
@ -6049,7 +6053,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
  case ISD::SETNE:
    std::swap(TV, FV);
  case ISD::SETEQ:
-    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
+    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags);
    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
    Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
@ -6059,25 +6063,25 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
                       DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
  case ISD::SETULT:
  case ISD::SETLT:
-    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
+    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags);
    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
  case ISD::SETOGE:
  case ISD::SETGE:
-    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
+    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags);
    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
  case ISD::SETUGT:
  case ISD::SETGT:
-    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
+    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags);
    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
  case ISD::SETOLE:
  case ISD::SETLE:
-    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
+    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags);
    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -12229,6 +12229,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
                  MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
                  false, false, false, 16);
  SDValue XR2F = DAG.getBitcast(MVT::v2f64, Unpck1);
+  // TODO: Are there any fast-math-flags to propagate here?
  SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
  SDValue Result;

@ -12278,6 +12279,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
                  DAG.getBitcast(MVT::v2f64, Or), DAG.getIntPtrConstant(0, dl));

  // Subtract the bias.
+  // TODO: Are there any fast-math-flags to propagate here?
  SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias);

  // Handle final rounding.
@ -12390,6 +12392,7 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,

  //     float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f);
  SDValue HighBitcast = DAG.getBitcast(VecFloatVT, High);
+  // TODO: Are there any fast-math-flags to propagate here?
  SDValue FHigh =
      DAG.getNode(ISD::FADD, DL, VecFloatVT, HighBitcast, VecCstFAdd);
  //     return (float4) lo + fhi;
@ -12509,6 +12512,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
      MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32,
      false, false, false, 4);
  // Extend everything to 80 bits to force it to be done on x87.
+  // TODO: Are there any fast-math-flags to propagate here?
  SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge);
  return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add,
                     DAG.getIntPtrConstant(0, dl));
@ -15847,8 +15851,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
                                      Mask, PassThru, Subtarget, DAG);
        }
      }
-      return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
-                                              Src1,Src2),
+      // TODO: Intrinsics should have fast-math-flags to propagate.
+      return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,Src1,Src2),
                                  Mask, PassThru, Subtarget, DAG);
    }
    case INTR_TYPE_2OP_MASK_RM: {
@ -19266,6 +19270,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
    SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, ZExtIn,
                             DAG.getBitcast(MVT::v2i64, VBias));
    Or = DAG.getBitcast(MVT::v2f64, Or);
+    // TODO: Are there any fast-math-flags to propagate here?
    SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, Or, VBias);
    Results.push_back(DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Sub));
    return;
--- a/test/CodeGen/X86/dag-fmf-cse.ll
+++ b/test/CodeGen/X86/dag-fmf-cse.ll
@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=fma -enable-unsafe-fp-math -enable-fmf-dag=1 | FileCheck %s
+
+; If fast-math-flags are propagated correctly, the mul1 expression
+; should be recognized as a factor in the last fsub, so we should
+; see a mul and add, not a mul and fma:
+; a * b - (-a * b) ---> (a * b) + (a * b)
+
+define float @fmf_should_not_break_cse(float %a, float %b) {
+; CHECK-LABEL: fmf_should_not_break_cse:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vmulss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vaddss %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+
+  %mul1 = fmul fast float %a, %b
+  %nega = fsub fast float 0.0, %a
+  %mul2 = fmul fast float %nega, %b
+  %abx2 = fsub fast float %mul1, %mul2
+  ret float %abx2
+}
+