This adds constrained intrinsics for the signed and unsigned conversions

of integers to floating point. This includes some of Craig Topper's changes for promotion support from D71130. Differential Revision: https://reviews.llvm.org/D69275
2025-02-01 22:53:29 +00:00 · 2019-10-16 15:24:47 -04:00 · 2019-10-16 15:24:47 -04:00 · b1d8576b0a
commit b1d8576b0a
parent 1ed832e424
17 changed files with 2833 additions and 91 deletions
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@ -15667,6 +15667,78 @@ Semantics:
 The result produced is a signed integer converted from the floating
 point operand. The value is truncated, so it is rounded towards zero.

+'``llvm.experimental.constrained.uitofp``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare <ty2>
+      @llvm.experimental.constrained.uitofp(<type> <value>,
+                                          metadata <rounding mode>,
+                                          metadata <exception behavior>)
+
+Overview:
+"""""""""
+
+The '``llvm.experimental.constrained.uitofp``' intrinsic converts an
+unsigned integer ``value`` to a floating-point of type ``ty2``.
+
+Arguments:
+""""""""""
+
+The first argument to the '``llvm.experimental.constrained.uitofp``'
+intrinsic must be an :ref:`integer <t_integer>` or :ref:`vector
+<t_vector>` of integer values.
+
+The second and third arguments specify the rounding mode and exception
+behavior as described above.
+
+Semantics:
+""""""""""
+
+An inexact floating-point exception will be raised if rounding is required.
+Any result produced is a floating point value converted from the input
+integer operand.
+
+'``llvm.experimental.constrained.sitofp``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare <ty2>
+      @llvm.experimental.constrained.sitofp(<type> <value>,
+                                          metadata <rounding mode>,
+                                          metadata <exception behavior>)
+
+Overview:
+"""""""""
+
+The '``llvm.experimental.constrained.sitofp``' intrinsic converts a
+signed integer ``value`` to a floating-point of type ``ty2``.
+
+Arguments:
+""""""""""
+
+The first argument to the '``llvm.experimental.constrained.sitofp``'
+intrinsic must be an :ref:`integer <t_integer>` or :ref:`vector
+<t_vector>` of integer values.
+
+The second and third arguments specify the rounding mode and exception
+behavior as described above.
+
+Semantics:
+""""""""""
+
+An inexact floating-point exception will be raised if rounding is required.
+Any result produced is a floating point value converted from the input
+integer operand.
+
 '``llvm.experimental.constrained.fptrunc``' Intrinsic
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@ -312,6 +312,13 @@ namespace ISD {
    STRICT_FP_TO_SINT,
    STRICT_FP_TO_UINT,

+    /// STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to
+    /// a floating point value. These have the same semantics as sitofp and
+    /// uitofp in IR.
+    /// They are used to limit optimizations while the DAG is being optimized.
+    STRICT_SINT_TO_FP,
+    STRICT_UINT_TO_FP,
+
    /// X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating 
    /// point type down to the precision of the destination VT.  TRUNC is a 
    /// flag, which is always an integer that is zero or one.  If TRUNC is 0,
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@ -811,6 +811,11 @@ public:
  /// float type VT, by either extending or rounding (by truncation).
  SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT);

+  /// Convert Op, which must be a STRICT operation of float type, to the
+  /// float type VT, by either extending or rounding (by truncation).
+  std::pair<SDValue, SDValue>
+  getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT);
+
  /// Convert Op, which must be of integer type, to the
  /// integer type VT, by either any-extending or truncating it.
  SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT);
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@ -4123,14 +4123,18 @@ public:
  /// Expand float to UINT conversion
  /// \param N Node to expand
  /// \param Result output after conversion
+  /// \param Chain output chain after conversion
  /// \returns True, if the expansion was successful, false otherwise
-  bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const;
+  bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain,
+                        SelectionDAG &DAG) const;

  /// Expand UINT(i64) to double(f64) conversion
  /// \param N Node to expand
  /// \param Result output after conversion
+  /// \param Chain output chain after conversion
  /// \returns True, if the expansion was successful, false otherwise
-  bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
+  bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain,
+                        SelectionDAG &DAG) const;

  /// Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
  SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const;
--- a/llvm/include/llvm/IR/ConstrainedOps.def
+++ b/llvm/include/llvm/IR/ConstrainedOps.def
@ -41,6 +41,8 @@ INSTRUCTION(FMul,         2, 1, experimental_constrained_fmul,       FMUL)
 INSTRUCTION(FDiv,         2, 1, experimental_constrained_fdiv,       FDIV)
 INSTRUCTION(FRem,         2, 1, experimental_constrained_frem,       FREM)
 INSTRUCTION(FPExt,        1, 0, experimental_constrained_fpext,      FP_EXTEND)
+INSTRUCTION(SIToFP,       1, 1, experimental_constrained_sitofp,     SINT_TO_FP)
+INSTRUCTION(UIToFP,       1, 1, experimental_constrained_uitofp,     UINT_TO_FP)
 INSTRUCTION(FPToSI,       1, 0, experimental_constrained_fptosi,     FP_TO_SINT)
 INSTRUCTION(FPToUI,       1, 0, experimental_constrained_fptoui,     FP_TO_UINT)
 INSTRUCTION(FPTrunc,      1, 1, experimental_constrained_fptrunc,    FP_ROUND)
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@ -640,6 +640,16 @@ let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn] in {
                                                    [ llvm_anyfloat_ty,
                                                      llvm_metadata_ty ]>;

+  def int_experimental_constrained_sitofp : Intrinsic<[ llvm_anyfloat_ty ],
+                                                       [ llvm_anyint_ty,
+                                                         llvm_metadata_ty,
+                                                         llvm_metadata_ty ]>;
+
+  def int_experimental_constrained_uitofp : Intrinsic<[ llvm_anyfloat_ty ],
+                                                       [ llvm_anyint_ty,
+                                                         llvm_metadata_ty,
+                                                         llvm_metadata_ty ]>;
+
  def int_experimental_constrained_fptrunc : Intrinsic<[ llvm_anyfloat_ty ],
                                                       [ llvm_anyfloat_ty,
                                                         llvm_metadata_ty,
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@ -173,10 +173,9 @@ private:
                          SDValue NewIntValue) const;
  SDValue ExpandFCOPYSIGN(SDNode *Node) const;
  SDValue ExpandFABS(SDNode *Node) const;
-  SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, EVT DestVT,
-                               const SDLoc &dl);
-  SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned,
-                                const SDLoc &dl);
+  SDValue ExpandLegalINT_TO_FP(SDNode *Node, SDValue &Chain);
+  void PromoteLegalINT_TO_FP(SDNode *N, const SDLoc &dl,
+                             SmallVectorImpl<SDValue> &Results);
  void PromoteLegalFP_TO_INT(SDNode *N, const SDLoc &dl,
                             SmallVectorImpl<SDValue> &Results);

@ -1010,6 +1009,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
    Action = TLI.getOperationAction(Node->getOpcode(),
                                    Node->getOperand(0).getValueType());
    break;
+  case ISD::STRICT_SINT_TO_FP:
+  case ISD::STRICT_UINT_TO_FP:
  case ISD::STRICT_LRINT:
  case ISD::STRICT_LLRINT:
  case ISD::STRICT_LROUND:
@ -2338,9 +2339,14 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
 /// INT_TO_FP operation of the specified operand when the target requests that
 /// we expand it.  At this point, we know that the result and operand types are
 /// legal for the target.
-SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
-                                                   EVT DestVT,
-                                                   const SDLoc &dl) {
+SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node,
+                                                   SDValue &Chain) {
+  bool isSigned = (Node->getOpcode() == ISD::STRICT_SINT_TO_FP ||
+                   Node->getOpcode() == ISD::SINT_TO_FP);
+  EVT DestVT = Node->getValueType(0);
+  SDLoc dl(Node);
+  unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
+  SDValue Op0 = Node->getOperand(OpNo);
  EVT SrcVT = Op0.getValueType();

  // TODO: Should any fast-math-flags be set for the created nodes?
@ -2387,16 +2393,38 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
                                     BitsToDouble(0x4330000080000000ULL) :
                                     BitsToDouble(0x4330000000000000ULL),
                                     dl, MVT::f64);
-    // subtract the bias
-    SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias);
-    // final result
-    SDValue Result = DAG.getFPExtendOrRound(Sub, dl, DestVT);
+    // Subtract the bias and get the final result.
+    SDValue Sub;
+    SDValue Result;
+    if (Node->isStrictFPOpcode()) {
+      Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::f64, MVT::Other},
+                        {Node->getOperand(0), Load, Bias});
+      if (DestVT != Sub.getValueType()) {
+        std::pair<SDValue, SDValue> ResultPair;
+        ResultPair =
+            DAG.getStrictFPExtendOrRound(Sub, SDValue(Node, 1), dl, DestVT);
+        Result = ResultPair.first;
+        Chain = ResultPair.second;
+      }
+      else
+        Result = Sub;
+    } else {
+      Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias);
+      Result = DAG.getFPExtendOrRound(Sub, dl, DestVT);
+    }
    return Result;
  }
  assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet");
  // Code below here assumes !isSigned without checking again.
+  // FIXME: This can produce slightly incorrect results. See details in
+  // FIXME: https://reviews.llvm.org/D69275

-  SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
+  SDValue Tmp1;
+  if (Node->isStrictFPOpcode()) {
+    Tmp1 = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, { DestVT, MVT::Other },
+                       { Node->getOperand(0), Op0 });
+  } else
+    Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);

  SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(SrcVT), Op0,
                                 DAG.getConstant(0, dl, SrcVT), ISD::SETLT);
@ -2442,6 +2470,13 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
    FudgeInReg = Handle.getValue();
  }

+  if (Node->isStrictFPOpcode()) {
+    SDValue Result = DAG.getNode(ISD::STRICT_FADD, dl, { DestVT, MVT::Other },
+                                 { Tmp1.getValue(1), Tmp1, FudgeInReg });
+    Chain = Result.getValue(1);
+    return Result;
+  }
+
  return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg);
 }

@ -2450,9 +2485,16 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
 /// we promote it.  At this point, we know that the result and operand types are
 /// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP
 /// operation that takes a larger input.
-SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT,
-                                                    bool isSigned,
-                                                    const SDLoc &dl) {
+void SelectionDAGLegalize::PromoteLegalINT_TO_FP(
+    SDNode *N, const SDLoc &dl, SmallVectorImpl<SDValue> &Results) {
+  bool IsStrict = N->isStrictFPOpcode();
+  bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP ||
+                  N->getOpcode() == ISD::STRICT_SINT_TO_FP;
+  EVT DestVT = N->getValueType(0);
+  SDValue LegalOp = N->getOperand(IsStrict ? 1 : 0);
+  unsigned UIntOp = IsStrict ? ISD::STRICT_UINT_TO_FP : ISD::UINT_TO_FP;
+  unsigned SIntOp = IsStrict ? ISD::STRICT_SINT_TO_FP : ISD::SINT_TO_FP;
+
  // First step, figure out the appropriate *INT_TO_FP operation to use.
  EVT NewInTy = LegalOp.getValueType();

@ -2464,15 +2506,16 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT,
    assert(NewInTy.isInteger() && "Ran out of possibilities!");

    // If the target supports SINT_TO_FP of this type, use it.
-    if (TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, NewInTy)) {
-      OpToUse = ISD::SINT_TO_FP;
+    if (TLI.isOperationLegalOrCustom(SIntOp, NewInTy)) {
+      OpToUse = SIntOp;
      break;
    }
-    if (isSigned) continue;
+    if (IsSigned)
+      continue;

    // If the target supports UINT_TO_FP of this type, use it.
-    if (TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, NewInTy)) {
-      OpToUse = ISD::UINT_TO_FP;
+    if (TLI.isOperationLegalOrCustom(UIntOp, NewInTy)) {
+      OpToUse = UIntOp;
      break;
    }

@ -2481,9 +2524,20 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT,

  // Okay, we found the operation and type to use.  Zero extend our input to the
  // desired type then run the operation on it.
-  return DAG.getNode(OpToUse, dl, DestVT,
-                     DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
-                                 dl, NewInTy, LegalOp));
+  if (IsStrict) {
+    SDValue Res =
+        DAG.getNode(OpToUse, dl, {DestVT, MVT::Other},
+                    {N->getOperand(0),
+                     DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
+                                 dl, NewInTy, LegalOp)});
+    Results.push_back(Res);
+    Results.push_back(Res.getValue(1));
+  }
+
+  Results.push_back(
+      DAG.getNode(OpToUse, dl, DestVT,
+                  DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
+                              dl, NewInTy, LegalOp)));
 }

 /// This function is responsible for legalizing a
@ -2899,15 +2953,20 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
    break;
  }
  case ISD::UINT_TO_FP:
-    if (TLI.expandUINT_TO_FP(Node, Tmp1, DAG)) {
+  case ISD::STRICT_UINT_TO_FP:
+    if (TLI.expandUINT_TO_FP(Node, Tmp1, Tmp2, DAG)) {
      Results.push_back(Tmp1);
+      if (Node->isStrictFPOpcode())
+        Results.push_back(Tmp2);
      break;
    }
    LLVM_FALLTHROUGH;
  case ISD::SINT_TO_FP:
-    Tmp1 = ExpandLegalINT_TO_FP(Node->getOpcode() == ISD::SINT_TO_FP,
-                                Node->getOperand(0), Node->getValueType(0), dl);
+  case ISD::STRICT_SINT_TO_FP:
+    Tmp1 = ExpandLegalINT_TO_FP(Node, Tmp2);
    Results.push_back(Tmp1);
+    if (Node->isStrictFPOpcode())
+      Results.push_back(Tmp2);
    break;
  case ISD::FP_TO_SINT:
    if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG))
@ -4194,6 +4253,9 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
      Node->getOpcode() == ISD::INSERT_VECTOR_ELT) {
    OVT = Node->getOperand(0).getSimpleValueType();
  }
+  if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP ||
+      Node->getOpcode() == ISD::STRICT_SINT_TO_FP)
+    OVT = Node->getOperand(1).getSimpleValueType();
  if (Node->getOpcode() == ISD::BR_CC)
    OVT = Node->getOperand(2).getSimpleValueType();
  MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
@ -4248,10 +4310,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
    PromoteLegalFP_TO_INT(Node, dl, Results);
    break;
  case ISD::UINT_TO_FP:
+  case ISD::STRICT_UINT_TO_FP:
  case ISD::SINT_TO_FP:
-    Tmp1 = PromoteLegalINT_TO_FP(Node->getOperand(0), Node->getValueType(0),
-                                 Node->getOpcode() == ISD::SINT_TO_FP, dl);
-    Results.push_back(Tmp1);
+  case ISD::STRICT_SINT_TO_FP:
+    PromoteLegalINT_TO_FP(Node, dl, Results);
    break;
  case ISD::VAARG: {
    SDValue Chain = Node->getOperand(0); // Get the chain.
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@ -307,23 +307,27 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
    return TranslateLegalizeResults(Op, Result);

  TargetLowering::LegalizeAction Action = TargetLowering::Legal;
+  EVT ValVT;
  switch (Op.getOpcode()) {
  default:
    return TranslateLegalizeResults(Op, Result);
 #define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN)                   \
  case ISD::STRICT_##DAGN:
 #include "llvm/IR/ConstrainedOps.def"
-    Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+    ValVT = Node->getValueType(0);
+    if (Op.getOpcode() == ISD::STRICT_SINT_TO_FP ||
+        Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
+      ValVT = Node->getOperand(1).getValueType();
+    Action = TLI.getOperationAction(Node->getOpcode(), ValVT);
    // If we're asked to expand a strict vector floating-point operation,
    // by default we're going to simply unroll it.  That is usually the
    // best approach, except in the case where the resulting strict (scalar)
    // operations would themselves use the fallback mutation to non-strict.
    // In that specific case, just do the fallback on the vector op.
    if (Action == TargetLowering::Expand && !TLI.isStrictFPEnabled() &&
-        TLI.getStrictFPOperationAction(Node->getOpcode(),
-                                   Node->getValueType(0))
-        == TargetLowering::Legal) {
-      EVT EltVT = Node->getValueType(0).getVectorElementType();
+        TLI.getStrictFPOperationAction(Node->getOpcode(), ValVT) ==
+            TargetLowering::Legal) {
+      EVT EltVT = ValVT.getVectorElementType();
      if (TLI.getOperationAction(Node->getOpcode(), EltVT)
          == TargetLowering::Expand &&
          TLI.getStrictFPOperationAction(Node->getOpcode(), EltVT)
@ -1153,18 +1157,29 @@ SDValue VectorLegalizer::ExpandFP_TO_UINT(SDValue Op) {
 }

 SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
-  EVT VT = Op.getOperand(0).getValueType();
+  bool IsStrict = Op.getNode()->isStrictFPOpcode();
+  unsigned OpNo = IsStrict ? 1 : 0;
+  SDValue Src = Op.getOperand(OpNo);
+  EVT VT = Src.getValueType();
  SDLoc DL(Op);

  // Attempt to expand using TargetLowering.
  SDValue Result;
-  if (TLI.expandUINT_TO_FP(Op.getNode(), Result, DAG))
+  SDValue Chain;
+  if (TLI.expandUINT_TO_FP(Op.getNode(), Result, Chain, DAG)) {
+    if (IsStrict)
+      // Relink the chain
+      DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Chain);
    return Result;
+  }

  // Make sure that the SINT_TO_FP and SRL instructions are available.
-  if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand ||
-      TLI.getOperationAction(ISD::SRL,        VT) == TargetLowering::Expand)
-    return DAG.UnrollVectorOp(Op.getNode());
+  if (((!IsStrict && TLI.getOperationAction(ISD::SINT_TO_FP, VT) ==
+                         TargetLowering::Expand) ||
+       (IsStrict && TLI.getOperationAction(ISD::STRICT_SINT_TO_FP, VT) ==
+                        TargetLowering::Expand)) ||
+      TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand)
+    return IsStrict ? SDValue() : DAG.UnrollVectorOp(Op.getNode());

  unsigned BW = VT.getScalarSizeInBits();
  assert((BW == 64 || BW == 32) &&
@ -1182,8 +1197,31 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
  SDValue TWOHW = DAG.getConstantFP(1ULL << (BW / 2), DL, Op.getValueType());

  // Clear upper part of LO, lower HI
-  SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord);
-  SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask);
+  SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Src, HalfWord);
+  SDValue LO = DAG.getNode(ISD::AND, DL, VT, Src, HalfWordMask);
+
+  if (IsStrict) {
+    // Convert hi and lo to floats
+    // Convert the hi part back to the upper values
+    // TODO: Can any fast-math-flags be set on these nodes?
+    SDValue fHI =
+        DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, {Op.getValueType(), MVT::Other},
+                    {Op.getOperand(0), HI});
+    fHI = DAG.getNode(ISD::STRICT_FMUL, DL, {Op.getValueType(), MVT::Other},
+                      {SDValue(fHI.getNode(), 1), fHI, TWOHW});
+    SDValue fLO =
+        DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, {Op.getValueType(), MVT::Other},
+                    {SDValue(fHI.getNode(), 1), LO});
+
+    // Add the two halves
+    SDValue Result =
+        DAG.getNode(ISD::STRICT_FADD, DL, {Op.getValueType(), MVT::Other},
+                    {SDValue(fLO.getNode(), 1), fHI, fLO});
+
+    // Relink the chain
+    DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), SDValue(Result.getNode(), 1));
+    return Result;
+  }

  // Convert hi and lo to floats
  // Convert the hi part back to the upper values
@ -1318,7 +1356,12 @@ SDValue VectorLegalizer::ExpandFixedPointMul(SDValue Op) {
 }

 SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
-  EVT VT = Op.getValueType();
+  if (Op.getOpcode() == ISD::STRICT_UINT_TO_FP) {
+    if (SDValue Res = ExpandUINT_TO_FLOAT(Op))
+      return Res;
+  }
+
+  EVT VT = Op.getValue(0).getValueType();
  EVT EltVT = VT.getVectorElementType();
  unsigned NumElems = VT.getVectorNumElements();
  unsigned NumOpers = Op.getNumOperands();
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@ -572,6 +572,8 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
    case ISD::UINT_TO_FP:
      Res = ScalarizeVecOp_UnaryOp(N);
      break;
+    case ISD::STRICT_SINT_TO_FP:
+    case ISD::STRICT_UINT_TO_FP:
    case ISD::STRICT_FP_TO_SINT:
    case ISD::STRICT_FP_TO_UINT:
      Res = ScalarizeVecOp_UnaryOp_StrictFP(N);
@ -1931,9 +1933,12 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
    case ISD::VSELECT:
      Res = SplitVecOp_VSELECT(N, OpNo);
      break;
+    case ISD::STRICT_SINT_TO_FP:
+    case ISD::STRICT_UINT_TO_FP:
    case ISD::SINT_TO_FP:
    case ISD::UINT_TO_FP:
-      if (N->getValueType(0).bitsLT(N->getOperand(0).getValueType()))
+      if (N->getValueType(0).bitsLT(
+              N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType()))
        Res = SplitVecOp_TruncateHelper(N);
      else
        Res = SplitVecOp_UnaryOp(N);
@ -2494,7 +2499,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
  //
  // Without this transform, the original truncate would end up being
  // scalarized, which is pretty much always a last resort.
-  SDValue InVec = N->getOperand(0);
+  unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
+  SDValue InVec = N->getOperand(OpNo);
  EVT InVT = InVec->getValueType(0);
  EVT OutVT = N->getValueType(0);
  unsigned NumElements = OutVT.getVectorNumElements();
@ -2538,8 +2544,23 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
    EVT::getIntegerVT(*DAG.getContext(), InElementSize/2);
  EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT,
                                NumElements/2);
-  SDValue HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec);
-  SDValue HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec);
+
+  SDValue HalfLo;
+  SDValue HalfHi;
+  SDValue Chain;
+  if (N->isStrictFPOpcode()) {
+    HalfLo = DAG.getNode(N->getOpcode(), DL, {HalfVT, MVT::Other},
+                         {N->getOperand(0), HalfLo});
+    HalfHi = DAG.getNode(N->getOpcode(), DL, {HalfVT, MVT::Other},
+                         {N->getOperand(0), HalfHi});
+    // Legalize the chain result - switch anything that used the old chain to
+    // use the new one.
+    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, HalfLo.getValue(1),
+                        HalfHi.getValue(1));
+  } else {
+    HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec);
+    HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec);
+  }
  // Concatenate them to get the full intermediate truncation result.
  EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements);
  SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo,
@ -2548,6 +2569,17 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
  // type. This should normally be something that ends up being legal directly,
  // but in theory if a target has very wide vectors and an annoyingly
  // restricted set of legal types, this split can chain to build things up.
+
+  if (N->isStrictFPOpcode()) {
+    SDValue Res = DAG.getNode(
+        ISD::STRICT_FP_ROUND, DL, {OutVT, MVT::Other},
+        {Chain, InterVec,
+         DAG.getTargetConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()))});
+    // Relink the chain
+    ReplaceValueWith(SDValue(N, 1), SDValue(Res.getNode(), 1));
+    return Res;
+  }
+
  return IsFloat
             ? DAG.getNode(ISD::FP_ROUND, DL, OutVT, InterVec,
                           DAG.getTargetConstant(
@ -3000,6 +3032,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) {
  case ISD::STRICT_FP_ROUND:
  case ISD::STRICT_FP_TO_SINT:
  case ISD::STRICT_FP_TO_UINT:
+  case ISD::STRICT_SINT_TO_FP:
+  case ISD::STRICT_UINT_TO_FP:
   return WidenVecRes_Convert_StrictFP(N);
  default:
    break;
@ -4120,7 +4154,9 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
  case ISD::FP_TO_UINT:
  case ISD::STRICT_FP_TO_UINT:
  case ISD::SINT_TO_FP:
+  case ISD::STRICT_SINT_TO_FP:
  case ISD::UINT_TO_FP:
+  case ISD::STRICT_UINT_TO_FP:
  case ISD::TRUNCATE:
    Res = WidenVecOp_Convert(N);
    break;
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@ -1117,6 +1117,20 @@ SDValue SelectionDAG::getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT) {
             : getNode(ISD::FP_ROUND, DL, VT, Op, getIntPtrConstant(0, DL));
 }

+std::pair<SDValue, SDValue>
+SelectionDAG::getStrictFPExtendOrRound(SDValue Op, SDValue Chain,
+                                       const SDLoc &DL, EVT VT) {
+  assert(!VT.bitsEq(Op.getValueType()) &&
+         "Strict no-op FP extend/round not allowed.");
+  SDValue Res =
+      VT.bitsGT(Op.getValueType())
+          ? getNode(ISD::STRICT_FP_EXTEND, DL, {VT, MVT::Other}, {Chain, Op})
+          : getNode(ISD::STRICT_FP_ROUND, DL, {VT, MVT::Other},
+                    {Chain, Op, getIntPtrConstant(0, DL)});
+
+  return std::pair<SDValue, SDValue>(Res, SDValue(Res.getNode(), 1));
+}
+
 SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
  return VT.bitsGT(Op.getValueType()) ?
    getNode(ISD::ANY_EXTEND, DL, VT, Op) :
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@ -326,7 +326,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
  case ISD::STRICT_FP_EXTEND:           return "strict_fp_extend";

  case ISD::SINT_TO_FP:                 return "sint_to_fp";
+  case ISD::STRICT_SINT_TO_FP:          return "strict_sint_to_fp";
  case ISD::UINT_TO_FP:                 return "uint_to_fp";
+  case ISD::STRICT_UINT_TO_FP:          return "strict_uint_to_fp";
  case ISD::FP_TO_SINT:                 return "fp_to_sint";
  case ISD::STRICT_FP_TO_SINT:          return "strict_fp_to_sint";
  case ISD::FP_TO_UINT:                 return "fp_to_uint";
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@ -6116,8 +6116,10 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
 }

 bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
+                                      SDValue &Chain,
                                      SelectionDAG &DAG) const {
-  SDValue Src = Node->getOperand(0);
+  unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
+  SDValue Src = Node->getOperand(OpNo);
  EVT SrcVT = Src.getValueType();
  EVT DstVT = Node->getValueType(0);

@ -6140,7 +6142,13 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,

    // For unsigned conversions, convert them to signed conversions using the
    // algorithm from the x86_64 __floatundidf in compiler_rt.
-    SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src);
+    SDValue Fast;
+    if (Node->isStrictFPOpcode()) {
+      Fast = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {DstVT, MVT::Other},
+                         {Node->getOperand(0), Src});
+      Chain = SDValue(Fast.getNode(), 1);
+    } else
+      Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src);

    SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT);
    SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Src, ShiftConst);
@ -6148,8 +6156,17 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
    SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Src, AndConst);
    SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr);

-    SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or);
-    SDValue Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt);
+    SDValue Slow;
+    if (Node->isStrictFPOpcode()) {
+      SDValue SignCvt = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl,
+                                    {DstVT, MVT::Other}, {Chain, Or});
+      Slow = DAG.getNode(ISD::STRICT_FADD, dl, { DstVT, MVT::Other },
+                         { SignCvt.getValue(1), SignCvt, SignCvt });
+      Chain = Slow.getValue(1);
+    } else {
+      SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or);
+      Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt);
+    }

    // TODO: This really should be implemented using a branch rather than a
    // select.  We happen to get lucky and machinesink does the right
@ -6192,8 +6209,18 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
    SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
    SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
    SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
-    SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
-    Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
+    if (Node->isStrictFPOpcode()) {
+      SDValue HiSub =
+          DAG.getNode(ISD::STRICT_FSUB, dl, {DstVT, MVT::Other},
+                      {Node->getOperand(0), HiFlt, TwoP84PlusTwoP52});
+      Result = DAG.getNode(ISD::STRICT_FADD, dl, {DstVT, MVT::Other},
+                           {HiSub.getValue(1), LoFlt, HiSub});
+      Chain = Result.getValue(1);
+    } else {
+      SDValue HiSub =
+          DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
+      Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
+    }
    return true;
  }

--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@ -4804,6 +4804,28 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
  }
    break;

+  case Intrinsic::experimental_constrained_sitofp:
+  case Intrinsic::experimental_constrained_uitofp: {
+    Value *Operand = FPI.getArgOperand(0);
+    uint64_t NumSrcElem = 0;
+    Assert(Operand->getType()->isIntOrIntVectorTy(),
+           "Intrinsic first argument must be integer", &FPI);
+    if (auto *OperandT = dyn_cast<VectorType>(Operand->getType())) {
+      NumSrcElem = OperandT->getNumElements();
+    }
+
+    Operand = &FPI;
+    Assert((NumSrcElem > 0) == Operand->getType()->isVectorTy(),
+           "Intrinsic first argument and result disagree on vector use", &FPI);
+    Assert(Operand->getType()->isFPOrFPVectorTy(),
+           "Intrinsic result must be a floating point", &FPI);
+    if (auto *OperandT = dyn_cast<VectorType>(Operand->getType())) {
+      Assert(NumSrcElem == OperandT->getNumElements(),
+             "Intrinsic first argument and result vector lengths must be equal",
+             &FPI);
+    }
+  } break;
+
  case Intrinsic::experimental_constrained_fptrunc:
  case Intrinsic::experimental_constrained_fpext: {
    Value *Operand = FPI.getArgOperand(0);
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@ -228,26 +228,34 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
  if (!Subtarget.useSoftFloat()) {
    // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
    // operation.
-    setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Promote);
+    setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
+    setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i8, Promote);
    setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
+    setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i16, Promote);
    // We have an algorithm for SSE2, and we turn this into a 64-bit
    // FILD or VCVTUSI2SS/SD for other targets.
    setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+    setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
    // We have an algorithm for SSE2->double, and we turn this into a
    // 64-bit FILD followed by conditional FADD for other targets.
    setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+    setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);

    // Promote i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
    // this operation.
-    setOperationAction(ISD::SINT_TO_FP, MVT::i8,  Promote);
+    setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
+    setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i8, Promote);
    // SSE has no i16 to fp conversion, only i32. We promote in the handler
    // to allow f80 to use i16 and f64 to use i16 with sse1 only
    setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom);
+    setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i16, Custom);
    // f32 and f64 cases are Legal with SSE1/SSE2, f80 case is not
    setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+    setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
    // In 32-bit mode these are custom lowered.  In 64-bit mode F32 and F64
    // are Legal, f80 is custom lowered.
    setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+    setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);

    // Promote i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
    // this operation.
@ -985,9 +993,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
    setOperationAction(ISD::FP_TO_UINT,         MVT::v4i16, Custom);

    setOperationAction(ISD::SINT_TO_FP,         MVT::v4i32, Legal);
+    setOperationAction(ISD::STRICT_SINT_TO_FP,  MVT::v4i32, Legal);
    setOperationAction(ISD::SINT_TO_FP,         MVT::v2i32, Custom);
+    setOperationAction(ISD::STRICT_SINT_TO_FP,  MVT::v2i32, Custom);

    setOperationAction(ISD::UINT_TO_FP,         MVT::v2i32, Custom);
+    setOperationAction(ISD::STRICT_UINT_TO_FP,  MVT::v2i32, Custom);

    // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
    setOperationAction(ISD::UINT_TO_FP,         MVT::v2f32, Custom);
@ -18421,8 +18432,13 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
 static SDValue LowerI64IntToFP_AVX512DQ(SDValue Op, SelectionDAG &DAG,
                                        const X86Subtarget &Subtarget) {
  assert((Op.getOpcode() == ISD::SINT_TO_FP ||
-          Op.getOpcode() == ISD::UINT_TO_FP) && "Unexpected opcode!");
-  SDValue Src = Op.getOperand(0);
+          Op.getOpcode() == ISD::STRICT_SINT_TO_FP ||
+          Op.getOpcode() == ISD::STRICT_UINT_TO_FP ||
+          Op.getOpcode() == ISD::UINT_TO_FP) &&
+         "Unexpected opcode!");
+  bool IsStrict = Op->isStrictFPOpcode();
+  unsigned OpNo = IsStrict ? 1 : 0;
+  SDValue Src = Op.getOperand(OpNo);
  MVT SrcVT = Src.getSimpleValueType();
  MVT VT = Op.getSimpleValueType();

@ -18439,7 +18455,17 @@ static SDValue LowerI64IntToFP_AVX512DQ(SDValue Op, SelectionDAG &DAG,

  SDLoc dl(Op);
  SDValue InVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecInVT, Src);
+  if (IsStrict) {
+    SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, {VecVT, MVT::Other},
+                                 {Op.getOperand(0), InVec});
+    SDValue Chain = CvtVec.getValue(1);
+    SDValue Value = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec,
+                                DAG.getIntPtrConstant(0, dl));
+    return DAG.getMergeValues({Value, Chain}, dl);
+  }
+
  SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, VecVT, InVec);
+
  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec,
                     DAG.getIntPtrConstant(0, dl));
 }
@ -18510,7 +18536,9 @@ static SDValue vectorizeExtractedCast(SDValue Cast, SelectionDAG &DAG,

 SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
                                           SelectionDAG &DAG) const {
-  SDValue Src = Op.getOperand(0);
+  bool IsStrict = Op->isStrictFPOpcode();
+  unsigned OpNo = IsStrict ? 1 : 0;
+  SDValue Src = Op.getOperand(OpNo);
  MVT SrcVT = Src.getSimpleValueType();
  MVT VT = Op.getSimpleValueType();
  SDLoc dl(Op);
@ -18519,7 +18547,8 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
    return Extract;

  if (SrcVT.isVector()) {
-    if (SrcVT == MVT::v2i32 && VT == MVT::v2f64) {
+    if (SrcVT == MVT::v2i32 && VT == MVT::v2f64 && !IsStrict) {
+      // FIXME: A strict version of CVTSI2P is needed.
      return DAG.getNode(X86ISD::CVTSI2P, dl, VT,
                         DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src,
                                     DAG.getUNDEF(SrcVT)));
@ -18545,13 +18574,17 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
  // SSE doesn't have an i16 conversion so we need to promote.
  if (SrcVT == MVT::i16 && (UseSSEReg || VT == MVT::f128)) {
    SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, Src);
+    if (IsStrict)
+      return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other},
+                         {Op.getOperand(0), Ext});
+
    return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Ext);
  }

  if (VT == MVT::f128)
    return LowerF128Call(Op, DAG, RTLIB::getSINTTOFP(SrcVT, VT));

-  SDValue ValueToStore = Op.getOperand(0);
+  SDValue ValueToStore = Src;
  if (SrcVT == MVT::i64 && UseSSEReg && !Subtarget.is64Bit())
    // Bitcasting to f64 here allows us to do a single 64-bit store from
    // an SSE register, avoiding the store forwarding penalty that would come
@ -18563,10 +18596,16 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
  auto PtrVT = getPointerTy(MF.getDataLayout());
  int SSFI = MF.getFrameInfo().CreateStackObject(Size, Size, false);
  SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
-  SDValue Chain = DAG.getStore(
-      DAG.getEntryNode(), dl, ValueToStore, StackSlot,
+  SDValue Chain = IsStrict ? Op->getOperand(0) : DAG.getEntryNode();
+  Chain = DAG.getStore(
+      Chain, dl, ValueToStore, StackSlot,
      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI));
-  return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG).first;
+  std::pair<SDValue, SDValue> Tmp = BuildFILD(Op, SrcVT, Chain, StackSlot, DAG);
+
+  if (IsStrict)
+    return DAG.getMergeValues({Tmp.first, Tmp.second}, dl);
+
+  return Tmp.first;
 }

 std::pair<SDValue, SDValue> X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
@ -18654,6 +18693,8 @@ static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG,
     #endif
  */

+  bool IsStrict = Op->isStrictFPOpcode();
+  unsigned OpNo = IsStrict ? 1 : 0;
  SDLoc dl(Op);
  LLVMContext *Context = DAG.getContext();

@ -18674,8 +18715,8 @@ static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG,
  SDValue CPIdx1 = DAG.getConstantPool(C1, PtrVT, 16);

  // Load the 64-bit value into an XMM register.
-  SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
-                            Op.getOperand(0));
+  SDValue XR1 =
+      DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Op.getOperand(OpNo));
  SDValue CLod0 =
      DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0,
                  MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
@ -18688,32 +18729,50 @@ static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG,
                  MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
                  /* Alignment = */ 16);
  SDValue XR2F = DAG.getBitcast(MVT::v2f64, Unpck1);
+  SDValue Sub;
+  SDValue Chain;
  // TODO: Are there any fast-math-flags to propagate here?
-  SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
+  if (IsStrict) {
+    Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::v2f64, MVT::Other},
+                      {Op.getOperand(0), XR2F, CLod1});
+    Chain = Sub.getValue(1);
+  } else
+    Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
  SDValue Result;

-  if (Subtarget.hasSSE3() && shouldUseHorizontalOp(true, DAG, Subtarget)) {
+  if (!IsStrict && Subtarget.hasSSE3() &&
+      shouldUseHorizontalOp(true, DAG, Subtarget)) {
+    // FIXME: Do we need a STRICT version of FHADD?
    Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub);
  } else {
    SDValue Shuffle = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, Sub, {1,-1});
-    Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuffle, Sub);
+    if (IsStrict) {
+      Result = DAG.getNode(ISD::STRICT_FADD, dl, {MVT::v2f64, MVT::Other},
+                           {Chain, Shuffle, Sub});
+      Chain = Result.getValue(1);
+    } else
+      Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuffle, Sub);
  }
+  Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result,
+                       DAG.getIntPtrConstant(0, dl));
+  if (IsStrict)
+    return DAG.getMergeValues({Result, Chain}, dl);

-  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result,
-                     DAG.getIntPtrConstant(0, dl));
+  return Result;
 }

 /// 32-bit unsigned integer to float expansion.
 static SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG,
                                   const X86Subtarget &Subtarget) {
+  unsigned OpNo = Op.getNode()->isStrictFPOpcode() ? 1 : 0;
  SDLoc dl(Op);
  // FP constant to bias correct the final result.
  SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl,
                                   MVT::f64);

  // Load the 32-bit value into an XMM register.
-  SDValue Load = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,
-                             Op.getOperand(0));
+  SDValue Load =
+      DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Op.getOperand(OpNo));

  // Zero out the upper parts of the register.
  Load = getShuffleVectorZeroOrUndef(Load, 0, true, Subtarget, DAG);
@ -18733,6 +18792,23 @@ static SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG,
      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
                  DAG.getBitcast(MVT::v2f64, Or), DAG.getIntPtrConstant(0, dl));

+  if (Op.getNode()->isStrictFPOpcode()) {
+    // Subtract the bias.
+    // TODO: Are there any fast-math-flags to propagate here?
+    SDValue Chain = Op.getOperand(0);
+    SDValue Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::f64, MVT::Other},
+                              {Chain, Or, Bias});
+
+    if (Op.getValueType() == Sub.getValueType())
+      return Sub;
+
+    // Handle final rounding.
+    std::pair<SDValue, SDValue> ResultPair = DAG.getStrictFPExtendOrRound(
+        Sub, Sub.getValue(1), dl, Op.getSimpleValueType());
+
+    return DAG.getMergeValues({ResultPair.first, ResultPair.second}, dl);
+  }
+
  // Subtract the bias.
  // TODO: Are there any fast-math-flags to propagate here?
  SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias);
@ -18747,6 +18823,10 @@ static SDValue lowerUINT_TO_FP_v2i32(SDValue Op, SelectionDAG &DAG,
  if (Op.getSimpleValueType() != MVT::v2f64)
    return SDValue();

+  // FIXME: Need to fix the lack of StrictFP support here.
+  if (Op.getNode()->isStrictFPOpcode())
+    return SDValue();
+
  SDValue N0 = Op.getOperand(0);
  assert(N0.getSimpleValueType() == MVT::v2i32 && "Unexpected input type");

@ -18873,7 +18953,8 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,

 static SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG,
                                   const X86Subtarget &Subtarget) {
-  SDValue N0 = Op.getOperand(0);
+  unsigned OpNo = Op.getNode()->isStrictFPOpcode() ? 1 : 0;
+  SDValue N0 = Op.getOperand(OpNo);
  MVT SrcVT = N0.getSimpleValueType();
  SDLoc dl(Op);

@ -18891,11 +18972,14 @@ static SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG,

 SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
                                           SelectionDAG &DAG) const {
-  SDValue N0 = Op.getOperand(0);
+  bool IsStrict = Op->isStrictFPOpcode();
+  unsigned OpNo = IsStrict ? 1 : 0;
+  SDValue Src = Op.getOperand(OpNo);
  SDLoc dl(Op);
  auto PtrVT = getPointerTy(DAG.getDataLayout());
-  MVT SrcVT = N0.getSimpleValueType();
+  MVT SrcVT = Src.getSimpleValueType();
  MVT DstVT = Op.getSimpleValueType();
+  SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();

  if (DstVT == MVT::f128)
    return LowerF128Call(Op, DAG, RTLIB::getUINTTOFP(SrcVT, DstVT));
@ -18915,8 +18999,11 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,

  // Promote i32 to i64 and use a signed conversion on 64-bit targets.
  if (SrcVT == MVT::i32 && Subtarget.is64Bit()) {
-    N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, N0);
-    return DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, N0);
+    Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Src);
+    if (IsStrict)
+      return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {DstVT, MVT::Other},
+                         {Chain, Src});
+    return DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src);
  }

  if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, DAG, Subtarget))
@ -18933,22 +19020,28 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
  SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64);
  if (SrcVT == MVT::i32) {
    SDValue OffsetSlot = DAG.getMemBasePlusOffset(StackSlot, 4, dl);
-    SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
-                                  StackSlot, MachinePointerInfo());
+    SDValue Store1 =
+        DAG.getStore(Chain, dl, Src, StackSlot, MachinePointerInfo());
    SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, dl, MVT::i32),
                                  OffsetSlot, MachinePointerInfo());
-    return BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG).first;
+    std::pair<SDValue, SDValue> Tmp =
+        BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG);
+    if (IsStrict)
+      return DAG.getMergeValues({Tmp.first, Tmp.second}, dl);
+
+    return Tmp.first;
  }

  assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP");
-  SDValue ValueToStore = Op.getOperand(0);
-  if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget.is64Bit())
+  SDValue ValueToStore = Src;
+  if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget.is64Bit()) {
    // Bitcasting to f64 here allows us to do a single 64-bit store from
    // an SSE register, avoiding the store forwarding penalty that would come
    // with two 32-bit stores.
    ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore);
-  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, ValueToStore, StackSlot,
-                               MachinePointerInfo());
+  }
+  SDValue Store =
+      DAG.getStore(Chain, dl, ValueToStore, StackSlot, MachinePointerInfo());
  // For i64 source, we need to add the appropriate power of 2 if the input
  // was negative.  This is the same as the optimization in
  // DAGTypeLegalizer::ExpandIntOp_UNIT_TO_FP, and for it to be safe here,
@ -18963,13 +19056,14 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
  SDValue Ops[] = { Store, StackSlot };
  SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops,
                                         MVT::i64, MMO);
+  Chain = Fild.getValue(1);

  APInt FF(32, 0x5F800000ULL);

  // Check whether the sign bit is set.
  SDValue SignSet = DAG.getSetCC(
      dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
-      Op.getOperand(0), DAG.getConstant(0, dl, MVT::i64), ISD::SETLT);
+      Op.getOperand(OpNo), DAG.getConstant(0, dl, MVT::i64), ISD::SETLT);

  // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
  SDValue FudgePtr = DAG.getConstantPool(
@ -18984,11 +19078,18 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
  // Load the value out, extending it from f32 to f80.
  // FIXME: Avoid the extend by constructing the right constant pool?
  SDValue Fudge = DAG.getExtLoad(
-      ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(), FudgePtr,
+      ISD::EXTLOAD, dl, MVT::f80, Chain, FudgePtr,
      MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32,
      /* Alignment = */ 4);
+  Chain = Fudge.getValue(1);
  // Extend everything to 80 bits to force it to be done on x87.
  // TODO: Are there any fast-math-flags to propagate here?
+  if (IsStrict) {
+    SDValue Add = DAG.getNode(ISD::STRICT_FADD, dl, {MVT::f80, MVT::Other},
+                              {Chain, Fild, Fudge});
+    return DAG.getNode(ISD::STRICT_FP_ROUND, dl, {DstVT, MVT::Other},
+                       {Add.getValue(1), Add, DAG.getIntPtrConstant(0, dl)});
+  }
  SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge);
  return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add,
                     DAG.getIntPtrConstant(0, dl));
@ -19042,10 +19143,7 @@ X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
  int SSFI = MF.getFrameInfo().CreateStackObject(MemSize, MemSize, false);
  SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);

-  if (IsStrict)
-    Chain = Op.getOperand(0);
-  else
-    Chain = DAG.getEntryNode();
+  Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();

  SDValue Adjust; // 0x0 or 0x80000000, for result sign bit adjustment.

@ -28013,7 +28111,9 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
  case ISD::SRL_PARTS:          return LowerShiftParts(Op, DAG);
  case ISD::FSHL:
  case ISD::FSHR:               return LowerFunnelShift(Op, Subtarget, DAG);
+  case ISD::STRICT_SINT_TO_FP:
  case ISD::SINT_TO_FP:         return LowerSINT_TO_FP(Op, DAG);
+  case ISD::STRICT_UINT_TO_FP:
  case ISD::UINT_TO_FP:         return LowerUINT_TO_FP(Op, DAG);
  case ISD::TRUNCATE:           return LowerTRUNCATE(Op, DAG);
  case ISD::ZERO_EXTEND:        return LowerZERO_EXTEND(Op, Subtarget, DAG);
--- a/llvm/test/CodeGen/X86/fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/fp-intrinsics.ll
@ -1941,6 +1941,762 @@ entry:
  ret i64 %result
 }

+; Verify that sitofp(%x) isn't simplified when the rounding mode is
+; unknown.
+; Verify that no gross errors happen.
+define double @sifdb(i8 %x) #0 {
+; X87-LABEL: sifdb:
+; X87:       # %bb.0: # %entry
+; X87-NEXT:    pushl %eax
+; X87-NEXT:    .cfi_def_cfa_offset 8
+; X87-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
+; X87-NEXT:    movw %ax, {{[0-9]+}}(%esp)
+; X87-NEXT:    filds {{[0-9]+}}(%esp)
+; X87-NEXT:    popl %eax
+; X87-NEXT:    .cfi_def_cfa_offset 4
+; X87-NEXT:    retl
+;
+; X86-SSE-LABEL: sifdb:
+; X86-SSE:       # %bb.0: # %entry
+; X86-SSE-NEXT:    subl $12, %esp
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 16
+; X86-SSE-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT:    cvtsi2sd %eax, %xmm0
+; X86-SSE-NEXT:    movsd %xmm0, (%esp)
+; X86-SSE-NEXT:    fldl (%esp)
+; X86-SSE-NEXT:    addl $12, %esp
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
+; X86-SSE-NEXT:    retl
+;
+; SSE-LABEL: sifdb:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    movsbl %dil, %eax
+; SSE-NEXT:    cvtsi2sd %eax, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: sifdb:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    movsbl %dil, %eax
+; AVX-NEXT:    vcvtsi2sd %eax, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %result = call double @llvm.experimental.constrained.sitofp.f64.i8(i8 %x,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret double %result
+}
+
+define double @sifdw(i16 %x) #0 {
+; X87-LABEL: sifdw:
+; X87:       # %bb.0: # %entry
+; X87-NEXT:    pushl %eax
+; X87-NEXT:    .cfi_def_cfa_offset 8
+; X87-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X87-NEXT:    movw %ax, {{[0-9]+}}(%esp)
+; X87-NEXT:    filds {{[0-9]+}}(%esp)
+; X87-NEXT:    popl %eax
+; X87-NEXT:    .cfi_def_cfa_offset 4
+; X87-NEXT:    retl
+;
+; X86-SSE-LABEL: sifdw:
+; X86-SSE:       # %bb.0: # %entry
+; X86-SSE-NEXT:    subl $12, %esp
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 16
+; X86-SSE-NEXT:    movswl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT:    cvtsi2sd %eax, %xmm0
+; X86-SSE-NEXT:    movsd %xmm0, (%esp)
+; X86-SSE-NEXT:    fldl (%esp)
+; X86-SSE-NEXT:    addl $12, %esp
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
+; X86-SSE-NEXT:    retl
+;
+; SSE-LABEL: sifdw:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    movswl %di, %eax
+; SSE-NEXT:    cvtsi2sd %eax, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: sifdw:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    movswl %di, %eax
+; AVX-NEXT:    vcvtsi2sd %eax, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %result = call double @llvm.experimental.constrained.sitofp.f64.i16(i16 %x,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret double %result
+}
+
+define double @sifdi(i32 %x) #0 {
+; X87-LABEL: sifdi:
+; X87:       # %bb.0: # %entry
+; X87-NEXT:    pushl %eax
+; X87-NEXT:    .cfi_def_cfa_offset 8
+; X87-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X87-NEXT:    movl %eax, (%esp)
+; X87-NEXT:    fildl (%esp)
+; X87-NEXT:    popl %eax
+; X87-NEXT:    .cfi_def_cfa_offset 4
+; X87-NEXT:    retl
+;
+; X86-SSE-LABEL: sifdi:
+; X86-SSE:       # %bb.0: # %entry
+; X86-SSE-NEXT:    subl $12, %esp
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 16
+; X86-SSE-NEXT:    cvtsi2sdl {{[0-9]+}}(%esp), %xmm0
+; X86-SSE-NEXT:    movsd %xmm0, (%esp)
+; X86-SSE-NEXT:    fldl (%esp)
+; X86-SSE-NEXT:    addl $12, %esp
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
+; X86-SSE-NEXT:    retl
+;
+; SSE-LABEL: sifdi:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    cvtsi2sd %edi, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: sifdi:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvtsi2sd %edi, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %result = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %x,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret double %result
+}
+
+define float @siffb(i8 %x) #0 {
+; X87-LABEL: siffb:
+; X87:       # %bb.0: # %entry
+; X87-NEXT:    pushl %eax
+; X87-NEXT:    .cfi_def_cfa_offset 8
+; X87-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
+; X87-NEXT:    movw %ax, {{[0-9]+}}(%esp)
+; X87-NEXT:    filds {{[0-9]+}}(%esp)
+; X87-NEXT:    popl %eax
+; X87-NEXT:    .cfi_def_cfa_offset 4
+; X87-NEXT:    retl
+;
+; X86-SSE-LABEL: siffb:
+; X86-SSE:       # %bb.0: # %entry
+; X86-SSE-NEXT:    pushl %eax
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 8
+; X86-SSE-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT:    cvtsi2ss %eax, %xmm0
+; X86-SSE-NEXT:    movss %xmm0, (%esp)
+; X86-SSE-NEXT:    flds (%esp)
+; X86-SSE-NEXT:    popl %eax
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
+; X86-SSE-NEXT:    retl
+;
+; SSE-LABEL: siffb:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    movsbl %dil, %eax
+; SSE-NEXT:    cvtsi2ss %eax, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: siffb:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    movsbl %dil, %eax
+; AVX-NEXT:    vcvtsi2ss %eax, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %result = call float @llvm.experimental.constrained.sitofp.f32.i8(i8 %x,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret float %result
+}
+
+define float @siffw(i16 %x) #0 {
+; X87-LABEL: siffw:
+; X87:       # %bb.0: # %entry
+; X87-NEXT:    pushl %eax
+; X87-NEXT:    .cfi_def_cfa_offset 8
+; X87-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X87-NEXT:    movw %ax, {{[0-9]+}}(%esp)
+; X87-NEXT:    filds {{[0-9]+}}(%esp)
+; X87-NEXT:    popl %eax
+; X87-NEXT:    .cfi_def_cfa_offset 4
+; X87-NEXT:    retl
+;
+; X86-SSE-LABEL: siffw:
+; X86-SSE:       # %bb.0: # %entry
+; X86-SSE-NEXT:    pushl %eax
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 8
+; X86-SSE-NEXT:    movswl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT:    cvtsi2ss %eax, %xmm0
+; X86-SSE-NEXT:    movss %xmm0, (%esp)
+; X86-SSE-NEXT:    flds (%esp)
+; X86-SSE-NEXT:    popl %eax
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
+; X86-SSE-NEXT:    retl
+;
+; SSE-LABEL: siffw:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    movswl %di, %eax
+; SSE-NEXT:    cvtsi2ss %eax, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: siffw:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    movswl %di, %eax
+; AVX-NEXT:    vcvtsi2ss %eax, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %result = call float @llvm.experimental.constrained.sitofp.f32.i16(i16 %x,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret float %result
+}
+
+define float @siffi(i32 %x) #0 {
+; X87-LABEL: siffi:
+; X87:       # %bb.0: # %entry
+; X87-NEXT:    pushl %eax
+; X87-NEXT:    .cfi_def_cfa_offset 8
+; X87-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X87-NEXT:    movl %eax, (%esp)
+; X87-NEXT:    fildl (%esp)
+; X87-NEXT:    popl %eax
+; X87-NEXT:    .cfi_def_cfa_offset 4
+; X87-NEXT:    retl
+;
+; X86-SSE-LABEL: siffi:
+; X86-SSE:       # %bb.0: # %entry
+; X86-SSE-NEXT:    pushl %eax
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 8
+; X86-SSE-NEXT:    cvtsi2ssl {{[0-9]+}}(%esp), %xmm0
+; X86-SSE-NEXT:    movss %xmm0, (%esp)
+; X86-SSE-NEXT:    flds (%esp)
+; X86-SSE-NEXT:    popl %eax
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
+; X86-SSE-NEXT:    retl
+;
+; SSE-LABEL: siffi:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    cvtsi2ss %edi, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: siffi:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvtsi2ss %edi, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %result = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %x,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret float %result
+}
+
+define double @sifdl(i64 %x) #0 {
+; X87-LABEL: sifdl:
+; X87:       # %bb.0: # %entry
+; X87-NEXT:    subl $12, %esp
+; X87-NEXT:    .cfi_def_cfa_offset 16
+; X87-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X87-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X87-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; X87-NEXT:    movl %eax, (%esp)
+; X87-NEXT:    fildll (%esp)
+; X87-NEXT:    addl $12, %esp
+; X87-NEXT:    .cfi_def_cfa_offset 4
+; X87-NEXT:    retl
+;
+; X86-SSE-LABEL: sifdl:
+; X86-SSE:       # %bb.0: # %entry
+; X86-SSE-NEXT:    subl $20, %esp
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 24
+; X86-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE-NEXT:    movlps %xmm0, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    fildll {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    fstpl (%esp)
+; X86-SSE-NEXT:    fldl (%esp)
+; X86-SSE-NEXT:    addl $20, %esp
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
+; X86-SSE-NEXT:    retl
+;
+; SSE-LABEL: sifdl:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    cvtsi2sd %rdi, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: sifdl:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvtsi2sd %rdi, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %result = call double @llvm.experimental.constrained.sitofp.f64.i64(i64 %x,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret double %result
+}
+
+define float @siffl(i64 %x) #0 {
+; X87-LABEL: siffl:
+; X87:       # %bb.0: # %entry
+; X87-NEXT:    subl $12, %esp
+; X87-NEXT:    .cfi_def_cfa_offset 16
+; X87-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X87-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X87-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; X87-NEXT:    movl %eax, (%esp)
+; X87-NEXT:    fildll (%esp)
+; X87-NEXT:    addl $12, %esp
+; X87-NEXT:    .cfi_def_cfa_offset 4
+; X87-NEXT:    retl
+;
+; X86-SSE-LABEL: siffl:
+; X86-SSE:       # %bb.0: # %entry
+; X86-SSE-NEXT:    subl $20, %esp
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 24
+; X86-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE-NEXT:    movlps %xmm0, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    fildll {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    flds {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    addl $20, %esp
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
+; X86-SSE-NEXT:    retl
+;
+; SSE-LABEL: siffl:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    cvtsi2ss %rdi, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: siffl:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvtsi2ss %rdi, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %result = call float @llvm.experimental.constrained.sitofp.f32.i64(i64 %x,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret float %result
+}
+
+; Verify that uitofp(%x) isn't simplified when the rounding mode is
+; unknown.
+; Verify that no gross errors happen.
+define double @uifdb(i8 %x) #0 {
+; X87-LABEL: uifdb:
+; X87:       # %bb.0: # %entry
+; X87-NEXT:    pushl %eax
+; X87-NEXT:    .cfi_def_cfa_offset 8
+; X87-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X87-NEXT:    movw %ax, {{[0-9]+}}(%esp)
+; X87-NEXT:    filds {{[0-9]+}}(%esp)
+; X87-NEXT:    popl %eax
+; X87-NEXT:    .cfi_def_cfa_offset 4
+; X87-NEXT:    retl
+;
+; X86-SSE-LABEL: uifdb:
+; X86-SSE:       # %bb.0: # %entry
+; X86-SSE-NEXT:    subl $12, %esp
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 16
+; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT:    cvtsi2sd %eax, %xmm0
+; X86-SSE-NEXT:    movsd %xmm0, (%esp)
+; X86-SSE-NEXT:    fldl (%esp)
+; X86-SSE-NEXT:    addl $12, %esp
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
+; X86-SSE-NEXT:    retl
+;
+; SSE-LABEL: uifdb:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    movzbl %dil, %eax
+; SSE-NEXT:    cvtsi2sd %eax, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: uifdb:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    movzbl %dil, %eax
+; AVX-NEXT:    vcvtsi2sd %eax, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %result = call double @llvm.experimental.constrained.uitofp.f64.i8(i8 %x,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret double %result
+}
+
+define double @uifdw(i16 %x) #0 {
+; X87-LABEL: uifdw:
+; X87:       # %bb.0: # %entry
+; X87-NEXT:    pushl %eax
+; X87-NEXT:    .cfi_def_cfa_offset 8
+; X87-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X87-NEXT:    movl %eax, (%esp)
+; X87-NEXT:    fildl (%esp)
+; X87-NEXT:    popl %eax
+; X87-NEXT:    .cfi_def_cfa_offset 4
+; X87-NEXT:    retl
+;
+; X86-SSE-LABEL: uifdw:
+; X86-SSE:       # %bb.0: # %entry
+; X86-SSE-NEXT:    subl $12, %esp
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 16
+; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT:    cvtsi2sd %eax, %xmm0
+; X86-SSE-NEXT:    movsd %xmm0, (%esp)
+; X86-SSE-NEXT:    fldl (%esp)
+; X86-SSE-NEXT:    addl $12, %esp
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
+; X86-SSE-NEXT:    retl
+;
+; SSE-LABEL: uifdw:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    movzwl %di, %eax
+; SSE-NEXT:    cvtsi2sd %eax, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: uifdw:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    movzwl %di, %eax
+; AVX-NEXT:    vcvtsi2sd %eax, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %result = call double @llvm.experimental.constrained.uitofp.f64.i16(i16 %x,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret double %result
+}
+
+define double @uifdi(i32 %x) #0 {
+; X87-LABEL: uifdi:
+; X87:       # %bb.0: # %entry
+; X87-NEXT:    subl $12, %esp
+; X87-NEXT:    .cfi_def_cfa_offset 16
+; X87-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X87-NEXT:    movl %eax, (%esp)
+; X87-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X87-NEXT:    fildll (%esp)
+; X87-NEXT:    addl $12, %esp
+; X87-NEXT:    .cfi_def_cfa_offset 4
+; X87-NEXT:    retl
+;
+; X86-SSE-LABEL: uifdi:
+; X86-SSE:       # %bb.0: # %entry
+; X86-SSE-NEXT:    subl $12, %esp
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 16
+; X86-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-SSE-NEXT:    orpd %xmm0, %xmm1
+; X86-SSE-NEXT:    subsd %xmm0, %xmm1
+; X86-SSE-NEXT:    movsd %xmm1, (%esp)
+; X86-SSE-NEXT:    fldl (%esp)
+; X86-SSE-NEXT:    addl $12, %esp
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
+; X86-SSE-NEXT:    retl
+;
+; SSE-LABEL: uifdi:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    movl %edi, %eax
+; SSE-NEXT:    cvtsi2sd %rax, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: uifdi:
+; AVX1:       # %bb.0: # %entry
+; AVX1-NEXT:    movl %edi, %eax
+; AVX1-NEXT:    vcvtsi2sd %rax, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX512-LABEL: uifdi:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvtusi2sd %edi, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+entry:
+  %result = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %x,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret double %result
+}
+
+define double @uifdl(i64 %x) #0 {
+; X87-LABEL: uifdl:
+; X87:       # %bb.0: # %entry
+; X87-NEXT:    subl $20, %esp
+; X87-NEXT:    .cfi_def_cfa_offset 24
+; X87-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X87-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X87-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; X87-NEXT:    movl %eax, (%esp)
+; X87-NEXT:    xorl %eax, %eax
+; X87-NEXT:    testl %ecx, %ecx
+; X87-NEXT:    setns %al
+; X87-NEXT:    fildll (%esp)
+; X87-NEXT:    fadds {{\.LCPI.*}}(,%eax,4)
+; X87-NEXT:    fstpl {{[0-9]+}}(%esp)
+; X87-NEXT:    fldl {{[0-9]+}}(%esp)
+; X87-NEXT:    addl $20, %esp
+; X87-NEXT:    .cfi_def_cfa_offset 4
+; X87-NEXT:    retl
+;
+; X86-SSE-LABEL: uifdl:
+; X86-SSE:       # %bb.0: # %entry
+; X86-SSE-NEXT:    subl $12, %esp
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 16
+; X86-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; X86-SSE-NEXT:    subpd {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    movapd %xmm0, %xmm1
+; X86-SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
+; X86-SSE-NEXT:    addpd %xmm0, %xmm1
+; X86-SSE-NEXT:    movlpd %xmm1, (%esp)
+; X86-SSE-NEXT:    fldl (%esp)
+; X86-SSE-NEXT:    addl $12, %esp
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
+; X86-SSE-NEXT:    retl
+;
+; SSE-LABEL: uifdl:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    movq %rdi, %xmm1
+; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
+; SSE-NEXT:    subpd {{.*}}(%rip), %xmm1
+; SSE-NEXT:    movapd %xmm1, %xmm0
+; SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; SSE-NEXT:    addpd %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: uifdl:
+; AVX1:       # %bb.0: # %entry
+; AVX1-NEXT:    vmovq %rdi, %xmm0
+; AVX1-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; AVX1-NEXT:    vsubpd {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; AVX1-NEXT:    vaddpd %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX512-LABEL: uifdl:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvtusi2sd %rdi, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+entry:
+  %result = call double @llvm.experimental.constrained.uitofp.f64.i64(i64 %x,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret double %result
+}
+
+define float @uiffb(i8 %x) #0 {
+; X87-LABEL: uiffb:
+; X87:       # %bb.0: # %entry
+; X87-NEXT:    pushl %eax
+; X87-NEXT:    .cfi_def_cfa_offset 8
+; X87-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X87-NEXT:    movw %ax, {{[0-9]+}}(%esp)
+; X87-NEXT:    filds {{[0-9]+}}(%esp)
+; X87-NEXT:    popl %eax
+; X87-NEXT:    .cfi_def_cfa_offset 4
+; X87-NEXT:    retl
+;
+; X86-SSE-LABEL: uiffb:
+; X86-SSE:       # %bb.0: # %entry
+; X86-SSE-NEXT:    pushl %eax
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 8
+; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT:    cvtsi2ss %eax, %xmm0
+; X86-SSE-NEXT:    movss %xmm0, (%esp)
+; X86-SSE-NEXT:    flds (%esp)
+; X86-SSE-NEXT:    popl %eax
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
+; X86-SSE-NEXT:    retl
+;
+; SSE-LABEL: uiffb:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    movzbl %dil, %eax
+; SSE-NEXT:    cvtsi2ss %eax, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: uiffb:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    movzbl %dil, %eax
+; AVX-NEXT:    vcvtsi2ss %eax, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %result = call float @llvm.experimental.constrained.uitofp.f32.i8(i8 %x,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret float %result
+}
+
+define float @uiffw(i16 %x) #0 {
+; X87-LABEL: uiffw:
+; X87:       # %bb.0: # %entry
+; X87-NEXT:    pushl %eax
+; X87-NEXT:    .cfi_def_cfa_offset 8
+; X87-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X87-NEXT:    movl %eax, (%esp)
+; X87-NEXT:    fildl (%esp)
+; X87-NEXT:    popl %eax
+; X87-NEXT:    .cfi_def_cfa_offset 4
+; X87-NEXT:    retl
+;
+; X86-SSE-LABEL: uiffw:
+; X86-SSE:       # %bb.0: # %entry
+; X86-SSE-NEXT:    pushl %eax
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 8
+; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT:    cvtsi2ss %eax, %xmm0
+; X86-SSE-NEXT:    movss %xmm0, (%esp)
+; X86-SSE-NEXT:    flds (%esp)
+; X86-SSE-NEXT:    popl %eax
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
+; X86-SSE-NEXT:    retl
+;
+; SSE-LABEL: uiffw:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    movzwl %di, %eax
+; SSE-NEXT:    cvtsi2ss %eax, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: uiffw:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    movzwl %di, %eax
+; AVX-NEXT:    vcvtsi2ss %eax, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %result = call float @llvm.experimental.constrained.uitofp.f32.i16(i16 %x,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret float %result
+}
+
+define float @uiffi(i32 %x) #0 {
+; X87-LABEL: uiffi:
+; X87:       # %bb.0: # %entry
+; X87-NEXT:    subl $12, %esp
+; X87-NEXT:    .cfi_def_cfa_offset 16
+; X87-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X87-NEXT:    movl %eax, (%esp)
+; X87-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X87-NEXT:    fildll (%esp)
+; X87-NEXT:    addl $12, %esp
+; X87-NEXT:    .cfi_def_cfa_offset 4
+; X87-NEXT:    retl
+;
+; X86-SSE-LABEL: uiffi:
+; X86-SSE:       # %bb.0: # %entry
+; X86-SSE-NEXT:    pushl %eax
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 8
+; X86-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-SSE-NEXT:    orpd %xmm0, %xmm1
+; X86-SSE-NEXT:    subsd %xmm0, %xmm1
+; X86-SSE-NEXT:    xorps %xmm0, %xmm0
+; X86-SSE-NEXT:    cvtsd2ss %xmm1, %xmm0
+; X86-SSE-NEXT:    movss %xmm0, (%esp)
+; X86-SSE-NEXT:    flds (%esp)
+; X86-SSE-NEXT:    popl %eax
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
+; X86-SSE-NEXT:    retl
+;
+; SSE-LABEL: uiffi:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    movl %edi, %eax
+; SSE-NEXT:    cvtsi2ss %rax, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: uiffi:
+; AVX1:       # %bb.0: # %entry
+; AVX1-NEXT:    movl %edi, %eax
+; AVX1-NEXT:    vcvtsi2ss %rax, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX512-LABEL: uiffi:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvtusi2ss %edi, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+entry:
+  %result = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %x,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret float %result
+}
+
+define float @uiffl(i64 %x) #0 {
+; X87-LABEL: uiffl:
+; X87:       # %bb.0: # %entry
+; X87-NEXT:    subl $20, %esp
+; X87-NEXT:    .cfi_def_cfa_offset 24
+; X87-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X87-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X87-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; X87-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X87-NEXT:    xorl %eax, %eax
+; X87-NEXT:    testl %ecx, %ecx
+; X87-NEXT:    setns %al
+; X87-NEXT:    fildll {{[0-9]+}}(%esp)
+; X87-NEXT:    fadds {{\.LCPI.*}}(,%eax,4)
+; X87-NEXT:    fstps {{[0-9]+}}(%esp)
+; X87-NEXT:    flds {{[0-9]+}}(%esp)
+; X87-NEXT:    addl $20, %esp
+; X87-NEXT:    .cfi_def_cfa_offset 4
+; X87-NEXT:    retl
+;
+; X86-SSE-LABEL: uiffl:
+; X86-SSE:       # %bb.0: # %entry
+; X86-SSE-NEXT:    subl $20, %esp
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 24
+; X86-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE-NEXT:    movlps %xmm0, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    xorl %eax, %eax
+; X86-SSE-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    setns %al
+; X86-SSE-NEXT:    fildll {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    fadds {{\.LCPI.*}}(,%eax,4)
+; X86-SSE-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE-NEXT:    movss %xmm0, (%esp)
+; X86-SSE-NEXT:    flds (%esp)
+; X86-SSE-NEXT:    addl $20, %esp
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
+; X86-SSE-NEXT:    retl
+;
+; SSE-LABEL: uiffl:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    testq %rdi, %rdi
+; SSE-NEXT:    js .LBB52_1
+; SSE-NEXT:  # %bb.2: # %entry
+; SSE-NEXT:    cvtsi2ss %rdi, %xmm0
+; SSE-NEXT:    retq
+; SSE-NEXT:  .LBB52_1:
+; SSE-NEXT:    movq %rdi, %rax
+; SSE-NEXT:    shrq %rax
+; SSE-NEXT:    andl $1, %edi
+; SSE-NEXT:    orq %rax, %rdi
+; SSE-NEXT:    cvtsi2ss %rdi, %xmm0
+; SSE-NEXT:    addss %xmm0, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: uiffl:
+; AVX1:       # %bb.0: # %entry
+; AVX1-NEXT:    testq %rdi, %rdi
+; AVX1-NEXT:    js .LBB52_1
+; AVX1-NEXT:  # %bb.2: # %entry
+; AVX1-NEXT:    vcvtsi2ss %rdi, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+; AVX1-NEXT:  .LBB52_1:
+; AVX1-NEXT:    movq %rdi, %rax
+; AVX1-NEXT:    shrq %rax
+; AVX1-NEXT:    andl $1, %edi
+; AVX1-NEXT:    orq %rax, %rdi
+; AVX1-NEXT:    vcvtsi2ss %rdi, %xmm0, %xmm0
+; AVX1-NEXT:    vaddss %xmm0, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX512-LABEL: uiffl:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    vcvtusi2ss %rdi, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+entry:
+  %result = call float @llvm.experimental.constrained.uitofp.f32.i64(i64 %x,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret float %result
+}
+
 attributes #0 = { strictfp }

@llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata"
@ -1981,3 +2737,19 @@ declare i32 @llvm.experimental.constrained.lround.i32.f64(double, metadata)
 declare i32 @llvm.experimental.constrained.lround.i32.f32(float, metadata)
 declare i64 @llvm.experimental.constrained.llround.i64.f64(double, metadata)
 declare i64 @llvm.experimental.constrained.llround.i64.f32(float, metadata)
+declare double @llvm.experimental.constrained.sitofp.f64.i8(i8, metadata, metadata)
+declare double @llvm.experimental.constrained.sitofp.f64.i16(i16, metadata, metadata)
+declare double @llvm.experimental.constrained.sitofp.f64.i32(i32, metadata, metadata)
+declare double @llvm.experimental.constrained.sitofp.f64.i64(i64, metadata, metadata)
+declare float @llvm.experimental.constrained.sitofp.f32.i8(i8, metadata, metadata)
+declare float @llvm.experimental.constrained.sitofp.f32.i16(i16, metadata, metadata)
+declare float @llvm.experimental.constrained.sitofp.f32.i32(i32, metadata, metadata)
+declare float @llvm.experimental.constrained.sitofp.f32.i64(i64, metadata, metadata)
+declare double @llvm.experimental.constrained.uitofp.f64.i8(i8, metadata, metadata)
+declare double @llvm.experimental.constrained.uitofp.f64.i16(i16, metadata, metadata)
+declare double @llvm.experimental.constrained.uitofp.f64.i32(i32, metadata, metadata)
+declare double @llvm.experimental.constrained.uitofp.f64.i64(i64, metadata, metadata)
+declare float @llvm.experimental.constrained.uitofp.f32.i8(i8, metadata, metadata)
+declare float @llvm.experimental.constrained.uitofp.f32.i16(i16, metadata, metadata)
+declare float @llvm.experimental.constrained.uitofp.f32.i32(i32, metadata, metadata)
+declare float @llvm.experimental.constrained.uitofp.f32.i64(i64, metadata, metadata)
--- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
--- a/llvm/test/Feature/fp-intrinsics.ll
+++ b/llvm/test/Feature/fp-intrinsics.ll
@ -373,6 +373,28 @@ entry:
  ret i64 %result
 }

+; Verify that sitofp(42) isn't simplified when the rounding mode is unknown.
+; CHECK-LABEL: @f30
+; CHECK: call double @llvm.experimental.constrained.sitofp
+define double @f30() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 42,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret double %result
+}
+
+; Verify that uitofp(42) isn't simplified when the rounding mode is unknown.
+; CHECK-LABEL: @f31
+; CHECK: call double @llvm.experimental.constrained.uitofp
+define double @f31() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 42,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret double %result
+}
+
 attributes #0 = { strictfp }

@llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata"
@ -405,3 +427,5 @@ declare i32 @llvm.experimental.constrained.lround.i32.f64(double, metadata)
 declare i32 @llvm.experimental.constrained.lround.i32.f32(float, metadata)
 declare i64 @llvm.experimental.constrained.llround.i64.f64(double, metadata)
 declare i64 @llvm.experimental.constrained.llround.i64.f32(float, metadata)
+declare double @llvm.experimental.constrained.sitofp.f64.i32(i32, metadata, metadata)
+declare double @llvm.experimental.constrained.uitofp.f64.i32(i32, metadata, metadata)