[CodeGen] Add lround/llround builtins

This patch add the ISD::LROUND and ISD::LLROUND along with new intrinsics. The changes are straightforward as for other floating-point rounding functions, with just some adjustments required to handle the return value being an interger. The idea is to optimize lround/llround generation for AArch64 in a subsequent patch. Current semantic is just route it to libm symbol. llvm-svn: 360889
2025-02-18 19:28:16 +00:00 · 2019-05-16 13:15:27 +00:00 · 2019-05-16 13:15:27 +00:00 · c00c3084e9
commit c00c3084e9
parent 082b98432d
24 changed files with 883 additions and 0 deletions
--- a/docs/LangRef.rst
+++ b/docs/LangRef.rst
@ -12344,6 +12344,81 @@ Semantics:
 This function returns the same values as the libm ``round``
 functions would, and handles error conditions in the same way.

+'``llvm.lround.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.lround`` on any
+floating-point type. Not all targets support all types however.
+
+::
+
+      declare i32 @llvm.lround.i32.f32(float %Val)
+      declare i32 @llvm.lround.i32.f64(double %Val)
+      declare i32 @llvm.lround.i32.f80(float %Val)
+      declare i32 @llvm.lround.i32.f128(double %Val)
+      declare i32 @llvm.lround.i32.ppcf128(double %Val)
+
+      declare i64 @llvm.lround.i64.f32(float %Val)
+      declare i64 @llvm.lround.i64.f64(double %Val)
+      declare i64 @llvm.lround.i64.f80(float %Val)
+      declare i64 @llvm.lround.i64.f128(double %Val)
+      declare i64 @llvm.lround.i64.ppcf128(double %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.lround.*``' intrinsics returns the operand rounded to the
+nearest integer.
+
+Arguments:
+""""""""""
+
+The argument is a floating-point number and return is i32 for
+``llvm.lround.i32`` and i64 for ``llvm.lround.i64``.
+
+Semantics:
+""""""""""
+
+This function returns the same values as the libm ``lround``
+functions would, but without setting errno.
+
+'``llvm.llround.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.llround`` on any
+floating-point type. Not all targets support all types however.
+
+::
+
+      declare i64 @llvm.lround.f32(float %Val)
+      declare i64 @llvm.lround.f64(double %Val)
+      declare i64 @llvm.lround.f80(float %Val)
+      declare i64 @llvm.lround.f128(double %Val)
+      declare i64 @llvm.lround.ppcf128(double %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.llround.*``' intrinsics returns the operand rounded to the
+nearest integer.
+
+Arguments:
+""""""""""
+
+The argument is a floating-point number and return is i64.
+
+Semantics:
+""""""""""
+
+This function returns the same values as the libm ``llround``
+functions would, but without setting errno.
+
 Bit Manipulation Intrinsics
 ---------------------------

--- a/include/llvm/CodeGen/ISDOpcodes.h
+++ b/include/llvm/CodeGen/ISDOpcodes.h
@ -605,6 +605,8 @@ namespace ISD {
    FNEG, FABS, FSQRT, FCBRT, FSIN, FCOS, FPOWI, FPOW,
    FLOG, FLOG2, FLOG10, FEXP, FEXP2,
    FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR,
+    LROUND, LLROUND,
+
    /// FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two
    /// values.
    //
--- a/include/llvm/IR/Intrinsics.td
+++ b/include/llvm/IR/Intrinsics.td
@ -538,6 +538,10 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
  def int_round : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
  def int_canonicalize : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>],
                                   [IntrNoMem]>;
+
+  def int_lround_i32 : Intrinsic<[llvm_i32_ty], [llvm_anyfloat_ty]>;
+  def int_lround_i64 : Intrinsic<[llvm_i64_ty], [llvm_anyfloat_ty]>;
+  def int_llround    : Intrinsic<[llvm_i64_ty], [llvm_anyfloat_ty]>;
 }

 def int_minnum : Intrinsic<[llvm_anyfloat_ty],
--- a/include/llvm/IR/RuntimeLibcalls.def
+++ b/include/llvm/IR/RuntimeLibcalls.def
@ -254,6 +254,16 @@ HANDLE_LIBCALL(FMAX_F64, "fmax")
 HANDLE_LIBCALL(FMAX_F80, "fmaxl")
 HANDLE_LIBCALL(FMAX_F128, "fmaxl")
 HANDLE_LIBCALL(FMAX_PPCF128, "fmaxl")
+HANDLE_LIBCALL(LROUND_F32, "lroundf")
+HANDLE_LIBCALL(LROUND_F64, "lround")
+HANDLE_LIBCALL(LROUND_F80, "lroundl")
+HANDLE_LIBCALL(LROUND_F128, "lroundl")
+HANDLE_LIBCALL(LROUND_PPCF128, "lroundl")
+HANDLE_LIBCALL(LLROUND_F32, "llroundf")
+HANDLE_LIBCALL(LLROUND_F64, "llround")
+HANDLE_LIBCALL(LLROUND_F80, "llroundl")
+HANDLE_LIBCALL(LLROUND_F128, "llroundl")
+HANDLE_LIBCALL(LLROUND_PPCF128, "llroundl")

 // Conversion
 HANDLE_LIBCALL(FPEXT_F32_PPCF128, "__gcc_stoq")
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@ -149,6 +149,10 @@ private:
                           RTLIB::Libcall Call_I32,
                           RTLIB::Libcall Call_I64,
                           RTLIB::Libcall Call_I128);
+  SDValue ExpandArgFPLibCall(SDNode *Node,
+                             RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64,
+                             RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128,
+                             RTLIB::Libcall Call_PPCF128);
  void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
  void ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);

@ -997,6 +1001,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
  case ISD::SINT_TO_FP:
  case ISD::UINT_TO_FP:
  case ISD::EXTRACT_VECTOR_ELT:
+  case ISD::LROUND:
+  case ISD::LLROUND:
    Action = TLI.getOperationAction(Node->getOpcode(),
                                    Node->getOperand(0).getValueType());
    break;
@ -2153,6 +2159,27 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
  return ExpandLibCall(LC, Node, isSigned);
 }

+/// Expand the node to a libcall based on first argument type (for instance
+/// lround and its variant).
+SDValue SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node,
+                                                 RTLIB::Libcall Call_F32,
+                                                 RTLIB::Libcall Call_F64,
+                                                 RTLIB::Libcall Call_F80,
+                                                 RTLIB::Libcall Call_F128,
+                                                 RTLIB::Libcall Call_PPCF128) {
+  RTLIB::Libcall LC;
+  switch (Node->getOperand(0).getValueType().getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("Unexpected request for libcall!");
+  case MVT::f32:     LC = Call_F32; break;
+  case MVT::f64:     LC = Call_F64; break;
+  case MVT::f80:     LC = Call_F80; break;
+  case MVT::f128:    LC = Call_F128; break;
+  case MVT::ppcf128: LC = Call_PPCF128; break;
+  }
+
+  return ExpandLibCall(LC, Node, false);
+}
+
 /// Issue libcalls to __{u}divmod to compute div / rem pairs.
 void
 SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
@ -2878,6 +2905,18 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
    if (TLI.expandFP_TO_UINT(Node, Tmp1, DAG))
      Results.push_back(Tmp1);
    break;
+  case ISD::LROUND:
+    Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LROUND_F32,
+                                         RTLIB::LROUND_F64, RTLIB::LROUND_F80,
+                                         RTLIB::LROUND_F128,
+                                         RTLIB::LROUND_PPCF128));
+    break;
+  case ISD::LLROUND:
+    Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLROUND_F32,
+                                         RTLIB::LLROUND_F64, RTLIB::LLROUND_F80,
+                                         RTLIB::LLROUND_F128,
+                                         RTLIB::LLROUND_PPCF128));
+    break;
  case ISD::VAARG:
    Results.push_back(DAG.expandVAArg(Node));
    Results.push_back(Results[0].getValue(1));
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@ -772,6 +772,8 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
  case ISD::FP_ROUND:    Res = SoftenFloatOp_FP_ROUND(N); break;
  case ISD::FP_TO_SINT:
  case ISD::FP_TO_UINT:  Res = SoftenFloatOp_FP_TO_XINT(N); break;
+  case ISD::LROUND:      Res = SoftenFloatOp_LROUND(N); break;
+  case ISD::LLROUND:     Res = SoftenFloatOp_LLROUND(N); break;
  case ISD::SELECT:      Res = SoftenFloatOp_SELECT(N); break;
  case ISD::SELECT_CC:   Res = SoftenFloatOp_SELECT_CC(N); break;
  case ISD::SETCC:       Res = SoftenFloatOp_SETCC(N); break;
@ -1038,6 +1040,33 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
                      ST->getMemOperand());
 }

+SDValue DAGTypeLegalizer::SoftenFloatOp_LROUND(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+  return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+                                           RTLIB::LROUND_F32,
+                                           RTLIB::LROUND_F64,
+                                           RTLIB::LROUND_F80,
+                                           RTLIB::LROUND_F128,
+                                           RTLIB::LROUND_PPCF128),
+                         NVT, Op, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+  return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+                                           RTLIB::LLROUND_F32,
+                                           RTLIB::LLROUND_F64,
+                                           RTLIB::LLROUND_F80,
+                                           RTLIB::LLROUND_F128,
+                                           RTLIB::LLROUND_PPCF128),
+                         NVT, Op, false, SDLoc(N)).first;
+}

 //===----------------------------------------------------------------------===//
 //  Float Result Expansion
@ -1571,6 +1600,8 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
  case ISD::FP_ROUND:   Res = ExpandFloatOp_FP_ROUND(N); break;
  case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break;
  case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break;
+  case ISD::LROUND:     Res = ExpandFloatOp_LROUND(N); break;
+  case ISD::LLROUND:    Res = ExpandFloatOp_LLROUND(N); break;
  case ISD::SELECT_CC:  Res = ExpandFloatOp_SELECT_CC(N); break;
  case ISD::SETCC:      Res = ExpandFloatOp_SETCC(N); break;
  case ISD::STORE:      Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N),
@ -1741,6 +1772,30 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
                           ST->getMemoryVT(), ST->getMemOperand());
 }

+SDValue DAGTypeLegalizer::ExpandFloatOp_LROUND(SDNode *N) {
+  EVT RVT = N->getValueType(0);
+  EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+  return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+                                           RTLIB::LROUND_F32,
+                                           RTLIB::LROUND_F64,
+                                           RTLIB::LROUND_F80,
+                                           RTLIB::LROUND_F128,
+                                           RTLIB::LROUND_PPCF128),
+                         RVT, N->getOperand(0), false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_LLROUND(SDNode *N) {
+  EVT RVT = N->getValueType(0);
+  EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+  return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+                                           RTLIB::LLROUND_F32,
+                                           RTLIB::LLROUND_F64,
+                                           RTLIB::LLROUND_F80,
+                                           RTLIB::LLROUND_F128,
+                                           RTLIB::LLROUND_PPCF128),
+                         RVT, N->getOperand(0), false, SDLoc(N)).first;
+}
+
 //===----------------------------------------------------------------------===//
 //  Float Operand Promotion
 //===----------------------------------------------------------------------===//
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@ -1600,6 +1600,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
  case ISD::FLT_ROUNDS_: ExpandIntRes_FLT_ROUNDS(N, Lo, Hi); break;
  case ISD::FP_TO_SINT:  ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break;
  case ISD::FP_TO_UINT:  ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
+  case ISD::LLROUND:     ExpandIntRes_LLROUND(N, Lo, Hi); break;
  case ISD::LOAD:        ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break;
  case ISD::MUL:         ExpandIntRes_MUL(N, Lo, Hi); break;
  case ISD::READCYCLECOUNTER: ExpandIntRes_READCYCLECOUNTER(N, Lo, Hi); break;
@ -2465,6 +2466,32 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
               Lo, Hi);
 }

+void DAGTypeLegalizer::ExpandIntRes_LLROUND(SDNode *N, SDValue &Lo,
+                                            SDValue &Hi) {
+  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+  EVT VT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+  if (VT == MVT::f32)
+    LC = RTLIB::LLROUND_F32;
+  else if (VT == MVT::f64)
+    LC = RTLIB::LLROUND_F64;
+  else if (VT == MVT::f80)
+    LC = RTLIB::LLROUND_F80;
+  else if (VT == MVT::f128)
+    LC = RTLIB::LLROUND_F128;
+  else if (VT == MVT::ppcf128)
+    LC = RTLIB::LLROUND_PPCF128;
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llround input type!");
+
+  SDValue Op = N->getOperand(0);
+  if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
+    Op = GetPromotedFloat(Op);
+
+  SDLoc dl(N);
+  EVT RetVT = N->getValueType(0);
+  SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, true/*irrelevant*/, dl).first,
+               Lo, Hi);
+}
+
 void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
                                         SDValue &Lo, SDValue &Hi) {
  if (ISD::isNormalLoad(N)) {
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@ -418,6 +418,7 @@ private:
  void ExpandIntRes_FLT_ROUNDS        (SDNode *N, SDValue &Lo, SDValue &Hi);
  void ExpandIntRes_FP_TO_SINT        (SDNode *N, SDValue &Lo, SDValue &Hi);
  void ExpandIntRes_FP_TO_UINT        (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_LLROUND           (SDNode *N, SDValue &Lo, SDValue &Hi);

  void ExpandIntRes_Logical           (SDNode *N, SDValue &Lo, SDValue &Hi);
  void ExpandIntRes_ADDSUB            (SDNode *N, SDValue &Lo, SDValue &Hi);
@ -553,6 +554,8 @@ private:
  SDValue SoftenFloatOp_FP_EXTEND(SDNode *N);
  SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
  SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N);
+  SDValue SoftenFloatOp_LROUND(SDNode *N);
+  SDValue SoftenFloatOp_LLROUND(SDNode *N);
  SDValue SoftenFloatOp_SELECT(SDNode *N);
  SDValue SoftenFloatOp_SELECT_CC(SDNode *N);
  SDValue SoftenFloatOp_SETCC(SDNode *N);
@ -612,6 +615,8 @@ private:
  SDValue ExpandFloatOp_FP_ROUND(SDNode *N);
  SDValue ExpandFloatOp_FP_TO_SINT(SDNode *N);
  SDValue ExpandFloatOp_FP_TO_UINT(SDNode *N);
+  SDValue ExpandFloatOp_LROUND(SDNode *N);
+  SDValue ExpandFloatOp_LLROUND(SDNode *N);
  SDValue ExpandFloatOp_SELECT_CC(SDNode *N);
  SDValue ExpandFloatOp_SETCC(SDNode *N);
  SDValue ExpandFloatOp_STORE(SDNode *N, unsigned OpNo);
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@ -6025,6 +6025,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                             getValue(I.getArgOperand(0))));
    return nullptr;
  }
+  case Intrinsic::lround_i32:
+  case Intrinsic::lround_i64:
+  case Intrinsic::llround: {
+    unsigned Opcode;
+    MVT RetVT;
+    switch (Intrinsic) {
+    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
+    case Intrinsic::lround_i32: Opcode = ISD::LROUND;  RetVT = MVT::i32; break;
+    case Intrinsic::lround_i64: Opcode = ISD::LROUND;  RetVT = MVT::i64; break;
+    case Intrinsic::llround:    Opcode = ISD::LLROUND; RetVT = MVT::i64; break;
+    }
+
+    setValue(&I, DAG.getNode(Opcode, sdl, RetVT,
+                             getValue(I.getArgOperand(0))));
+    return nullptr;
+  }
  case Intrinsic::minnum: {
    auto VT = getValue(I.getArgOperand(0)).getValueType();
    unsigned Opc =
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@ -328,6 +328,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
  case ISD::ADDRSPACECAST:              return "addrspacecast";
  case ISD::FP16_TO_FP:                 return "fp16_to_fp";
  case ISD::FP_TO_FP16:                 return "fp_to_fp16";
+  case ISD::LROUND:                     return "lround";
+  case ISD::LLROUND:                    return "llround";

    // Control flow instructions
  case ISD::BR:                         return "br";
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@ -710,6 +710,8 @@ void TargetLoweringBase::initActions() {
    setOperationAction(ISD::FRINT,      VT, Expand);
    setOperationAction(ISD::FTRUNC,     VT, Expand);
    setOperationAction(ISD::FROUND,     VT, Expand);
+    setOperationAction(ISD::LROUND,     VT, Expand);
+    setOperationAction(ISD::LLROUND,    VT, Expand);
  }

  // Default ISD::TRAP to expand (which turns it into abort).
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -672,6 +672,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
    setOperationAction(ISD::FRINT,  MVT::f80, Expand);
    setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
    setOperationAction(ISD::FMA, MVT::f80, Expand);
+    setOperationAction(ISD::LROUND, MVT::f80, Expand);
+    setOperationAction(ISD::LLROUND, MVT::f80, Expand);
  }

  // Always use a library call for pow.
--- a/test/CodeGen/AArch64/llround-conv.ll
+++ b/test/CodeGen/AArch64/llround-conv.ll
@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=aarch64 -mattr=+neon | FileCheck %s
+
+; CHECK-LABEL: testmsws:
+; CHECK:       bl      llroundf
+define i32 @testmsws(float %x) {
+entry:
+  %0 = tail call i64 @llvm.llround.f32(float %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testmsxs:
+; CHECK:       b       llroundf
+define i64 @testmsxs(float %x) {
+entry:
+  %0 = tail call i64 @llvm.llround.f32(float %x)
+  ret i64 %0
+}
+
+; CHECK-LABEL: testmswd:
+; CHECK:       bl      llround
+define i32 @testmswd(double %x) {
+entry:
+  %0 = tail call i64 @llvm.llround.f64(double %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testmsxd:
+; CHECK:       b       llround
+define i64 @testmsxd(double %x) {
+entry:
+  %0 = tail call i64 @llvm.llround.f64(double %x)
+  ret i64 %0
+}
+
+; CHECK-LABEL: testmswl:
+; CHECK:       bl      llroundl
+define i32 @testmswl(fp128 %x) {
+entry:
+  %0 = tail call i64 @llvm.llround.f128(fp128 %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testmsll:
+; CHECK:       b       llroundl
+define i64 @testmsll(fp128 %x) {
+entry:
+  %0 = tail call i64 @llvm.llround.f128(fp128 %x)
+  ret i64 %0
+}
+
+declare i64 @llvm.llround.f32(float) nounwind readnone
+declare i64 @llvm.llround.f64(double) nounwind readnone
+declare i64 @llvm.llround.f128(fp128) nounwind readnone
--- a/test/CodeGen/AArch64/lround-conv.ll
+++ b/test/CodeGen/AArch64/lround-conv.ll
@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=aarch64 -mattr=+neon | FileCheck %s
+
+; CHECK-LABEL: testmsws:
+; CHECK:       bl      lroundf
+define i32 @testmsws(float %x) {
+entry:
+  %0 = tail call i64 @llvm.lround.i64.f32(float %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testmsxs:
+; CHECK:       b       lroundf
+define i64 @testmsxs(float %x) {
+entry:
+  %0 = tail call i64 @llvm.lround.i64.f32(float %x)
+  ret i64 %0
+}
+
+; CHECK-LABEL: testmswd:
+; CHECK:       bl      lround
+define i32 @testmswd(double %x) {
+entry:
+  %0 = tail call i64 @llvm.lround.i64.f64(double %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testmsxd:
+; CHECK:       b       lround
+define i64 @testmsxd(double %x) {
+entry:
+  %0 = tail call i64 @llvm.lround.i64.f64(double %x)
+  ret i64 %0
+}
+
+; CHECK-LABEL: testmswl:
+; CHECK:       bl      lroundl
+define i32 @testmswl(fp128 %x) {
+entry:
+  %0 = tail call i64 @llvm.lround.i64.f128(fp128 %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testmsll:
+; CHECK:       b       lroundl
+define i64 @testmsll(fp128 %x) {
+entry:
+  %0 = tail call i64 @llvm.lround.i64.f128(fp128 %x)
+  ret i64 %0
+}
+
+declare i64 @llvm.lround.i64.f32(float) nounwind readnone
+declare i64 @llvm.lround.i64.f64(double) nounwind readnone
+declare i64 @llvm.lround.i64.f128(fp128) nounwind readnone
--- a/test/CodeGen/ARM/llround-conv.ll
+++ b/test/CodeGen/ARM/llround-conv.ll
@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=arm-eabi -float-abi=soft | FileCheck %s --check-prefix=SOFTFP
+; RUN: llc < %s -mtriple=arm-eabi -float-abi=hard | FileCheck %s --check-prefix=HARDFP
+
+; SOFTFP-LABEL: testmsxs_builtin:
+; SOFTFP:       bl      llroundf
+; HARDFP-LABEL: testmsxs_builtin:
+; HARDFP:       bl      llroundf
+define i64 @testmsxs_builtin(float %x) {
+entry:
+  %0 = tail call i64 @llvm.llround.f32(float %x)
+  ret i64 %0
+}
+
+; SOFTFP-LABEL: testmsxd_builtin:
+; SOFTFP:       bl      llround
+; HARDFP-LABEL: testmsxd_builtin:
+; HARDFP:       bl      llround
+define i64 @testmsxd_builtin(double %x) {
+entry:
+  %0 = tail call i64 @llvm.llround.f64(double %x)
+  ret i64 %0
+}
+
+declare i64 @llvm.llround.f32(float) nounwind readnone
+declare i64 @llvm.llround.f64(double) nounwind readnone
--- a/test/CodeGen/ARM/lround-conv.ll
+++ b/test/CodeGen/ARM/lround-conv.ll
@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=arm-eabi -float-abi=soft | FileCheck %s --check-prefix=SOFTFP
+; RUN: llc < %s -mtriple=arm-eabi -float-abi=hard | FileCheck %s --check-prefix=HARDFP
+
+; SOFTFP-LABEL: testmsws_builtin:
+; SOFTFP:       bl      lroundf
+; HARDFP-LABEL: testmsws_builtin:
+; HARDFP:       bl      lroundf
+define i32 @testmsws_builtin(float %x) {
+entry:
+  %0 = tail call i32 @llvm.lround.i32.f32(float %x)
+  ret i32 %0
+}
+
+; SOFTFP-LABEL: testmswd_builtin:
+; SOFTFP:       bl      lround
+; HARDFP-LABEL: testmswd_builtin:
+; HARDFP:       bl      lround
+define i32 @testmswd_builtin(double %x) {
+entry:
+  %0 = tail call i32 @llvm.lround.i32.f64(double %x)
+  ret i32 %0
+}
+
+declare i32 @llvm.lround.i32.f32(float) nounwind readnone
+declare i32 @llvm.lround.i32.f64(double) nounwind readnone
--- a/test/CodeGen/Mips/llround-conv.ll
+++ b/test/CodeGen/Mips/llround-conv.ll
@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=mips64el -mattr=+soft-float | FileCheck %s
+
+define signext i32 @testmsws(float %x) {
+; CHECK-LABEL: testmsws:
+; CHECK:       jal     llroundf
+entry:
+  %0 = tail call i64 @llvm.llround.f32(float %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+define i64 @testmsxs(float %x) {
+; CHECK-LABEL: testmsxs:
+; CHECK:       jal     llroundf
+entry:
+  %0 = tail call i64 @llvm.llround.f32(float %x)
+  ret i64 %0
+}
+
+define signext i32 @testmswd(double %x) {
+; CHECK-LABEL: testmswd:
+; CHECK:       jal     llround
+entry:
+  %0 = tail call i64 @llvm.llround.f64(double %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+define i64 @testmsxd(double %x) {
+; CHECK-LABEL: testmsxd:
+; CHECK:       jal     llround
+entry:
+  %0 = tail call i64 @llvm.llround.f64(double %x)
+  ret i64 %0
+}
+
+define signext i32 @testmswl(fp128 %x) {
+; CHECK-LABEL: testmswl:
+; CHECK:       jal     llroundl
+entry:
+  %0 = tail call i64 @llvm.llround.f128(fp128 %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+define i64 @testmsll(fp128 %x) {
+; CHECK-LABEL: testmsll:
+; CHECK:       jal     llroundl
+entry:
+  %0 = tail call i64 @llvm.llround.f128(fp128 %x)
+  ret i64 %0
+}
+
+declare i64 @llvm.llround.f32(float) nounwind readnone
+declare i64 @llvm.llround.f64(double) nounwind readnone
+declare i64 @llvm.llround.f128(fp128) nounwind readnone
--- a/test/CodeGen/Mips/lround-conv.ll
+++ b/test/CodeGen/Mips/lround-conv.ll
@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=mips64el -mattr=+soft-float | FileCheck %s
+
+define signext i32 @testmsws(float %x) {
+; CHECK-LABEL: testmsws:
+; CHECK:       jal     lroundf
+entry:
+  %0 = tail call i64 @llvm.lround.i64.f32(float %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+define i64 @testmsxs(float %x) {
+; CHECK-LABEL: testmsxs:
+; CHECK:       jal     lroundf
+entry:
+  %0 = tail call i64 @llvm.lround.i64.f32(float %x)
+  ret i64 %0
+}
+
+define signext i32 @testmswd(double %x) {
+; CHECK-LABEL: testmswd:
+; CHECK:       jal     lround
+entry:
+  %0 = tail call i64 @llvm.lround.i64.f64(double %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+define i64 @testmsxd(double %x) {
+; CHECK-LABEL: testmsxd:
+; CHECK:       jal     lround
+entry:
+  %0 = tail call i64 @llvm.lround.i64.f64(double %x)
+  ret i64 %0
+}
+
+define signext i32 @testmswl(fp128 %x) {
+; CHECK-LABEL: testmswl:
+; CHECK:       jal     lroundl
+entry:
+  %0 = tail call i64 @llvm.lround.i64.f128(fp128 %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+define signext i64 @testmsll(fp128 %x) {
+; CHECK-LABEL: testmsll:
+; CHECK:       jal     lroundl
+entry:
+  %0 = tail call i64 @llvm.lround.i64.f128(fp128 %x)
+  ret i64 %0
+}
+
+declare i64 @llvm.lround.i64.f32(float) nounwind readnone
+declare i64 @llvm.lround.i64.f64(double) nounwind readnone
+declare i64 @llvm.lround.i64.f128(fp128) nounwind readnone
--- a/test/CodeGen/PowerPC/llround-conv.ll
+++ b/test/CodeGen/PowerPC/llround-conv.ll
@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=powerpc64le | FileCheck %s
+
+; CHECK-LABEL: testmsws:
+; CHECK:       bl      llroundf
+define signext i32 @testmsws(float %x) {
+entry:
+  %0 = tail call i64 @llvm.llround.f32(float %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testmsxs:
+; CHECK:       bl      llroundf
+define i64 @testmsxs(float %x) {
+entry:
+  %0 = tail call i64 @llvm.llround.f32(float %x)
+  ret i64 %0
+}
+
+; CHECK-LABEL: testmswd:
+; CHECK:       bl      llround
+define signext i32 @testmswd(double %x) {
+entry:
+  %0 = tail call i64 @llvm.llround.f64(double %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testmsxd:
+; CHECK:       bl      llround
+define i64 @testmsxd(double %x) {
+entry:
+  %0 = tail call i64 @llvm.llround.f64(double %x)
+  ret i64 %0
+}
+
+; CHECK-LABEL: testmswl:
+; CHECK:       bl      llroundl
+define signext i32 @testmswl(ppc_fp128 %x) {
+entry:
+  %0 = tail call i64 @llvm.llround.ppcf128(ppc_fp128 %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testmsll:
+; CHECK:       bl      llroundl
+define i64 @testmsll(ppc_fp128 %x) {
+entry:
+  %0 = tail call i64 @llvm.llround.ppcf128(ppc_fp128 %x)
+  ret i64 %0
+}
+
+declare i64 @llvm.llround.f32(float) nounwind readnone
+declare i64 @llvm.llround.f64(double) nounwind readnone
+declare i64 @llvm.llround.ppcf128(ppc_fp128) nounwind readnone
--- a/test/CodeGen/PowerPC/lround-conv.ll
+++ b/test/CodeGen/PowerPC/lround-conv.ll
@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=powerpc64le | FileCheck %s
+
+; CHECK-LABEL: testmsws:
+; CHECK:       bl      lroundf
+define signext i32 @testmsws(float %x) {
+entry:
+  %0 = tail call i64 @llvm.lround.i64.f32(float %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testmsxs:
+; CHECK:       bl      lroundf
+define i64 @testmsxs(float %x) {
+entry:
+  %0 = tail call i64 @llvm.lround.i64.f32(float %x)
+  ret i64 %0
+}
+
+; CHECK-LABEL: testmswd:
+; CHECK:       bl      lround
+define signext i32 @testmswd(double %x) {
+entry:
+  %0 = tail call i64 @llvm.lround.i64.f64(double %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testmsxd:
+; CHECK:       bl      lround
+define i64 @testmsxd(double %x) {
+entry:
+  %0 = tail call i64 @llvm.lround.i64.f64(double %x)
+  ret i64 %0
+}
+
+; CHECK-LABEL: testmswl:
+; CHECK:       bl      lroundl
+define signext i32 @testmswl(ppc_fp128 %x) {
+entry:
+  %0 = tail call i64 @llvm.lround.i64.ppcf128(ppc_fp128 %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testmsll:
+; CHECK:       bl      lroundl
+define i64 @testmsll(ppc_fp128 %x) {
+entry:
+  %0 = tail call i64 @llvm.lround.i64.ppcf128(ppc_fp128 %x)
+  ret i64 %0
+}
+
+declare i64 @llvm.lround.i64.f32(float) nounwind readnone
+declare i64 @llvm.lround.i64.f64(double) nounwind readnone
+declare i64 @llvm.lround.i64.ppcf128(ppc_fp128) nounwind readnone
--- a/test/CodeGen/X86/llround-conv-i32.ll
+++ b/test/CodeGen/X86/llround-conv-i32.ll
@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown             | FileCheck %s
+; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefix=SSE2
+
+define i64 @testmsxs_builtin(float %x) {
+; CHECK-LABEL: testmsxs_builtin:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushl %eax
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
+; CHECK-NEXT:    fstps (%esp)
+; CHECK-NEXT:    calll llroundf
+; CHECK-NEXT:    popl %ecx
+; CHECK-NEXT:    .cfi_def_cfa_offset 4
+; CHECK-NEXT:    retl
+;
+; SSE2-LABEL: testmsxs_builtin:
+; SSE2:       # %bb.0: # %entry
+; SSE2-NEXT:    pushl %eax
+; SSE2-NEXT:    .cfi_def_cfa_offset 8
+; SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT:    movss %xmm0, (%esp)
+; SSE2-NEXT:    calll llroundf
+; SSE2-NEXT:    popl %ecx
+; SSE2-NEXT:    .cfi_def_cfa_offset 4
+; SSE2-NEXT:    retl
+entry:
+  %0 = tail call i64 @llvm.llround.f32(float %x)
+  ret i64 %0
+}
+
+define i64 @testmsxd_builtin(double %x) {
+; CHECK-LABEL: testmsxd_builtin:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    subl $8, %esp
+; CHECK-NEXT:    .cfi_def_cfa_offset 12
+; CHECK-NEXT:    fldl {{[0-9]+}}(%esp)
+; CHECK-NEXT:    fstpl (%esp)
+; CHECK-NEXT:    calll llround
+; CHECK-NEXT:    addl $8, %esp
+; CHECK-NEXT:    .cfi_def_cfa_offset 4
+; CHECK-NEXT:    retl
+;
+; SSE2-LABEL: testmsxd_builtin:
+; SSE2:       # %bb.0: # %entry
+; SSE2-NEXT:    subl $8, %esp
+; SSE2-NEXT:    .cfi_def_cfa_offset 12
+; SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT:    movsd %xmm0, (%esp)
+; SSE2-NEXT:    calll llround
+; SSE2-NEXT:    addl $8, %esp
+; SSE2-NEXT:    .cfi_def_cfa_offset 4
+; SSE2-NEXT:    retl
+entry:
+  %0 = tail call i64 @llvm.llround.f64(double %x)
+  ret i64 %0
+}
+
+declare i64 @llvm.llround.f32(float) nounwind readnone
+declare i64 @llvm.llround.f64(double) nounwind readnone
--- a/test/CodeGen/X86/llround-conv.ll
+++ b/test/CodeGen/X86/llround-conv.ll
@ -0,0 +1,83 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s
+
+define i32 @testmsws(float %x) {
+; CHECK-LABEL: testmsws:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    callq llroundf
+; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT:    popq %rcx
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+entry:
+  %0 = tail call i64 @llvm.llround.f32(float %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+define i64 @testmsxs(float %x) {
+; CHECK-LABEL: testmsxs:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    jmp llroundf # TAILCALL
+entry:
+  %0 = tail call i64 @llvm.llround.f32(float %x)
+  ret i64 %0
+}
+
+define i32 @testmswd(double %x) {
+; CHECK-LABEL: testmswd:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    callq llround
+; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT:    popq %rcx
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+entry:
+  %0 = tail call i64 @llvm.llround.f64(double %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+define i64 @testmsxd(double %x) {
+; CHECK-LABEL: testmsxd:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    jmp llround # TAILCALL
+entry:
+  %0 = tail call i64 @llvm.llround.f64(double %x)
+  ret i64 %0
+}
+
+define i32 @testmswl(x86_fp80 %x) {
+; CHECK-LABEL: testmswl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    subq $24, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fstpt (%rsp)
+; CHECK-NEXT:    callq llroundl
+; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT:    addq $24, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+entry:
+  %0 = tail call i64 @llvm.llround.f80(x86_fp80 %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+define i64 @testmsll(x86_fp80 %x) {
+; CHECK-LABEL: testmsll:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    jmp llroundl # TAILCALL
+entry:
+  %0 = tail call i64 @llvm.llround.f80(x86_fp80 %x)
+  ret i64 %0
+}
+
+declare i64 @llvm.llround.f32(float) nounwind readnone
+declare i64 @llvm.llround.f64(double) nounwind readnone
+declare i64 @llvm.llround.f80(x86_fp80) nounwind readnone
--- a/test/CodeGen/X86/lround-conv-i32.ll
+++ b/test/CodeGen/X86/lround-conv-i32.ll
@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown             | FileCheck %s
+; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefix=SSE2
+
+define i32 @testmsws_builtin(float %x) {
+; CHECK-LABEL: testmsws_builtin:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    jmp lroundf # TAILCALL
+;
+; SSE2-LABEL: testmsws_builtin:
+; SSE2:       # %bb.0: # %entry
+; SSE2-NEXT:    jmp lroundf # TAILCALL
+entry:
+  %0 = tail call i32 @llvm.lround.i32.f32(float %x)
+  ret i32 %0
+}
+
+define i32 @testmswd_builtin(double %x) {
+; CHECK-LABEL: testmswd_builtin:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    jmp lround # TAILCALL
+;
+; SSE2-LABEL: testmswd_builtin:
+; SSE2:       # %bb.0: # %entry
+; SSE2-NEXT:    jmp lround # TAILCALL
+entry:
+  %0 = tail call i32 @llvm.lround.i32.f64(double %x)
+  ret i32 %0
+}
+
+declare i32 @llvm.lround.i32.f32(float) nounwind readnone
+declare i32 @llvm.lround.i32.f64(double) nounwind readnone
--- a/test/CodeGen/X86/lround-conv.ll
+++ b/test/CodeGen/X86/lround-conv.ll
@ -0,0 +1,83 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s
+
+define i32 @testmsws(float %x) {
+; CHECK-LABEL: testmsws:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    callq lroundf
+; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT:    popq %rcx
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+entry:
+  %0 = tail call i64 @llvm.lround.i64.f32(float %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+define i64 @testmsxs(float %x) {
+; CHECK-LABEL: testmsxs:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    jmp lroundf # TAILCALL
+entry:
+  %0 = tail call i64 @llvm.lround.i64.f32(float %x)
+  ret i64 %0
+}
+
+define i32 @testmswd(double %x) {
+; CHECK-LABEL: testmswd:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    callq lround
+; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT:    popq %rcx
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+entry:
+  %0 = tail call i64 @llvm.lround.i64.f64(double %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+define i64 @testmsxd(double %x) {
+; CHECK-LABEL: testmsxd:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    jmp lround # TAILCALL
+entry:
+  %0 = tail call i64 @llvm.lround.i64.f64(double %x)
+  ret i64 %0
+}
+
+define i32 @testmswl(x86_fp80 %x) {
+; CHECK-LABEL: testmswl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    subq $24, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fstpt (%rsp)
+; CHECK-NEXT:    callq lroundl
+; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT:    addq $24, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+entry:
+  %0 = tail call i64 @llvm.lround.i64.f80(x86_fp80 %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+define i64 @testmsll(x86_fp80 %x) {
+; CHECK-LABEL: testmsll:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    jmp lroundl # TAILCALL
+entry:
+  %0 = tail call i64 @llvm.lround.i64.f80(x86_fp80 %x)
+  ret i64 %0
+}
+
+declare i64 @llvm.lround.i64.f32(float) nounwind readnone
+declare i64 @llvm.lround.i64.f64(double) nounwind readnone
+declare i64 @llvm.lround.i64.f80(x86_fp80) nounwind readnone