Allow target to handle STRICT floating-point nodes

The ISD::STRICT_ nodes used to implement the constrained floating-point intrinsics are currently never passed to the target back-end, which makes it impossible to handle them correctly (e.g. mark instructions are depending on a floating-point status and control register, or mark instructions as possibly trapping). This patch allows the target to use setOperationAction to switch the action on ISD::STRICT_ nodes to Legal. If this is done, the SelectionDAG common code will stop converting the STRICT nodes to regular floating-point nodes, but instead pass the STRICT nodes to the target using normal SelectionDAG matching rules. To avoid having the back-end duplicate all the floating-point instruction patterns to handle both strict and non-strict variants, we make the MI codegen explicitly aware of the floating-point exceptions by introducing two new concepts: - A new MCID flag "mayRaiseFPException" that the target should set on any instruction that possibly can raise FP exception according to the architecture definition. - A new MI flag FPExcept that CodeGen/SelectionDAG will set on any MI instruction resulting from expansion of any constrained FP intrinsic. Any MI instruction that is *both* marked as mayRaiseFPException *and* FPExcept then needs to be considered as raising exceptions by MI-level codegen (e.g. scheduling). Setting those two new flags is straightforward. The mayRaiseFPException flag is simply set via TableGen by marking all relevant instruction patterns in the .td files. The FPExcept flag is set in SDNodeFlags when creating the STRICT_ nodes in the SelectionDAG, and gets inherited in the MachineSDNode nodes created from it during instruction selection. The flag is then transfered to an MIFlag when creating the MI from the MachineSDNode. This is handled just like fast-math flags like no-nans are handled today. This patch includes both common code changes required to implement the new features, and the SystemZ implementation. Reviewed By: andrew.w.kaylor Differential Revision: https://reviews.llvm.org/D55506 llvm-svn: 362663
2024-12-02 08:26:29 +00:00 · 2019-06-05 22:33:10 +00:00 · 2019-06-05 22:33:10 +00:00 · fba10ebb96
commit fba10ebb96
parent 76f0779d4e
82 changed files with 5793 additions and 377 deletions
--- a/include/llvm/CodeGen/MachineInstr.h
+++ b/include/llvm/CodeGen/MachineInstr.h
@ -102,8 +102,10 @@ public:
                                        // no unsigned wrap.
    NoSWrap      = 1 << 12,             // Instruction supports binary operator
                                        // no signed wrap.
-    IsExact      = 1 << 13              // Instruction supports division is
+    IsExact      = 1 << 13,             // Instruction supports division is
                                        // known to be exact.
+    FPExcept     = 1 << 14,             // Instruction may raise floating-point
+                                        // exceptions.
  };

 private:
@ -830,6 +832,17 @@ public:
    return mayLoad(Type) || mayStore(Type);
  }

+  /// Return true if this instruction could possibly raise a floating-point
+  /// exception.  This is the case if the instruction is a floating-point
+  /// instruction that can in principle raise an exception, as indicated
+  /// by the MCID::MayRaiseFPException property, *and* at the same time,
+  /// the instruction is used in a context where we expect floating-point
+  /// exceptions might be enabled, as indicated by the FPExcept MI flag.
+  bool mayRaiseFPException() const {
+    return hasProperty(MCID::MayRaiseFPException) &&
+           getFlag(MachineInstr::MIFlag::FPExcept);
+  }
+
  //===--------------------------------------------------------------------===//
  // Flags that indicate whether an instruction can be modified by a method.
  //===--------------------------------------------------------------------===//
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@ -368,6 +368,13 @@ private:
  bool ApproximateFuncs : 1;
  bool AllowReassociation : 1;

+  // We assume instructions do not raise floating-point exceptions by default,
+  // and only those marked explicitly may do so.  We could choose to represent
+  // this via a positive "FPExcept" flags like on the MI level, but having a
+  // negative "NoFPExcept" flag here (that defaults to true) makes the flag
+  // intersection logic more straightforward.
+  bool NoFPExcept : 1;
+
 public:
  /// Default constructor turns off all optimization flags.
  SDNodeFlags()
@ -375,7 +382,7 @@ public:
        Exact(false), NoNaNs(false), NoInfs(false),
        NoSignedZeros(false), AllowReciprocal(false), VectorReduction(false),
        AllowContract(false), ApproximateFuncs(false),
-        AllowReassociation(false) {}
+        AllowReassociation(false), NoFPExcept(true) {}

  /// Propagate the fast-math-flags from an IR FPMathOperator.
  void copyFMF(const FPMathOperator &FPMO) {
@ -438,6 +445,10 @@ public:
    setDefined();
    AllowReassociation = b;
  }
+  void setFPExcept(bool b) {
+    setDefined();
+    NoFPExcept = !b;
+  }

  // These are accessors for each flag.
  bool hasNoUnsignedWrap() const { return NoUnsignedWrap; }
@ -451,9 +462,10 @@ public:
  bool hasAllowContract() const { return AllowContract; }
  bool hasApproximateFuncs() const { return ApproximateFuncs; }
  bool hasAllowReassociation() const { return AllowReassociation; }
+  bool hasFPExcept() const { return !NoFPExcept; }

  bool isFast() const {
-    return NoSignedZeros && AllowReciprocal && NoNaNs && NoInfs &&
+    return NoSignedZeros && AllowReciprocal && NoNaNs && NoInfs && NoFPExcept &&
           AllowContract && ApproximateFuncs && AllowReassociation;
  }

@ -473,6 +485,7 @@ public:
    AllowContract &= Flags.AllowContract;
    ApproximateFuncs &= Flags.ApproximateFuncs;
    AllowReassociation &= Flags.AllowReassociation;
+    NoFPExcept &= Flags.NoFPExcept;
  }
 };

--- a/include/llvm/MC/MCInstrDesc.h
+++ b/include/llvm/MC/MCInstrDesc.h
@ -134,6 +134,7 @@ enum Flag {
  FoldableAsLoad,
  MayLoad,
  MayStore,
+  MayRaiseFPException,
  Predicable,
  NotDuplicable,
  UnmodeledSideEffects,
@ -403,6 +404,11 @@ public:
  /// may not actually modify anything, for example.
  bool mayStore() const { return Flags & (1ULL << MCID::MayStore); }

+  /// Return true if this instruction may raise a floating-point exception.
+  bool mayRaiseFPException() const {
+    return Flags & (1ULL << MCID::MayRaiseFPException);
+  }
+
  /// Return true if this instruction has side
  /// effects that are not modeled by other flags.  This does not return true
  /// for instructions whose effects are captured by:
--- a/include/llvm/Target/Target.td
+++ b/include/llvm/Target/Target.td
@ -456,6 +456,7 @@ class Instruction {
  bit canFoldAsLoad = 0;    // Can this be folded as a simple memory operand?
  bit mayLoad      = ?;     // Is it possible for this inst to read memory?
  bit mayStore     = ?;     // Is it possible for this inst to write memory?
+  bit mayRaiseFPException = 0; // Can this raise a floating-point exception?
  bit isConvertibleToThreeAddress = 0;  // Can this 2-addr instruction promote?
  bit isCommutable = 0;     // Is this 3 operand instruction commutable?
  bit isTerminator = 0;     // Is this part of the terminator for a basic block?
--- a/include/llvm/Target/TargetSelectionDAG.td
+++ b/include/llvm/Target/TargetSelectionDAG.td
@ -467,6 +467,53 @@ def fp_to_uint : SDNode<"ISD::FP_TO_UINT" , SDTFPToIntOp>;
 def f16_to_fp  : SDNode<"ISD::FP16_TO_FP" , SDTIntToFPOp>;
 def fp_to_f16  : SDNode<"ISD::FP_TO_FP16" , SDTFPToIntOp>;

+def strict_fadd       : SDNode<"ISD::STRICT_FADD",
+                               SDTFPBinOp, [SDNPHasChain, SDNPCommutative]>;
+def strict_fsub       : SDNode<"ISD::STRICT_FSUB",
+                               SDTFPBinOp, [SDNPHasChain]>;
+def strict_fmul       : SDNode<"ISD::STRICT_FMUL",
+                               SDTFPBinOp, [SDNPHasChain, SDNPCommutative]>;
+def strict_fdiv       : SDNode<"ISD::STRICT_FDIV",
+                               SDTFPBinOp, [SDNPHasChain]>;
+def strict_frem       : SDNode<"ISD::STRICT_FREM",
+                               SDTFPBinOp, [SDNPHasChain]>;
+def strict_fma        : SDNode<"ISD::STRICT_FMA",
+                               SDTFPTernaryOp, [SDNPHasChain]>;
+def strict_fsqrt      : SDNode<"ISD::STRICT_FSQRT",
+                               SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_fsin       : SDNode<"ISD::STRICT_FSIN",
+                               SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_fcos       : SDNode<"ISD::STRICT_FCOS",
+                               SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_fexp2      : SDNode<"ISD::STRICT_FEXP2",
+                               SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_fpow       : SDNode<"ISD::STRICT_FPOW",
+                               SDTFPBinOp, [SDNPHasChain]>;
+def strict_flog2      : SDNode<"ISD::STRICT_FLOG2",
+                               SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_frint      : SDNode<"ISD::STRICT_FRINT",
+                               SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_fnearbyint : SDNode<"ISD::STRICT_FNEARBYINT",
+                               SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_fceil      : SDNode<"ISD::STRICT_FCEIL",
+                               SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_ffloor     : SDNode<"ISD::STRICT_FFLOOR",
+                               SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_fround     : SDNode<"ISD::STRICT_FROUND",
+                               SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_ftrunc     : SDNode<"ISD::STRICT_FTRUNC",
+                               SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_fminnum    : SDNode<"ISD::STRICT_FMINNUM",
+                               SDTFPBinOp, [SDNPHasChain,
+                                            SDNPCommutative, SDNPAssociative]>;
+def strict_fmaxnum    : SDNode<"ISD::STRICT_FMAXNUM",
+                               SDTFPBinOp, [SDNPHasChain,
+                                            SDNPCommutative, SDNPAssociative]>;
+def strict_fpround    : SDNode<"ISD::STRICT_FP_ROUND",
+                               SDTFPRoundOp, [SDNPHasChain]>;
+def strict_fpextend   : SDNode<"ISD::STRICT_FP_EXTEND",
+                               SDTFPExtendOp, [SDNPHasChain]>;
+
 def setcc      : SDNode<"ISD::SETCC"      , SDTSetCC>;
 def select     : SDNode<"ISD::SELECT"     , SDTSelect>;
 def vselect    : SDNode<"ISD::VSELECT"    , SDTVSelect>;
@ -1177,6 +1224,74 @@ def setle  : PatFrag<(ops node:$lhs, node:$rhs),
 def setne  : PatFrag<(ops node:$lhs, node:$rhs),
                     (setcc node:$lhs, node:$rhs, SETNE)>;

+// Convenience fragments to match both strict and non-strict fp operations
+def any_fadd       : PatFrags<(ops node:$lhs, node:$rhs),
+                              [(strict_fadd node:$lhs, node:$rhs),
+                               (fadd node:$lhs, node:$rhs)]>;
+def any_fsub       : PatFrags<(ops node:$lhs, node:$rhs),
+                              [(strict_fsub node:$lhs, node:$rhs),
+                               (fsub node:$lhs, node:$rhs)]>;
+def any_fmul       : PatFrags<(ops node:$lhs, node:$rhs),
+                              [(strict_fmul node:$lhs, node:$rhs),
+                               (fmul node:$lhs, node:$rhs)]>;
+def any_fdiv       : PatFrags<(ops node:$lhs, node:$rhs),
+                              [(strict_fdiv node:$lhs, node:$rhs),
+                               (fdiv node:$lhs, node:$rhs)]>;
+def any_frem       : PatFrags<(ops node:$lhs, node:$rhs),
+                              [(strict_frem node:$lhs, node:$rhs),
+                               (frem node:$lhs, node:$rhs)]>;
+def any_fma        : PatFrags<(ops node:$src1, node:$src2, node:$src3),
+                              [(strict_fma node:$src1, node:$src2, node:$src3),
+                               (fma node:$src1, node:$src2, node:$src3)]>;
+def any_fsqrt      : PatFrags<(ops node:$src),
+                              [(strict_fsqrt node:$src),
+                               (fsqrt node:$src)]>;
+def any_fsin       : PatFrags<(ops node:$src),
+                              [(strict_fsin node:$src),
+                               (fsin node:$src)]>;
+def any_fcos       : PatFrags<(ops node:$src),
+                              [(strict_fcos node:$src),
+                               (fcos node:$src)]>;
+def any_fexp2      : PatFrags<(ops node:$src),
+                              [(strict_fexp2 node:$src),
+                               (fexp2 node:$src)]>;
+def any_fpow       : PatFrags<(ops node:$lhs, node:$rhs),
+                              [(strict_fpow node:$lhs, node:$rhs),
+                               (fpow node:$lhs, node:$rhs)]>;
+def any_flog2      : PatFrags<(ops node:$src),
+                              [(strict_flog2 node:$src),
+                               (flog2 node:$src)]>;
+def any_frint      : PatFrags<(ops node:$src),
+                              [(strict_frint node:$src),
+                               (frint node:$src)]>;
+def any_fnearbyint : PatFrags<(ops node:$src),
+                              [(strict_fnearbyint node:$src),
+                               (fnearbyint node:$src)]>;
+def any_fceil      : PatFrags<(ops node:$src),
+                              [(strict_fceil node:$src),
+                               (fceil node:$src)]>;
+def any_ffloor     : PatFrags<(ops node:$src),
+                              [(strict_ffloor node:$src),
+                               (ffloor node:$src)]>;
+def any_fround     : PatFrags<(ops node:$src),
+                              [(strict_fround node:$src),
+                               (fround node:$src)]>;
+def any_ftrunc     : PatFrags<(ops node:$src),
+                              [(strict_ftrunc node:$src),
+                               (ftrunc node:$src)]>;
+def any_fmaxnum    : PatFrags<(ops node:$lhs, node:$rhs),
+                              [(strict_fmaxnum node:$lhs, node:$rhs),
+                               (fmaxnum node:$lhs, node:$rhs)]>;
+def any_fminnum    : PatFrags<(ops node:$lhs, node:$rhs),
+                              [(strict_fminnum node:$lhs, node:$rhs),
+                               (fminnum node:$lhs, node:$rhs)]>;
+def any_fpround    : PatFrags<(ops node:$src),
+                              [(strict_fpround node:$src),
+                               (fpround node:$src)]>;
+def any_fpextend   : PatFrags<(ops node:$src),
+                              [(strict_fpextend node:$src),
+                               (fpextend node:$src)]>;
+
 multiclass binary_atomic_op_ord<SDNode atomic_op> {
  def #NAME#_monotonic : PatFrag<(ops node:$ptr, node:$val),
      (!cast<SDPatternOperator>(#NAME) node:$ptr, node:$val)> {
--- a/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@ -78,6 +78,6 @@ bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI,
      std::next(MI.getIterator()) == IntoMI.getIterator())
    return true;

-  return !MI.mayLoadOrStore() && !MI.hasUnmodeledSideEffects() &&
-         empty(MI.implicit_operands());
+  return !MI.mayLoadOrStore() && !MI.mayRaiseFPException() &&
+         !MI.hasUnmodeledSideEffects() && empty(MI.implicit_operands());
 }
--- a/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/lib/CodeGen/ImplicitNullChecks.cpp
@ -229,7 +229,8 @@ public:
 } // end anonymous namespace

 bool ImplicitNullChecks::canHandle(const MachineInstr *MI) {
-  if (MI->isCall() || MI->hasUnmodeledSideEffects())
+  if (MI->isCall() || MI->mayRaiseFPException() ||
+      MI->hasUnmodeledSideEffects())
    return false;
  auto IsRegMask = [](const MachineOperand &MO) { return MO.isRegMask(); };
  (void)IsRegMask;
--- a/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/lib/CodeGen/MIRParser/MILexer.cpp
@ -204,6 +204,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
      .Case("nuw" , MIToken::kw_nuw)
      .Case("nsw" , MIToken::kw_nsw)
      .Case("exact" , MIToken::kw_exact)
+      .Case("fpexcept", MIToken::kw_fpexcept)
      .Case("debug-location", MIToken::kw_debug_location)
      .Case("same_value", MIToken::kw_cfi_same_value)
      .Case("offset", MIToken::kw_cfi_offset)
--- a/lib/CodeGen/MIRParser/MILexer.h
+++ b/lib/CodeGen/MIRParser/MILexer.h
@ -73,6 +73,7 @@ struct MIToken {
    kw_nuw,
    kw_nsw,
    kw_exact,
+    kw_fpexcept,
    kw_debug_location,
    kw_cfi_same_value,
    kw_cfi_offset,
--- a/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/lib/CodeGen/MIRParser/MIParser.cpp
@ -1136,7 +1136,8 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
         Token.is(MIToken::kw_reassoc) ||
         Token.is(MIToken::kw_nuw) ||
         Token.is(MIToken::kw_nsw) ||
-         Token.is(MIToken::kw_exact)) {
+         Token.is(MIToken::kw_exact) ||
+         Token.is(MIToken::kw_fpexcept)) {
    // Mine frame and fast math flags
    if (Token.is(MIToken::kw_frame_setup))
      Flags |= MachineInstr::FrameSetup;
@ -1162,6 +1163,8 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
      Flags |= MachineInstr::NoSWrap;
    if (Token.is(MIToken::kw_exact))
      Flags |= MachineInstr::IsExact;
+    if (Token.is(MIToken::kw_fpexcept))
+      Flags |= MachineInstr::FPExcept;

    lex();
  }
--- a/lib/CodeGen/MIRPrinter.cpp
+++ b/lib/CodeGen/MIRPrinter.cpp
@ -713,6 +713,8 @@ void MIPrinter::print(const MachineInstr &MI) {
    OS << "nsw ";
  if (MI.getFlag(MachineInstr::IsExact))
    OS << "exact ";
+  if (MI.getFlag(MachineInstr::FPExcept))
+    OS << "fpexcept ";

  OS << TII->getName(MI.getOpcode());
  if (I < E)
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@ -382,7 +382,7 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) {

  // Ignore stuff that we obviously can't move.
  if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
-      MI->hasUnmodeledSideEffects())
+      MI->mayRaiseFPException() || MI->hasUnmodeledSideEffects())
    return false;

  if (MI->mayLoad()) {
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@ -1178,7 +1178,7 @@ bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const {
  }

  if (isPosition() || isDebugInstr() || isTerminator() ||
-      hasUnmodeledSideEffects())
+      mayRaiseFPException() || hasUnmodeledSideEffects())
    return false;

  // See if this instruction does a load.  If so, we have to guarantee that the
@ -1544,6 +1544,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
    OS << "nsw ";
  if (getFlag(MachineInstr::IsExact))
    OS << "exact ";
+  if (getFlag(MachineInstr::FPExcept))
+    OS << "fpexcept ";

  // Print the opcode name.
  if (TII)
--- a/lib/CodeGen/MachinePipeliner.cpp
+++ b/lib/CodeGen/MachinePipeliner.cpp
@ -579,7 +579,8 @@ static bool isSuccOrder(SUnit *SUa, SUnit *SUb) {
 /// Return true if the instruction causes a chain between memory
 /// references before and after it.
 static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA) {
-  return MI.isCall() || MI.hasUnmodeledSideEffects() ||
+  return MI.isCall() || MI.mayRaiseFPException() ||
+         MI.hasUnmodeledSideEffects() ||
         (MI.hasOrderedMemoryRef() &&
          (!MI.mayLoad() || !MI.isDereferenceableInvariantLoad(AA)));
 }
@ -3238,6 +3239,7 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,

  // Assume ordered loads and stores may have a loop carried dependence.
  if (SI->hasUnmodeledSideEffects() || DI->hasUnmodeledSideEffects() ||
+      SI->mayRaiseFPException() || DI->mayRaiseFPException() ||
      SI->hasOrderedMemoryRef() || DI->hasOrderedMemoryRef())
    return true;

--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@ -1825,7 +1825,7 @@ ValueTrackerResult ValueTracker::getNextSourceFromBitcast() {
  assert(Def->isBitcast() && "Invalid definition");

  // Bail if there are effects that a plain copy will not expose.
-  if (Def->hasUnmodeledSideEffects())
+  if (Def->mayRaiseFPException() || Def->hasUnmodeledSideEffects())
    return ValueTrackerResult();

  // Bitcasts with more than one def are not supported.
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@ -712,6 +712,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
  AAForDep = UseAA ? AA : nullptr;

  BarrierChain = nullptr;
+  SUnit *FPBarrierChain = nullptr;

  this->TrackLaneMasks = TrackLaneMasks;
  MISUnitMap.clear();
@ -871,9 +872,21 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
      addBarrierChain(NonAliasStores);
      addBarrierChain(NonAliasLoads);

+      // Add dependency against previous FP barrier and reset FP barrier.
+      if (FPBarrierChain)
+        FPBarrierChain->addPredBarrier(BarrierChain);
+      FPBarrierChain = BarrierChain;
+
      continue;
    }

+    // Instructions that may raise FP exceptions depend on each other.
+    if (MI.mayRaiseFPException()) {
+      if (FPBarrierChain)
+        FPBarrierChain->addPredBarrier(SU);
+      FPBarrierChain = SU;
+    }
+
    // If it's not a store or a variant load, we're done.
    if (!MI.mayStore() &&
        !(MI.mayLoad() && !MI.isDereferenceableInvariantLoad(AA)))
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@ -883,6 +883,9 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,

    if (Flags.hasExact())
      MI->setFlag(MachineInstr::MIFlag::IsExact);
+
+    if (Flags.hasFPExcept())
+      MI->setFlag(MachineInstr::MIFlag::FPExcept);
  }

  // Emit all of the actual operands of this instruction, adding them to the
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@ -6955,6 +6955,13 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
                         { Chain, getValue(FPI.getArgOperand(0)),
                           getValue(FPI.getArgOperand(1))  });

+  if (FPI.getExceptionBehavior() !=
+      ConstrainedFPIntrinsic::ExceptionBehavior::ebIgnore) {
+    SDNodeFlags Flags;
+    Flags.setFPExcept(true);
+    Result->setFlags(Flags);
+  }
+
  assert(Result.getNode()->getNumValues() == 2);
  SDValue OutChain = Result.getValue(1);
  DAG.setRoot(OutChain);
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@ -1122,16 +1122,14 @@ void SelectionDAGISel::DoInstructionSelection() {
 #endif

      // When we are using non-default rounding modes or FP exception behavior
-      // FP operations are represented by StrictFP pseudo-operations.  They
-      // need to be simplified here so that the target-specific instruction
-      // selectors know how to handle them.
-      //
-      // If the current node is a strict FP pseudo-op, the isStrictFPOp()
-      // function will provide the corresponding normal FP opcode to which the
-      // node should be mutated.
-      //
-      // FIXME: The backends need a way to handle FP constraints.
-      if (Node->isStrictFPOpcode())
+      // FP operations are represented by StrictFP pseudo-operations.  For
+      // targets that do not (yet) understand strict FP operations directly,
+      // we convert them to normal FP opcodes instead at this point.  This
+      // will allow them to be handled by existing target-specific instruction
+      // selectors.
+      if (Node->isStrictFPOpcode() &&
+          (TLI->getOperationAction(Node->getOpcode(), Node->getValueType(0))
+           != TargetLowering::Legal))
        Node = CurDAG->mutateStrictFPToFP(Node);

      LLVM_DEBUG(dbgs() << "\nISEL: Starting selection on root node: ";
--- a/lib/CodeGen/TargetInstrInfo.cpp
+++ b/lib/CodeGen/TargetInstrInfo.cpp
@ -899,7 +899,8 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
    return true;

  // Avoid instructions obviously unsafe for remat.
-  if (MI.isNotDuplicable() || MI.mayStore() || MI.hasUnmodeledSideEffects())
+  if (MI.isNotDuplicable() || MI.mayStore() || MI.mayRaiseFPException() ||
+      MI.hasUnmodeledSideEffects())
    return false;

  // Don't remat inline asm. We have no idea how expensive it is
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@ -663,6 +663,34 @@ void TargetLoweringBase::initActions() {
      setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand);
    }

+    // Constrained floating-point operations default to expand.
+    setOperationAction(ISD::STRICT_FADD, VT, Expand);
+    setOperationAction(ISD::STRICT_FSUB, VT, Expand);
+    setOperationAction(ISD::STRICT_FMUL, VT, Expand);
+    setOperationAction(ISD::STRICT_FDIV, VT, Expand);
+    setOperationAction(ISD::STRICT_FREM, VT, Expand);
+    setOperationAction(ISD::STRICT_FMA, VT, Expand);
+    setOperationAction(ISD::STRICT_FSQRT, VT, Expand);
+    setOperationAction(ISD::STRICT_FPOW, VT, Expand);
+    setOperationAction(ISD::STRICT_FPOWI, VT, Expand);
+    setOperationAction(ISD::STRICT_FSIN, VT, Expand);
+    setOperationAction(ISD::STRICT_FCOS, VT, Expand);
+    setOperationAction(ISD::STRICT_FEXP, VT, Expand);
+    setOperationAction(ISD::STRICT_FEXP2, VT, Expand);
+    setOperationAction(ISD::STRICT_FLOG, VT, Expand);
+    setOperationAction(ISD::STRICT_FLOG10, VT, Expand);
+    setOperationAction(ISD::STRICT_FLOG2, VT, Expand);
+    setOperationAction(ISD::STRICT_FRINT, VT, Expand);
+    setOperationAction(ISD::STRICT_FNEARBYINT, VT, Expand);
+    setOperationAction(ISD::STRICT_FCEIL, VT, Expand);
+    setOperationAction(ISD::STRICT_FFLOOR, VT, Expand);
+    setOperationAction(ISD::STRICT_FROUND, VT, Expand);
+    setOperationAction(ISD::STRICT_FTRUNC, VT, Expand);
+    setOperationAction(ISD::STRICT_FMAXNUM, VT, Expand);
+    setOperationAction(ISD::STRICT_FMINNUM, VT, Expand);
+    setOperationAction(ISD::STRICT_FP_ROUND, VT, Expand);
+    setOperationAction(ISD::STRICT_FP_EXTEND, VT, Expand);
+
    // For most targets @llvm.get.dynamic.area.offset just returns 0.
    setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand);

--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@ -401,6 +401,24 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
      setOperationAction(ISD::FSINCOS, VT, Expand);
      setOperationAction(ISD::FREM, VT, Expand);
      setOperationAction(ISD::FPOW, VT, Expand);
+
+      // Handle constrained floating-point operations.
+      setOperationAction(ISD::STRICT_FADD, VT, Legal);
+      setOperationAction(ISD::STRICT_FSUB, VT, Legal);
+      setOperationAction(ISD::STRICT_FMUL, VT, Legal);
+      setOperationAction(ISD::STRICT_FDIV, VT, Legal);
+      setOperationAction(ISD::STRICT_FMA, VT, Legal);
+      setOperationAction(ISD::STRICT_FSQRT, VT, Legal);
+      setOperationAction(ISD::STRICT_FRINT, VT, Legal);
+      setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal);
+      setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal);
+      if (Subtarget.hasFPExtension()) {
+        setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
+        setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
+        setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
+        setOperationAction(ISD::STRICT_FROUND, VT, Legal);
+        setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
+      }
    }
  }

@ -432,6 +450,20 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
    setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
    setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
    setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
+
+    // Handle constrained floating-point operations.
+    setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
+    setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
+    setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
+    setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);
+    setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
+    setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
+    setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);
+    setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v2f64, Legal);
+    setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
+    setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal);
+    setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
+    setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);
  }

  // The vector enhancements facility 1 has instructions for these.
@ -475,6 +507,25 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
    setOperationAction(ISD::FMAXIMUM, MVT::f128, Legal);
    setOperationAction(ISD::FMINNUM, MVT::f128, Legal);
    setOperationAction(ISD::FMINIMUM, MVT::f128, Legal);
+
+    // Handle constrained floating-point operations.
+    setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
+    setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
+    setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
+    setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);
+    setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
+    setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
+    setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
+    setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v4f32, Legal);
+    setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
+    setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal);
+    setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);
+    setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
+    for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
+                     MVT::v4f32, MVT::v2f64 }) {
+      setOperationAction(ISD::STRICT_FMAXNUM, VT, Legal);
+      setOperationAction(ISD::STRICT_FMINNUM, VT, Legal);
+    }
  }

  // We have fused multiply-addition for f32 and f64 but not f128.
--- a/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/lib/Target/SystemZ/SystemZInstrFP.td
@ -52,7 +52,8 @@ let isCodeGenOnly = 1 in

 // Moves between two floating-point registers that also set the condition
 // codes.
-let Uses = [FPC], Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
+let Uses = [FPC], mayRaiseFPException = 1,
+    Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
  defm LTEBR : LoadAndTestRRE<"ltebr", 0xB302, FP32>;
  defm LTDBR : LoadAndTestRRE<"ltdbr", 0xB312, FP64>;
  defm LTXBR : LoadAndTestRRE<"ltxbr", 0xB342, FP128>;
@ -68,7 +69,8 @@ let Predicates = [FeatureNoVector] in {

 // Use a normal load-and-test for compare against zero in case of
 // vector support (via a pseudo to simplify instruction selection).
-let Uses = [FPC], Defs = [CC], usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
+let Uses = [FPC], mayRaiseFPException = 1,
+    Defs = [CC], usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
  def LTEBRCompare_VecPseudo : Pseudo<(outs), (ins FP32:$R1, FP32:$R2), []>;
  def LTDBRCompare_VecPseudo : Pseudo<(outs), (ins FP64:$R1, FP64:$R2), []>;
  def LTXBRCompare_VecPseudo : Pseudo<(outs), (ins FP128:$R1, FP128:$R2), []>;
@ -173,8 +175,8 @@ let SimpleBDXStore = 1, mayStore = 1 in {
 // Convert floating-point values to narrower representations, rounding
 // according to the current mode.  The destination of LEXBR and LDXBR
 // is a 128-bit value, but only the first register of the pair is used.
-let Uses = [FPC] in {
-  def LEDBR : UnaryRRE<"ledbr", 0xB344, fpround,    FP32,  FP64>;
+let Uses = [FPC], mayRaiseFPException = 1 in {
+  def LEDBR : UnaryRRE<"ledbr", 0xB344, any_fpround, FP32, FP64>;
  def LEXBR : UnaryRRE<"lexbr", 0xB346, null_frag, FP128, FP128>;
  def LDXBR : UnaryRRE<"ldxbr", 0xB345, null_frag, FP128, FP128>;

@ -187,25 +189,25 @@ let Uses = [FPC] in {
 }

 let Predicates = [FeatureNoVectorEnhancements1] in {
-  def : Pat<(f32 (fpround FP128:$src)),
+  def : Pat<(f32 (any_fpround FP128:$src)),
            (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hh32)>;
-  def : Pat<(f64 (fpround FP128:$src)),
+  def : Pat<(f64 (any_fpround FP128:$src)),
            (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_h64)>;
 }

 // Extend register floating-point values to wider representations.
-let Uses = [FPC] in {
-  def LDEBR : UnaryRRE<"ldebr", 0xB304, fpextend,  FP64,  FP32>;
+let Uses = [FPC], mayRaiseFPException = 1 in {
+  def LDEBR : UnaryRRE<"ldebr", 0xB304, any_fpextend, FP64, FP32>;
  def LXEBR : UnaryRRE<"lxebr", 0xB306, null_frag, FP128, FP32>;
  def LXDBR : UnaryRRE<"lxdbr", 0xB305, null_frag, FP128, FP64>;
 }
 let Predicates = [FeatureNoVectorEnhancements1] in {
-  def : Pat<(f128 (fpextend (f32 FP32:$src))), (LXEBR FP32:$src)>;
-  def : Pat<(f128 (fpextend (f64 FP64:$src))), (LXDBR FP64:$src)>;
+  def : Pat<(f128 (any_fpextend (f32 FP32:$src))), (LXEBR FP32:$src)>;
+  def : Pat<(f128 (any_fpextend (f64 FP64:$src))), (LXDBR FP64:$src)>;
 }

 // Extend memory floating-point values to wider representations.
-let Uses = [FPC] in {
+let Uses = [FPC], mayRaiseFPException = 1 in {
  def LDEB : UnaryRXE<"ldeb", 0xED04, extloadf32, FP64,  4>;
  def LXEB : UnaryRXE<"lxeb", 0xED06, null_frag,  FP128, 4>;
  def LXDB : UnaryRXE<"lxdb", 0xED05, null_frag,  FP128, 8>;
@ -218,7 +220,7 @@ let Predicates = [FeatureNoVectorEnhancements1] in {
 }

 // Convert a signed integer register value to a floating-point one.
-let Uses = [FPC] in {
+let Uses = [FPC], mayRaiseFPException = 1 in {
  def CEFBR : UnaryRRE<"cefbr", 0xB394, sint_to_fp, FP32,  GR32>;
  def CDFBR : UnaryRRE<"cdfbr", 0xB395, sint_to_fp, FP64,  GR32>;
  def CXFBR : UnaryRRE<"cxfbr", 0xB396, sint_to_fp, FP128, GR32>;
@ -230,7 +232,7 @@ let Uses = [FPC] in {

 // The FP extension feature provides versions of the above that allow
 // specifying rounding mode and inexact-exception suppression flags.
-let Uses = [FPC], Predicates = [FeatureFPExtension] in {
+let Uses = [FPC], mayRaiseFPException = 1, Predicates = [FeatureFPExtension] in {
  def CEFBRA : TernaryRRFe<"cefbra", 0xB394, FP32,  GR32>;
  def CDFBRA : TernaryRRFe<"cdfbra", 0xB395, FP64,  GR32>;
  def CXFBRA : TernaryRRFe<"cxfbra", 0xB396, FP128, GR32>;
@ -242,7 +244,7 @@ let Uses = [FPC], Predicates = [FeatureFPExtension] in {

 // Convert am unsigned integer register value to a floating-point one.
 let Predicates = [FeatureFPExtension] in {
-  let Uses = [FPC] in {
+  let Uses = [FPC], mayRaiseFPException = 1 in {
    def CELFBR : TernaryRRFe<"celfbr", 0xB390, FP32,  GR32>;
    def CDLFBR : TernaryRRFe<"cdlfbr", 0xB391, FP64,  GR32>;
    def CXLFBR : TernaryRRFe<"cxlfbr", 0xB392, FP128, GR32>;
@ -263,7 +265,7 @@ let Predicates = [FeatureFPExtension] in {

 // Convert a floating-point register value to a signed integer value,
 // with the second operand (modifier M3) specifying the rounding mode.
-let Uses = [FPC], Defs = [CC] in {
+let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {
  def CFEBR : BinaryRRFe<"cfebr", 0xB398, GR32, FP32>;
  def CFDBR : BinaryRRFe<"cfdbr", 0xB399, GR32, FP64>;
  def CFXBR : BinaryRRFe<"cfxbr", 0xB39A, GR32, FP128>;
@ -284,7 +286,8 @@ def : Pat<(i64 (fp_to_sint FP128:$src)), (CGXBR 5, FP128:$src)>;

 // The FP extension feature provides versions of the above that allow
 // also specifying the inexact-exception suppression flag.
-let Uses = [FPC], Predicates = [FeatureFPExtension], Defs = [CC] in {
+let Uses = [FPC], mayRaiseFPException = 1,
+    Predicates = [FeatureFPExtension], Defs = [CC] in {
  def CFEBRA : TernaryRRFe<"cfebra", 0xB398, GR32, FP32>;
  def CFDBRA : TernaryRRFe<"cfdbra", 0xB399, GR32, FP64>;
  def CFXBRA : TernaryRRFe<"cfxbra", 0xB39A, GR32, FP128>;
@ -296,7 +299,7 @@ let Uses = [FPC], Predicates = [FeatureFPExtension], Defs = [CC] in {

 // Convert a floating-point register value to an unsigned integer value.
 let Predicates = [FeatureFPExtension] in {
-  let Uses = [FPC], Defs = [CC] in {
+  let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {
    def CLFEBR : TernaryRRFe<"clfebr", 0xB39C, GR32, FP32>;
    def CLFDBR : TernaryRRFe<"clfdbr", 0xB39D, GR32, FP64>;
    def CLFXBR : TernaryRRFe<"clfxbr", 0xB39E, GR32, FP128>;
@ -362,18 +365,18 @@ let isCodeGenOnly = 1 in
  def LNDFR_32 : UnaryRRE<"lndfr", 0xB371, fnabs, FP32,  FP32>;

 // Square root.
-let Uses = [FPC] in {
-  def SQEBR : UnaryRRE<"sqebr", 0xB314, fsqrt, FP32,  FP32>;
-  def SQDBR : UnaryRRE<"sqdbr", 0xB315, fsqrt, FP64,  FP64>;
-  def SQXBR : UnaryRRE<"sqxbr", 0xB316, fsqrt, FP128, FP128>;
+let Uses = [FPC], mayRaiseFPException = 1 in {
+  def SQEBR : UnaryRRE<"sqebr", 0xB314, any_fsqrt, FP32,  FP32>;
+  def SQDBR : UnaryRRE<"sqdbr", 0xB315, any_fsqrt, FP64,  FP64>;
+  def SQXBR : UnaryRRE<"sqxbr", 0xB316, any_fsqrt, FP128, FP128>;

-  def SQEB : UnaryRXE<"sqeb", 0xED14, loadu<fsqrt>, FP32, 4>;
-  def SQDB : UnaryRXE<"sqdb", 0xED15, loadu<fsqrt>, FP64, 8>;
+  def SQEB : UnaryRXE<"sqeb", 0xED14, loadu<any_fsqrt>, FP32, 4>;
+  def SQDB : UnaryRXE<"sqdb", 0xED15, loadu<any_fsqrt>, FP64, 8>;
 }

 // Round to an integer, with the second operand (modifier M3) specifying
 // the rounding mode.  These forms always check for inexact conditions.
-let Uses = [FPC] in {
+let Uses = [FPC], mayRaiseFPException = 1 in {
  def FIEBR : BinaryRRFe<"fiebr", 0xB357, FP32,  FP32>;
  def FIDBR : BinaryRRFe<"fidbr", 0xB35F, FP64,  FP64>;
  def FIXBR : BinaryRRFe<"fixbr", 0xB347, FP128, FP128>;
@ -381,46 +384,46 @@ let Uses = [FPC] in {

 // frint rounds according to the current mode (modifier 0) and detects
 // inexact conditions.
-def : Pat<(frint FP32:$src),  (FIEBR 0, FP32:$src)>;
-def : Pat<(frint FP64:$src),  (FIDBR 0, FP64:$src)>;
-def : Pat<(frint FP128:$src), (FIXBR 0, FP128:$src)>;
+def : Pat<(any_frint FP32:$src),  (FIEBR 0, FP32:$src)>;
+def : Pat<(any_frint FP64:$src),  (FIDBR 0, FP64:$src)>;
+def : Pat<(any_frint FP128:$src), (FIXBR 0, FP128:$src)>;

 let Predicates = [FeatureFPExtension] in {
  // Extended forms of the FIxBR instructions.  M4 can be set to 4
  // to suppress detection of inexact conditions.
-  let Uses = [FPC] in {
+  let Uses = [FPC], mayRaiseFPException = 1 in {
    def FIEBRA : TernaryRRFe<"fiebra", 0xB357, FP32,  FP32>;
    def FIDBRA : TernaryRRFe<"fidbra", 0xB35F, FP64,  FP64>;
    def FIXBRA : TernaryRRFe<"fixbra", 0xB347, FP128, FP128>;
  }

  // fnearbyint is like frint but does not detect inexact conditions.
-  def : Pat<(fnearbyint FP32:$src),  (FIEBRA 0, FP32:$src,  4)>;
-  def : Pat<(fnearbyint FP64:$src),  (FIDBRA 0, FP64:$src,  4)>;
-  def : Pat<(fnearbyint FP128:$src), (FIXBRA 0, FP128:$src, 4)>;
+  def : Pat<(any_fnearbyint FP32:$src),  (FIEBRA 0, FP32:$src,  4)>;
+  def : Pat<(any_fnearbyint FP64:$src),  (FIDBRA 0, FP64:$src,  4)>;
+  def : Pat<(any_fnearbyint FP128:$src), (FIXBRA 0, FP128:$src, 4)>;

  // floor is no longer allowed to raise an inexact condition,
  // so restrict it to the cases where the condition can be suppressed.
  // Mode 7 is round towards -inf.
-  def : Pat<(ffloor FP32:$src),  (FIEBRA 7, FP32:$src,  4)>;
-  def : Pat<(ffloor FP64:$src),  (FIDBRA 7, FP64:$src,  4)>;
-  def : Pat<(ffloor FP128:$src), (FIXBRA 7, FP128:$src, 4)>;
+  def : Pat<(any_ffloor FP32:$src),  (FIEBRA 7, FP32:$src,  4)>;
+  def : Pat<(any_ffloor FP64:$src),  (FIDBRA 7, FP64:$src,  4)>;
+  def : Pat<(any_ffloor FP128:$src), (FIXBRA 7, FP128:$src, 4)>;

  // Same idea for ceil, where mode 6 is round towards +inf.
-  def : Pat<(fceil FP32:$src),  (FIEBRA 6, FP32:$src,  4)>;
-  def : Pat<(fceil FP64:$src),  (FIDBRA 6, FP64:$src,  4)>;
-  def : Pat<(fceil FP128:$src), (FIXBRA 6, FP128:$src, 4)>;
+  def : Pat<(any_fceil FP32:$src),  (FIEBRA 6, FP32:$src,  4)>;
+  def : Pat<(any_fceil FP64:$src),  (FIDBRA 6, FP64:$src,  4)>;
+  def : Pat<(any_fceil FP128:$src), (FIXBRA 6, FP128:$src, 4)>;

  // Same idea for trunc, where mode 5 is round towards zero.
-  def : Pat<(ftrunc FP32:$src),  (FIEBRA 5, FP32:$src,  4)>;
-  def : Pat<(ftrunc FP64:$src),  (FIDBRA 5, FP64:$src,  4)>;
-  def : Pat<(ftrunc FP128:$src), (FIXBRA 5, FP128:$src, 4)>;
+  def : Pat<(any_ftrunc FP32:$src),  (FIEBRA 5, FP32:$src,  4)>;
+  def : Pat<(any_ftrunc FP64:$src),  (FIDBRA 5, FP64:$src,  4)>;
+  def : Pat<(any_ftrunc FP128:$src), (FIXBRA 5, FP128:$src, 4)>;

  // Same idea for round, where mode 1 is round towards nearest with
  // ties away from zero.
-  def : Pat<(fround FP32:$src),  (FIEBRA 1, FP32:$src,  4)>;
-  def : Pat<(fround FP64:$src),  (FIDBRA 1, FP64:$src,  4)>;
-  def : Pat<(fround FP128:$src), (FIXBRA 1, FP128:$src, 4)>;
+  def : Pat<(any_fround FP32:$src),  (FIEBRA 1, FP32:$src,  4)>;
+  def : Pat<(any_fround FP64:$src),  (FIDBRA 1, FP64:$src,  4)>;
+  def : Pat<(any_fround FP128:$src), (FIXBRA 1, FP128:$src, 4)>;
 }

 //===----------------------------------------------------------------------===//
@ -428,99 +431,103 @@ let Predicates = [FeatureFPExtension] in {
 //===----------------------------------------------------------------------===//

 // Addition.
-let Uses = [FPC], Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
+let Uses = [FPC], mayRaiseFPException = 1,
+    Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
  let isCommutable = 1 in {
-    def AEBR : BinaryRRE<"aebr", 0xB30A, fadd, FP32,  FP32>;
-    def ADBR : BinaryRRE<"adbr", 0xB31A, fadd, FP64,  FP64>;
-    def AXBR : BinaryRRE<"axbr", 0xB34A, fadd, FP128, FP128>;
+    def AEBR : BinaryRRE<"aebr", 0xB30A, any_fadd, FP32,  FP32>;
+    def ADBR : BinaryRRE<"adbr", 0xB31A, any_fadd, FP64,  FP64>;
+    def AXBR : BinaryRRE<"axbr", 0xB34A, any_fadd, FP128, FP128>;
  }
-  def AEB : BinaryRXE<"aeb", 0xED0A, fadd, FP32, load, 4>;
-  def ADB : BinaryRXE<"adb", 0xED1A, fadd, FP64, load, 8>;
+  def AEB : BinaryRXE<"aeb", 0xED0A, any_fadd, FP32, load, 4>;
+  def ADB : BinaryRXE<"adb", 0xED1A, any_fadd, FP64, load, 8>;
 }

 // Subtraction.
-let Uses = [FPC], Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
-  def SEBR : BinaryRRE<"sebr", 0xB30B, fsub, FP32,  FP32>;
-  def SDBR : BinaryRRE<"sdbr", 0xB31B, fsub, FP64,  FP64>;
-  def SXBR : BinaryRRE<"sxbr", 0xB34B, fsub, FP128, FP128>;
+let Uses = [FPC], mayRaiseFPException = 1,
+    Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
+  def SEBR : BinaryRRE<"sebr", 0xB30B, any_fsub, FP32,  FP32>;
+  def SDBR : BinaryRRE<"sdbr", 0xB31B, any_fsub, FP64,  FP64>;
+  def SXBR : BinaryRRE<"sxbr", 0xB34B, any_fsub, FP128, FP128>;

-  def SEB : BinaryRXE<"seb",  0xED0B, fsub, FP32, load, 4>;
-  def SDB : BinaryRXE<"sdb",  0xED1B, fsub, FP64, load, 8>;
+  def SEB : BinaryRXE<"seb",  0xED0B, any_fsub, FP32, load, 4>;
+  def SDB : BinaryRXE<"sdb",  0xED1B, any_fsub, FP64, load, 8>;
 }

 // Multiplication.
-let Uses = [FPC] in {
+let Uses = [FPC], mayRaiseFPException = 1 in {
  let isCommutable = 1 in {
-    def MEEBR : BinaryRRE<"meebr", 0xB317, fmul, FP32,  FP32>;
-    def MDBR  : BinaryRRE<"mdbr",  0xB31C, fmul, FP64,  FP64>;
-    def MXBR  : BinaryRRE<"mxbr",  0xB34C, fmul, FP128, FP128>;
+    def MEEBR : BinaryRRE<"meebr", 0xB317, any_fmul, FP32,  FP32>;
+    def MDBR  : BinaryRRE<"mdbr",  0xB31C, any_fmul, FP64,  FP64>;
+    def MXBR  : BinaryRRE<"mxbr",  0xB34C, any_fmul, FP128, FP128>;
  }
-  def MEEB : BinaryRXE<"meeb", 0xED17, fmul, FP32, load, 4>;
-  def MDB  : BinaryRXE<"mdb",  0xED1C, fmul, FP64, load, 8>;
+  def MEEB : BinaryRXE<"meeb", 0xED17, any_fmul, FP32, load, 4>;
+  def MDB  : BinaryRXE<"mdb",  0xED1C, any_fmul, FP64, load, 8>;
 }

 // f64 multiplication of two FP32 registers.
-let Uses = [FPC] in
+let Uses = [FPC], mayRaiseFPException = 1 in
  def MDEBR : BinaryRRE<"mdebr", 0xB30C, null_frag, FP64, FP32>;
-def : Pat<(fmul (f64 (fpextend FP32:$src1)), (f64 (fpextend FP32:$src2))),
+def : Pat<(any_fmul (f64 (fpextend FP32:$src1)),
+                    (f64 (fpextend FP32:$src2))),
          (MDEBR (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
                                FP32:$src1, subreg_h32), FP32:$src2)>;

 // f64 multiplication of an FP32 register and an f32 memory.
-let Uses = [FPC] in
+let Uses = [FPC], mayRaiseFPException = 1 in
  def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load, 4>;
-def : Pat<(fmul (f64 (fpextend FP32:$src1)),
-                (f64 (extloadf32 bdxaddr12only:$addr))),
+def : Pat<(any_fmul (f64 (fpextend FP32:$src1)),
+                    (f64 (extloadf32 bdxaddr12only:$addr))),
          (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_h32),
                bdxaddr12only:$addr)>;

 // f128 multiplication of two FP64 registers.
-let Uses = [FPC] in
+let Uses = [FPC], mayRaiseFPException = 1 in
  def MXDBR : BinaryRRE<"mxdbr", 0xB307, null_frag, FP128, FP64>;
 let Predicates = [FeatureNoVectorEnhancements1] in
-  def : Pat<(fmul (f128 (fpextend FP64:$src1)), (f128 (fpextend FP64:$src2))),
+  def : Pat<(any_fmul (f128 (fpextend FP64:$src1)),
+                      (f128 (fpextend FP64:$src2))),
            (MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)),
                                  FP64:$src1, subreg_h64), FP64:$src2)>;

 // f128 multiplication of an FP64 register and an f64 memory.
-let Uses = [FPC] in
+let Uses = [FPC], mayRaiseFPException = 1 in
  def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load, 8>;
 let Predicates = [FeatureNoVectorEnhancements1] in
-  def : Pat<(fmul (f128 (fpextend FP64:$src1)),
-                  (f128 (extloadf64 bdxaddr12only:$addr))),
+  def : Pat<(any_fmul (f128 (fpextend FP64:$src1)),
+                      (f128 (extloadf64 bdxaddr12only:$addr))),
            (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_h64),
                  bdxaddr12only:$addr)>;

 // Fused multiply-add.
-let Uses = [FPC] in {
-  def MAEBR : TernaryRRD<"maebr", 0xB30E, z_fma, FP32, FP32>;
-  def MADBR : TernaryRRD<"madbr", 0xB31E, z_fma, FP64, FP64>;
+let Uses = [FPC], mayRaiseFPException = 1 in {
+  def MAEBR : TernaryRRD<"maebr", 0xB30E, z_any_fma, FP32, FP32>;
+  def MADBR : TernaryRRD<"madbr", 0xB31E, z_any_fma, FP64, FP64>;

-  def MAEB : TernaryRXF<"maeb", 0xED0E, z_fma, FP32, FP32, load, 4>;
-  def MADB : TernaryRXF<"madb", 0xED1E, z_fma, FP64, FP64, load, 8>;
+  def MAEB : TernaryRXF<"maeb", 0xED0E, z_any_fma, FP32, FP32, load, 4>;
+  def MADB : TernaryRXF<"madb", 0xED1E, z_any_fma, FP64, FP64, load, 8>;
 }

 // Fused multiply-subtract.
-let Uses = [FPC] in {
-  def MSEBR : TernaryRRD<"msebr", 0xB30F, z_fms, FP32, FP32>;
-  def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_fms, FP64, FP64>;
+let Uses = [FPC], mayRaiseFPException = 1 in {
+  def MSEBR : TernaryRRD<"msebr", 0xB30F, z_any_fms, FP32, FP32>;
+  def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_any_fms, FP64, FP64>;

-  def MSEB : TernaryRXF<"mseb", 0xED0F, z_fms, FP32, FP32, load, 4>;
-  def MSDB : TernaryRXF<"msdb", 0xED1F, z_fms, FP64, FP64, load, 8>;
+  def MSEB : TernaryRXF<"mseb", 0xED0F, z_any_fms, FP32, FP32, load, 4>;
+  def MSDB : TernaryRXF<"msdb", 0xED1F, z_any_fms, FP64, FP64, load, 8>;
 }

 // Division.
-let Uses = [FPC] in {
-  def DEBR : BinaryRRE<"debr", 0xB30D, fdiv, FP32,  FP32>;
-  def DDBR : BinaryRRE<"ddbr", 0xB31D, fdiv, FP64,  FP64>;
-  def DXBR : BinaryRRE<"dxbr", 0xB34D, fdiv, FP128, FP128>;
+let Uses = [FPC], mayRaiseFPException = 1 in {
+  def DEBR : BinaryRRE<"debr", 0xB30D, any_fdiv, FP32,  FP32>;
+  def DDBR : BinaryRRE<"ddbr", 0xB31D, any_fdiv, FP64,  FP64>;
+  def DXBR : BinaryRRE<"dxbr", 0xB34D, any_fdiv, FP128, FP128>;

-  def DEB : BinaryRXE<"deb", 0xED0D, fdiv, FP32, load, 4>;
-  def DDB : BinaryRXE<"ddb", 0xED1D, fdiv, FP64, load, 8>;
+  def DEB : BinaryRXE<"deb", 0xED0D, any_fdiv, FP32, load, 4>;
+  def DDB : BinaryRXE<"ddb", 0xED1D, any_fdiv, FP64, load, 8>;
 }

 // Divide to integer.
-let Uses = [FPC], Defs = [CC] in {
+let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {
  def DIEBR : TernaryRRFb<"diebr", 0xB353, FP32, FP32, FP32>;
  def DIDBR : TernaryRRFb<"didbr", 0xB35B, FP64, FP64, FP64>;
 }
@ -529,7 +536,7 @@ let Uses = [FPC], Defs = [CC] in {
 // Comparisons
 //===----------------------------------------------------------------------===//

-let Uses = [FPC], Defs = [CC], CCValues = 0xF in {
+let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC], CCValues = 0xF in {
  def CEBR : CompareRRE<"cebr", 0xB309, z_fcmp, FP32,  FP32>;
  def CDBR : CompareRRE<"cdbr", 0xB319, z_fcmp, FP64,  FP64>;
  def CXBR : CompareRRE<"cxbr", 0xB349, z_fcmp, FP128, FP128>;
@ -570,7 +577,7 @@ let hasSideEffects = 1 in {
    }
  }

-  let Defs = [FPC] in {
+  let Defs = [FPC], mayRaiseFPException = 1 in {
    def SFASR : SideEffectUnaryRRE<"sfasr", 0xB385, GR32, null_frag>;
    def LFAS  : SideEffectUnaryS<"lfas", 0xB2BD, null_frag, 4>;
  }
--- a/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/lib/Target/SystemZ/SystemZInstrVector.td
@ -924,29 +924,29 @@ let Predicates = [FeatureVector] in {
 // See comments in SystemZInstrFP.td for the suppression flags and
 // rounding modes.
 multiclass VectorRounding<Instruction insn, TypedReg tr> {
-  def : FPConversion<insn, frint,      tr, tr, 0, 0>;
-  def : FPConversion<insn, fnearbyint, tr, tr, 4, 0>;
-  def : FPConversion<insn, ffloor,     tr, tr, 4, 7>;
-  def : FPConversion<insn, fceil,      tr, tr, 4, 6>;
-  def : FPConversion<insn, ftrunc,     tr, tr, 4, 5>;
-  def : FPConversion<insn, fround,     tr, tr, 4, 1>;
+  def : FPConversion<insn, any_frint,      tr, tr, 0, 0>;
+  def : FPConversion<insn, any_fnearbyint, tr, tr, 4, 0>;
+  def : FPConversion<insn, any_ffloor,     tr, tr, 4, 7>;
+  def : FPConversion<insn, any_fceil,      tr, tr, 4, 6>;
+  def : FPConversion<insn, any_ftrunc,     tr, tr, 4, 5>;
+  def : FPConversion<insn, any_fround,     tr, tr, 4, 1>;
 }

 let Predicates = [FeatureVector] in {
  // Add.
-  let Uses = [FPC] in {
+  let Uses = [FPC], mayRaiseFPException = 1 in {
    def VFA   : BinaryVRRcFloatGeneric<"vfa", 0xE7E3>;
-    def VFADB : BinaryVRRc<"vfadb", 0xE7E3, fadd, v128db, v128db, 3, 0>;
-    def WFADB : BinaryVRRc<"wfadb", 0xE7E3, fadd, v64db, v64db, 3, 8>;
+    def VFADB : BinaryVRRc<"vfadb", 0xE7E3, any_fadd, v128db, v128db, 3, 0>;
+    def WFADB : BinaryVRRc<"wfadb", 0xE7E3, any_fadd, v64db, v64db, 3, 8>;
    let Predicates = [FeatureVectorEnhancements1] in {
-      def VFASB : BinaryVRRc<"vfasb", 0xE7E3, fadd, v128sb, v128sb, 2, 0>;
-      def WFASB : BinaryVRRc<"wfasb", 0xE7E3, fadd, v32sb, v32sb, 2, 8>;
-      def WFAXB : BinaryVRRc<"wfaxb", 0xE7E3, fadd, v128xb, v128xb, 4, 8>;
+      def VFASB : BinaryVRRc<"vfasb", 0xE7E3, any_fadd, v128sb, v128sb, 2, 0>;
+      def WFASB : BinaryVRRc<"wfasb", 0xE7E3, any_fadd, v32sb, v32sb, 2, 8>;
+      def WFAXB : BinaryVRRc<"wfaxb", 0xE7E3, any_fadd, v128xb, v128xb, 4, 8>;
    }
  }

  // Convert from fixed 64-bit.
-  let Uses = [FPC] in {
+  let Uses = [FPC], mayRaiseFPException = 1 in {
    def VCDG  : TernaryVRRaFloatGeneric<"vcdg", 0xE7C3>;
    def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>;
    def WCDGB : TernaryVRRa<"wcdgb", 0xE7C3, null_frag, v64db, v64g, 3, 8>;
@ -954,7 +954,7 @@ let Predicates = [FeatureVector] in {
  def : FPConversion<VCDGB, sint_to_fp, v128db, v128g, 0, 0>;

  // Convert from logical 64-bit.
-  let Uses = [FPC] in {
+  let Uses = [FPC], mayRaiseFPException = 1 in {
    def VCDLG  : TernaryVRRaFloatGeneric<"vcdlg", 0xE7C1>;
    def VCDLGB : TernaryVRRa<"vcdlgb", 0xE7C1, null_frag, v128db, v128g, 3, 0>;
    def WCDLGB : TernaryVRRa<"wcdlgb", 0xE7C1, null_frag, v64db, v64g, 3, 8>;
@ -962,7 +962,7 @@ let Predicates = [FeatureVector] in {
  def : FPConversion<VCDLGB, uint_to_fp, v128db, v128g, 0, 0>;

  // Convert to fixed 64-bit.
-  let Uses = [FPC] in {
+  let Uses = [FPC], mayRaiseFPException = 1 in {
    def VCGD  : TernaryVRRaFloatGeneric<"vcgd", 0xE7C2>;
    def VCGDB : TernaryVRRa<"vcgdb", 0xE7C2, null_frag, v128g, v128db, 3, 0>;
    def WCGDB : TernaryVRRa<"wcgdb", 0xE7C2, null_frag, v64g, v64db, 3, 8>;
@ -971,7 +971,7 @@ let Predicates = [FeatureVector] in {
  def : FPConversion<VCGDB, fp_to_sint, v128g, v128db, 0, 5>;

  // Convert to logical 64-bit.
-  let Uses = [FPC] in {
+  let Uses = [FPC], mayRaiseFPException = 1 in {
    def VCLGD  : TernaryVRRaFloatGeneric<"vclgd", 0xE7C0>;
    def VCLGDB : TernaryVRRa<"vclgdb", 0xE7C0, null_frag, v128g, v128db, 3, 0>;
    def WCLGDB : TernaryVRRa<"wclgdb", 0xE7C0, null_frag, v64g, v64db, 3, 8>;
@ -980,19 +980,19 @@ let Predicates = [FeatureVector] in {
  def : FPConversion<VCLGDB, fp_to_uint, v128g, v128db, 0, 5>;

  // Divide.
-  let Uses = [FPC] in {
+  let Uses = [FPC], mayRaiseFPException = 1 in {
    def VFD   : BinaryVRRcFloatGeneric<"vfd", 0xE7E5>;
-    def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, fdiv, v128db, v128db, 3, 0>;
-    def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, fdiv, v64db, v64db, 3, 8>;
+    def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, any_fdiv, v128db, v128db, 3, 0>;
+    def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, any_fdiv, v64db, v64db, 3, 8>;
    let Predicates = [FeatureVectorEnhancements1] in {
-      def VFDSB : BinaryVRRc<"vfdsb", 0xE7E5, fdiv, v128sb, v128sb, 2, 0>;
-      def WFDSB : BinaryVRRc<"wfdsb", 0xE7E5, fdiv, v32sb, v32sb, 2, 8>;
-      def WFDXB : BinaryVRRc<"wfdxb", 0xE7E5, fdiv, v128xb, v128xb, 4, 8>;
+      def VFDSB : BinaryVRRc<"vfdsb", 0xE7E5, any_fdiv, v128sb, v128sb, 2, 0>;
+      def WFDSB : BinaryVRRc<"wfdsb", 0xE7E5, any_fdiv, v32sb, v32sb, 2, 8>;
+      def WFDXB : BinaryVRRc<"wfdxb", 0xE7E5, any_fdiv, v128xb, v128xb, 4, 8>;
    }
  }

  // Load FP integer.
-  let Uses = [FPC] in {
+  let Uses = [FPC], mayRaiseFPException = 1 in {
    def VFI   : TernaryVRRaFloatGeneric<"vfi", 0xE7C7>;
    def VFIDB : TernaryVRRa<"vfidb", 0xE7C7, int_s390_vfidb, v128db, v128db, 3, 0>;
    def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>;
@ -1000,7 +1000,7 @@ let Predicates = [FeatureVector] in {
  defm : VectorRounding<VFIDB, v128db>;
  defm : VectorRounding<WFIDB, v64db>;
  let Predicates = [FeatureVectorEnhancements1] in {
-    let Uses = [FPC] in {
+    let Uses = [FPC], mayRaiseFPException = 1 in {
      def VFISB : TernaryVRRa<"vfisb", 0xE7C7, int_s390_vfisb, v128sb, v128sb, 2, 0>;
      def WFISB : TernaryVRRa<"wfisb", 0xE7C7, null_frag, v32sb, v32sb, 2, 8>;
      def WFIXB : TernaryVRRa<"wfixb", 0xE7C7, null_frag, v128xb, v128xb, 4, 8>;
@ -1011,34 +1011,34 @@ let Predicates = [FeatureVector] in {
  }

  // Load lengthened.
-  let Uses = [FPC] in {
+  let Uses = [FPC], mayRaiseFPException = 1 in {
    def VLDE  : UnaryVRRaFloatGeneric<"vlde", 0xE7C4>;
    def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128sb, 2, 0>;
-    def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, fpextend, v64db, v32sb, 2, 8>;
+    def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, any_fpextend, v64db, v32sb, 2, 8>;
  }
  let Predicates = [FeatureVectorEnhancements1] in {
-    let Uses = [FPC] in {
+    let Uses = [FPC], mayRaiseFPException = 1 in {
      let isAsmParserOnly = 1 in {
        def VFLL  : UnaryVRRaFloatGeneric<"vfll", 0xE7C4>;
        def VFLLS : UnaryVRRa<"vflls", 0xE7C4, null_frag, v128db, v128sb, 2, 0>;
        def WFLLS : UnaryVRRa<"wflls", 0xE7C4, null_frag, v64db, v32sb, 2, 8>;
      }
-      def WFLLD : UnaryVRRa<"wflld", 0xE7C4, fpextend, v128xb, v64db, 3, 8>;
+      def WFLLD : UnaryVRRa<"wflld", 0xE7C4, any_fpextend, v128xb, v64db, 3, 8>;
    }
-    def : Pat<(f128 (fpextend (f32 VR32:$src))),
+    def : Pat<(f128 (any_fpextend (f32 VR32:$src))),
              (WFLLD (WLDEB VR32:$src))>;
  }

  // Load rounded.
-  let Uses = [FPC] in {
+  let Uses = [FPC], mayRaiseFPException = 1 in {
    def VLED  : TernaryVRRaFloatGeneric<"vled", 0xE7C5>;
    def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128sb, v128db, 3, 0>;
    def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32sb, v64db, 3, 8>;
  }
  def : Pat<(v4f32 (z_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>;
-  def : FPConversion<WLEDB, fpround, v32sb, v64db, 0, 0>;
+  def : FPConversion<WLEDB, any_fpround, v32sb, v64db, 0, 0>;
  let Predicates = [FeatureVectorEnhancements1] in {
-    let Uses = [FPC] in {
+    let Uses = [FPC], mayRaiseFPException = 1 in {
      let isAsmParserOnly = 1 in {
        def VFLR  : TernaryVRRaFloatGeneric<"vflr", 0xE7C5>;
        def VFLRD : TernaryVRRa<"vflrd", 0xE7C5, null_frag, v128sb, v128db, 3, 0>;
@ -1046,18 +1046,18 @@ let Predicates = [FeatureVector] in {
      }
      def WFLRX : TernaryVRRa<"wflrx", 0xE7C5, null_frag, v64db, v128xb, 4, 8>;
    }
-    def : FPConversion<WFLRX, fpround, v64db, v128xb, 0, 0>;
-    def : Pat<(f32 (fpround (f128 VR128:$src))),
+    def : FPConversion<WFLRX, any_fpround, v64db, v128xb, 0, 0>;
+    def : Pat<(f32 (any_fpround (f128 VR128:$src))),
              (WLEDB (WFLRX VR128:$src, 0, 3), 0, 0)>;
  }

  // Maximum.
  multiclass VectorMax<Instruction insn, TypedReg tr> {
-    def : FPMinMax<insn, fmaxnum, tr, 4>;
+    def : FPMinMax<insn, any_fmaxnum, tr, 4>;
    def : FPMinMax<insn, fmaximum, tr, 1>;
  }
  let Predicates = [FeatureVectorEnhancements1] in {
-    let Uses = [FPC] in {
+    let Uses = [FPC], mayRaiseFPException = 1 in {
      def VFMAX   : TernaryVRRcFloatGeneric<"vfmax", 0xE7EF>;
      def VFMAXDB : TernaryVRRcFloat<"vfmaxdb", 0xE7EF, int_s390_vfmaxdb,
                                     v128db, v128db, 3, 0>;
@ -1079,11 +1079,11 @@ let Predicates = [FeatureVector] in {

  // Minimum.
  multiclass VectorMin<Instruction insn, TypedReg tr> {
-    def : FPMinMax<insn, fminnum, tr, 4>;
+    def : FPMinMax<insn, any_fminnum, tr, 4>;
    def : FPMinMax<insn, fminimum, tr, 1>;
  }
  let Predicates = [FeatureVectorEnhancements1] in {
-    let Uses = [FPC] in {
+    let Uses = [FPC], mayRaiseFPException = 1 in {
      def VFMIN   : TernaryVRRcFloatGeneric<"vfmin", 0xE7EE>;
      def VFMINDB : TernaryVRRcFloat<"vfmindb", 0xE7EE, int_s390_vfmindb,
                                     v128db, v128db, 3, 0>;
@ -1104,59 +1104,61 @@ let Predicates = [FeatureVector] in {
  }

  // Multiply.
-  let Uses = [FPC] in {
+  let Uses = [FPC], mayRaiseFPException = 1 in {
    def VFM   : BinaryVRRcFloatGeneric<"vfm", 0xE7E7>;
-    def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, fmul, v128db, v128db, 3, 0>;
-    def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, fmul, v64db, v64db, 3, 8>;
+    def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, any_fmul, v128db, v128db, 3, 0>;
+    def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, any_fmul, v64db, v64db, 3, 8>;
    let Predicates = [FeatureVectorEnhancements1] in {
-      def VFMSB : BinaryVRRc<"vfmsb", 0xE7E7, fmul, v128sb, v128sb, 2, 0>;
-      def WFMSB : BinaryVRRc<"wfmsb", 0xE7E7, fmul, v32sb, v32sb, 2, 8>;
-      def WFMXB : BinaryVRRc<"wfmxb", 0xE7E7, fmul, v128xb, v128xb, 4, 8>;
+      def VFMSB : BinaryVRRc<"vfmsb", 0xE7E7, any_fmul, v128sb, v128sb, 2, 0>;
+      def WFMSB : BinaryVRRc<"wfmsb", 0xE7E7, any_fmul, v32sb, v32sb, 2, 8>;
+      def WFMXB : BinaryVRRc<"wfmxb", 0xE7E7, any_fmul, v128xb, v128xb, 4, 8>;
    }
  }

  // Multiply and add.
-  let Uses = [FPC] in {
+  let Uses = [FPC], mayRaiseFPException = 1 in {
    def VFMA   : TernaryVRReFloatGeneric<"vfma", 0xE78F>;
-    def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, fma, v128db, v128db, 0, 3>;
-    def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, fma, v64db, v64db, 8, 3>;
+    def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, any_fma, v128db, v128db, 0, 3>;
+    def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, any_fma, v64db, v64db, 8, 3>;
    let Predicates = [FeatureVectorEnhancements1] in {
-      def VFMASB : TernaryVRRe<"vfmasb", 0xE78F, fma, v128sb, v128sb, 0, 2>;
-      def WFMASB : TernaryVRRe<"wfmasb", 0xE78F, fma, v32sb, v32sb, 8, 2>;
-      def WFMAXB : TernaryVRRe<"wfmaxb", 0xE78F, fma, v128xb, v128xb, 8, 4>;
+      def VFMASB : TernaryVRRe<"vfmasb", 0xE78F, any_fma, v128sb, v128sb, 0, 2>;
+      def WFMASB : TernaryVRRe<"wfmasb", 0xE78F, any_fma, v32sb, v32sb, 8, 2>;
+      def WFMAXB : TernaryVRRe<"wfmaxb", 0xE78F, any_fma, v128xb, v128xb, 8, 4>;
    }
  }

  // Multiply and subtract.
-  let Uses = [FPC] in {
+  let Uses = [FPC], mayRaiseFPException = 1 in {
    def VFMS   : TernaryVRReFloatGeneric<"vfms", 0xE78E>;
-    def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, fms, v128db, v128db, 0, 3>;
-    def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, fms, v64db, v64db, 8, 3>;
+    def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, any_fms, v128db, v128db, 0, 3>;
+    def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, any_fms, v64db, v64db, 8, 3>;
    let Predicates = [FeatureVectorEnhancements1] in {
-      def VFMSSB : TernaryVRRe<"vfmssb", 0xE78E, fms, v128sb, v128sb, 0, 2>;
-      def WFMSSB : TernaryVRRe<"wfmssb", 0xE78E, fms, v32sb, v32sb, 8, 2>;
-      def WFMSXB : TernaryVRRe<"wfmsxb", 0xE78E, fms, v128xb, v128xb, 8, 4>;
+      def VFMSSB : TernaryVRRe<"vfmssb", 0xE78E, any_fms, v128sb, v128sb, 0, 2>;
+      def WFMSSB : TernaryVRRe<"wfmssb", 0xE78E, any_fms, v32sb, v32sb, 8, 2>;
+      def WFMSXB : TernaryVRRe<"wfmsxb", 0xE78E, any_fms, v128xb, v128xb, 8, 4>;
    }
  }

  // Negative multiply and add.
-  let Uses = [FPC], Predicates = [FeatureVectorEnhancements1] in {
+  let Uses = [FPC], mayRaiseFPException = 1,
+      Predicates = [FeatureVectorEnhancements1] in {
    def VFNMA   : TernaryVRReFloatGeneric<"vfnma", 0xE79F>;
-    def VFNMADB : TernaryVRRe<"vfnmadb", 0xE79F, fnma, v128db, v128db, 0, 3>;
-    def WFNMADB : TernaryVRRe<"wfnmadb", 0xE79F, fnma, v64db, v64db, 8, 3>;
-    def VFNMASB : TernaryVRRe<"vfnmasb", 0xE79F, fnma, v128sb, v128sb, 0, 2>;
-    def WFNMASB : TernaryVRRe<"wfnmasb", 0xE79F, fnma, v32sb, v32sb, 8, 2>;
-    def WFNMAXB : TernaryVRRe<"wfnmaxb", 0xE79F, fnma, v128xb, v128xb, 8, 4>;
+    def VFNMADB : TernaryVRRe<"vfnmadb", 0xE79F, any_fnma, v128db, v128db, 0, 3>;
+    def WFNMADB : TernaryVRRe<"wfnmadb", 0xE79F, any_fnma, v64db, v64db, 8, 3>;
+    def VFNMASB : TernaryVRRe<"vfnmasb", 0xE79F, any_fnma, v128sb, v128sb, 0, 2>;
+    def WFNMASB : TernaryVRRe<"wfnmasb", 0xE79F, any_fnma, v32sb, v32sb, 8, 2>;
+    def WFNMAXB : TernaryVRRe<"wfnmaxb", 0xE79F, any_fnma, v128xb, v128xb, 8, 4>;
  }

  // Negative multiply and subtract.
-  let Uses = [FPC], Predicates = [FeatureVectorEnhancements1] in {
+  let Uses = [FPC], mayRaiseFPException = 1,
+      Predicates = [FeatureVectorEnhancements1] in {
    def VFNMS   : TernaryVRReFloatGeneric<"vfnms", 0xE79E>;
-    def VFNMSDB : TernaryVRRe<"vfnmsdb", 0xE79E, fnms, v128db, v128db, 0, 3>;
-    def WFNMSDB : TernaryVRRe<"wfnmsdb", 0xE79E, fnms, v64db, v64db, 8, 3>;
-    def VFNMSSB : TernaryVRRe<"vfnmssb", 0xE79E, fnms, v128sb, v128sb, 0, 2>;
-    def WFNMSSB : TernaryVRRe<"wfnmssb", 0xE79E, fnms, v32sb, v32sb, 8, 2>;
-    def WFNMSXB : TernaryVRRe<"wfnmsxb", 0xE79E, fnms, v128xb, v128xb, 8, 4>;
+    def VFNMSDB : TernaryVRRe<"vfnmsdb", 0xE79E, any_fnms, v128db, v128db, 0, 3>;
+    def WFNMSDB : TernaryVRRe<"wfnmsdb", 0xE79E, any_fnms, v64db, v64db, 8, 3>;
+    def VFNMSSB : TernaryVRRe<"vfnmssb", 0xE79E, any_fnms, v128sb, v128sb, 0, 2>;
+    def WFNMSSB : TernaryVRRe<"wfnmssb", 0xE79E, any_fnms, v32sb, v32sb, 8, 2>;
+    def WFNMSXB : TernaryVRRe<"wfnmsxb", 0xE79E, any_fnms, v128xb, v128xb, 8, 4>;
  }

  // Perform sign operation.
@ -1197,26 +1199,26 @@ let Predicates = [FeatureVector] in {
  }

  // Square root.
-  let Uses = [FPC] in {
+  let Uses = [FPC], mayRaiseFPException = 1 in {
    def VFSQ   : UnaryVRRaFloatGeneric<"vfsq", 0xE7CE>;
-    def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, fsqrt, v128db, v128db, 3, 0>;
-    def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, fsqrt, v64db, v64db, 3, 8>;
+    def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, any_fsqrt, v128db, v128db, 3, 0>;
+    def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, any_fsqrt, v64db, v64db, 3, 8>;
    let Predicates = [FeatureVectorEnhancements1] in {
-      def VFSQSB : UnaryVRRa<"vfsqsb", 0xE7CE, fsqrt, v128sb, v128sb, 2, 0>;
-      def WFSQSB : UnaryVRRa<"wfsqsb", 0xE7CE, fsqrt, v32sb, v32sb, 2, 8>;
-      def WFSQXB : UnaryVRRa<"wfsqxb", 0xE7CE, fsqrt, v128xb, v128xb, 4, 8>;
+      def VFSQSB : UnaryVRRa<"vfsqsb", 0xE7CE, any_fsqrt, v128sb, v128sb, 2, 0>;
+      def WFSQSB : UnaryVRRa<"wfsqsb", 0xE7CE, any_fsqrt, v32sb, v32sb, 2, 8>;
+      def WFSQXB : UnaryVRRa<"wfsqxb", 0xE7CE, any_fsqrt, v128xb, v128xb, 4, 8>;
    }
  }

  // Subtract.
-  let Uses = [FPC] in {
+  let Uses = [FPC], mayRaiseFPException = 1 in {
    def VFS   : BinaryVRRcFloatGeneric<"vfs", 0xE7E2>;
-    def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, fsub, v128db, v128db, 3, 0>;
-    def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, fsub, v64db, v64db, 3, 8>;
+    def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, any_fsub, v128db, v128db, 3, 0>;
+    def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, any_fsub, v64db, v64db, 3, 8>;
    let Predicates = [FeatureVectorEnhancements1] in {
-      def VFSSB : BinaryVRRc<"vfssb", 0xE7E2, fsub, v128sb, v128sb, 2, 0>;
-      def WFSSB : BinaryVRRc<"wfssb", 0xE7E2, fsub, v32sb, v32sb, 2, 8>;
-      def WFSXB : BinaryVRRc<"wfsxb", 0xE7E2, fsub, v128xb, v128xb, 4, 8>;
+      def VFSSB : BinaryVRRc<"vfssb", 0xE7E2, any_fsub, v128sb, v128sb, 2, 0>;
+      def WFSSB : BinaryVRRc<"wfssb", 0xE7E2, any_fsub, v32sb, v32sb, 2, 8>;
+      def WFSXB : BinaryVRRc<"wfsxb", 0xE7E2, any_fsub, v128xb, v128xb, 4, 8>;
    }
  }

@ -1239,7 +1241,7 @@ let Predicates = [FeatureVector] in {

 let Predicates = [FeatureVector] in {
  // Compare scalar.
-  let Uses = [FPC], Defs = [CC] in {
+  let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {
    def WFC   : CompareVRRaFloatGeneric<"wfc", 0xE7CB>;
    def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_fcmp, v64db, 3>;
    let Predicates = [FeatureVectorEnhancements1] in {
@ -1249,7 +1251,7 @@ let Predicates = [FeatureVector] in {
  }

  // Compare and signal scalar.
-  let Uses = [FPC], Defs = [CC] in {
+  let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {
    def WFK   : CompareVRRaFloatGeneric<"wfk", 0xE7CA>;
    def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, null_frag, v64db, 3>;
    let Predicates = [FeatureVectorEnhancements1] in {
@ -1259,7 +1261,7 @@ let Predicates = [FeatureVector] in {
  }

  // Compare equal.
-  let Uses = [FPC] in {
+  let Uses = [FPC], mayRaiseFPException = 1 in {
    def  VFCE   : BinaryVRRcSPairFloatGeneric<"vfce", 0xE7E8>;
    defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_vfcmpe, z_vfcmpes,
                                  v128g, v128db, 3, 0>;
@ -1276,7 +1278,8 @@ let Predicates = [FeatureVector] in {
  }

  // Compare and signal equal.
-  let Uses = [FPC], Predicates = [FeatureVectorEnhancements1] in {
+  let Uses = [FPC], mayRaiseFPException = 1,
+      Predicates = [FeatureVectorEnhancements1] in {
    defm VFKEDB : BinaryVRRcSPair<"vfkedb", 0xE7E8, null_frag, null_frag,
                                  v128g, v128db, 3, 4>;
    defm WFKEDB : BinaryVRRcSPair<"wfkedb", 0xE7E8, null_frag, null_frag,
@ -1290,7 +1293,7 @@ let Predicates = [FeatureVector] in {
  }

  // Compare high.
-  let Uses = [FPC] in {
+  let Uses = [FPC], mayRaiseFPException = 1 in {
    def  VFCH   : BinaryVRRcSPairFloatGeneric<"vfch", 0xE7EB>;
    defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_vfcmph, z_vfcmphs,
                                  v128g, v128db, 3, 0>;
@ -1307,7 +1310,8 @@ let Predicates = [FeatureVector] in {
  }

  // Compare and signal high.
-  let Uses = [FPC], Predicates = [FeatureVectorEnhancements1] in {
+  let Uses = [FPC], mayRaiseFPException = 1,
+      Predicates = [FeatureVectorEnhancements1] in {
    defm VFKHDB : BinaryVRRcSPair<"vfkhdb", 0xE7EB, null_frag, null_frag,
                                  v128g, v128db, 3, 4>;
    defm WFKHDB : BinaryVRRcSPair<"wfkhdb", 0xE7EB, null_frag, null_frag,
@ -1321,7 +1325,7 @@ let Predicates = [FeatureVector] in {
  }

  // Compare high or equal.
-  let Uses = [FPC] in {
+  let Uses = [FPC], mayRaiseFPException = 1 in {
    def  VFCHE   : BinaryVRRcSPairFloatGeneric<"vfche", 0xE7EA>;
    defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_vfcmphe, z_vfcmphes,
                                   v128g, v128db, 3, 0>;
@ -1338,7 +1342,8 @@ let Predicates = [FeatureVector] in {
  }

  // Compare and signal high or equal.
-  let Uses = [FPC], Predicates = [FeatureVectorEnhancements1] in {
+  let Uses = [FPC], mayRaiseFPException = 1,
+      Predicates = [FeatureVectorEnhancements1] in {
    defm VFKHEDB : BinaryVRRcSPair<"vfkhedb", 0xE7EA, null_frag, null_frag,
                                   v128g, v128db, 3, 4>;
    defm WFKHEDB : BinaryVRRcSPair<"wfkhedb", 0xE7EA, null_frag, null_frag,
--- a/lib/Target/SystemZ/SystemZOperators.td
+++ b/lib/Target/SystemZ/SystemZOperators.td
@ -662,21 +662,21 @@ def z_usub : PatFrags<(ops node:$src1, node:$src2),
                       (sub node:$src1, node:$src2)]>;

 // Fused multiply-subtract, using the natural operand order.
-def fms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
-                  (fma node:$src1, node:$src2, (fneg node:$src3))>;
+def any_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                      (any_fma node:$src1, node:$src2, (fneg node:$src3))>;

 // Fused multiply-add and multiply-subtract, but with the order of the
 // operands matching SystemZ's MA and MS instructions.
-def z_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3),
-                    (fma node:$src2, node:$src3, node:$src1)>;
-def z_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
-                    (fma node:$src2, node:$src3, (fneg node:$src1))>;
+def z_any_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                        (any_fma node:$src2, node:$src3, node:$src1)>;
+def z_any_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                        (any_fma node:$src2, node:$src3, (fneg node:$src1))>;

 // Negative fused multiply-add and multiply-subtract.
-def fnma : PatFrag<(ops node:$src1, node:$src2, node:$src3),
-                   (fneg (fma node:$src1, node:$src2, node:$src3))>;
-def fnms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
-                   (fneg (fms node:$src1, node:$src2, node:$src3))>;
+def any_fnma : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                       (fneg (any_fma node:$src1, node:$src2, node:$src3))>;
+def any_fnms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                       (fneg (any_fms node:$src1, node:$src2, node:$src3))>;

 // Floating-point negative absolute.
 def fnabs : PatFrag<(ops node:$ptr), (fneg (fabs node:$ptr))>;
--- a/test/CodeGen/SystemZ/fp-strict-add-01.ll
+++ b/test/CodeGen/SystemZ/fp-strict-add-01.ll
@ -0,0 +1,173 @@
+; Test 32-bit floating-point strict addition.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare float @foo()
+declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
+
+; Check register addition.
+define float @f1(float %f1, float %f2) {
+; CHECK-LABEL: f1:
+; CHECK: aebr %f0, %f2
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.fadd.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check the low end of the AEB range.
+define float @f2(float %f1, float *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: aeb %f0, 0(%r2)
+; CHECK: br %r14
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fadd.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check the high end of the aligned AEB range.
+define float @f3(float %f1, float *%base) {
+; CHECK-LABEL: f3:
+; CHECK: aeb %f0, 4092(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 1023
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fadd.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define float @f4(float %f1, float *%base) {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: aeb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 1024
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fadd.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define float @f5(float %f1, float *%base) {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -4
+; CHECK: aeb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 -1
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fadd.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check that AEB allows indices.
+define float @f6(float %f1, float *%base, i64 %index) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: aeb %f0, 400(%r1,%r2)
+; CHECK: br %r14
+  %ptr1 = getelementptr float, float *%base, i64 %index
+  %ptr2 = getelementptr float, float *%ptr1, i64 100
+  %f2 = load float, float *%ptr2
+  %res = call float @llvm.experimental.constrained.fadd.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check that additions of spilled values can use AEB rather than AEBR.
+define float @f7(float *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK-SCALAR: aeb %f0, 16{{[04]}}(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr float, float *%ptr0, i64 2
+  %ptr2 = getelementptr float, float *%ptr0, i64 4
+  %ptr3 = getelementptr float, float *%ptr0, i64 6
+  %ptr4 = getelementptr float, float *%ptr0, i64 8
+  %ptr5 = getelementptr float, float *%ptr0, i64 10
+  %ptr6 = getelementptr float, float *%ptr0, i64 12
+  %ptr7 = getelementptr float, float *%ptr0, i64 14
+  %ptr8 = getelementptr float, float *%ptr0, i64 16
+  %ptr9 = getelementptr float, float *%ptr0, i64 18
+  %ptr10 = getelementptr float, float *%ptr0, i64 20
+
+  %val0 = load float, float *%ptr0
+  %val1 = load float, float *%ptr1
+  %val2 = load float, float *%ptr2
+  %val3 = load float, float *%ptr3
+  %val4 = load float, float *%ptr4
+  %val5 = load float, float *%ptr5
+  %val6 = load float, float *%ptr6
+  %val7 = load float, float *%ptr7
+  %val8 = load float, float *%ptr8
+  %val9 = load float, float *%ptr9
+  %val10 = load float, float *%ptr10
+
+  %ret = call float @foo()
+
+  %add0 = call float @llvm.experimental.constrained.fadd.f32(
+                        float %ret, float %val0,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %add1 = call float @llvm.experimental.constrained.fadd.f32(
+                        float %add0, float %val1,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %add2 = call float @llvm.experimental.constrained.fadd.f32(
+                        float %add1, float %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %add3 = call float @llvm.experimental.constrained.fadd.f32(
+                        float %add2, float %val3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %add4 = call float @llvm.experimental.constrained.fadd.f32(
+                        float %add3, float %val4,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %add5 = call float @llvm.experimental.constrained.fadd.f32(
+                        float %add4, float %val5,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %add6 = call float @llvm.experimental.constrained.fadd.f32(
+                        float %add5, float %val6,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %add7 = call float @llvm.experimental.constrained.fadd.f32(
+                        float %add6, float %val7,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %add8 = call float @llvm.experimental.constrained.fadd.f32(
+                        float %add7, float %val8,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %add9 = call float @llvm.experimental.constrained.fadd.f32(
+                        float %add8, float %val9,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %add10 = call float @llvm.experimental.constrained.fadd.f32(
+                        float %add9, float %val10,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+
+  ret float %add10
+}
--- a/test/CodeGen/SystemZ/fp-strict-add-02.ll
+++ b/test/CodeGen/SystemZ/fp-strict-add-02.ll
@ -0,0 +1,172 @@
+; Test strict 64-bit floating-point addition.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -verify-machineinstrs | FileCheck %s
+declare double @foo()
+declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata)
+
+; Check register addition.
+define double @f1(double %f1, double %f2) {
+; CHECK-LABEL: f1:
+; CHECK: adbr %f0, %f2
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.fadd.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check the low end of the ADB range.
+define double @f2(double %f1, double *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: adb %f0, 0(%r2)
+; CHECK: br %r14
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fadd.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check the high end of the aligned ADB range.
+define double @f3(double %f1, double *%base) {
+; CHECK-LABEL: f3:
+; CHECK: adb %f0, 4088(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 511
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fadd.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f4(double %f1, double *%base) {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: adb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 512
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fadd.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define double @f5(double %f1, double *%base) {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -8
+; CHECK: adb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 -1
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fadd.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check that ADB allows indices.
+define double @f6(double %f1, double *%base, i64 %index) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: adb %f0, 800(%r1,%r2)
+; CHECK: br %r14
+  %ptr1 = getelementptr double, double *%base, i64 %index
+  %ptr2 = getelementptr double, double *%ptr1, i64 100
+  %f2 = load double, double *%ptr2
+  %res = call double @llvm.experimental.constrained.fadd.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check that additions of spilled values can use ADB rather than ADBR.
+define double @f7(double *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK-SCALAR: adb %f0, 160(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr double, double *%ptr0, i64 2
+  %ptr2 = getelementptr double, double *%ptr0, i64 4
+  %ptr3 = getelementptr double, double *%ptr0, i64 6
+  %ptr4 = getelementptr double, double *%ptr0, i64 8
+  %ptr5 = getelementptr double, double *%ptr0, i64 10
+  %ptr6 = getelementptr double, double *%ptr0, i64 12
+  %ptr7 = getelementptr double, double *%ptr0, i64 14
+  %ptr8 = getelementptr double, double *%ptr0, i64 16
+  %ptr9 = getelementptr double, double *%ptr0, i64 18
+  %ptr10 = getelementptr double, double *%ptr0, i64 20
+
+  %val0 = load double, double *%ptr0
+  %val1 = load double, double *%ptr1
+  %val2 = load double, double *%ptr2
+  %val3 = load double, double *%ptr3
+  %val4 = load double, double *%ptr4
+  %val5 = load double, double *%ptr5
+  %val6 = load double, double *%ptr6
+  %val7 = load double, double *%ptr7
+  %val8 = load double, double *%ptr8
+  %val9 = load double, double *%ptr9
+  %val10 = load double, double *%ptr10
+
+  %ret = call double @foo()
+
+  %add0 = call double @llvm.experimental.constrained.fadd.f64(
+                        double %ret, double %val0,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %add1 = call double @llvm.experimental.constrained.fadd.f64(
+                        double %add0, double %val1,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %add2 = call double @llvm.experimental.constrained.fadd.f64(
+                        double %add1, double %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %add3 = call double @llvm.experimental.constrained.fadd.f64(
+                        double %add2, double %val3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %add4 = call double @llvm.experimental.constrained.fadd.f64(
+                        double %add3, double %val4,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %add5 = call double @llvm.experimental.constrained.fadd.f64(
+                        double %add4, double %val5,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %add6 = call double @llvm.experimental.constrained.fadd.f64(
+                        double %add5, double %val6,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %add7 = call double @llvm.experimental.constrained.fadd.f64(
+                        double %add6, double %val7,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %add8 = call double @llvm.experimental.constrained.fadd.f64(
+                        double %add7, double %val8,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %add9 = call double @llvm.experimental.constrained.fadd.f64(
+                        double %add8, double %val9,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %add10 = call double @llvm.experimental.constrained.fadd.f64(
+                        double %add9, double %val10,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+
+  ret double %add10
+}
--- a/test/CodeGen/SystemZ/fp-strict-add-03.ll
+++ b/test/CodeGen/SystemZ/fp-strict-add-03.ll
@ -0,0 +1,25 @@
+; Test strict 128-bit floating-point addition.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.fadd.f128(fp128, fp128, metadata, metadata)
+
+; There is no memory form of 128-bit addition.
+define void @f1(fp128 *%ptr, float %f2) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: lxebr %f0, %f0
+; CHECK-DAG: ld %f1, 0(%r2)
+; CHECK-DAG: ld %f3, 8(%r2)
+; CHECK: axbr %f0, %f1
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+  %f1 = load fp128, fp128 *%ptr
+  %f2x = fpext float %f2 to fp128
+  %sum = call fp128 @llvm.experimental.constrained.fadd.f128(
+                        fp128 %f1, fp128 %f2x,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %sum, fp128 *%ptr
+  ret void
+}
--- a/test/CodeGen/SystemZ/fp-strict-add-04.ll
+++ b/test/CodeGen/SystemZ/fp-strict-add-04.ll
@ -0,0 +1,22 @@
+; Test strict 128-bit floating-point addition on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.fadd.f128(fp128, fp128, metadata, metadata)
+
+define void @f1(fp128 *%ptr1, fp128 *%ptr2) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3)
+; CHECK: wfaxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]]
+; CHECK: vst [[RES]], 0(%r2)
+; CHECK: br %r14
+  %f1 = load fp128, fp128 *%ptr1
+  %f2 = load fp128, fp128 *%ptr2
+  %sum = call fp128 @llvm.experimental.constrained.fadd.f128(
+                        fp128 %f1, fp128 %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %sum, fp128 *%ptr1
+  ret void
+}
--- a/test/CodeGen/SystemZ/fp-strict-alias.ll
+++ b/test/CodeGen/SystemZ/fp-strict-alias.ll
@ -0,0 +1,140 @@
+; Verify that strict FP operations are not rescheduled
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
+declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata)
+declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata)
+declare float @llvm.sqrt.f32(float)
+declare void @llvm.s390.sfpc(i32)
+
+; For non-strict operations, we expect the post-RA scheduler to
+; separate the two square root instructions on z13.
+define void @f1(float %f1, float %f2, float %f3, float %f4, float *%ptr0) {
+; CHECK-LABEL: f1:
+; CHECK: sqebr
+; CHECK: {{aebr|sebr}}
+; CHECK: sqebr
+; CHECK: br %r14
+
+  %add = fadd float %f1, %f2
+  %sub = fsub float %f3, %f4
+  %sqrt1 = call float @llvm.sqrt.f32(float %f2)
+  %sqrt2 = call float @llvm.sqrt.f32(float %f4)
+
+  %ptr1 = getelementptr float, float *%ptr0, i64 1
+  %ptr2 = getelementptr float, float *%ptr0, i64 2
+  %ptr3 = getelementptr float, float *%ptr0, i64 3
+
+  store float %add, float *%ptr0
+  store float %sub, float *%ptr1
+  store float %sqrt1, float *%ptr2
+  store float %sqrt2, float *%ptr3
+
+  ret void
+}
+
+; But for strict operations, this must not happen.
+define void @f2(float %f1, float %f2, float %f3, float %f4, float *%ptr0) {
+; CHECK-LABEL: f2:
+; CHECK: {{aebr|sebr}}
+; CHECK: {{aebr|sebr}}
+; CHECK: sqebr
+; CHECK: sqebr
+; CHECK: br %r14
+
+  %add = call float @llvm.experimental.constrained.fadd.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub = call float @llvm.experimental.constrained.fsub.f32(
+                        float %f3, float %f4,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sqrt1 = call float @llvm.experimental.constrained.sqrt.f32(
+                        float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sqrt2 = call float @llvm.experimental.constrained.sqrt.f32(
+                        float %f4,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+
+  %ptr1 = getelementptr float, float *%ptr0, i64 1
+  %ptr2 = getelementptr float, float *%ptr0, i64 2
+  %ptr3 = getelementptr float, float *%ptr0, i64 3
+
+  store float %add, float *%ptr0
+  store float %sub, float *%ptr1
+  store float %sqrt1, float *%ptr2
+  store float %sqrt2, float *%ptr3
+
+  ret void
+}
+
+; On the other hand, strict operations that use the fpexcept.ignore
+; exception behaviour should be scheduled freely.
+define void @f3(float %f1, float %f2, float %f3, float %f4, float *%ptr0) {
+; CHECK-LABEL: f3:
+; CHECK: sqebr
+; CHECK: {{aebr|sebr}}
+; CHECK: sqebr
+; CHECK: br %r14
+
+  %add = call float @llvm.experimental.constrained.fadd.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.ignore")
+  %sub = call float @llvm.experimental.constrained.fsub.f32(
+                        float %f3, float %f4,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.ignore")
+  %sqrt1 = call float @llvm.experimental.constrained.sqrt.f32(
+                        float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.ignore")
+  %sqrt2 = call float @llvm.experimental.constrained.sqrt.f32(
+                        float %f4,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.ignore")
+
+  %ptr1 = getelementptr float, float *%ptr0, i64 1
+  %ptr2 = getelementptr float, float *%ptr0, i64 2
+  %ptr3 = getelementptr float, float *%ptr0, i64 3
+
+  store float %add, float *%ptr0
+  store float %sub, float *%ptr1
+  store float %sqrt1, float *%ptr2
+  store float %sqrt2, float *%ptr3
+
+  ret void
+}
+
+; However, even non-strict operations must not be scheduled across an SFPC.
+define void @f4(float %f1, float %f2, float %f3, float %f4, float *%ptr0) {
+; CHECK-LABEL: f4:
+; CHECK: {{aebr|sebr}}
+; CHECK: {{aebr|sebr}}
+; CHECK: sfpc
+; CHECK: sqebr
+; CHECK: sqebr
+; CHECK: br %r14
+
+  %add = fadd float %f1, %f2
+  %sub = fsub float %f3, %f4
+  call void @llvm.s390.sfpc(i32 0)
+  %sqrt1 = call float @llvm.sqrt.f32(float %f2)
+  %sqrt2 = call float @llvm.sqrt.f32(float %f4)
+
+  %ptr1 = getelementptr float, float *%ptr0, i64 1
+  %ptr2 = getelementptr float, float *%ptr0, i64 2
+  %ptr3 = getelementptr float, float *%ptr0, i64 3
+
+  store float %add, float *%ptr0
+  store float %sub, float *%ptr1
+  store float %sqrt1, float *%ptr2
+  store float %sqrt2, float *%ptr3
+
+  ret void
+}
+
--- a/test/CodeGen/SystemZ/fp-strict-conv-01.ll
+++ b/test/CodeGen/SystemZ/fp-strict-conv-01.ll
@ -0,0 +1,95 @@
+; Test strict floating-point truncations.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
+
+declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata)
+declare float @llvm.experimental.constrained.fptrunc.f32.f128(fp128, metadata, metadata)
+declare double @llvm.experimental.constrained.fptrunc.f64.f128(fp128, metadata, metadata)
+
+declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
+declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata)
+
+; Test f64->f32.
+define float @f1(double %d1, double %d2) {
+; CHECK-LABEL: f1:
+; CHECK-SCALAR: ledbr %f0, %f2
+; CHECK-VECTOR: ledbra %f0, 0, %f2, 0
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.fptrunc.f32.f64(
+                                               double %d2,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Test f128->f32.
+define float @f2(fp128 *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: lexbr %f0, %f0
+; CHECK: br %r14
+  %val = load fp128, fp128 *%ptr
+  %res = call float @llvm.experimental.constrained.fptrunc.f32.f128(
+                                               fp128 %val,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Make sure that we don't use %f0 as the destination of LEXBR when %f2
+; is still live.
+define void @f3(float *%dst, fp128 *%ptr, float %d1, float %d2) {
+; CHECK-LABEL: f3:
+; CHECK: lexbr %f1, %f1
+; CHECK: aebr %f1, %f2
+; CHECK: ste %f1, 0(%r2)
+; CHECK: br %r14
+  %val = load fp128, fp128 *%ptr
+  %conv = call float @llvm.experimental.constrained.fptrunc.f32.f128(
+                                               fp128 %val,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict")
+  %res = call float @llvm.experimental.constrained.fadd.f32(
+                        float %conv, float %d2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store float %res, float *%dst
+  ret void
+}
+
+; Test f128->f64.
+define double @f4(fp128 *%ptr) {
+; CHECK-LABEL: f4:
+; CHECK: ldxbr %f0, %f0
+; CHECK: br %r14
+  %val = load fp128, fp128 *%ptr
+  %res = call double @llvm.experimental.constrained.fptrunc.f64.f128(
+                                               fp128 %val,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Like f3, but for f128->f64.
+define void @f5(double *%dst, fp128 *%ptr, double %d1, double %d2) {
+; CHECK-LABEL: f5:
+; CHECK: ldxbr %f1, %f1
+; CHECK-SCALAR: adbr %f1, %f2
+; CHECK-SCALAR: std %f1, 0(%r2)
+; CHECK-VECTOR: wfadb [[REG:%f[0-9]+]], %f1, %f2
+; CHECK-VECTOR: std [[REG]], 0(%r2)
+; CHECK: br %r14
+  %val = load fp128, fp128 *%ptr
+  %conv = call double @llvm.experimental.constrained.fptrunc.f64.f128(
+                                               fp128 %val,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict")
+  %res = call double @llvm.experimental.constrained.fadd.f64(
+                        double %conv, double %d2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store double %res, double *%dst
+  ret void
+}
--- a/test/CodeGen/SystemZ/fp-strict-conv-02.ll
+++ b/test/CodeGen/SystemZ/fp-strict-conv-02.ll
@ -0,0 +1,33 @@
+; Test strict extensions of f32 to f64.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
+
+declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata)
+
+; Check register extension.
+define double @f1(float %val) {
+; CHECK-LABEL: f1:
+; CHECK: ldebr %f0, %f0
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.fpext.f64.f32(float %val,
+                                               metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check extension from memory.
+; FIXME: This should really use LDEB, but there is no strict "extload" yet.
+define double @f2(float *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK-SCALAR: le %f0, 0(%r2)
+; CHECK-VECTOR: lde %f0, 0(%r2)
+; CHECK: ldebr %f0, %f0
+; CHECK: br %r14
+  %val = load float, float *%ptr
+  %res = call double @llvm.experimental.constrained.fpext.f64.f32(float %val,
+                                               metadata !"fpexcept.strict")
+  ret double %res
+}
+
--- a/test/CodeGen/SystemZ/fp-strict-conv-03.ll
+++ b/test/CodeGen/SystemZ/fp-strict-conv-03.ll
@ -0,0 +1,35 @@
+; Test strict extensions of f32 to f128.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.fpext.f128.f32(float, metadata)
+
+; Check register extension.
+define void @f1(fp128 *%dst, float %val) {
+; CHECK-LABEL: f1:
+; CHECK: lxebr %f0, %f0
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+  %res = call fp128 @llvm.experimental.constrained.fpext.f128.f32(float %val,
+                                               metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%dst
+  ret void
+}
+
+; Check extension from memory.
+; FIXME: This should really use LXEB, but there is no strict "extload" yet.
+define void @f2(fp128 *%dst, float *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: le %f0, 0(%r3)
+; CHECK: lxebr %f0, %f0
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+  %val = load float, float *%ptr
+  %res = call fp128 @llvm.experimental.constrained.fpext.f128.f32(float %val,
+                                               metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%dst
+  ret void
+}
+
--- a/test/CodeGen/SystemZ/fp-strict-conv-04.ll
+++ b/test/CodeGen/SystemZ/fp-strict-conv-04.ll
@ -0,0 +1,35 @@
+; Test strict extensions of f64 to f128.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.fpext.f128.f64(double, metadata)
+
+; Check register extension.
+define void @f1(fp128 *%dst, double %val) {
+; CHECK-LABEL: f1:
+; CHECK: lxdbr %f0, %f0
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+  %res = call fp128 @llvm.experimental.constrained.fpext.f128.f64(double %val,
+                                               metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%dst
+  ret void
+}
+
+; Check extension from memory.
+; FIXME: This should really use LXDB, but there is no strict "extload" yet.
+define void @f2(fp128 *%dst, double *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: ld %f0, 0(%r3)
+; CHECK: lxdbr %f0, %f0
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+  %val = load double, double *%ptr
+  %res = call fp128 @llvm.experimental.constrained.fpext.f128.f64(double %val,
+                                               metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%dst
+  ret void
+}
+
--- a/test/CodeGen/SystemZ/fp-strict-conv-15.ll
+++ b/test/CodeGen/SystemZ/fp-strict-conv-15.ll
@ -0,0 +1,64 @@
+; Test f128 floating-point strict truncations/extensions on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare float @llvm.experimental.constrained.fptrunc.f32.f128(fp128, metadata, metadata)
+declare double @llvm.experimental.constrained.fptrunc.f64.f128(fp128, metadata, metadata)
+
+declare fp128 @llvm.experimental.constrained.fpext.f128.f32(float, metadata)
+declare fp128 @llvm.experimental.constrained.fpext.f128.f64(double, metadata)
+
+; Test f128->f64.
+define double @f1(fp128 *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK: vl [[REG:%v[0-9]+]], 0(%r2)
+; CHECK: wflrx %f0, [[REG]], 0, 0
+; CHECK: br %r14
+  %val = load fp128, fp128 *%ptr
+  %res = call double @llvm.experimental.constrained.fptrunc.f64.f128(
+                                               fp128 %val,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Test f128->f32.
+define float @f2(fp128 *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: vl [[REG:%v[0-9]+]], 0(%r2)
+; CHECK: wflrx %f0, [[REG]], 0, 3
+; CHECK: ledbra %f0, 0, %f0, 0
+; CHECK: br %r14
+  %val = load fp128, fp128 *%ptr
+  %res = call float @llvm.experimental.constrained.fptrunc.f32.f128(
+                                               fp128 %val,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Test f64->f128.
+define void @f3(fp128 *%dst, double %val) {
+; CHECK-LABEL: f3:
+; CHECK: wflld [[RES:%v[0-9]+]], %f0
+; CHECK: vst [[RES]], 0(%r2)
+; CHECK: br %r14
+  %res = call fp128 @llvm.experimental.constrained.fpext.f128.f64(double %val,
+                                               metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%dst
+  ret void
+}
+
+; Test f32->f128.
+define void @f4(fp128 *%dst, float %val) {
+; CHECK-LABEL: f4:
+; CHECK: ldebr %f0, %f0
+; CHECK: wflld [[RES:%v[0-9]+]], %f0
+; CHECK: vst [[RES]], 0(%r2)
+; CHECK: br %r14
+  %res = call fp128 @llvm.experimental.constrained.fpext.f128.f32(float %val,
+                                               metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%dst
+  ret void
+}
+
--- a/test/CodeGen/SystemZ/fp-strict-div-01.ll
+++ b/test/CodeGen/SystemZ/fp-strict-div-01.ll
@ -0,0 +1,173 @@
+; Test strict 32-bit floating-point division.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare float @foo()
+declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata)
+
+; Check register division.
+define float @f1(float %f1, float %f2) {
+; CHECK-LABEL: f1:
+; CHECK: debr %f0, %f2
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.fdiv.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check the low end of the DEB range.
+define float @f2(float %f1, float *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: deb %f0, 0(%r2)
+; CHECK: br %r14
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fdiv.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check the high end of the aligned DEB range.
+define float @f3(float %f1, float *%base) {
+; CHECK-LABEL: f3:
+; CHECK: deb %f0, 4092(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 1023
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fdiv.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define float @f4(float %f1, float *%base) {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: deb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 1024
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fdiv.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define float @f5(float %f1, float *%base) {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -4
+; CHECK: deb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 -1
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fdiv.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check that DEB allows indices.
+define float @f6(float %f1, float *%base, i64 %index) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: deb %f0, 400(%r1,%r2)
+; CHECK: br %r14
+  %ptr1 = getelementptr float, float *%base, i64 %index
+  %ptr2 = getelementptr float, float *%ptr1, i64 100
+  %f2 = load float, float *%ptr2
+  %res = call float @llvm.experimental.constrained.fdiv.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check that divisions of spilled values can use DEB rather than DEBR.
+define float @f7(float *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK-SCALAR: deb %f0, 16{{[04]}}(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr float, float *%ptr0, i64 2
+  %ptr2 = getelementptr float, float *%ptr0, i64 4
+  %ptr3 = getelementptr float, float *%ptr0, i64 6
+  %ptr4 = getelementptr float, float *%ptr0, i64 8
+  %ptr5 = getelementptr float, float *%ptr0, i64 10
+  %ptr6 = getelementptr float, float *%ptr0, i64 12
+  %ptr7 = getelementptr float, float *%ptr0, i64 14
+  %ptr8 = getelementptr float, float *%ptr0, i64 16
+  %ptr9 = getelementptr float, float *%ptr0, i64 18
+  %ptr10 = getelementptr float, float *%ptr0, i64 20
+
+  %val0 = load float, float *%ptr0
+  %val1 = load float, float *%ptr1
+  %val2 = load float, float *%ptr2
+  %val3 = load float, float *%ptr3
+  %val4 = load float, float *%ptr4
+  %val5 = load float, float *%ptr5
+  %val6 = load float, float *%ptr6
+  %val7 = load float, float *%ptr7
+  %val8 = load float, float *%ptr8
+  %val9 = load float, float *%ptr9
+  %val10 = load float, float *%ptr10
+
+  %ret = call float @foo()
+
+  %div0 = call float @llvm.experimental.constrained.fdiv.f32(
+                        float %ret, float %val0,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %div1 = call float @llvm.experimental.constrained.fdiv.f32(
+                        float %div0, float %val1,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %div2 = call float @llvm.experimental.constrained.fdiv.f32(
+                        float %div1, float %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %div3 = call float @llvm.experimental.constrained.fdiv.f32(
+                        float %div2, float %val3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %div4 = call float @llvm.experimental.constrained.fdiv.f32(
+                        float %div3, float %val4,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %div5 = call float @llvm.experimental.constrained.fdiv.f32(
+                        float %div4, float %val5,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %div6 = call float @llvm.experimental.constrained.fdiv.f32(
+                        float %div5, float %val6,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %div7 = call float @llvm.experimental.constrained.fdiv.f32(
+                        float %div6, float %val7,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %div8 = call float @llvm.experimental.constrained.fdiv.f32(
+                        float %div7, float %val8,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %div9 = call float @llvm.experimental.constrained.fdiv.f32(
+                        float %div8, float %val9,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %div10 = call float @llvm.experimental.constrained.fdiv.f32(
+                        float %div9, float %val10,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+
+  ret float %div10
+}
--- a/test/CodeGen/SystemZ/fp-strict-div-02.ll
+++ b/test/CodeGen/SystemZ/fp-strict-div-02.ll
@ -0,0 +1,173 @@
+; Test strict 64-bit floating-point division.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare double @foo()
+declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata)
+
+; Check register division.
+define double @f1(double %f1, double %f2) {
+; CHECK-LABEL: f1:
+; CHECK: ddbr %f0, %f2
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.fdiv.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check the low end of the DDB range.
+define double @f2(double %f1, double *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: ddb %f0, 0(%r2)
+; CHECK: br %r14
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fdiv.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check the high end of the aligned DDB range.
+define double @f3(double %f1, double *%base) {
+; CHECK-LABEL: f3:
+; CHECK: ddb %f0, 4088(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 511
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fdiv.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f4(double %f1, double *%base) {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: ddb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 512
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fdiv.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define double @f5(double %f1, double *%base) {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -8
+; CHECK: ddb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 -1
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fdiv.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check that DDB allows indices.
+define double @f6(double %f1, double *%base, i64 %index) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: ddb %f0, 800(%r1,%r2)
+; CHECK: br %r14
+  %ptr1 = getelementptr double, double *%base, i64 %index
+  %ptr2 = getelementptr double, double *%ptr1, i64 100
+  %f2 = load double, double *%ptr2
+  %res = call double @llvm.experimental.constrained.fdiv.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check that divisions of spilled values can use DDB rather than DDBR.
+define double @f7(double *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK-SCALAR: ddb %f0, 160(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr double, double *%ptr0, i64 2
+  %ptr2 = getelementptr double, double *%ptr0, i64 4
+  %ptr3 = getelementptr double, double *%ptr0, i64 6
+  %ptr4 = getelementptr double, double *%ptr0, i64 8
+  %ptr5 = getelementptr double, double *%ptr0, i64 10
+  %ptr6 = getelementptr double, double *%ptr0, i64 12
+  %ptr7 = getelementptr double, double *%ptr0, i64 14
+  %ptr8 = getelementptr double, double *%ptr0, i64 16
+  %ptr9 = getelementptr double, double *%ptr0, i64 18
+  %ptr10 = getelementptr double, double *%ptr0, i64 20
+
+  %val0 = load double, double *%ptr0
+  %val1 = load double, double *%ptr1
+  %val2 = load double, double *%ptr2
+  %val3 = load double, double *%ptr3
+  %val4 = load double, double *%ptr4
+  %val5 = load double, double *%ptr5
+  %val6 = load double, double *%ptr6
+  %val7 = load double, double *%ptr7
+  %val8 = load double, double *%ptr8
+  %val9 = load double, double *%ptr9
+  %val10 = load double, double *%ptr10
+
+  %ret = call double @foo()
+
+  %div0 = call double @llvm.experimental.constrained.fdiv.f64(
+                        double %ret, double %val0,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %div1 = call double @llvm.experimental.constrained.fdiv.f64(
+                        double %div0, double %val1,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %div2 = call double @llvm.experimental.constrained.fdiv.f64(
+                        double %div1, double %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %div3 = call double @llvm.experimental.constrained.fdiv.f64(
+                        double %div2, double %val3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %div4 = call double @llvm.experimental.constrained.fdiv.f64(
+                        double %div3, double %val4,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %div5 = call double @llvm.experimental.constrained.fdiv.f64(
+                        double %div4, double %val5,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %div6 = call double @llvm.experimental.constrained.fdiv.f64(
+                        double %div5, double %val6,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %div7 = call double @llvm.experimental.constrained.fdiv.f64(
+                        double %div6, double %val7,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %div8 = call double @llvm.experimental.constrained.fdiv.f64(
+                        double %div7, double %val8,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %div9 = call double @llvm.experimental.constrained.fdiv.f64(
+                        double %div8, double %val9,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %div10 = call double @llvm.experimental.constrained.fdiv.f64(
+                        double %div9, double %val10,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+
+  ret double %div10
+}
--- a/test/CodeGen/SystemZ/fp-strict-div-03.ll
+++ b/test/CodeGen/SystemZ/fp-strict-div-03.ll
@ -0,0 +1,25 @@
+; Test strict 128-bit floating-point division.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.fdiv.f128(fp128, fp128, metadata, metadata)
+
+; There is no memory form of 128-bit division.
+define void @f1(fp128 *%ptr, float %f2) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: lxebr %f0, %f0
+; CHECK-DAG: ld %f1, 0(%r2)
+; CHECK-DAG: ld %f3, 8(%r2)
+; CHECK: dxbr %f1, %f0
+; CHECK: std %f1, 0(%r2)
+; CHECK: std %f3, 8(%r2)
+; CHECK: br %r14
+  %f1 = load fp128, fp128 *%ptr
+  %f2x = fpext float %f2 to fp128
+  %sum = call fp128 @llvm.experimental.constrained.fdiv.f128(
+                        fp128 %f1, fp128 %f2x,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %sum, fp128 *%ptr
+  ret void
+}
--- a/test/CodeGen/SystemZ/fp-strict-div-04.ll
+++ b/test/CodeGen/SystemZ/fp-strict-div-04.ll
@ -0,0 +1,22 @@
+; Test strict 128-bit floating-point division on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.fdiv.f128(fp128, fp128, metadata, metadata)
+
+define void @f1(fp128 *%ptr1, fp128 *%ptr2) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3)
+; CHECK: wfdxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]]
+; CHECK: vst [[RES]], 0(%r2)
+; CHECK: br %r14
+  %f1 = load fp128, fp128 *%ptr1
+  %f2 = load fp128, fp128 *%ptr2
+  %sum = call fp128 @llvm.experimental.constrained.fdiv.f128(
+                        fp128 %f1, fp128 %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %sum, fp128 *%ptr1
+  ret void
+}
--- a/test/CodeGen/SystemZ/fp-strict-mul-01.ll
+++ b/test/CodeGen/SystemZ/fp-strict-mul-01.ll
@ -0,0 +1,173 @@
+; Test strict multiplication of two f32s, producing an f32 result.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare float @foo()
+declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata)
+
+; Check register multiplication.
+define float @f1(float %f1, float %f2) {
+; CHECK-LABEL: f1:
+; CHECK: meebr %f0, %f2
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.fmul.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check the low end of the MEEB range.
+define float @f2(float %f1, float *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: meeb %f0, 0(%r2)
+; CHECK: br %r14
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fmul.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check the high end of the aligned MEEB range.
+define float @f3(float %f1, float *%base) {
+; CHECK-LABEL: f3:
+; CHECK: meeb %f0, 4092(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 1023
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fmul.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define float @f4(float %f1, float *%base) {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: meeb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 1024
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fmul.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define float @f5(float %f1, float *%base) {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -4
+; CHECK: meeb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 -1
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fmul.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check that MEEB allows indices.
+define float @f6(float %f1, float *%base, i64 %index) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: meeb %f0, 400(%r1,%r2)
+; CHECK: br %r14
+  %ptr1 = getelementptr float, float *%base, i64 %index
+  %ptr2 = getelementptr float, float *%ptr1, i64 100
+  %f2 = load float, float *%ptr2
+  %res = call float @llvm.experimental.constrained.fmul.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check that multiplications of spilled values can use MEEB rather than MEEBR.
+define float @f7(float *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK-SCALAR: meeb %f0, 16{{[04]}}(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr float, float *%ptr0, i64 2
+  %ptr2 = getelementptr float, float *%ptr0, i64 4
+  %ptr3 = getelementptr float, float *%ptr0, i64 6
+  %ptr4 = getelementptr float, float *%ptr0, i64 8
+  %ptr5 = getelementptr float, float *%ptr0, i64 10
+  %ptr6 = getelementptr float, float *%ptr0, i64 12
+  %ptr7 = getelementptr float, float *%ptr0, i64 14
+  %ptr8 = getelementptr float, float *%ptr0, i64 16
+  %ptr9 = getelementptr float, float *%ptr0, i64 18
+  %ptr10 = getelementptr float, float *%ptr0, i64 20
+
+  %val0 = load float, float *%ptr0
+  %val1 = load float, float *%ptr1
+  %val2 = load float, float *%ptr2
+  %val3 = load float, float *%ptr3
+  %val4 = load float, float *%ptr4
+  %val5 = load float, float *%ptr5
+  %val6 = load float, float *%ptr6
+  %val7 = load float, float *%ptr7
+  %val8 = load float, float *%ptr8
+  %val9 = load float, float *%ptr9
+  %val10 = load float, float *%ptr10
+
+  %ret = call float @foo()
+
+  %mul0 = call float @llvm.experimental.constrained.fmul.f32(
+                        float %ret, float %val0,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %mul1 = call float @llvm.experimental.constrained.fmul.f32(
+                        float %mul0, float %val1,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %mul2 = call float @llvm.experimental.constrained.fmul.f32(
+                        float %mul1, float %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %mul3 = call float @llvm.experimental.constrained.fmul.f32(
+                        float %mul2, float %val3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %mul4 = call float @llvm.experimental.constrained.fmul.f32(
+                        float %mul3, float %val4,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %mul5 = call float @llvm.experimental.constrained.fmul.f32(
+                        float %mul4, float %val5,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %mul6 = call float @llvm.experimental.constrained.fmul.f32(
+                        float %mul5, float %val6,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %mul7 = call float @llvm.experimental.constrained.fmul.f32(
+                        float %mul6, float %val7,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %mul8 = call float @llvm.experimental.constrained.fmul.f32(
+                        float %mul7, float %val8,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %mul9 = call float @llvm.experimental.constrained.fmul.f32(
+                        float %mul8, float %val9,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %mul10 = call float @llvm.experimental.constrained.fmul.f32(
+                        float %mul9, float %val10,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+
+  ret float %mul10
+}
--- a/test/CodeGen/SystemZ/fp-strict-mul-02.ll
+++ b/test/CodeGen/SystemZ/fp-strict-mul-02.ll
@ -0,0 +1,283 @@
+; Test strict multiplication of two f32s, producing an f64 result.
+; FIXME: we do not have a strict version of fpext yet
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare float @foo()
+declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
+
+; Check register multiplication.
+define double @f1(float %f1, float %f2) {
+; CHECK-LABEL: f1:
+; CHECK: mdebr %f0, %f2
+; CHECK: br %r14
+  %f1x = fpext float %f1 to double
+  %f2x = fpext float %f2 to double
+  %res = call double @llvm.experimental.constrained.fmul.f64(
+                        double %f1x, double %f2x,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check the low end of the MDEB range.
+define double @f2(float %f1, float *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: mdeb %f0, 0(%r2)
+; CHECK: br %r14
+  %f2 = load float, float *%ptr
+  %f1x = fpext float %f1 to double
+  %f2x = fpext float %f2 to double
+  %res = call double @llvm.experimental.constrained.fmul.f64(
+                        double %f1x, double %f2x,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check the high end of the aligned MDEB range.
+define double @f3(float %f1, float *%base) {
+; CHECK-LABEL: f3:
+; CHECK: mdeb %f0, 4092(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 1023
+  %f2 = load float, float *%ptr
+  %f1x = fpext float %f1 to double
+  %f2x = fpext float %f2 to double
+  %res = call double @llvm.experimental.constrained.fmul.f64(
+                        double %f1x, double %f2x,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f4(float %f1, float *%base) {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: mdeb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 1024
+  %f2 = load float, float *%ptr
+  %f1x = fpext float %f1 to double
+  %f2x = fpext float %f2 to double
+  %res = call double @llvm.experimental.constrained.fmul.f64(
+                        double %f1x, double %f2x,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define double @f5(float %f1, float *%base) {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -4
+; CHECK: mdeb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 -1
+  %f2 = load float, float *%ptr
+  %f1x = fpext float %f1 to double
+  %f2x = fpext float %f2 to double
+  %res = call double @llvm.experimental.constrained.fmul.f64(
+                        double %f1x, double %f2x,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check that MDEB allows indices.
+define double @f6(float %f1, float *%base, i64 %index) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: mdeb %f0, 400(%r1,%r2)
+; CHECK: br %r14
+  %ptr1 = getelementptr float, float *%base, i64 %index
+  %ptr2 = getelementptr float, float *%ptr1, i64 100
+  %f2 = load float, float *%ptr2
+  %f1x = fpext float %f1 to double
+  %f2x = fpext float %f2 to double
+  %res = call double @llvm.experimental.constrained.fmul.f64(
+                        double %f1x, double %f2x,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check that multiplications of spilled values can use MDEB rather than MDEBR.
+define float @f7(float *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: mdeb %f0, 16{{[04]}}(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr float, float *%ptr0, i64 2
+  %ptr2 = getelementptr float, float *%ptr0, i64 4
+  %ptr3 = getelementptr float, float *%ptr0, i64 6
+  %ptr4 = getelementptr float, float *%ptr0, i64 8
+  %ptr5 = getelementptr float, float *%ptr0, i64 10
+  %ptr6 = getelementptr float, float *%ptr0, i64 12
+  %ptr7 = getelementptr float, float *%ptr0, i64 14
+  %ptr8 = getelementptr float, float *%ptr0, i64 16
+  %ptr9 = getelementptr float, float *%ptr0, i64 18
+  %ptr10 = getelementptr float, float *%ptr0, i64 20
+
+  %val0 = load float, float *%ptr0
+  %val1 = load float, float *%ptr1
+  %val2 = load float, float *%ptr2
+  %val3 = load float, float *%ptr3
+  %val4 = load float, float *%ptr4
+  %val5 = load float, float *%ptr5
+  %val6 = load float, float *%ptr6
+  %val7 = load float, float *%ptr7
+  %val8 = load float, float *%ptr8
+  %val9 = load float, float *%ptr9
+  %val10 = load float, float *%ptr10
+
+  %frob0 = fadd float %val0, %val0
+  %frob1 = fadd float %val1, %val1
+  %frob2 = fadd float %val2, %val2
+  %frob3 = fadd float %val3, %val3
+  %frob4 = fadd float %val4, %val4
+  %frob5 = fadd float %val5, %val5
+  %frob6 = fadd float %val6, %val6
+  %frob7 = fadd float %val7, %val7
+  %frob8 = fadd float %val8, %val8
+  %frob9 = fadd float %val9, %val9
+  %frob10 = fadd float %val9, %val10
+
+  store float %frob0, float *%ptr0
+  store float %frob1, float *%ptr1
+  store float %frob2, float *%ptr2
+  store float %frob3, float *%ptr3
+  store float %frob4, float *%ptr4
+  store float %frob5, float *%ptr5
+  store float %frob6, float *%ptr6
+  store float %frob7, float *%ptr7
+  store float %frob8, float *%ptr8
+  store float %frob9, float *%ptr9
+  store float %frob10, float *%ptr10
+
+  %ret = call float @foo()
+
+  %accext0 = fpext float %ret to double
+  %ext0 = fpext float %frob0 to double
+  %mul0 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %accext0, double %ext0,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %extra0 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %mul0, double 1.01,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %trunc0 = fptrunc double %extra0 to float
+
+  %accext1 = fpext float %trunc0 to double
+  %ext1 = fpext float %frob1 to double
+  %mul1 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %accext1, double %ext1,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %extra1 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %mul1, double 1.11,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %trunc1 = fptrunc double %extra1 to float
+
+  %accext2 = fpext float %trunc1 to double
+  %ext2 = fpext float %frob2 to double
+  %mul2 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %accext2, double %ext2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %extra2 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %mul2, double 1.21,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %trunc2 = fptrunc double %extra2 to float
+
+  %accext3 = fpext float %trunc2 to double
+  %ext3 = fpext float %frob3 to double
+  %mul3 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %accext3, double %ext3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %extra3 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %mul3, double 1.31,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %trunc3 = fptrunc double %extra3 to float
+
+  %accext4 = fpext float %trunc3 to double
+  %ext4 = fpext float %frob4 to double
+  %mul4 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %accext4, double %ext4,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %extra4 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %mul4, double 1.41,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %trunc4 = fptrunc double %extra4 to float
+
+  %accext5 = fpext float %trunc4 to double
+  %ext5 = fpext float %frob5 to double
+  %mul5 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %accext5, double %ext5,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %extra5 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %mul5, double 1.51,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %trunc5 = fptrunc double %extra5 to float
+
+  %accext6 = fpext float %trunc5 to double
+  %ext6 = fpext float %frob6 to double
+  %mul6 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %accext6, double %ext6,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %extra6 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %mul6, double 1.61,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %trunc6 = fptrunc double %extra6 to float
+
+  %accext7 = fpext float %trunc6 to double
+  %ext7 = fpext float %frob7 to double
+  %mul7 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %accext7, double %ext7,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %extra7 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %mul7, double 1.71,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %trunc7 = fptrunc double %extra7 to float
+
+  %accext8 = fpext float %trunc7 to double
+  %ext8 = fpext float %frob8 to double
+  %mul8 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %accext8, double %ext8,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %extra8 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %mul8, double 1.81,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %trunc8 = fptrunc double %extra8 to float
+
+  %accext9 = fpext float %trunc8 to double
+  %ext9 = fpext float %frob9 to double
+  %mul9 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %accext9, double %ext9,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %extra9 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %mul9, double 1.91,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %trunc9 = fptrunc double %extra9 to float
+
+  ret float %trunc9
+}
--- a/test/CodeGen/SystemZ/fp-strict-mul-03.ll
+++ b/test/CodeGen/SystemZ/fp-strict-mul-03.ll
@ -0,0 +1,173 @@
+; Test strict multiplication of two f64s, producing an f64 result.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare double @foo()
+declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
+
+; Check register multiplication.
+define double @f1(double %f1, double %f2) {
+; CHECK-LABEL: f1:
+; CHECK: mdbr %f0, %f2
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.fmul.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check the low end of the MDB range.
+define double @f2(double %f1, double *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: mdb %f0, 0(%r2)
+; CHECK: br %r14
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fmul.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check the high end of the aligned MDB range.
+define double @f3(double %f1, double *%base) {
+; CHECK-LABEL: f3:
+; CHECK: mdb %f0, 4088(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 511
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fmul.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f4(double %f1, double *%base) {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: mdb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 512
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fmul.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define double @f5(double %f1, double *%base) {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -8
+; CHECK: mdb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 -1
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fmul.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check that MDB allows indices.
+define double @f6(double %f1, double *%base, i64 %index) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: mdb %f0, 800(%r1,%r2)
+; CHECK: br %r14
+  %ptr1 = getelementptr double, double *%base, i64 %index
+  %ptr2 = getelementptr double, double *%ptr1, i64 100
+  %f2 = load double, double *%ptr2
+  %res = call double @llvm.experimental.constrained.fmul.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check that multiplications of spilled values can use MDB rather than MDBR.
+define double @f7(double *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK-SCALAR: mdb %f0, 160(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr double, double *%ptr0, i64 2
+  %ptr2 = getelementptr double, double *%ptr0, i64 4
+  %ptr3 = getelementptr double, double *%ptr0, i64 6
+  %ptr4 = getelementptr double, double *%ptr0, i64 8
+  %ptr5 = getelementptr double, double *%ptr0, i64 10
+  %ptr6 = getelementptr double, double *%ptr0, i64 12
+  %ptr7 = getelementptr double, double *%ptr0, i64 14
+  %ptr8 = getelementptr double, double *%ptr0, i64 16
+  %ptr9 = getelementptr double, double *%ptr0, i64 18
+  %ptr10 = getelementptr double, double *%ptr0, i64 20
+
+  %val0 = load double, double *%ptr0
+  %val1 = load double, double *%ptr1
+  %val2 = load double, double *%ptr2
+  %val3 = load double, double *%ptr3
+  %val4 = load double, double *%ptr4
+  %val5 = load double, double *%ptr5
+  %val6 = load double, double *%ptr6
+  %val7 = load double, double *%ptr7
+  %val8 = load double, double *%ptr8
+  %val9 = load double, double *%ptr9
+  %val10 = load double, double *%ptr10
+
+  %ret = call double @foo()
+
+  %mul0 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %ret, double %val0,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %mul1 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %mul0, double %val1,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %mul2 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %mul1, double %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %mul3 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %mul2, double %val3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %mul4 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %mul3, double %val4,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %mul5 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %mul4, double %val5,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %mul6 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %mul5, double %val6,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %mul7 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %mul6, double %val7,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %mul8 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %mul7, double %val8,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %mul9 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %mul8, double %val9,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %mul10 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %mul9, double %val10,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+
+  ret double %mul10
+}
--- a/test/CodeGen/SystemZ/fp-strict-mul-04.ll
+++ b/test/CodeGen/SystemZ/fp-strict-mul-04.ll
@ -0,0 +1,314 @@
+; Test strict multiplication of two f64s, producing an f128 result.
+; FIXME: we do not have a strict version of fpext yet
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.fmul.f128(fp128, fp128, metadata, metadata)
+
+declare double @foo()
+
+; Check register multiplication.  "mxdbr %f0, %f2" is not valid from LLVM's
+; point of view, because %f2 is the low register of the FP128 %f0.  Pass the
+; multiplier in %f4 instead.
+define void @f1(double %f1, double %dummy, double %f2, fp128 *%dst) {
+; CHECK-LABEL: f1:
+; CHECK: mxdbr %f0, %f4
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+  %f1x = fpext double %f1 to fp128
+  %f2x = fpext double %f2 to fp128
+  %res = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %f1x, fp128 %f2x,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%dst
+  ret void
+}
+
+; Check the low end of the MXDB range.
+define void @f2(double %f1, double *%ptr, fp128 *%dst) {
+; CHECK-LABEL: f2:
+; CHECK: mxdb %f0, 0(%r2)
+; CHECK: std %f0, 0(%r3)
+; CHECK: std %f2, 8(%r3)
+; CHECK: br %r14
+  %f2 = load double, double *%ptr
+  %f1x = fpext double %f1 to fp128
+  %f2x = fpext double %f2 to fp128
+  %res = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %f1x, fp128 %f2x,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%dst
+  ret void
+}
+
+; Check the high end of the aligned MXDB range.
+define void @f3(double %f1, double *%base, fp128 *%dst) {
+; CHECK-LABEL: f3:
+; CHECK: mxdb %f0, 4088(%r2)
+; CHECK: std %f0, 0(%r3)
+; CHECK: std %f2, 8(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 511
+  %f2 = load double, double *%ptr
+  %f1x = fpext double %f1 to fp128
+  %f2x = fpext double %f2 to fp128
+  %res = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %f1x, fp128 %f2x,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%dst
+  ret void
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f4(double %f1, double *%base, fp128 *%dst) {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: mxdb %f0, 0(%r2)
+; CHECK: std %f0, 0(%r3)
+; CHECK: std %f2, 8(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 512
+  %f2 = load double, double *%ptr
+  %f1x = fpext double %f1 to fp128
+  %f2x = fpext double %f2 to fp128
+  %res = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %f1x, fp128 %f2x,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%dst
+  ret void
+}
+
+; Check negative displacements, which also need separate address logic.
+define void @f5(double %f1, double *%base, fp128 *%dst) {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -8
+; CHECK: mxdb %f0, 0(%r2)
+; CHECK: std %f0, 0(%r3)
+; CHECK: std %f2, 8(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 -1
+  %f2 = load double, double *%ptr
+  %f1x = fpext double %f1 to fp128
+  %f2x = fpext double %f2 to fp128
+  %res = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %f1x, fp128 %f2x,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%dst
+  ret void
+}
+
+; Check that MXDB allows indices.
+define void @f6(double %f1, double *%base, i64 %index, fp128 *%dst) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: mxdb %f0, 800(%r1,%r2)
+; CHECK: std %f0, 0(%r4)
+; CHECK: std %f2, 8(%r4)
+; CHECK: br %r14
+  %ptr1 = getelementptr double, double *%base, i64 %index
+  %ptr2 = getelementptr double, double *%ptr1, i64 100
+  %f2 = load double, double *%ptr2
+  %f1x = fpext double %f1 to fp128
+  %f2x = fpext double %f2 to fp128
+  %res = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %f1x, fp128 %f2x,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%dst
+  ret void
+}
+
+; Check that multiplications of spilled values can use MXDB rather than MXDBR.
+define double @f7(double *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: mxdb %f0, 160(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr double, double *%ptr0, i64 2
+  %ptr2 = getelementptr double, double *%ptr0, i64 4
+  %ptr3 = getelementptr double, double *%ptr0, i64 6
+  %ptr4 = getelementptr double, double *%ptr0, i64 8
+  %ptr5 = getelementptr double, double *%ptr0, i64 10
+  %ptr6 = getelementptr double, double *%ptr0, i64 12
+  %ptr7 = getelementptr double, double *%ptr0, i64 14
+  %ptr8 = getelementptr double, double *%ptr0, i64 16
+  %ptr9 = getelementptr double, double *%ptr0, i64 18
+  %ptr10 = getelementptr double, double *%ptr0, i64 20
+
+  %val0 = load double, double *%ptr0
+  %val1 = load double, double *%ptr1
+  %val2 = load double, double *%ptr2
+  %val3 = load double, double *%ptr3
+  %val4 = load double, double *%ptr4
+  %val5 = load double, double *%ptr5
+  %val6 = load double, double *%ptr6
+  %val7 = load double, double *%ptr7
+  %val8 = load double, double *%ptr8
+  %val9 = load double, double *%ptr9
+  %val10 = load double, double *%ptr10
+
+  %frob0 = fadd double %val0, %val0
+  %frob1 = fadd double %val1, %val1
+  %frob2 = fadd double %val2, %val2
+  %frob3 = fadd double %val3, %val3
+  %frob4 = fadd double %val4, %val4
+  %frob5 = fadd double %val5, %val5
+  %frob6 = fadd double %val6, %val6
+  %frob7 = fadd double %val7, %val7
+  %frob8 = fadd double %val8, %val8
+  %frob9 = fadd double %val9, %val9
+  %frob10 = fadd double %val9, %val10
+
+  store double %frob0, double *%ptr0
+  store double %frob1, double *%ptr1
+  store double %frob2, double *%ptr2
+  store double %frob3, double *%ptr3
+  store double %frob4, double *%ptr4
+  store double %frob5, double *%ptr5
+  store double %frob6, double *%ptr6
+  store double %frob7, double *%ptr7
+  store double %frob8, double *%ptr8
+  store double %frob9, double *%ptr9
+  store double %frob10, double *%ptr10
+
+  %ret = call double @foo()
+
+  %accext0 = fpext double %ret to fp128
+  %ext0 = fpext double %frob0 to fp128
+  %mul0 = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %accext0, fp128 %ext0,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %const0 = fpext double 1.01 to fp128
+  %extra0 = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %mul0, fp128 %const0,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %trunc0 = fptrunc fp128 %extra0 to double
+
+  %accext1 = fpext double %trunc0 to fp128
+  %ext1 = fpext double %frob1 to fp128
+  %mul1 = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %accext1, fp128 %ext1,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %const1 = fpext double 1.11 to fp128
+  %extra1 = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %mul1, fp128 %const1,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %trunc1 = fptrunc fp128 %extra1 to double
+
+  %accext2 = fpext double %trunc1 to fp128
+  %ext2 = fpext double %frob2 to fp128
+  %mul2 = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %accext2, fp128 %ext2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %const2 = fpext double 1.21 to fp128
+  %extra2 = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %mul2, fp128 %const2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %trunc2 = fptrunc fp128 %extra2 to double
+
+  %accext3 = fpext double %trunc2 to fp128
+  %ext3 = fpext double %frob3 to fp128
+  %mul3 = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %accext3, fp128 %ext3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %const3 = fpext double 1.31 to fp128
+  %extra3 = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %mul3, fp128 %const3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %trunc3 = fptrunc fp128 %extra3 to double
+
+  %accext4 = fpext double %trunc3 to fp128
+  %ext4 = fpext double %frob4 to fp128
+  %mul4 = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %accext4, fp128 %ext4,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %const4 = fpext double 1.41 to fp128
+  %extra4 = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %mul4, fp128 %const4,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %trunc4 = fptrunc fp128 %extra4 to double
+
+  %accext5 = fpext double %trunc4 to fp128
+  %ext5 = fpext double %frob5 to fp128
+  %mul5 = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %accext5, fp128 %ext5,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %const5 = fpext double 1.51 to fp128
+  %extra5 = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %mul5, fp128 %const5,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %trunc5 = fptrunc fp128 %extra5 to double
+
+  %accext6 = fpext double %trunc5 to fp128
+  %ext6 = fpext double %frob6 to fp128
+  %mul6 = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %accext6, fp128 %ext6,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %const6 = fpext double 1.61 to fp128
+  %extra6 = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %mul6, fp128 %const6,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %trunc6 = fptrunc fp128 %extra6 to double
+
+  %accext7 = fpext double %trunc6 to fp128
+  %ext7 = fpext double %frob7 to fp128
+  %mul7 = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %accext7, fp128 %ext7,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %const7 = fpext double 1.71 to fp128
+  %extra7 = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %mul7, fp128 %const7,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %trunc7 = fptrunc fp128 %extra7 to double
+
+  %accext8 = fpext double %trunc7 to fp128
+  %ext8 = fpext double %frob8 to fp128
+  %mul8 = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %accext8, fp128 %ext8,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %const8 = fpext double 1.81 to fp128
+  %extra8 = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %mul8, fp128 %const8,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %trunc8 = fptrunc fp128 %extra8 to double
+
+  %accext9 = fpext double %trunc8 to fp128
+  %ext9 = fpext double %frob9 to fp128
+  %mul9 = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %accext9, fp128 %ext9,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %const9 = fpext double 1.91 to fp128
+  %extra9 = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %mul9, fp128 %const9,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %trunc9 = fptrunc fp128 %extra9 to double
+
+  ret double %trunc9
+}
--- a/test/CodeGen/SystemZ/fp-strict-mul-05.ll
+++ b/test/CodeGen/SystemZ/fp-strict-mul-05.ll
@ -0,0 +1,25 @@
+; Test strict multiplication of two f128s.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.fmul.f128(fp128, fp128, metadata, metadata)
+
+; There is no memory form of 128-bit multiplication.
+define void @f1(fp128 *%ptr, float %f2) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: lxebr %f0, %f0
+; CHECK-DAG: ld %f1, 0(%r2)
+; CHECK-DAG: ld %f3, 8(%r2)
+; CHECK: mxbr %f0, %f1
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+  %f1 = load fp128, fp128 *%ptr
+  %f2x = fpext float %f2 to fp128
+  %diff = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %f1, fp128 %f2x,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %diff, fp128 *%ptr
+  ret void
+}
--- a/test/CodeGen/SystemZ/fp-strict-mul-06.ll
+++ b/test/CodeGen/SystemZ/fp-strict-mul-06.ll
@ -0,0 +1,137 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
+
+declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata)
+
+define float @f1(float %f1, float %f2, float %acc) {
+; CHECK-LABEL: f1:
+; CHECK-SCALAR: maebr %f4, %f0, %f2
+; CHECK-SCALAR: ler %f0, %f4
+; CHECK-VECTOR: wfmasb %f0, %f0, %f2, %f4
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.fma.f32 (
+                        float %f1, float %f2, float %acc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+define float @f2(float %f1, float *%ptr, float %acc) {
+; CHECK-LABEL: f2:
+; CHECK: maeb %f2, %f0, 0(%r2)
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fma.f32 (
+                        float %f1, float %f2, float %acc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+define float @f3(float %f1, float *%base, float %acc) {
+; CHECK-LABEL: f3:
+; CHECK: maeb %f2, %f0, 4092(%r2)
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 1023
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fma.f32 (
+                        float %f1, float %f2, float %acc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+define float @f4(float %f1, float *%base, float %acc) {
+; The important thing here is that we don't generate an out-of-range
+; displacement.  Other sequences besides this one would be OK.
+;
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: maeb %f2, %f0, 0(%r2)
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 1024
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fma.f32 (
+                        float %f1, float %f2, float %acc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+define float @f5(float %f1, float *%base, float %acc) {
+; Here too the important thing is that we don't generate an out-of-range
+; displacement.  Other sequences besides this one would be OK.
+;
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -4
+; CHECK: maeb %f2, %f0, 0(%r2)
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 -1
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fma.f32 (
+                        float %f1, float %f2, float %acc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+define float @f6(float %f1, float *%base, i64 %index, float %acc) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: maeb %f2, %f0, 0(%r1,%r2)
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 %index
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fma.f32 (
+                        float %f1, float %f2, float %acc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+define float @f7(float %f1, float *%base, i64 %index, float %acc) {
+; CHECK-LABEL: f7:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: maeb %f2, %f0, 4092({{%r1,%r2|%r2,%r1}})
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+  %index2 = add i64 %index, 1023
+  %ptr = getelementptr float, float *%base, i64 %index2
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fma.f32 (
+                        float %f1, float %f2, float %acc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+define float @f8(float %f1, float *%base, i64 %index, float %acc) {
+; CHECK-LABEL: f8:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}})
+; CHECK: maeb %f2, %f0, 0(%r1)
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+  %index2 = add i64 %index, 1024
+  %ptr = getelementptr float, float *%base, i64 %index2
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fma.f32 (
+                        float %f1, float %f2, float %acc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
--- a/test/CodeGen/SystemZ/fp-strict-mul-07.ll
+++ b/test/CodeGen/SystemZ/fp-strict-mul-07.ll
@ -0,0 +1,130 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
+
+declare double @llvm.experimental.constrained.fma.f64(double %f1, double %f2, double %f3, metadata, metadata)
+
+define double @f1(double %f1, double %f2, double %acc) {
+; CHECK-LABEL: f1:
+; CHECK-SCALAR: madbr %f4, %f0, %f2
+; CHECK-SCALAR: ldr %f0, %f4
+; CHECK-VECTOR: wfmadb %f0, %f0, %f2, %f4
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.fma.f64 (
+                        double %f1, double %f2, double %acc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+define double @f2(double %f1, double *%ptr, double %acc) {
+; CHECK-LABEL: f2:
+; CHECK: madb %f2, %f0, 0(%r2)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fma.f64 (
+                        double %f1, double %f2, double %acc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+define double @f3(double %f1, double *%base, double %acc) {
+; CHECK-LABEL: f3:
+; CHECK: madb %f2, %f0, 4088(%r2)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 511
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fma.f64 (
+                        double %f1, double %f2, double %acc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+define double @f4(double %f1, double *%base, double %acc) {
+; The important thing here is that we don't generate an out-of-range
+; displacement.  Other sequences besides this one would be OK.
+;
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: madb %f2, %f0, 0(%r2)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 512
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fma.f64 (
+                        double %f1, double %f2, double %acc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+define double @f5(double %f1, double *%base, double %acc) {
+; Here too the important thing is that we don't generate an out-of-range
+; displacement.  Other sequences besides this one would be OK.
+;
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -8
+; CHECK: madb %f2, %f0, 0(%r2)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 -1
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fma.f64 (
+                        double %f1, double %f2, double %acc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+define double @f6(double %f1, double *%base, i64 %index, double %acc) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: madb %f2, %f0, 0(%r1,%r2)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 %index
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fma.f64 (
+                        double %f1, double %f2, double %acc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+define double @f7(double %f1, double *%base, i64 %index, double %acc) {
+; CHECK-LABEL: f7:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: madb %f2, %f0, 4088({{%r1,%r2|%r2,%r1}})
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %index2 = add i64 %index, 511
+  %ptr = getelementptr double, double *%base, i64 %index2
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fma.f64 (
+                        double %f1, double %f2, double %acc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+define double @f8(double %f1, double *%base, i64 %index, double %acc) {
+; CHECK-LABEL: f8:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}})
+; CHECK: madb %f2, %f0, 0(%r1)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %index2 = add i64 %index, 512
+  %ptr = getelementptr double, double *%base, i64 %index2
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fma.f64 (
+                        double %f1, double %f2, double %acc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
--- a/test/CodeGen/SystemZ/fp-strict-mul-08.ll
+++ b/test/CodeGen/SystemZ/fp-strict-mul-08.ll
@ -0,0 +1,145 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
+
+declare float @llvm.experimental.constrained.fma.f32(float %f1, float %f2, float %f3, metadata, metadata)
+
+define float @f1(float %f1, float %f2, float %acc) {
+; CHECK-LABEL: f1:
+; CHECK-SCALAR: msebr %f4, %f0, %f2
+; CHECK-SCALAR: ler %f0, %f4
+; CHECK-VECTOR: wfmssb %f0, %f0, %f2, %f4
+; CHECK: br %r14
+  %negacc = fsub float -0.0, %acc
+  %res = call float @llvm.experimental.constrained.fma.f32 (
+                        float %f1, float %f2, float %negacc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+define float @f2(float %f1, float *%ptr, float %acc) {
+; CHECK-LABEL: f2:
+; CHECK: mseb %f2, %f0, 0(%r2)
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+  %f2 = load float, float *%ptr
+  %negacc = fsub float -0.0, %acc
+  %res = call float @llvm.experimental.constrained.fma.f32 (
+                        float %f1, float %f2, float %negacc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+define float @f3(float %f1, float *%base, float %acc) {
+; CHECK-LABEL: f3:
+; CHECK: mseb %f2, %f0, 4092(%r2)
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 1023
+  %f2 = load float, float *%ptr
+  %negacc = fsub float -0.0, %acc
+  %res = call float @llvm.experimental.constrained.fma.f32 (
+                        float %f1, float %f2, float %negacc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+define float @f4(float %f1, float *%base, float %acc) {
+; The important thing here is that we don't generate an out-of-range
+; displacement.  Other sequences besides this one would be OK.
+;
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: mseb %f2, %f0, 0(%r2)
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 1024
+  %f2 = load float, float *%ptr
+  %negacc = fsub float -0.0, %acc
+  %res = call float @llvm.experimental.constrained.fma.f32 (
+                        float %f1, float %f2, float %negacc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+define float @f5(float %f1, float *%base, float %acc) {
+; Here too the important thing is that we don't generate an out-of-range
+; displacement.  Other sequences besides this one would be OK.
+;
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -4
+; CHECK: mseb %f2, %f0, 0(%r2)
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 -1
+  %f2 = load float, float *%ptr
+  %negacc = fsub float -0.0, %acc
+  %res = call float @llvm.experimental.constrained.fma.f32 (
+                        float %f1, float %f2, float %negacc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+define float @f6(float %f1, float *%base, i64 %index, float %acc) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: mseb %f2, %f0, 0(%r1,%r2)
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 %index
+  %f2 = load float, float *%ptr
+  %negacc = fsub float -0.0, %acc
+  %res = call float @llvm.experimental.constrained.fma.f32 (
+                        float %f1, float %f2, float %negacc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+define float @f7(float %f1, float *%base, i64 %index, float %acc) {
+; CHECK-LABEL: f7:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: mseb %f2, %f0, 4092({{%r1,%r2|%r2,%r1}})
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+  %index2 = add i64 %index, 1023
+  %ptr = getelementptr float, float *%base, i64 %index2
+  %f2 = load float, float *%ptr
+  %negacc = fsub float -0.0, %acc
+  %res = call float @llvm.experimental.constrained.fma.f32 (
+                        float %f1, float %f2, float %negacc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+define float @f8(float %f1, float *%base, i64 %index, float %acc) {
+; CHECK-LABEL: f8:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}})
+; CHECK: mseb %f2, %f0, 0(%r1)
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+  %index2 = add i64 %index, 1024
+  %ptr = getelementptr float, float *%base, i64 %index2
+  %f2 = load float, float *%ptr
+  %negacc = fsub float -0.0, %acc
+  %res = call float @llvm.experimental.constrained.fma.f32 (
+                        float %f1, float %f2, float %negacc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
--- a/test/CodeGen/SystemZ/fp-strict-mul-09.ll
+++ b/test/CodeGen/SystemZ/fp-strict-mul-09.ll
@ -0,0 +1,138 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
+
+declare double @llvm.experimental.constrained.fma.f64(double %f1, double %f2, double %f3, metadata, metadata)
+
+define double @f1(double %f1, double %f2, double %acc) {
+; CHECK-LABEL: f1:
+; CHECK-SCALAR: msdbr %f4, %f0, %f2
+; CHECK-SCALAR: ldr %f0, %f4
+; CHECK-VECTOR: wfmsdb %f0, %f0, %f2, %f4
+; CHECK: br %r14
+  %negacc = fsub double -0.0, %acc
+  %res = call double @llvm.experimental.constrained.fma.f64 (
+                        double %f1, double %f2, double %negacc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+define double @f2(double %f1, double *%ptr, double %acc) {
+; CHECK-LABEL: f2:
+; CHECK: msdb %f2, %f0, 0(%r2)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %f2 = load double, double *%ptr
+  %negacc = fsub double -0.0, %acc
+  %res = call double @llvm.experimental.constrained.fma.f64 (
+                        double %f1, double %f2, double %negacc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+define double @f3(double %f1, double *%base, double %acc) {
+; CHECK-LABEL: f3:
+; CHECK: msdb %f2, %f0, 4088(%r2)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 511
+  %f2 = load double, double *%ptr
+  %negacc = fsub double -0.0, %acc
+  %res = call double @llvm.experimental.constrained.fma.f64 (
+                        double %f1, double %f2, double %negacc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+define double @f4(double %f1, double *%base, double %acc) {
+; The important thing here is that we don't generate an out-of-range
+; displacement.  Other sequences besides this one would be OK.
+;
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: msdb %f2, %f0, 0(%r2)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 512
+  %f2 = load double, double *%ptr
+  %negacc = fsub double -0.0, %acc
+  %res = call double @llvm.experimental.constrained.fma.f64 (
+                        double %f1, double %f2, double %negacc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+define double @f5(double %f1, double *%base, double %acc) {
+; Here too the important thing is that we don't generate an out-of-range
+; displacement.  Other sequences besides this one would be OK.
+;
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -8
+; CHECK: msdb %f2, %f0, 0(%r2)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 -1
+  %f2 = load double, double *%ptr
+  %negacc = fsub double -0.0, %acc
+  %res = call double @llvm.experimental.constrained.fma.f64 (
+                        double %f1, double %f2, double %negacc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+define double @f6(double %f1, double *%base, i64 %index, double %acc) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: msdb %f2, %f0, 0(%r1,%r2)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 %index
+  %f2 = load double, double *%ptr
+  %negacc = fsub double -0.0, %acc
+  %res = call double @llvm.experimental.constrained.fma.f64 (
+                        double %f1, double %f2, double %negacc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+define double @f7(double %f1, double *%base, i64 %index, double %acc) {
+; CHECK-LABEL: f7:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: msdb %f2, %f0, 4088({{%r1,%r2|%r2,%r1}})
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %index2 = add i64 %index, 511
+  %ptr = getelementptr double, double *%base, i64 %index2
+  %f2 = load double, double *%ptr
+  %negacc = fsub double -0.0, %acc
+  %res = call double @llvm.experimental.constrained.fma.f64 (
+                        double %f1, double %f2, double %negacc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+define double @f8(double %f1, double *%base, i64 %index, double %acc) {
+; CHECK-LABEL: f8:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}})
+; CHECK: msdb %f2, %f0, 0(%r1)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %index2 = add i64 %index, 512
+  %ptr = getelementptr double, double *%base, i64 %index2
+  %f2 = load double, double *%ptr
+  %negacc = fsub double -0.0, %acc
+  %res = call double @llvm.experimental.constrained.fma.f64 (
+                        double %f1, double %f2, double %negacc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
--- a/test/CodeGen/SystemZ/fp-strict-mul-10.ll
+++ b/test/CodeGen/SystemZ/fp-strict-mul-10.ll
@ -0,0 +1,55 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare double @llvm.experimental.constrained.fma.f64(double %f1, double %f2, double %f3, metadata, metadata)
+declare float @llvm.experimental.constrained.fma.f32(float %f1, float %f2, float %f3, metadata, metadata)
+
+define double @f1(double %f1, double %f2, double %acc) {
+; CHECK-LABEL: f1:
+; CHECK: wfnmadb %f0, %f0, %f2, %f4
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.fma.f64 (
+                        double %f1, double %f2, double %acc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %negres = fsub double -0.0, %res
+  ret double %negres
+}
+
+define double @f2(double %f1, double %f2, double %acc) {
+; CHECK-LABEL: f2:
+; CHECK: wfnmsdb %f0, %f0, %f2, %f4
+; CHECK: br %r14
+  %negacc = fsub double -0.0, %acc
+  %res = call double @llvm.experimental.constrained.fma.f64 (
+                        double %f1, double %f2, double %negacc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %negres = fsub double -0.0, %res
+  ret double %negres
+}
+
+define float @f3(float %f1, float %f2, float %acc) {
+; CHECK-LABEL: f3:
+; CHECK: wfnmasb %f0, %f0, %f2, %f4
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.fma.f32 (
+                        float %f1, float %f2, float %acc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %negres = fsub float -0.0, %res
+  ret float %negres
+}
+
+define float @f4(float %f1, float %f2, float %acc) {
+; CHECK-LABEL: f4:
+; CHECK: wfnmssb %f0, %f0, %f2, %f4
+; CHECK: br %r14
+  %negacc = fsub float -0.0, %acc
+  %res = call float @llvm.experimental.constrained.fma.f32 (
+                        float %f1, float %f2, float %negacc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %negres = fsub float -0.0, %res
+  ret float %negres
+}
+
--- a/test/CodeGen/SystemZ/fp-strict-mul-11.ll
+++ b/test/CodeGen/SystemZ/fp-strict-mul-11.ll
@ -0,0 +1,40 @@
+; Test strict 128-bit floating-point multiplication on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.fmul.f128(fp128, fp128, metadata, metadata)
+
+define void @f1(fp128 *%ptr1, fp128 *%ptr2) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3)
+; CHECK: wfmxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]]
+; CHECK: vst [[RES]], 0(%r2)
+; CHECK: br %r14
+  %f1 = load fp128, fp128 *%ptr1
+  %f2 = load fp128, fp128 *%ptr2
+  %sum = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %f1, fp128 %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %sum, fp128 *%ptr1
+  ret void
+}
+
+define void @f2(double %f1, double %f2, fp128 *%dst) {
+; CHECK-LABEL: f2:
+; CHECK-DAG: wflld [[REG1:%v[0-9]+]], %f0
+; CHECK-DAG: wflld [[REG2:%v[0-9]+]], %f2
+; CHECK: wfmxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]]
+; CHECK: vst [[RES]], 0(%r2)
+; CHECK: br %r14
+  %f1x = fpext double %f1 to fp128
+  %f2x = fpext double %f2 to fp128
+  %res = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %f1x, fp128 %f2x,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%dst
+  ret void
+}
+
--- a/test/CodeGen/SystemZ/fp-strict-round-01.ll
+++ b/test/CodeGen/SystemZ/fp-strict-round-01.ll
@ -0,0 +1,250 @@
+; Test strict rounding functions for z10.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+
+; Test rint for f32.
+declare float @llvm.experimental.constrained.rint.f32(float, metadata, metadata)
+define float @f1(float %f) {
+; CHECK-LABEL: f1:
+; CHECK: fiebr %f0, 0, %f0
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.rint.f32(
+                        float %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Test rint for f64.
+declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
+define double @f2(double %f) {
+; CHECK-LABEL: f2:
+; CHECK: fidbr %f0, 0, %f0
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.rint.f64(
+                        double %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Test rint for f128.
+declare fp128 @llvm.experimental.constrained.rint.f128(fp128, metadata, metadata)
+define void @f3(fp128 *%ptr) {
+; CHECK-LABEL: f3:
+; CHECK: fixbr %f0, 0, %f0
+; CHECK: br %r14
+  %src = load fp128, fp128 *%ptr
+  %res = call fp128 @llvm.experimental.constrained.rint.f128(
+                        fp128 %src,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
+; Test nearbyint for f32.
+declare float @llvm.experimental.constrained.nearbyint.f32(float, metadata, metadata)
+define float @f4(float %f) {
+; CHECK-LABEL: f4:
+; CHECK: brasl %r14, nearbyintf@PLT
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.nearbyint.f32(
+                        float %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Test nearbyint for f64.
+declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
+define double @f5(double %f) {
+; CHECK-LABEL: f5:
+; CHECK: brasl %r14, nearbyint@PLT
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.nearbyint.f64(
+                        double %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Test nearbyint for f128.
+declare fp128 @llvm.experimental.constrained.nearbyint.f128(fp128, metadata, metadata)
+define void @f6(fp128 *%ptr) {
+; CHECK-LABEL: f6:
+; CHECK: brasl %r14, nearbyintl@PLT
+; CHECK: br %r14
+  %src = load fp128, fp128 *%ptr
+  %res = call fp128 @llvm.experimental.constrained.nearbyint.f128(
+                        fp128 %src,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
+; Test floor for f32.
+declare float @llvm.experimental.constrained.floor.f32(float, metadata, metadata)
+define float @f7(float %f) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, floorf@PLT
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.floor.f32(
+                        float %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Test floor for f64.
+declare double @llvm.experimental.constrained.floor.f64(double, metadata, metadata)
+define double @f8(double %f) {
+; CHECK-LABEL: f8:
+; CHECK: brasl %r14, floor@PLT
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.floor.f64(
+                        double %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Test floor for f128.
+declare fp128 @llvm.experimental.constrained.floor.f128(fp128, metadata, metadata)
+define void @f9(fp128 *%ptr) {
+; CHECK-LABEL: f9:
+; CHECK: brasl %r14, floorl@PLT
+; CHECK: br %r14
+  %src = load fp128, fp128 *%ptr
+  %res = call fp128 @llvm.experimental.constrained.floor.f128(
+                        fp128 %src,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
+; Test ceil for f32.
+declare float @llvm.experimental.constrained.ceil.f32(float, metadata, metadata)
+define float @f10(float %f) {
+; CHECK-LABEL: f10:
+; CHECK: brasl %r14, ceilf@PLT
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.ceil.f32(
+                        float %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Test ceil for f64.
+declare double @llvm.experimental.constrained.ceil.f64(double, metadata, metadata)
+define double @f11(double %f) {
+; CHECK-LABEL: f11:
+; CHECK: brasl %r14, ceil@PLT
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.ceil.f64(
+                        double %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Test ceil for f128.
+declare fp128 @llvm.experimental.constrained.ceil.f128(fp128, metadata, metadata)
+define void @f12(fp128 *%ptr) {
+; CHECK-LABEL: f12:
+; CHECK: brasl %r14, ceill@PLT
+; CHECK: br %r14
+  %src = load fp128, fp128 *%ptr
+  %res = call fp128 @llvm.experimental.constrained.ceil.f128(
+                        fp128 %src,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
+; Test trunc for f32.
+declare float @llvm.experimental.constrained.trunc.f32(float, metadata, metadata)
+define float @f13(float %f) {
+; CHECK-LABEL: f13:
+; CHECK: brasl %r14, truncf@PLT
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.trunc.f32(
+                        float %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Test trunc for f64.
+declare double @llvm.experimental.constrained.trunc.f64(double, metadata, metadata)
+define double @f14(double %f) {
+; CHECK-LABEL: f14:
+; CHECK: brasl %r14, trunc@PLT
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.trunc.f64(
+                        double %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Test trunc for f128.
+declare fp128 @llvm.experimental.constrained.trunc.f128(fp128, metadata, metadata)
+define void @f15(fp128 *%ptr) {
+; CHECK-LABEL: f15:
+; CHECK: brasl %r14, truncl@PLT
+; CHECK: br %r14
+  %src = load fp128, fp128 *%ptr
+  %res = call fp128 @llvm.experimental.constrained.trunc.f128(
+                        fp128 %src,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
+; Test round for f32.
+declare float @llvm.experimental.constrained.round.f32(float, metadata, metadata)
+define float @f16(float %f) {
+; CHECK-LABEL: f16:
+; CHECK: brasl %r14, roundf@PLT
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.round.f32(
+                        float %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Test round for f64.
+declare double @llvm.experimental.constrained.round.f64(double, metadata, metadata)
+define double @f17(double %f) {
+; CHECK-LABEL: f17:
+; CHECK: brasl %r14, round@PLT
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.round.f64(
+                        double %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Test round for f128.
+declare fp128 @llvm.experimental.constrained.round.f128(fp128, metadata, metadata)
+define void @f18(fp128 *%ptr) {
+; CHECK-LABEL: f18:
+; CHECK: brasl %r14, roundl@PLT
+; CHECK: br %r14
+  %src = load fp128, fp128 *%ptr
+  %res = call fp128 @llvm.experimental.constrained.round.f128(
+                        fp128 %src,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
--- a/test/CodeGen/SystemZ/fp-strict-round-02.ll
+++ b/test/CodeGen/SystemZ/fp-strict-round-02.ll
@ -0,0 +1,254 @@
+; Test strict rounding functions for z196 and above.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
+
+; Test rint for f32.
+declare float @llvm.experimental.constrained.rint.f32(float, metadata, metadata)
+define float @f1(float %f) {
+; CHECK-LABEL: f1:
+; CHECK: fiebr %f0, 0, %f0
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.rint.f32(
+                        float %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Test rint for f64.
+declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
+define double @f2(double %f) {
+; CHECK-LABEL: f2:
+; CHECK-SCALAR: fidbr %f0, 0, %f0
+; CHECK-VECTOR: fidbra %f0, 0, %f0, 0
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.rint.f64(
+                        double %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Test rint for f128.
+declare fp128 @llvm.experimental.constrained.rint.f128(fp128, metadata, metadata)
+define void @f3(fp128 *%ptr) {
+; CHECK-LABEL: f3:
+; CHECK: fixbr %f0, 0, %f0
+; CHECK: br %r14
+  %src = load fp128, fp128 *%ptr
+  %res = call fp128 @llvm.experimental.constrained.rint.f128(
+                        fp128 %src,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
+; Test nearbyint for f32.
+declare float @llvm.experimental.constrained.nearbyint.f32(float, metadata, metadata)
+define float @f4(float %f) {
+; CHECK-LABEL: f4:
+; CHECK: fiebra %f0, 0, %f0, 4
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.nearbyint.f32(
+                        float %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Test nearbyint for f64.
+declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
+define double @f5(double %f) {
+; CHECK-LABEL: f5:
+; CHECK: fidbra %f0, 0, %f0, 4
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.nearbyint.f64(
+                        double %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Test nearbyint for f128.
+declare fp128 @llvm.experimental.constrained.nearbyint.f128(fp128, metadata, metadata)
+define void @f6(fp128 *%ptr) {
+; CHECK-LABEL: f6:
+; CHECK: fixbra %f0, 0, %f0, 4
+; CHECK: br %r14
+  %src = load fp128, fp128 *%ptr
+  %res = call fp128 @llvm.experimental.constrained.nearbyint.f128(
+                        fp128 %src,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
+; Test floor for f32.
+declare float @llvm.experimental.constrained.floor.f32(float, metadata, metadata)
+define float @f7(float %f) {
+; CHECK-LABEL: f7:
+; CHECK: fiebra %f0, 7, %f0, 4
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.floor.f32(
+                        float %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Test floor for f64.
+declare double @llvm.experimental.constrained.floor.f64(double, metadata, metadata)
+define double @f8(double %f) {
+; CHECK-LABEL: f8:
+; CHECK: fidbra %f0, 7, %f0, 4
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.floor.f64(
+                        double %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Test floor for f128.
+declare fp128 @llvm.experimental.constrained.floor.f128(fp128, metadata, metadata)
+define void @f9(fp128 *%ptr) {
+; CHECK-LABEL: f9:
+; CHECK: fixbra %f0, 7, %f0, 4
+; CHECK: br %r14
+  %src = load fp128, fp128 *%ptr
+  %res = call fp128 @llvm.experimental.constrained.floor.f128(
+                        fp128 %src,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
+; Test ceil for f32.
+declare float @llvm.experimental.constrained.ceil.f32(float, metadata, metadata)
+define float @f10(float %f) {
+; CHECK-LABEL: f10:
+; CHECK: fiebra %f0, 6, %f0, 4
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.ceil.f32(
+                        float %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Test ceil for f64.
+declare double @llvm.experimental.constrained.ceil.f64(double, metadata, metadata)
+define double @f11(double %f) {
+; CHECK-LABEL: f11:
+; CHECK: fidbra %f0, 6, %f0, 4
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.ceil.f64(
+                        double %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Test ceil for f128.
+declare fp128 @llvm.experimental.constrained.ceil.f128(fp128, metadata, metadata)
+define void @f12(fp128 *%ptr) {
+; CHECK-LABEL: f12:
+; CHECK: fixbra %f0, 6, %f0, 4
+; CHECK: br %r14
+  %src = load fp128, fp128 *%ptr
+  %res = call fp128 @llvm.experimental.constrained.ceil.f128(
+                        fp128 %src,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
+; Test trunc for f32.
+declare float @llvm.experimental.constrained.trunc.f32(float, metadata, metadata)
+define float @f13(float %f) {
+; CHECK-LABEL: f13:
+; CHECK: fiebra %f0, 5, %f0, 4
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.trunc.f32(
+                        float %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Test trunc for f64.
+declare double @llvm.experimental.constrained.trunc.f64(double, metadata, metadata)
+define double @f14(double %f) {
+; CHECK-LABEL: f14:
+; CHECK: fidbra %f0, 5, %f0, 4
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.trunc.f64(
+                        double %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Test trunc for f128.
+declare fp128 @llvm.experimental.constrained.trunc.f128(fp128, metadata, metadata)
+define void @f15(fp128 *%ptr) {
+; CHECK-LABEL: f15:
+; CHECK: fixbra %f0, 5, %f0, 4
+; CHECK: br %r14
+  %src = load fp128, fp128 *%ptr
+  %res = call fp128 @llvm.experimental.constrained.trunc.f128(
+                        fp128 %src,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
+; Test round for f32.
+declare float @llvm.experimental.constrained.round.f32(float, metadata, metadata)
+define float @f16(float %f) {
+; CHECK-LABEL: f16:
+; CHECK: fiebra %f0, 1, %f0, 4
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.round.f32(
+                        float %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Test round for f64.
+declare double @llvm.experimental.constrained.round.f64(double, metadata, metadata)
+define double @f17(double %f) {
+; CHECK-LABEL: f17:
+; CHECK: fidbra %f0, 1, %f0, 4
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.round.f64(
+                        double %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Test round for f128.
+declare fp128 @llvm.experimental.constrained.round.f128(fp128, metadata, metadata)
+define void @f18(fp128 *%ptr) {
+; CHECK-LABEL: f18:
+; CHECK: fixbra %f0, 1, %f0, 4
+; CHECK: br %r14
+  %src = load fp128, fp128 *%ptr
+  %res = call fp128 @llvm.experimental.constrained.round.f128(
+                        fp128 %src,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
--- a/test/CodeGen/SystemZ/fp-strict-round-03.ll
+++ b/test/CodeGen/SystemZ/fp-strict-round-03.ll
@ -0,0 +1,262 @@
+; Test strict rounding functions for z14 and above.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+; Test rint for f32.
+declare float @llvm.experimental.constrained.rint.f32(float, metadata, metadata)
+define float @f1(float %f) {
+; CHECK-LABEL: f1:
+; CHECK: fiebra %f0, 0, %f0, 0
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.rint.f32(
+                        float %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Test rint for f64.
+declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
+define double @f2(double %f) {
+; CHECK-LABEL: f2:
+; CHECK: fidbra %f0, 0, %f0, 0
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.rint.f64(
+                        double %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Test rint for f128.
+declare fp128 @llvm.experimental.constrained.rint.f128(fp128, metadata, metadata)
+define void @f3(fp128 *%ptr) {
+; CHECK-LABEL: f3:
+; CHECK: vl [[REG:%v[0-9]+]], 0(%r2)
+; CHECK: wfixb [[RES:%v[0-9]+]], [[REG]], 0, 0
+; CHECK: vst [[RES]], 0(%r2)
+; CHECK: br %r14
+  %src = load fp128, fp128 *%ptr
+  %res = call fp128 @llvm.experimental.constrained.rint.f128(
+                        fp128 %src,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
+; Test nearbyint for f32.
+declare float @llvm.experimental.constrained.nearbyint.f32(float, metadata, metadata)
+define float @f4(float %f) {
+; CHECK-LABEL: f4:
+; CHECK: fiebra %f0, 0, %f0, 4
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.nearbyint.f32(
+                        float %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Test nearbyint for f64.
+declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
+define double @f5(double %f) {
+; CHECK-LABEL: f5:
+; CHECK: fidbra %f0, 0, %f0, 4
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.nearbyint.f64(
+                        double %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Test nearbyint for f128.
+declare fp128 @llvm.experimental.constrained.nearbyint.f128(fp128, metadata, metadata)
+define void @f6(fp128 *%ptr) {
+; CHECK-LABEL: f6:
+; CHECK: vl [[REG:%v[0-9]+]], 0(%r2)
+; CHECK: wfixb [[RES:%v[0-9]+]], [[REG]], 4, 0
+; CHECK: vst [[RES]], 0(%r2)
+; CHECK: br %r14
+  %src = load fp128, fp128 *%ptr
+  %res = call fp128 @llvm.experimental.constrained.nearbyint.f128(
+                        fp128 %src,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
+; Test floor for f32.
+declare float @llvm.experimental.constrained.floor.f32(float, metadata, metadata)
+define float @f7(float %f) {
+; CHECK-LABEL: f7:
+; CHECK: fiebra %f0, 7, %f0, 4
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.floor.f32(
+                        float %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Test floor for f64.
+declare double @llvm.experimental.constrained.floor.f64(double, metadata, metadata)
+define double @f8(double %f) {
+; CHECK-LABEL: f8:
+; CHECK: fidbra %f0, 7, %f0, 4
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.floor.f64(
+                        double %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Test floor for f128.
+declare fp128 @llvm.experimental.constrained.floor.f128(fp128, metadata, metadata)
+define void @f9(fp128 *%ptr) {
+; CHECK-LABEL: f9:
+; CHECK: vl [[REG:%v[0-9]+]], 0(%r2)
+; CHECK: wfixb [[RES:%v[0-9]+]], [[REG]], 4, 7
+; CHECK: vst [[RES]], 0(%r2)
+; CHECK: br %r14
+  %src = load fp128, fp128 *%ptr
+  %res = call fp128 @llvm.experimental.constrained.floor.f128(
+                        fp128 %src,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
+; Test ceil for f32.
+declare float @llvm.experimental.constrained.ceil.f32(float, metadata, metadata)
+define float @f10(float %f) {
+; CHECK-LABEL: f10:
+; CHECK: fiebra %f0, 6, %f0, 4
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.ceil.f32(
+                        float %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Test ceil for f64.
+declare double @llvm.experimental.constrained.ceil.f64(double, metadata, metadata)
+define double @f11(double %f) {
+; CHECK-LABEL: f11:
+; CHECK: fidbra %f0, 6, %f0, 4
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.ceil.f64(
+                        double %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Test ceil for f128.
+declare fp128 @llvm.experimental.constrained.ceil.f128(fp128, metadata, metadata)
+define void @f12(fp128 *%ptr) {
+; CHECK-LABEL: f12:
+; CHECK: vl [[REG:%v[0-9]+]], 0(%r2)
+; CHECK: wfixb [[RES:%v[0-9]+]], [[REG]], 4, 6
+; CHECK: vst [[RES]], 0(%r2)
+; CHECK: br %r14
+  %src = load fp128, fp128 *%ptr
+  %res = call fp128 @llvm.experimental.constrained.ceil.f128(
+                        fp128 %src,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
+; Test trunc for f32.
+declare float @llvm.experimental.constrained.trunc.f32(float, metadata, metadata)
+define float @f13(float %f) {
+; CHECK-LABEL: f13:
+; CHECK: fiebra %f0, 5, %f0, 4
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.trunc.f32(
+                        float %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Test trunc for f64.
+declare double @llvm.experimental.constrained.trunc.f64(double, metadata, metadata)
+define double @f14(double %f) {
+; CHECK-LABEL: f14:
+; CHECK: fidbra %f0, 5, %f0, 4
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.trunc.f64(
+                        double %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Test trunc for f128.
+declare fp128 @llvm.experimental.constrained.trunc.f128(fp128, metadata, metadata)
+define void @f15(fp128 *%ptr) {
+; CHECK-LABEL: f15:
+; CHECK: vl [[REG:%v[0-9]+]], 0(%r2)
+; CHECK: wfixb [[RES:%v[0-9]+]], [[REG]], 4, 5
+; CHECK: vst [[RES]], 0(%r2)
+; CHECK: br %r14
+  %src = load fp128, fp128 *%ptr
+  %res = call fp128 @llvm.experimental.constrained.trunc.f128(
+                        fp128 %src,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
+; Test round for f32.
+declare float @llvm.experimental.constrained.round.f32(float, metadata, metadata)
+define float @f16(float %f) {
+; CHECK-LABEL: f16:
+; CHECK: fiebra %f0, 1, %f0, 4
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.round.f32(
+                        float %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Test round for f64.
+declare double @llvm.experimental.constrained.round.f64(double, metadata, metadata)
+define double @f17(double %f) {
+; CHECK-LABEL: f17:
+; CHECK: fidbra %f0, 1, %f0, 4
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.round.f64(
+                        double %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Test round for f128.
+declare fp128 @llvm.experimental.constrained.round.f128(fp128, metadata, metadata)
+define void @f18(fp128 *%ptr) {
+; CHECK-LABEL: f18:
+; CHECK: vl [[REG:%v[0-9]+]], 0(%r2)
+; CHECK: wfixb [[RES:%v[0-9]+]], [[REG]], 4, 1
+; CHECK: vst [[RES]], 0(%r2)
+; CHECK: br %r14
+  %src = load fp128, fp128 *%ptr
+  %res = call fp128 @llvm.experimental.constrained.round.f128(
+                        fp128 %src,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
--- a/test/CodeGen/SystemZ/fp-strict-sqrt-01.ll
+++ b/test/CodeGen/SystemZ/fp-strict-sqrt-01.ll
@ -0,0 +1,94 @@
+; Test strict 32-bit square root.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata)
+
+; Check register square root.
+define float @f1(float %val) {
+; CHECK-LABEL: f1:
+; CHECK: sqebr %f0, %f0
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.sqrt.f32(
+                        float %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check the low end of the SQEB range.
+define float @f2(float *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: sqeb %f0, 0(%r2)
+; CHECK: br %r14
+  %val = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.sqrt.f32(
+                        float %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check the high end of the aligned SQEB range.
+define float @f3(float *%base) {
+; CHECK-LABEL: f3:
+; CHECK: sqeb %f0, 4092(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 1023
+  %val = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.sqrt.f32(
+                        float %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define float @f4(float *%base) {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: sqeb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 1024
+  %val = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.sqrt.f32(
+                        float %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define float @f5(float *%base) {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -4
+; CHECK: sqeb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 -1
+  %val = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.sqrt.f32(
+                        float %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check that SQEB allows indices.
+define float @f6(float *%base, i64 %index) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: sqeb %f0, 400(%r1,%r2)
+; CHECK: br %r14
+  %ptr1 = getelementptr float, float *%base, i64 %index
+  %ptr2 = getelementptr float, float *%ptr1, i64 100
+  %val = load float, float *%ptr2
+  %res = call float @llvm.experimental.constrained.sqrt.f32(
+                        float %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
--- a/test/CodeGen/SystemZ/fp-strict-sqrt-02.ll
+++ b/test/CodeGen/SystemZ/fp-strict-sqrt-02.ll
@ -0,0 +1,94 @@
+; Test strict 64-bit square root.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata)
+
+; Check register square root.
+define double @f1(double %val) {
+; CHECK-LABEL: f1:
+; CHECK: sqdbr %f0, %f0
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.sqrt.f64(
+                        double %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check the low end of the SQDB range.
+define double @f2(double *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: sqdb %f0, 0(%r2)
+; CHECK: br %r14
+  %val = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.sqrt.f64(
+                        double %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check the high end of the aligned SQDB range.
+define double @f3(double *%base) {
+; CHECK-LABEL: f3:
+; CHECK: sqdb %f0, 4088(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 511
+  %val = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.sqrt.f64(
+                        double %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f4(double *%base) {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: sqdb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 512
+  %val = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.sqrt.f64(
+                        double %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define double @f5(double *%base) {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -8
+; CHECK: sqdb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 -1
+  %val = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.sqrt.f64(
+                        double %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check that SQDB allows indices.
+define double @f6(double *%base, i64 %index) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: sqdb %f0, 800(%r1,%r2)
+; CHECK: br %r14
+  %ptr1 = getelementptr double, double *%base, i64 %index
+  %ptr2 = getelementptr double, double *%ptr1, i64 100
+  %val = load double, double *%ptr2
+  %res = call double @llvm.experimental.constrained.sqrt.f64(
+                        double %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
--- a/test/CodeGen/SystemZ/fp-strict-sqrt-03.ll
+++ b/test/CodeGen/SystemZ/fp-strict-sqrt-03.ll
@ -0,0 +1,23 @@
+; Test strict 128-bit square root.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.sqrt.f128(fp128, metadata, metadata)
+
+; There's no memory form of SQXBR.
+define void @f1(fp128 *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK: ld %f0, 0(%r2)
+; CHECK: ld %f2, 8(%r2)
+; CHECK: sqxbr %f0, %f0
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+  %orig = load fp128, fp128 *%ptr
+  %sqrt = call fp128 @llvm.experimental.constrained.sqrt.f128(
+                        fp128 %orig,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %sqrt, fp128 *%ptr
+  ret void
+}
--- a/test/CodeGen/SystemZ/fp-strict-sqrt-04.ll
+++ b/test/CodeGen/SystemZ/fp-strict-sqrt-04.ll
@ -0,0 +1,20 @@
+; Test strict 128-bit floating-point square root on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.sqrt.f128(fp128, metadata, metadata)
+
+define void @f1(fp128 *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: vl [[REG:%v[0-9]+]], 0(%r2)
+; CHECK: wfsqxb [[RES:%v[0-9]+]], [[REG]]
+; CHECK: vst [[RES]], 0(%r2)
+; CHECK: br %r14
+  %f = load fp128, fp128 *%ptr
+  %res = call fp128 @llvm.experimental.constrained.sqrt.f128(
+                        fp128 %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
--- a/test/CodeGen/SystemZ/fp-strict-sub-01.ll
+++ b/test/CodeGen/SystemZ/fp-strict-sub-01.ll
@ -0,0 +1,173 @@
+; Test 32-bit floating-point strict subtraction.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare float @foo()
+declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata)
+
+; Check register subtraction.
+define float @f1(float %f1, float %f2) {
+; CHECK-LABEL: f1:
+; CHECK: sebr %f0, %f2
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.fsub.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check the low end of the SEB range.
+define float @f2(float %f1, float *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: seb %f0, 0(%r2)
+; CHECK: br %r14
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fsub.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check the high end of the aligned SEB range.
+define float @f3(float %f1, float *%base) {
+; CHECK-LABEL: f3:
+; CHECK: seb %f0, 4092(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 1023
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fsub.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define float @f4(float %f1, float *%base) {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: seb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 1024
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fsub.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define float @f5(float %f1, float *%base) {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -4
+; CHECK: seb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 -1
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fsub.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check that SEB allows indices.
+define float @f6(float %f1, float *%base, i64 %index) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: seb %f0, 400(%r1,%r2)
+; CHECK: br %r14
+  %ptr1 = getelementptr float, float *%base, i64 %index
+  %ptr2 = getelementptr float, float *%ptr1, i64 100
+  %f2 = load float, float *%ptr2
+  %res = call float @llvm.experimental.constrained.fsub.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check that subtractions of spilled values can use SEB rather than SEBR.
+define float @f7(float *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK-SCALAR: seb %f0, 16{{[04]}}(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr float, float *%ptr0, i64 2
+  %ptr2 = getelementptr float, float *%ptr0, i64 4
+  %ptr3 = getelementptr float, float *%ptr0, i64 6
+  %ptr4 = getelementptr float, float *%ptr0, i64 8
+  %ptr5 = getelementptr float, float *%ptr0, i64 10
+  %ptr6 = getelementptr float, float *%ptr0, i64 12
+  %ptr7 = getelementptr float, float *%ptr0, i64 14
+  %ptr8 = getelementptr float, float *%ptr0, i64 16
+  %ptr9 = getelementptr float, float *%ptr0, i64 18
+  %ptr10 = getelementptr float, float *%ptr0, i64 20
+
+  %val0 = load float, float *%ptr0
+  %val1 = load float, float *%ptr1
+  %val2 = load float, float *%ptr2
+  %val3 = load float, float *%ptr3
+  %val4 = load float, float *%ptr4
+  %val5 = load float, float *%ptr5
+  %val6 = load float, float *%ptr6
+  %val7 = load float, float *%ptr7
+  %val8 = load float, float *%ptr8
+  %val9 = load float, float *%ptr9
+  %val10 = load float, float *%ptr10
+
+  %ret = call float @foo()
+
+  %sub0 = call float @llvm.experimental.constrained.fsub.f32(
+                        float %ret, float %val0,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub1 = call float @llvm.experimental.constrained.fsub.f32(
+                        float %sub0, float %val1,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub2 = call float @llvm.experimental.constrained.fsub.f32(
+                        float %sub1, float %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub3 = call float @llvm.experimental.constrained.fsub.f32(
+                        float %sub2, float %val3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub4 = call float @llvm.experimental.constrained.fsub.f32(
+                        float %sub3, float %val4,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub5 = call float @llvm.experimental.constrained.fsub.f32(
+                        float %sub4, float %val5,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub6 = call float @llvm.experimental.constrained.fsub.f32(
+                        float %sub5, float %val6,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub7 = call float @llvm.experimental.constrained.fsub.f32(
+                        float %sub6, float %val7,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub8 = call float @llvm.experimental.constrained.fsub.f32(
+                        float %sub7, float %val8,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub9 = call float @llvm.experimental.constrained.fsub.f32(
+                        float %sub8, float %val9,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub10 = call float @llvm.experimental.constrained.fsub.f32(
+                        float %sub9, float %val10,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+
+  ret float %sub10
+}
--- a/test/CodeGen/SystemZ/fp-strict-sub-02.ll
+++ b/test/CodeGen/SystemZ/fp-strict-sub-02.ll
@ -0,0 +1,173 @@
+; Test strict 64-bit floating-point subtraction.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare double @foo()
+declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata)
+
+; Check register subtraction.
+define double @f1(double %f1, double %f2) {
+; CHECK-LABEL: f1:
+; CHECK: sdbr %f0, %f2
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.fsub.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check the low end of the SDB range.
+define double @f2(double %f1, double *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: sdb %f0, 0(%r2)
+; CHECK: br %r14
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fsub.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check the high end of the aligned SDB range.
+define double @f3(double %f1, double *%base) {
+; CHECK-LABEL: f3:
+; CHECK: sdb %f0, 4088(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 511
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fsub.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f4(double %f1, double *%base) {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: sdb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 512
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fsub.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define double @f5(double %f1, double *%base) {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -8
+; CHECK: sdb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 -1
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fsub.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check that SDB allows indices.
+define double @f6(double %f1, double *%base, i64 %index) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: sdb %f0, 800(%r1,%r2)
+; CHECK: br %r14
+  %ptr1 = getelementptr double, double *%base, i64 %index
+  %ptr2 = getelementptr double, double *%ptr1, i64 100
+  %f2 = load double, double *%ptr2
+  %res = call double @llvm.experimental.constrained.fsub.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check that subtractions of spilled values can use SDB rather than SDBR.
+define double @f7(double *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK-SCALAR: sdb %f0, 16{{[04]}}(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr double, double *%ptr0, i64 2
+  %ptr2 = getelementptr double, double *%ptr0, i64 4
+  %ptr3 = getelementptr double, double *%ptr0, i64 6
+  %ptr4 = getelementptr double, double *%ptr0, i64 8
+  %ptr5 = getelementptr double, double *%ptr0, i64 10
+  %ptr6 = getelementptr double, double *%ptr0, i64 12
+  %ptr7 = getelementptr double, double *%ptr0, i64 14
+  %ptr8 = getelementptr double, double *%ptr0, i64 16
+  %ptr9 = getelementptr double, double *%ptr0, i64 18
+  %ptr10 = getelementptr double, double *%ptr0, i64 20
+
+  %val0 = load double, double *%ptr0
+  %val1 = load double, double *%ptr1
+  %val2 = load double, double *%ptr2
+  %val3 = load double, double *%ptr3
+  %val4 = load double, double *%ptr4
+  %val5 = load double, double *%ptr5
+  %val6 = load double, double *%ptr6
+  %val7 = load double, double *%ptr7
+  %val8 = load double, double *%ptr8
+  %val9 = load double, double *%ptr9
+  %val10 = load double, double *%ptr10
+
+  %ret = call double @foo()
+
+  %sub0 = call double @llvm.experimental.constrained.fsub.f64(
+                        double %ret, double %val0,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub1 = call double @llvm.experimental.constrained.fsub.f64(
+                        double %sub0, double %val1,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub2 = call double @llvm.experimental.constrained.fsub.f64(
+                        double %sub1, double %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub3 = call double @llvm.experimental.constrained.fsub.f64(
+                        double %sub2, double %val3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub4 = call double @llvm.experimental.constrained.fsub.f64(
+                        double %sub3, double %val4,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub5 = call double @llvm.experimental.constrained.fsub.f64(
+                        double %sub4, double %val5,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub6 = call double @llvm.experimental.constrained.fsub.f64(
+                        double %sub5, double %val6,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub7 = call double @llvm.experimental.constrained.fsub.f64(
+                        double %sub6, double %val7,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub8 = call double @llvm.experimental.constrained.fsub.f64(
+                        double %sub7, double %val8,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub9 = call double @llvm.experimental.constrained.fsub.f64(
+                        double %sub8, double %val9,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub10 = call double @llvm.experimental.constrained.fsub.f64(
+                        double %sub9, double %val10,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+
+  ret double %sub10
+}
--- a/test/CodeGen/SystemZ/fp-strict-sub-03.ll
+++ b/test/CodeGen/SystemZ/fp-strict-sub-03.ll
@ -0,0 +1,25 @@
+; Test strict 128-bit floating-point subtraction.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.fsub.f128(fp128, fp128, metadata, metadata)
+
+; There is no memory form of 128-bit subtraction.
+define void @f1(fp128 *%ptr, float %f2) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: lxebr %f0, %f0
+; CHECK-DAG: ld %f1, 0(%r2)
+; CHECK-DAG: ld %f3, 8(%r2)
+; CHECK: sxbr %f1, %f0
+; CHECK: std %f1, 0(%r2)
+; CHECK: std %f3, 8(%r2)
+; CHECK: br %r14
+  %f1 = load fp128, fp128 *%ptr
+  %f2x = fpext float %f2 to fp128
+  %sum = call fp128 @llvm.experimental.constrained.fsub.f128(
+                        fp128 %f1, fp128 %f2x,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %sum, fp128 *%ptr
+  ret void
+}
--- a/test/CodeGen/SystemZ/fp-strict-sub-04.ll
+++ b/test/CodeGen/SystemZ/fp-strict-sub-04.ll
@ -0,0 +1,22 @@
+; Test strict 128-bit floating-point subtraction on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.fsub.f128(fp128, fp128, metadata, metadata)
+
+define void @f1(fp128 *%ptr1, fp128 *%ptr2) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3)
+; CHECK: wfsxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]]
+; CHECK: vst [[RES]], 0(%r2)
+; CHECK: br %r14
+  %f1 = load fp128, fp128 *%ptr1
+  %f2 = load fp128, fp128 *%ptr2
+  %sum = call fp128 @llvm.experimental.constrained.fsub.f128(
+                        fp128 %f1, fp128 %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %sum, fp128 *%ptr1
+  ret void
+}
--- a/test/CodeGen/SystemZ/vec-strict-add-01.ll
+++ b/test/CodeGen/SystemZ/vec-strict-add-01.ll
@ -0,0 +1,33 @@
+; Test strict vector addition.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+
+; Test a v2f64 addition.
+define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1,
+                        <2 x double> %val2) {
+; CHECK-LABEL: f5:
+; CHECK: vfadb %v24, %v26, %v28
+; CHECK: br %r14
+  %ret = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(
+                        <2 x double> %val1, <2 x double> %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <2 x double> %ret
+}
+
+; Test an f64 addition that uses vector registers.
+define double @f6(<2 x double> %val1, <2 x double> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: wfadb %f0, %v24, %v26
+; CHECK: br %r14
+  %scalar1 = extractelement <2 x double> %val1, i32 0
+  %scalar2 = extractelement <2 x double> %val2, i32 0
+  %ret = call double @llvm.experimental.constrained.fadd.f64(
+                        double %scalar1, double %scalar2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %ret
+}
--- a/test/CodeGen/SystemZ/vec-strict-add-02.ll
+++ b/test/CodeGen/SystemZ/vec-strict-add-02.ll
@ -0,0 +1,33 @@
+; Test strict vector addition on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float>, <4 x float>, metadata, metadata)
+
+; Test a v4f32 addition.
+define <4 x float> @f1(<4 x float> %dummy, <4 x float> %val1,
+                       <4 x float> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vfasb %v24, %v26, %v28
+; CHECK: br %r14
+  %ret = call <4 x float> @llvm.experimental.constrained.fadd.v4f32(
+                        <4 x float> %val1, <4 x float> %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <4 x float> %ret
+}
+
+; Test an f32 addition that uses vector registers.
+define float @f2(<4 x float> %val1, <4 x float> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: wfasb %f0, %v24, %v26
+; CHECK: br %r14
+  %scalar1 = extractelement <4 x float> %val1, i32 0
+  %scalar2 = extractelement <4 x float> %val2, i32 0
+  %ret = call float @llvm.experimental.constrained.fadd.f32(
+                        float %scalar1, float %scalar2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %ret
+}
--- a/test/CodeGen/SystemZ/vec-strict-div-01.ll
+++ b/test/CodeGen/SystemZ/vec-strict-div-01.ll
@ -0,0 +1,33 @@
+; Test strict vector division.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+
+; Test a v2f64 division.
+define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1,
+                        <2 x double> %val2) {
+; CHECK-LABEL: f5:
+; CHECK: vfddb %v24, %v26, %v28
+; CHECK: br %r14
+  %ret = call <2 x double> @llvm.experimental.constrained.fdiv.v2f64(
+                        <2 x double> %val1, <2 x double> %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <2 x double> %ret
+}
+
+; Test an f64 division that uses vector registers.
+define double @f6(<2 x double> %val1, <2 x double> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: wfddb %f0, %v24, %v26
+; CHECK: br %r14
+  %scalar1 = extractelement <2 x double> %val1, i32 0
+  %scalar2 = extractelement <2 x double> %val2, i32 0
+  %ret = call double @llvm.experimental.constrained.fdiv.f64(
+                        double %scalar1, double %scalar2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %ret
+}
--- a/test/CodeGen/SystemZ/vec-strict-div-02.ll
+++ b/test/CodeGen/SystemZ/vec-strict-div-02.ll
@ -0,0 +1,33 @@
+; Test strict vector division on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.fdiv.v4f32(<4 x float>, <4 x float>, metadata, metadata)
+
+; Test a v4f32 division.
+define <4 x float> @f1(<4 x float> %dummy, <4 x float> %val1,
+                       <4 x float> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vfdsb %v24, %v26, %v28
+; CHECK: br %r14
+  %ret = call <4 x float> @llvm.experimental.constrained.fdiv.v4f32(
+                        <4 x float> %val1, <4 x float> %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <4 x float> %ret
+}
+
+; Test an f32 division that uses vector registers.
+define float @f2(<4 x float> %val1, <4 x float> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: wfdsb %f0, %v24, %v26
+; CHECK: br %r14
+  %scalar1 = extractelement <4 x float> %val1, i32 0
+  %scalar2 = extractelement <4 x float> %val2, i32 0
+  %ret = call float @llvm.experimental.constrained.fdiv.f32(
+                        float %scalar1, float %scalar2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %ret
+}
--- a/test/CodeGen/SystemZ/vec-strict-max-01.ll
+++ b/test/CodeGen/SystemZ/vec-strict-max-01.ll
@ -0,0 +1,80 @@
+; Test strict vector maximum on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare double @llvm.experimental.constrained.maxnum.f64(double, double, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.maxnum.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+
+declare float @llvm.experimental.constrained.maxnum.f32(float, float, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.maxnum.v4f32(<4 x float>, <4 x float>, metadata, metadata)
+
+declare fp128 @llvm.experimental.constrained.maxnum.f128(fp128, fp128, metadata, metadata)
+
+; Test the f64 maxnum intrinsic.
+define double @f1(double %dummy, double %val1, double %val2) {
+; CHECK-LABEL: f1:
+; CHECK: wfmaxdb %f0, %f2, %f4, 4
+; CHECK: br %r14
+  %ret = call double @llvm.experimental.constrained.maxnum.f64(
+                        double %val1, double %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %ret
+}
+
+; Test the v2f64 maxnum intrinsic.
+define <2 x double> @f2(<2 x double> %dummy, <2 x double> %val1,
+                        <2 x double> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: vfmaxdb %v24, %v26, %v28, 4
+; CHECK: br %r14
+  %ret = call <2 x double> @llvm.experimental.constrained.maxnum.v2f64(
+                        <2 x double> %val1, <2 x double> %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <2 x double> %ret
+}
+
+; Test the f32 maxnum intrinsic.
+define float @f3(float %dummy, float %val1, float %val2) {
+; CHECK-LABEL: f3:
+; CHECK: wfmaxsb %f0, %f2, %f4, 4
+; CHECK: br %r14
+  %ret = call float @llvm.experimental.constrained.maxnum.f32(
+                        float %val1, float %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %ret
+}
+
+; Test the v4f32 maxnum intrinsic.
+define <4 x float> @f4(<4 x float> %dummy, <4 x float> %val1,
+                       <4 x float> %val2) {
+; CHECK-LABEL: f4:
+; CHECK: vfmaxsb %v24, %v26, %v28, 4
+; CHECK: br %r14
+  %ret = call <4 x float> @llvm.experimental.constrained.maxnum.v4f32(
+                        <4 x float> %val1, <4 x float> %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <4 x float> %ret
+}
+
+; Test the f128 maxnum intrinsic.
+define void @f5(fp128 *%ptr1, fp128 *%ptr2, fp128 *%dst) {
+; CHECK-LABEL: f5:
+; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3)
+; CHECK: wfmaxxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]], 4
+; CHECK: vst [[RES]], 0(%r4)
+; CHECK: br %r14
+  %val1 = load fp128, fp128* %ptr1
+  %val2 = load fp128, fp128* %ptr2
+  %res = call fp128 @llvm.experimental.constrained.maxnum.f128(
+                        fp128 %val1, fp128 %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128* %dst
+  ret void
+}
+
--- a/test/CodeGen/SystemZ/vec-strict-min-01.ll
+++ b/test/CodeGen/SystemZ/vec-strict-min-01.ll
@ -0,0 +1,80 @@
+; Test strict vector minimum on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare double @llvm.experimental.constrained.minnum.f64(double, double, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.minnum.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+
+declare float @llvm.experimental.constrained.minnum.f32(float, float, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.minnum.v4f32(<4 x float>, <4 x float>, metadata, metadata)
+
+declare fp128 @llvm.experimental.constrained.minnum.f128(fp128, fp128, metadata, metadata)
+
+; Test the f64 minnum intrinsic.
+define double @f1(double %dummy, double %val1, double %val2) {
+; CHECK-LABEL: f1:
+; CHECK: wfmindb %f0, %f2, %f4, 4
+; CHECK: br %r14
+  %ret = call double @llvm.experimental.constrained.minnum.f64(
+                        double %val1, double %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %ret
+}
+
+; Test the v2f64 minnum intrinsic.
+define <2 x double> @f2(<2 x double> %dummy, <2 x double> %val1,
+                        <2 x double> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: vfmindb %v24, %v26, %v28, 4
+; CHECK: br %r14
+  %ret = call <2 x double> @llvm.experimental.constrained.minnum.v2f64(
+                        <2 x double> %val1, <2 x double> %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <2 x double> %ret
+}
+
+; Test the f32 minnum intrinsic.
+define float @f3(float %dummy, float %val1, float %val2) {
+; CHECK-LABEL: f3:
+; CHECK: wfminsb %f0, %f2, %f4, 4
+; CHECK: br %r14
+  %ret = call float @llvm.experimental.constrained.minnum.f32(
+                        float %val1, float %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %ret
+}
+
+; Test the v4f32 minnum intrinsic.
+define <4 x float> @f4(<4 x float> %dummy, <4 x float> %val1,
+                       <4 x float> %val2) {
+; CHECK-LABEL: f4:
+; CHECK: vfminsb %v24, %v26, %v28, 4
+; CHECK: br %r14
+  %ret = call <4 x float> @llvm.experimental.constrained.minnum.v4f32(
+                        <4 x float> %val1, <4 x float> %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <4 x float> %ret
+}
+
+; Test the f128 minnum intrinsic.
+define void @f5(fp128 *%ptr1, fp128 *%ptr2, fp128 *%dst) {
+; CHECK-LABEL: f5:
+; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3)
+; CHECK: wfminxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]], 4
+; CHECK: vst [[RES]], 0(%r4)
+; CHECK: br %r14
+  %val1 = load fp128, fp128* %ptr1
+  %val2 = load fp128, fp128* %ptr2
+  %res = call fp128 @llvm.experimental.constrained.minnum.f128(
+                        fp128 %val1, fp128 %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128* %dst
+  ret void
+}
+
--- a/test/CodeGen/SystemZ/vec-strict-mul-01.ll
+++ b/test/CodeGen/SystemZ/vec-strict-mul-01.ll
@ -0,0 +1,33 @@
+; Test strict vector multiplication.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+
+; Test a v2f64 multiplication.
+define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1,
+                        <2 x double> %val2) {
+; CHECK-LABEL: f5:
+; CHECK: vfmdb %v24, %v26, %v28
+; CHECK: br %r14
+  %ret = call <2 x double> @llvm.experimental.constrained.fmul.v2f64(
+                        <2 x double> %val1, <2 x double> %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <2 x double> %ret
+}
+
+; Test an f64 multiplication that uses vector registers.
+define double @f6(<2 x double> %val1, <2 x double> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: wfmdb %f0, %v24, %v26
+; CHECK: br %r14
+  %scalar1 = extractelement <2 x double> %val1, i32 0
+  %scalar2 = extractelement <2 x double> %val2, i32 0
+  %ret = call double @llvm.experimental.constrained.fmul.f64(
+                        double %scalar1, double %scalar2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %ret
+}
--- a/test/CodeGen/SystemZ/vec-strict-mul-02.ll
+++ b/test/CodeGen/SystemZ/vec-strict-mul-02.ll
@ -0,0 +1,36 @@
+; Test strict vector multiply-and-add.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, metadata, metadata)
+
+; Test a v2f64 multiply-and-add.
+define <2 x double> @f4(<2 x double> %dummy, <2 x double> %val1,
+                        <2 x double> %val2, <2 x double> %val3) {
+; CHECK-LABEL: f4:
+; CHECK: vfmadb %v24, %v26, %v28, %v30
+; CHECK: br %r14
+  %ret = call <2 x double> @llvm.experimental.constrained.fma.v2f64 (
+                        <2 x double> %val1,
+                        <2 x double> %val2,
+                        <2 x double> %val3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <2 x double> %ret
+}
+
+; Test a v2f64 multiply-and-subtract.
+define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1,
+                        <2 x double> %val2, <2 x double> %val3) {
+; CHECK-LABEL: f5:
+; CHECK: vfmsdb %v24, %v26, %v28, %v30
+; CHECK: br %r14
+  %negval3 = fsub <2 x double> <double -0.0, double -0.0>, %val3
+  %ret = call <2 x double> @llvm.experimental.constrained.fma.v2f64 (
+                        <2 x double> %val1,
+                        <2 x double> %val2,
+                        <2 x double> %negval3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <2 x double> %ret
+}
--- a/test/CodeGen/SystemZ/vec-strict-mul-03.ll
+++ b/test/CodeGen/SystemZ/vec-strict-mul-03.ll
@ -0,0 +1,33 @@
+; Test strict vector multiplication on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float>, <4 x float>, metadata, metadata)
+
+; Test a v4f32 multiplication.
+define <4 x float> @f1(<4 x float> %dummy, <4 x float> %val1,
+                       <4 x float> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vfmsb %v24, %v26, %v28
+; CHECK: br %r14
+  %ret = call <4 x float> @llvm.experimental.constrained.fmul.v4f32(
+                        <4 x float> %val1, <4 x float> %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <4 x float> %ret
+}
+
+; Test an f32 multiplication that uses vector registers.
+define float @f2(<4 x float> %val1, <4 x float> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: wfmsb %f0, %v24, %v26
+; CHECK: br %r14
+  %scalar1 = extractelement <4 x float> %val1, i32 0
+  %scalar2 = extractelement <4 x float> %val2, i32 0
+  %ret = call float @llvm.experimental.constrained.fmul.f32(
+                        float %scalar1, float %scalar2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %ret
+}
--- a/test/CodeGen/SystemZ/vec-strict-mul-04.ll
+++ b/test/CodeGen/SystemZ/vec-strict-mul-04.ll
@ -0,0 +1,37 @@
+; Test strict vector multiply-and-add on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata)
+
+; Test a v4f32 multiply-and-add.
+define <4 x float> @f1(<4 x float> %dummy, <4 x float> %val1,
+                       <4 x float> %val2, <4 x float> %val3) {
+; CHECK-LABEL: f1:
+; CHECK: vfmasb %v24, %v26, %v28, %v30
+; CHECK: br %r14
+  %ret = call <4 x float> @llvm.experimental.constrained.fma.v4f32 (
+                        <4 x float> %val1,
+                        <4 x float> %val2,
+                        <4 x float> %val3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <4 x float> %ret
+}
+
+; Test a v4f32 multiply-and-subtract.
+define <4 x float> @f2(<4 x float> %dummy, <4 x float> %val1,
+                       <4 x float> %val2, <4 x float> %val3) {
+; CHECK-LABEL: f2:
+; CHECK: vfmssb %v24, %v26, %v28, %v30
+; CHECK: br %r14
+  %negval3 = fsub <4 x float> <float -0.0, float -0.0,
+                               float -0.0, float -0.0>, %val3
+  %ret = call <4 x float> @llvm.experimental.constrained.fma.v4f32 (
+                        <4 x float> %val1,
+                        <4 x float> %val2,
+                        <4 x float> %negval3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <4 x float> %ret
+}
--- a/test/CodeGen/SystemZ/vec-strict-mul-05.ll
+++ b/test/CodeGen/SystemZ/vec-strict-mul-05.ll
@ -0,0 +1,75 @@
+; Test vector negative multiply-and-add on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata)
+
+; Test a v2f64 negative multiply-and-add.
+define <2 x double> @f1(<2 x double> %dummy, <2 x double> %val1,
+                        <2 x double> %val2, <2 x double> %val3) {
+; CHECK-LABEL: f1:
+; CHECK: vfnmadb %v24, %v26, %v28, %v30
+; CHECK: br %r14
+  %ret = call <2 x double> @llvm.experimental.constrained.fma.v2f64 (
+                        <2 x double> %val1,
+                        <2 x double> %val2,
+                        <2 x double> %val3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %negret = fsub <2 x double> <double -0.0, double -0.0>, %ret
+  ret <2 x double> %negret
+}
+
+; Test a v2f64 negative multiply-and-subtract.
+define <2 x double> @f2(<2 x double> %dummy, <2 x double> %val1,
+                        <2 x double> %val2, <2 x double> %val3) {
+; CHECK-LABEL: f2:
+; CHECK: vfnmsdb %v24, %v26, %v28, %v30
+; CHECK: br %r14
+  %negval3 = fsub <2 x double> <double -0.0, double -0.0>, %val3
+  %ret = call <2 x double> @llvm.experimental.constrained.fma.v2f64 (
+                        <2 x double> %val1,
+                        <2 x double> %val2,
+                        <2 x double> %negval3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %negret = fsub <2 x double> <double -0.0, double -0.0>, %ret
+  ret <2 x double> %negret
+}
+
+; Test a v4f32 negative multiply-and-add.
+define <4 x float> @f3(<4 x float> %dummy, <4 x float> %val1,
+                       <4 x float> %val2, <4 x float> %val3) {
+; CHECK-LABEL: f3:
+; CHECK: vfnmasb %v24, %v26, %v28, %v30
+; CHECK: br %r14
+  %ret = call <4 x float> @llvm.experimental.constrained.fma.v4f32 (
+                        <4 x float> %val1,
+                        <4 x float> %val2,
+                        <4 x float> %val3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %negret = fsub <4 x float> <float -0.0, float -0.0,
+                              float -0.0, float -0.0>, %ret
+  ret <4 x float> %negret
+}
+
+; Test a v4f32 negative multiply-and-subtract.
+define <4 x float> @f4(<4 x float> %dummy, <4 x float> %val1,
+                       <4 x float> %val2, <4 x float> %val3) {
+; CHECK-LABEL: f4:
+; CHECK: vfnmssb %v24, %v26, %v28, %v30
+; CHECK: br %r14
+  %negval3 = fsub <4 x float> <float -0.0, float -0.0,
+                               float -0.0, float -0.0>, %val3
+  %ret = call <4 x float> @llvm.experimental.constrained.fma.v4f32 (
+                        <4 x float> %val1,
+                        <4 x float> %val2,
+                        <4 x float> %negval3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %negret = fsub <4 x float> <float -0.0, float -0.0,
+                               float -0.0, float -0.0>, %ret
+  ret <4 x float> %negret
+}
--- a/test/CodeGen/SystemZ/vec-strict-round-01.ll
+++ b/test/CodeGen/SystemZ/vec-strict-round-01.ll
@ -0,0 +1,155 @@
+; Test strict v2f64 rounding.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
+declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
+declare double @llvm.experimental.constrained.floor.f64(double, metadata, metadata)
+declare double @llvm.experimental.constrained.ceil.f64(double, metadata, metadata)
+declare double @llvm.experimental.constrained.trunc.f64(double, metadata, metadata)
+declare double @llvm.experimental.constrained.round.f64(double, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double>, metadata, metadata)
+
+define <2 x double> @f1(<2 x double> %val) {
+; CHECK-LABEL: f1:
+; CHECK: vfidb %v24, %v24, 0, 0
+; CHECK: br %r14
+  %res = call <2 x double> @llvm.experimental.constrained.rint.v2f64(
+                        <2 x double> %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <2 x double> %res
+}
+
+define <2 x double> @f2(<2 x double> %val) {
+; CHECK-LABEL: f2:
+; CHECK: vfidb %v24, %v24, 4, 0
+; CHECK: br %r14
+  %res = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(
+                        <2 x double> %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <2 x double> %res
+}
+
+define <2 x double> @f3(<2 x double> %val) {
+; CHECK-LABEL: f3:
+; CHECK: vfidb %v24, %v24, 4, 7
+; CHECK: br %r14
+  %res = call <2 x double> @llvm.experimental.constrained.floor.v2f64(
+                        <2 x double> %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <2 x double> %res
+}
+
+define <2 x double> @f4(<2 x double> %val) {
+; CHECK-LABEL: f4:
+; CHECK: vfidb %v24, %v24, 4, 6
+; CHECK: br %r14
+  %res = call <2 x double> @llvm.experimental.constrained.ceil.v2f64(
+                        <2 x double> %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <2 x double> %res
+}
+
+define <2 x double> @f5(<2 x double> %val) {
+; CHECK-LABEL: f5:
+; CHECK: vfidb %v24, %v24, 4, 5
+; CHECK: br %r14
+  %res = call <2 x double> @llvm.experimental.constrained.trunc.v2f64(
+                        <2 x double> %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <2 x double> %res
+}
+
+define <2 x double> @f6(<2 x double> %val) {
+; CHECK-LABEL: f6:
+; CHECK: vfidb %v24, %v24, 4, 1
+; CHECK: br %r14
+  %res = call <2 x double> @llvm.experimental.constrained.round.v2f64(
+                        <2 x double> %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <2 x double> %res
+}
+
+define double @f7(<2 x double> %val) {
+; CHECK-LABEL: f7:
+; CHECK: wfidb %f0, %v24, 0, 0
+; CHECK: br %r14
+  %scalar = extractelement <2 x double> %val, i32 0
+  %res = call double @llvm.experimental.constrained.rint.f64(
+                        double %scalar,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+define double @f8(<2 x double> %val) {
+; CHECK-LABEL: f8:
+; CHECK: wfidb %f0, %v24, 4, 0
+; CHECK: br %r14
+  %scalar = extractelement <2 x double> %val, i32 0
+  %res = call double @llvm.experimental.constrained.nearbyint.f64(
+                        double %scalar,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+define double @f9(<2 x double> %val) {
+; CHECK-LABEL: f9:
+; CHECK: wfidb %f0, %v24, 4, 7
+; CHECK: br %r14
+  %scalar = extractelement <2 x double> %val, i32 0
+  %res = call double @llvm.experimental.constrained.floor.f64(
+                        double %scalar,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+
+define double @f10(<2 x double> %val) {
+; CHECK-LABEL: f10:
+; CHECK: wfidb %f0, %v24, 4, 6
+; CHECK: br %r14
+  %scalar = extractelement <2 x double> %val, i32 0
+  %res = call double @llvm.experimental.constrained.ceil.f64(
+                        double %scalar,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+define double @f11(<2 x double> %val) {
+; CHECK-LABEL: f11:
+; CHECK: wfidb %f0, %v24, 4, 5
+; CHECK: br %r14
+  %scalar = extractelement <2 x double> %val, i32 0
+  %res = call double @llvm.experimental.constrained.trunc.f64(
+                        double %scalar,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+define double @f12(<2 x double> %val) {
+; CHECK-LABEL: f12:
+; CHECK: wfidb %f0, %v24, 4, 1
+; CHECK: br %r14
+  %scalar = extractelement <2 x double> %val, i32 0
+  %res = call double @llvm.experimental.constrained.round.f64(
+                        double %scalar,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
--- a/test/CodeGen/SystemZ/vec-strict-round-02.ll
+++ b/test/CodeGen/SystemZ/vec-strict-round-02.ll
@ -0,0 +1,154 @@
+; Test strict v4f32 rounding on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare float @llvm.experimental.constrained.rint.f32(float, metadata, metadata)
+declare float @llvm.experimental.constrained.nearbyint.f32(float, metadata, metadata)
+declare float @llvm.experimental.constrained.floor.f32(float, metadata, metadata)
+declare float @llvm.experimental.constrained.ceil.f32(float, metadata, metadata)
+declare float @llvm.experimental.constrained.trunc.f32(float, metadata, metadata)
+declare float @llvm.experimental.constrained.round.f32(float, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.rint.v4f32(<4 x float>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(<4 x float>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.floor.v4f32(<4 x float>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.ceil.v4f32(<4 x float>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.trunc.v4f32(<4 x float>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.round.v4f32(<4 x float>, metadata, metadata)
+
+define <4 x float> @f1(<4 x float> %val) {
+; CHECK-LABEL: f1:
+; CHECK: vfisb %v24, %v24, 0, 0
+; CHECK: br %r14
+  %res = call <4 x float> @llvm.experimental.constrained.rint.v4f32(
+                        <4 x float> %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <4 x float> %res
+}
+
+define <4 x float> @f2(<4 x float> %val) {
+; CHECK-LABEL: f2:
+; CHECK: vfisb %v24, %v24, 4, 0
+; CHECK: br %r14
+  %res = call <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(
+                        <4 x float> %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <4 x float> %res
+}
+
+define <4 x float> @f3(<4 x float> %val) {
+; CHECK-LABEL: f3:
+; CHECK: vfisb %v24, %v24, 4, 7
+; CHECK: br %r14
+  %res = call <4 x float> @llvm.experimental.constrained.floor.v4f32(
+                        <4 x float> %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <4 x float> %res
+}
+
+define <4 x float> @f4(<4 x float> %val) {
+; CHECK-LABEL: f4:
+; CHECK: vfisb %v24, %v24, 4, 6
+; CHECK: br %r14
+  %res = call <4 x float> @llvm.experimental.constrained.ceil.v4f32(
+                        <4 x float> %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <4 x float> %res
+}
+
+define <4 x float> @f5(<4 x float> %val) {
+; CHECK-LABEL: f5:
+; CHECK: vfisb %v24, %v24, 4, 5
+; CHECK: br %r14
+  %res = call <4 x float> @llvm.experimental.constrained.trunc.v4f32(
+                        <4 x float> %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <4 x float> %res
+}
+
+define <4 x float> @f6(<4 x float> %val) {
+; CHECK-LABEL: f6:
+; CHECK: vfisb %v24, %v24, 4, 1
+; CHECK: br %r14
+  %res = call <4 x float> @llvm.experimental.constrained.round.v4f32(
+                        <4 x float> %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <4 x float> %res
+}
+
+define float @f7(<4 x float> %val) {
+; CHECK-LABEL: f7:
+; CHECK: wfisb %f0, %v24, 0, 0
+; CHECK: br %r14
+  %scalar = extractelement <4 x float> %val, i32 0
+  %res = call float @llvm.experimental.constrained.rint.f32(
+                        float %scalar,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+define float @f8(<4 x float> %val) {
+; CHECK-LABEL: f8:
+; CHECK: wfisb %f0, %v24, 4, 0
+; CHECK: br %r14
+  %scalar = extractelement <4 x float> %val, i32 0
+  %res = call float @llvm.experimental.constrained.nearbyint.f32(
+                        float %scalar,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+define float @f9(<4 x float> %val) {
+; CHECK-LABEL: f9:
+; CHECK: wfisb %f0, %v24, 4, 7
+; CHECK: br %r14
+  %scalar = extractelement <4 x float> %val, i32 0
+  %res = call float @llvm.experimental.constrained.floor.f32(
+                        float %scalar,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+define float @f10(<4 x float> %val) {
+; CHECK-LABEL: f10:
+; CHECK: wfisb %f0, %v24, 4, 6
+; CHECK: br %r14
+  %scalar = extractelement <4 x float> %val, i32 0
+  %res = call float @llvm.experimental.constrained.ceil.f32(
+                        float %scalar,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+define float @f11(<4 x float> %val) {
+; CHECK-LABEL: f11:
+; CHECK: wfisb %f0, %v24, 4, 5
+; CHECK: br %r14
+  %scalar = extractelement <4 x float> %val, i32 0
+  %res = call float @llvm.experimental.constrained.trunc.f32(
+                        float %scalar,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+define float @f12(<4 x float> %val) {
+; CHECK-LABEL: f12:
+; CHECK: wfisb %f0, %v24, 4, 1
+; CHECK: br %r14
+  %scalar = extractelement <4 x float> %val, i32 0
+  %res = call float @llvm.experimental.constrained.round.f32(
+                        float %scalar,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
--- a/test/CodeGen/SystemZ/vec-strict-sqrt-01.ll
+++ b/test/CodeGen/SystemZ/vec-strict-sqrt-01.ll
@ -0,0 +1,29 @@
+; Test f64 and v2f64 square root.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double>, metadata, metadata)
+
+define <2 x double> @f1(<2 x double> %val) {
+; CHECK-LABEL: f1:
+; CHECK: vfsqdb %v24, %v24
+; CHECK: br %r14
+  %ret = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64(
+                        <2 x double> %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <2 x double> %ret
+}
+
+define double @f2(<2 x double> %val) {
+; CHECK-LABEL: f2:
+; CHECK: wfsqdb %f0, %v24
+; CHECK: br %r14
+  %scalar = extractelement <2 x double> %val, i32 0
+  %ret = call double @llvm.experimental.constrained.sqrt.f64(
+                        double %scalar,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %ret
+}
--- a/test/CodeGen/SystemZ/vec-strict-sqrt-02.ll
+++ b/test/CodeGen/SystemZ/vec-strict-sqrt-02.ll
@ -0,0 +1,29 @@
+; Test strict f32 and v4f32 square root on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.sqrt.v4f32(<4 x float>, metadata, metadata)
+
+define <4 x float> @f1(<4 x float> %val) {
+; CHECK-LABEL: f1:
+; CHECK: vfsqsb %v24, %v24
+; CHECK: br %r14
+  %ret = call <4 x float> @llvm.experimental.constrained.sqrt.v4f32(
+                        <4 x float> %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <4 x float> %ret
+}
+
+define float @f2(<4 x float> %val) {
+; CHECK-LABEL: f2:
+; CHECK: wfsqsb %f0, %v24
+; CHECK: br %r14
+  %scalar = extractelement <4 x float> %val, i32 0
+  %ret = call float @llvm.experimental.constrained.sqrt.f32(
+                        float %scalar,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %ret
+}
--- a/test/CodeGen/SystemZ/vec-strict-sub-01.ll
+++ b/test/CodeGen/SystemZ/vec-strict-sub-01.ll
@ -0,0 +1,34 @@
+; Test strict vector subtraction.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+
+; Test a v2f64 subtraction.
+define <2 x double> @f6(<2 x double> %dummy, <2 x double> %val1,
+                        <2 x double> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: vfsdb %v24, %v26, %v28
+; CHECK: br %r14
+  %ret = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(
+                        <2 x double> %val1, <2 x double> %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <2 x double> %ret
+}
+
+; Test an f64 subtraction that uses vector registers.
+define double @f7(<2 x double> %val1, <2 x double> %val2) {
+; CHECK-LABEL: f7:
+; CHECK: wfsdb %f0, %v24, %v26
+; CHECK: br %r14
+  %scalar1 = extractelement <2 x double> %val1, i32 0
+  %scalar2 = extractelement <2 x double> %val2, i32 0
+  %ret = call double @llvm.experimental.constrained.fsub.f64(
+                        double %scalar1, double %scalar2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %ret
+}
+
--- a/test/CodeGen/SystemZ/vec-strict-sub-02.ll
+++ b/test/CodeGen/SystemZ/vec-strict-sub-02.ll
@ -0,0 +1,33 @@
+; Test strict vector subtraction on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.fsub.v4f32(<4 x float>, <4 x float>, metadata, metadata)
+
+; Test a v4f32 subtraction.
+define <4 x float> @f6(<4 x float> %dummy, <4 x float> %val1,
+                       <4 x float> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: vfssb %v24, %v26, %v28
+; CHECK: br %r14
+  %ret = call <4 x float> @llvm.experimental.constrained.fsub.v4f32(
+                        <4 x float> %val1, <4 x float> %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <4 x float> %ret
+}
+
+; Test an f32 subtraction that uses vector registers.
+define float @f7(<4 x float> %val1, <4 x float> %val2) {
+; CHECK-LABEL: f7:
+; CHECK: wfssb %f0, %v24, %v26
+; CHECK: br %r14
+  %scalar1 = extractelement <4 x float> %val1, i32 0
+  %scalar2 = extractelement <4 x float> %val2, i32 0
+  %ret = call float @llvm.experimental.constrained.fsub.f32(
+                        float %scalar1, float %scalar2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %ret
+}
--- a/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
+++ b/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
@ -33,11 +33,11 @@ define <2 x double> @constrained_vector_fdiv_v2f64() {
 ; S390X-NEXT:    larl %r1, .LCPI1_0
 ; S390X-NEXT:    ldeb %f1, 0(%r1)
 ; S390X-NEXT:    larl %r1, .LCPI1_1
-; S390X-NEXT:    ldeb %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI1_2
 ; S390X-NEXT:    ldeb %f2, 0(%r1)
-; S390X-NEXT:    ddbr %f0, %f1
+; S390X-NEXT:    larl %r1, .LCPI1_2
+; S390X-NEXT:    ldeb %f0, 0(%r1)
 ; S390X-NEXT:    ddbr %f2, %f1
+; S390X-NEXT:    ddbr %f0, %f1
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_fdiv_v2f64:
@ -63,14 +63,14 @@ define <3 x float> @constrained_vector_fdiv_v3f32() {
 ; S390X-NEXT:    larl %r1, .LCPI2_0
 ; S390X-NEXT:    le %f1, 0(%r1)
 ; S390X-NEXT:    larl %r1, .LCPI2_1
-; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    le %f4, 0(%r1)
 ; S390X-NEXT:    larl %r1, .LCPI2_2
 ; S390X-NEXT:    le %f2, 0(%r1)
 ; S390X-NEXT:    larl %r1, .LCPI2_3
-; S390X-NEXT:    le %f4, 0(%r1)
-; S390X-NEXT:    debr %f0, %f1
-; S390X-NEXT:    debr %f2, %f1
+; S390X-NEXT:    le %f0, 0(%r1)
 ; S390X-NEXT:    debr %f4, %f1
+; S390X-NEXT:    debr %f2, %f1
+; S390X-NEXT:    debr %f0, %f1
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_fdiv_v3f32:
@ -100,30 +100,32 @@ entry:
 define void @constrained_vector_fdiv_v3f64(<3 x double>* %a) {
 ; S390X-LABEL: constrained_vector_fdiv_v3f64:
 ; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI3_1
-; S390X-NEXT:    ldeb %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI3_2
-; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ld %f0, 16(%r2)
+; S390X-NEXT:    ld %f1, 8(%r2)
 ; S390X-NEXT:    larl %r1, .LCPI3_0
 ; S390X-NEXT:    ldeb %f2, 0(%r1)
-; S390X-NEXT:    ddb %f1, 16(%r2)
-; S390X-NEXT:    ddb %f0, 8(%r2)
+; S390X-NEXT:    larl %r1, .LCPI3_1
+; S390X-NEXT:    ldeb %f3, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI3_2
+; S390X-NEXT:    ldeb %f4, 0(%r1)
 ; S390X-NEXT:    ddb %f2, 0(%r2)
-; S390X-NEXT:    std %f1, 16(%r2)
-; S390X-NEXT:    std %f0, 8(%r2)
+; S390X-NEXT:    ddbr %f3, %f1
+; S390X-NEXT:    ddbr %f4, %f0
+; S390X-NEXT:    std %f4, 16(%r2)
+; S390X-NEXT:    std %f3, 8(%r2)
 ; S390X-NEXT:    std %f2, 0(%r2)
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_fdiv_v3f64:
 ; SZ13:       # %bb.0: # %entry
 ; SZ13-NEXT:    larl %r1, .LCPI3_0
-; SZ13-NEXT:    vl %v0, 0(%r2)
-; SZ13-NEXT:    vl %v1, 0(%r1)
-; SZ13-NEXT:    vfddb %v0, %v1, %v0
-; SZ13-NEXT:    larl %r1, .LCPI3_1
 ; SZ13-NEXT:    ldeb %f1, 0(%r1)
 ; SZ13-NEXT:    ddb %f1, 16(%r2)
+; SZ13-NEXT:    larl %r1, .LCPI3_1
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    vl %v2, 0(%r1)
 ; SZ13-NEXT:    std %f1, 16(%r2)
+; SZ13-NEXT:    vfddb %v0, %v2, %v0
 ; SZ13-NEXT:    vst %v0, 0(%r2)
 ; SZ13-NEXT:    br %r14
 entry:
@ -143,17 +145,17 @@ define <4 x double> @constrained_vector_fdiv_v4f64() {
 ; S390X-NEXT:    larl %r1, .LCPI4_0
 ; S390X-NEXT:    ldeb %f1, 0(%r1)
 ; S390X-NEXT:    larl %r1, .LCPI4_1
-; S390X-NEXT:    ldeb %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI4_2
-; S390X-NEXT:    ldeb %f2, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI4_3
-; S390X-NEXT:    ldeb %f4, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI4_4
 ; S390X-NEXT:    ldeb %f6, 0(%r1)
-; S390X-NEXT:    ddbr %f0, %f1
-; S390X-NEXT:    ddbr %f2, %f1
-; S390X-NEXT:    ddbr %f4, %f1
+; S390X-NEXT:    larl %r1, .LCPI4_2
+; S390X-NEXT:    ldeb %f4, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI4_3
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI4_4
+; S390X-NEXT:    ldeb %f0, 0(%r1)
 ; S390X-NEXT:    ddbr %f6, %f1
+; S390X-NEXT:    ddbr %f4, %f1
+; S390X-NEXT:    ddbr %f2, %f1
+; S390X-NEXT:    ddbr %f0, %f1
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_fdiv_v4f64:
@ -162,10 +164,10 @@ define <4 x double> @constrained_vector_fdiv_v4f64() {
 ; SZ13-NEXT:    vl %v0, 0(%r1)
 ; SZ13-NEXT:    larl %r1, .LCPI4_1
 ; SZ13-NEXT:    vl %v1, 0(%r1)
-; SZ13-NEXT:    vfddb %v24, %v1, %v0
+; SZ13-NEXT:    vfddb %v26, %v1, %v0
 ; SZ13-NEXT:    larl %r1, .LCPI4_2
 ; SZ13-NEXT:    vl %v1, 0(%r1)
-; SZ13-NEXT:    vfddb %v26, %v1, %v0
+; SZ13-NEXT:    vfddb %v24, %v1, %v0
 ; SZ13-NEXT:    br %r14
 entry:
  %div = call <4 x double> @llvm.experimental.constrained.fdiv.v4f64(
@ -589,13 +591,13 @@ define <2 x double> @constrained_vector_fmul_v2f64() {
 ; S390X-LABEL: constrained_vector_fmul_v2f64:
 ; S390X:       # %bb.0: # %entry
 ; S390X-NEXT:    larl %r1, .LCPI11_0
-; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    ldeb %f2, 0(%r1)
 ; S390X-NEXT:    larl %r1, .LCPI11_1
 ; S390X-NEXT:    ld %f1, 0(%r1)
 ; S390X-NEXT:    larl %r1, .LCPI11_2
-; S390X-NEXT:    ldeb %f2, 0(%r1)
-; S390X-NEXT:    mdbr %f0, %f1
+; S390X-NEXT:    ldeb %f0, 0(%r1)
 ; S390X-NEXT:    mdbr %f2, %f1
+; S390X-NEXT:    mdbr %f0, %f1
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_fmul_v2f64:
@ -619,15 +621,15 @@ define <3 x float> @constrained_vector_fmul_v3f32() {
 ; S390X-LABEL: constrained_vector_fmul_v3f32:
 ; S390X:       # %bb.0: # %entry
 ; S390X-NEXT:    larl %r1, .LCPI12_0
-; S390X-NEXT:    le %f4, 0(%r1)
+; S390X-NEXT:    le %f0, 0(%r1)
 ; S390X-NEXT:    larl %r1, .LCPI12_1
-; S390X-NEXT:    ler %f0, %f4
-; S390X-NEXT:    meeb %f0, 0(%r1)
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    meeb %f4, 0(%r1)
 ; S390X-NEXT:    larl %r1, .LCPI12_2
-; S390X-NEXT:    ler %f2, %f4
+; S390X-NEXT:    ler %f2, %f0
 ; S390X-NEXT:    meeb %f2, 0(%r1)
 ; S390X-NEXT:    larl %r1, .LCPI12_3
-; S390X-NEXT:    meeb %f4, 0(%r1)
+; S390X-NEXT:    meeb %f0, 0(%r1)
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_fmul_v3f32:
@ -659,25 +661,26 @@ define void @constrained_vector_fmul_v3f64(<3 x double>* %a) {
 ; S390X:       # %bb.0: # %entry
 ; S390X-NEXT:    larl %r1, .LCPI13_0
 ; S390X-NEXT:    ld %f0, 0(%r1)
-; S390X-NEXT:    ldr %f1, %f0
-; S390X-NEXT:    ldr %f2, %f0
-; S390X-NEXT:    mdb %f0, 16(%r2)
-; S390X-NEXT:    mdb %f2, 8(%r2)
-; S390X-NEXT:    mdb %f1, 0(%r2)
-; S390X-NEXT:    std %f0, 16(%r2)
-; S390X-NEXT:    std %f2, 8(%r2)
-; S390X-NEXT:    std %f1, 0(%r2)
+; S390X-NEXT:    ld %f1, 8(%r2)
+; S390X-NEXT:    ld %f2, 16(%r2)
+; S390X-NEXT:    ldr %f3, %f0
+; S390X-NEXT:    mdb %f3, 0(%r2)
+; S390X-NEXT:    mdbr %f1, %f0
+; S390X-NEXT:    mdbr %f2, %f0
+; S390X-NEXT:    std %f2, 16(%r2)
+; S390X-NEXT:    std %f1, 8(%r2)
+; S390X-NEXT:    std %f3, 0(%r2)
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_fmul_v3f64:
 ; SZ13:       # %bb.0: # %entry
 ; SZ13-NEXT:    larl %r1, .LCPI13_0
-; SZ13-NEXT:    vl %v0, 0(%r2)
-; SZ13-NEXT:    vl %v1, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI13_1
-; SZ13-NEXT:    vfmdb %v0, %v1, %v0
 ; SZ13-NEXT:    ld %f1, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI13_1
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    vl %v2, 0(%r1)
 ; SZ13-NEXT:    mdb %f1, 16(%r2)
+; SZ13-NEXT:    vfmdb %v0, %v2, %v0
 ; SZ13-NEXT:    std %f1, 16(%r2)
 ; SZ13-NEXT:    vst %v0, 0(%r2)
 ; SZ13-NEXT:    br %r14
@ -697,19 +700,19 @@ define <4 x double> @constrained_vector_fmul_v4f64() {
 ; S390X-LABEL: constrained_vector_fmul_v4f64:
 ; S390X:       # %bb.0: # %entry
 ; S390X-NEXT:    larl %r1, .LCPI14_0
-; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    ldeb %f6, 0(%r1)
 ; S390X-NEXT:    larl %r1, .LCPI14_1
 ; S390X-NEXT:    ld %f1, 0(%r1)
 ; S390X-NEXT:    larl %r1, .LCPI14_2
-; S390X-NEXT:    ldeb %f2, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI14_3
 ; S390X-NEXT:    ldeb %f4, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI14_3
+; S390X-NEXT:    ldeb %f2, 0(%r1)
 ; S390X-NEXT:    larl %r1, .LCPI14_4
-; S390X-NEXT:    ldeb %f6, 0(%r1)
-; S390X-NEXT:    mdbr %f0, %f1
-; S390X-NEXT:    mdbr %f2, %f1
-; S390X-NEXT:    mdbr %f4, %f1
+; S390X-NEXT:    ldeb %f0, 0(%r1)
 ; S390X-NEXT:    mdbr %f6, %f1
+; S390X-NEXT:    mdbr %f4, %f1
+; S390X-NEXT:    mdbr %f2, %f1
+; S390X-NEXT:    mdbr %f0, %f1
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_fmul_v4f64:
@ -719,9 +722,9 @@ define <4 x double> @constrained_vector_fmul_v4f64() {
 ; SZ13-NEXT:    larl %r1, .LCPI14_1
 ; SZ13-NEXT:    vl %v1, 0(%r1)
 ; SZ13-NEXT:    larl %r1, .LCPI14_2
-; SZ13-NEXT:    vfmdb %v24, %v1, %v0
-; SZ13-NEXT:    vl %v0, 0(%r1)
 ; SZ13-NEXT:    vfmdb %v26, %v1, %v0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfmdb %v24, %v1, %v0
 ; SZ13-NEXT:    br %r14
 entry:
  %mul = call <4 x double> @llvm.experimental.constrained.fmul.v4f64(
@ -763,12 +766,13 @@ define <2 x double> @constrained_vector_fadd_v2f64() {
 ; S390X-LABEL: constrained_vector_fadd_v2f64:
 ; S390X:       # %bb.0: # %entry
 ; S390X-NEXT:    larl %r1, .LCPI16_0
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI16_2
 ; S390X-NEXT:    ldeb %f0, 0(%r1)
 ; S390X-NEXT:    larl %r1, .LCPI16_1
-; S390X-NEXT:    ld %f2, 0(%r1)
-; S390X-NEXT:    adbr %f0, %f2
-; S390X-NEXT:    larl %r1, .LCPI16_2
+; S390X-NEXT:    ldr %f2, %f1
 ; S390X-NEXT:    adb %f2, 0(%r1)
+; S390X-NEXT:    adbr %f0, %f1
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_fadd_v2f64:
@ -792,15 +796,14 @@ define <3 x float> @constrained_vector_fadd_v3f32() {
 ; S390X-LABEL: constrained_vector_fadd_v3f32:
 ; S390X:       # %bb.0: # %entry
 ; S390X-NEXT:    larl %r1, .LCPI17_0
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI17_1
-; S390X-NEXT:    ler %f2, %f1
-; S390X-NEXT:    ler %f0, %f1
-; S390X-NEXT:    aeb %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI17_2
-; S390X-NEXT:    aeb %f2, 0(%r1)
+; S390X-NEXT:    le %f0, 0(%r1)
 ; S390X-NEXT:    lzer %f4
-; S390X-NEXT:    aebr %f4, %f1
+; S390X-NEXT:    aebr %f4, %f0
+; S390X-NEXT:    larl %r1, .LCPI17_1
+; S390X-NEXT:    ler %f2, %f0
+; S390X-NEXT:    aeb %f2, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI17_2
+; S390X-NEXT:    aeb %f0, 0(%r1)
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_fadd_v3f32:
@ -831,25 +834,26 @@ define void @constrained_vector_fadd_v3f64(<3 x double>* %a) {
 ; S390X:       # %bb.0: # %entry
 ; S390X-NEXT:    larl %r1, .LCPI18_0
 ; S390X-NEXT:    ld %f0, 0(%r1)
-; S390X-NEXT:    ldr %f1, %f0
-; S390X-NEXT:    ldr %f2, %f0
-; S390X-NEXT:    adb %f0, 16(%r2)
-; S390X-NEXT:    adb %f2, 8(%r2)
-; S390X-NEXT:    adb %f1, 0(%r2)
-; S390X-NEXT:    std %f0, 16(%r2)
-; S390X-NEXT:    std %f2, 8(%r2)
-; S390X-NEXT:    std %f1, 0(%r2)
+; S390X-NEXT:    ld %f1, 8(%r2)
+; S390X-NEXT:    ld %f2, 16(%r2)
+; S390X-NEXT:    ldr %f3, %f0
+; S390X-NEXT:    adb %f3, 0(%r2)
+; S390X-NEXT:    adbr %f1, %f0
+; S390X-NEXT:    adbr %f2, %f0
+; S390X-NEXT:    std %f2, 16(%r2)
+; S390X-NEXT:    std %f1, 8(%r2)
+; S390X-NEXT:    std %f3, 0(%r2)
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_fadd_v3f64:
 ; SZ13:       # %bb.0: # %entry
 ; SZ13-NEXT:    larl %r1, .LCPI18_0
-; SZ13-NEXT:    vl %v0, 0(%r2)
-; SZ13-NEXT:    vl %v1, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI18_1
-; SZ13-NEXT:    vfadb %v0, %v1, %v0
 ; SZ13-NEXT:    ld %f1, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI18_1
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    vl %v2, 0(%r1)
 ; SZ13-NEXT:    adb %f1, 16(%r2)
+; SZ13-NEXT:    vfadb %v0, %v2, %v0
 ; SZ13-NEXT:    std %f1, 16(%r2)
 ; SZ13-NEXT:    vst %v0, 0(%r2)
 ; SZ13-NEXT:    br %r14
@ -869,18 +873,19 @@ define <4 x double> @constrained_vector_fadd_v4f64() {
 ; S390X-LABEL: constrained_vector_fadd_v4f64:
 ; S390X:       # %bb.0: # %entry
 ; S390X-NEXT:    larl %r1, .LCPI19_0
-; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    ld %f1, 0(%r1)
 ; S390X-NEXT:    larl %r1, .LCPI19_1
-; S390X-NEXT:    ld %f6, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI19_3
-; S390X-NEXT:    ldeb %f4, 0(%r1)
-; S390X-NEXT:    adbr %f0, %f6
-; S390X-NEXT:    larl %r1, .LCPI19_2
-; S390X-NEXT:    ldr %f2, %f6
-; S390X-NEXT:    adb %f2, 0(%r1)
-; S390X-NEXT:    adbr %f4, %f6
-; S390X-NEXT:    larl %r1, .LCPI19_4
+; S390X-NEXT:    ldr %f2, %f1
+; S390X-NEXT:    ldr %f6, %f1
 ; S390X-NEXT:    adb %f6, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI19_2
+; S390X-NEXT:    ldeb %f4, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI19_4
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI19_3
+; S390X-NEXT:    adb %f2, 0(%r1)
+; S390X-NEXT:    adbr %f4, %f1
+; S390X-NEXT:    adbr %f0, %f1
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_fadd_v4f64:
@ -890,9 +895,9 @@ define <4 x double> @constrained_vector_fadd_v4f64() {
 ; SZ13-NEXT:    larl %r1, .LCPI19_1
 ; SZ13-NEXT:    vl %v1, 0(%r1)
 ; SZ13-NEXT:    larl %r1, .LCPI19_2
-; SZ13-NEXT:    vfadb %v24, %v1, %v0
-; SZ13-NEXT:    vl %v0, 0(%r1)
 ; SZ13-NEXT:    vfadb %v26, %v1, %v0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfadb %v24, %v1, %v0
 ; SZ13-NEXT:    br %r14
 entry:
  %add = call <4 x double> @llvm.experimental.constrained.fadd.v4f64(
@ -933,12 +938,12 @@ entry:
 define <2 x double> @constrained_vector_fsub_v2f64() {
 ; S390X-LABEL: constrained_vector_fsub_v2f64:
 ; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI21_1
-; S390X-NEXT:    ld %f2, 0(%r1)
 ; S390X-NEXT:    larl %r1, .LCPI21_0
-; S390X-NEXT:    ldeb %f1, 0(%r1)
-; S390X-NEXT:    ldr %f0, %f2
+; S390X-NEXT:    ld %f0, 0(%r1)
 ; S390X-NEXT:    larl %r1, .LCPI21_2
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI21_1
+; S390X-NEXT:    ldr %f2, %f0
 ; S390X-NEXT:    sdb %f2, 0(%r1)
 ; S390X-NEXT:    sdbr %f0, %f1
 ; S390X-NEXT:    br %r14
@ -963,15 +968,15 @@ define <3 x float> @constrained_vector_fsub_v3f32() {
 ; S390X-LABEL: constrained_vector_fsub_v3f32:
 ; S390X:       # %bb.0: # %entry
 ; S390X-NEXT:    larl %r1, .LCPI22_0
-; S390X-NEXT:    le %f4, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI22_1
-; S390X-NEXT:    ler %f0, %f4
-; S390X-NEXT:    seb %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI22_2
-; S390X-NEXT:    ler %f2, %f4
-; S390X-NEXT:    seb %f2, 0(%r1)
+; S390X-NEXT:    le %f0, 0(%r1)
 ; S390X-NEXT:    lzer %f1
+; S390X-NEXT:    ler %f4, %f0
 ; S390X-NEXT:    sebr %f4, %f1
+; S390X-NEXT:    larl %r1, .LCPI22_1
+; S390X-NEXT:    ler %f2, %f0
+; S390X-NEXT:    seb %f2, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI22_2
+; S390X-NEXT:    seb %f0, 0(%r1)
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_fsub_v3f32:
@ -1004,23 +1009,26 @@ define void @constrained_vector_fsub_v3f64(<3 x double>* %a) {
 ; S390X:       # %bb.0: # %entry
 ; S390X-NEXT:    larl %r1, .LCPI23_0
 ; S390X-NEXT:    ld %f0, 0(%r1)
-; S390X-NEXT:    ldr %f1, %f0
-; S390X-NEXT:    ldr %f2, %f0
-; S390X-NEXT:    sdb %f0, 16(%r2)
-; S390X-NEXT:    sdb %f2, 8(%r2)
-; S390X-NEXT:    sdb %f1, 0(%r2)
+; S390X-NEXT:    ld %f1, 8(%r2)
+; S390X-NEXT:    ld %f2, 16(%r2)
+; S390X-NEXT:    ldr %f3, %f0
+; S390X-NEXT:    sdb %f3, 0(%r2)
+; S390X-NEXT:    ldr %f4, %f0
+; S390X-NEXT:    sdbr %f4, %f1
+; S390X-NEXT:    sdbr %f0, %f2
 ; S390X-NEXT:    std %f0, 16(%r2)
-; S390X-NEXT:    std %f2, 8(%r2)
-; S390X-NEXT:    std %f1, 0(%r2)
+; S390X-NEXT:    std %f4, 8(%r2)
+; S390X-NEXT:    std %f3, 0(%r2)
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_fsub_v3f64:
 ; SZ13:       # %bb.0: # %entry
 ; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    vgmg %v2, 12, 10
+; SZ13-NEXT:    sdb %f2, 16(%r2)
 ; SZ13-NEXT:    vgmg %v1, 12, 10
 ; SZ13-NEXT:    vfsdb %v0, %v1, %v0
-; SZ13-NEXT:    sdb %f1, 16(%r2)
-; SZ13-NEXT:    std %f1, 16(%r2)
+; SZ13-NEXT:    std %f2, 16(%r2)
 ; SZ13-NEXT:    vst %v0, 0(%r2)
 ; SZ13-NEXT:    br %r14
 entry:
@ -1038,21 +1046,21 @@ entry:
 define <4 x double> @constrained_vector_fsub_v4f64() {
 ; S390X-LABEL: constrained_vector_fsub_v4f64:
 ; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI24_1
-; S390X-NEXT:    ld %f6, 0(%r1)
 ; S390X-NEXT:    larl %r1, .LCPI24_0
-; S390X-NEXT:    ldeb %f1, 0(%r1)
-; S390X-NEXT:    ldr %f0, %f6
-; S390X-NEXT:    larl %r1, .LCPI24_2
-; S390X-NEXT:    ldr %f2, %f6
-; S390X-NEXT:    sdb %f2, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI24_3
-; S390X-NEXT:    ldeb %f3, 0(%r1)
-; S390X-NEXT:    ldr %f4, %f6
-; S390X-NEXT:    larl %r1, .LCPI24_4
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI24_1
+; S390X-NEXT:    ldr %f6, %f0
 ; S390X-NEXT:    sdb %f6, 0(%r1)
-; S390X-NEXT:    sdbr %f0, %f1
-; S390X-NEXT:    sdbr %f4, %f3
+; S390X-NEXT:    larl %r1, .LCPI24_2
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI24_4
+; S390X-NEXT:    ldeb %f3, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI24_3
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    sdb %f2, 0(%r1)
+; S390X-NEXT:    ldr %f4, %f0
+; S390X-NEXT:    sdbr %f4, %f1
+; S390X-NEXT:    sdbr %f0, %f3
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_fsub_v4f64:
@ -1061,9 +1069,9 @@ define <4 x double> @constrained_vector_fsub_v4f64() {
 ; SZ13-NEXT:    vl %v0, 0(%r1)
 ; SZ13-NEXT:    vgmg %v1, 12, 10
 ; SZ13-NEXT:    larl %r1, .LCPI24_1
-; SZ13-NEXT:    vfsdb %v24, %v1, %v0
-; SZ13-NEXT:    vl %v0, 0(%r1)
 ; SZ13-NEXT:    vfsdb %v26, %v1, %v0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfsdb %v24, %v1, %v0
 ; SZ13-NEXT:    br %r14
 entry:
  %sub = call <4 x double> @llvm.experimental.constrained.fsub.v4f64(
@ -1125,11 +1133,11 @@ define <3 x float> @constrained_vector_sqrt_v3f32() {
 ; S390X-LABEL: constrained_vector_sqrt_v3f32:
 ; S390X:       # %bb.0: # %entry
 ; S390X-NEXT:    larl %r1, .LCPI27_0
-; S390X-NEXT:    sqeb %f0, 0(%r1)
+; S390X-NEXT:    sqeb %f4, 0(%r1)
 ; S390X-NEXT:    larl %r1, .LCPI27_1
 ; S390X-NEXT:    sqeb %f2, 0(%r1)
 ; S390X-NEXT:    larl %r1, .LCPI27_2
-; S390X-NEXT:    sqeb %f4, 0(%r1)
+; S390X-NEXT:    sqeb %f0, 0(%r1)
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_sqrt_v3f32:
@ -1155,11 +1163,13 @@ entry:
 define void @constrained_vector_sqrt_v3f64(<3 x double>* %a) {
 ; S390X-LABEL: constrained_vector_sqrt_v3f64:
 ; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    sqdb %f0, 16(%r2)
-; S390X-NEXT:    sqdb %f1, 8(%r2)
+; S390X-NEXT:    ld %f0, 8(%r2)
+; S390X-NEXT:    ld %f1, 16(%r2)
 ; S390X-NEXT:    sqdb %f2, 0(%r2)
-; S390X-NEXT:    std %f0, 16(%r2)
-; S390X-NEXT:    std %f1, 8(%r2)
+; S390X-NEXT:    sqdbr %f0, %f0
+; S390X-NEXT:    sqdbr %f1, %f1
+; S390X-NEXT:    std %f1, 16(%r2)
+; S390X-NEXT:    std %f0, 8(%r2)
 ; S390X-NEXT:    std %f2, 0(%r2)
 ; S390X-NEXT:    br %r14
 ;
@ -1185,13 +1195,13 @@ define <4 x double> @constrained_vector_sqrt_v4f64() {
 ; S390X-LABEL: constrained_vector_sqrt_v4f64:
 ; S390X:       # %bb.0: # %entry
 ; S390X-NEXT:    larl %r1, .LCPI29_0
-; S390X-NEXT:    sqdb %f2, 0(%r1)
+; S390X-NEXT:    sqdb %f6, 0(%r1)
 ; S390X-NEXT:    larl %r1, .LCPI29_1
 ; S390X-NEXT:    sqdb %f4, 0(%r1)
 ; S390X-NEXT:    larl %r1, .LCPI29_3
 ; S390X-NEXT:    ldeb %f0, 0(%r1)
 ; S390X-NEXT:    larl %r1, .LCPI29_2
-; S390X-NEXT:    sqdb %f6, 0(%r1)
+; S390X-NEXT:    sqdb %f2, 0(%r1)
 ; S390X-NEXT:    sqdbr %f0, %f0
 ; S390X-NEXT:    br %r14
 ;
@ -1199,10 +1209,10 @@ define <4 x double> @constrained_vector_sqrt_v4f64() {
 ; SZ13:       # %bb.0: # %entry
 ; SZ13-NEXT:    larl %r1, .LCPI29_0
 ; SZ13-NEXT:    vl %v0, 0(%r1)
-; SZ13-NEXT:    vfsqdb %v24, %v0
+; SZ13-NEXT:    vfsqdb %v26, %v0
 ; SZ13-NEXT:    larl %r1, .LCPI29_1
 ; SZ13-NEXT:    vl %v0, 0(%r1)
-; SZ13-NEXT:    vfsqdb %v26, %v0
+; SZ13-NEXT:    vfsqdb %v24, %v0
 ; SZ13-NEXT:    br %r14
 entry:
  %sqrt = call <4 x double> @llvm.experimental.constrained.sqrt.v4f64(
@ -4220,11 +4230,11 @@ define <2 x double> @constrained_vector_rint_v2f64() {
 ; S390X-LABEL: constrained_vector_rint_v2f64:
 ; S390X:       # %bb.0: # %entry
 ; S390X-NEXT:    larl %r1, .LCPI76_0
-; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    ldeb %f0, 0(%r1)
 ; S390X-NEXT:    larl %r1, .LCPI76_1
-; S390X-NEXT:    ldeb %f1, 0(%r1)
-; S390X-NEXT:    fidbr %f0, 0, %f0
-; S390X-NEXT:    fidbr %f2, 0, %f1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    fidbr %f2, 0, %f0
+; S390X-NEXT:    fidbr %f0, 0, %f1
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_rint_v2f64:
@ -4250,9 +4260,9 @@ define <3 x float> @constrained_vector_rint_v3f32() {
 ; S390X-NEXT:    le %f1, 0(%r1)
 ; S390X-NEXT:    larl %r1, .LCPI77_2
 ; S390X-NEXT:    le %f3, 0(%r1)
-; S390X-NEXT:    fiebr %f0, 0, %f0
+; S390X-NEXT:    fiebr %f4, 0, %f0
 ; S390X-NEXT:    fiebr %f2, 0, %f1
-; S390X-NEXT:    fiebr %f4, 0, %f3
+; S390X-NEXT:    fiebr %f0, 0, %f3
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_rint_v3f32:
@ -4319,13 +4329,13 @@ define <4 x double> @constrained_vector_rint_v4f64() {
 ; S390X-NEXT:    larl %r1, .LCPI79_1
 ; S390X-NEXT:    ld %f1, 0(%r1)
 ; S390X-NEXT:    larl %r1, .LCPI79_2
-; S390X-NEXT:    ld %f3, 0(%r1)
+; S390X-NEXT:    ld %f2, 0(%r1)
 ; S390X-NEXT:    larl %r1, .LCPI79_3
-; S390X-NEXT:    ld %f5, 0(%r1)
-; S390X-NEXT:    fidbr %f0, 0, %f0
-; S390X-NEXT:    fidbr %f2, 0, %f1
-; S390X-NEXT:    fidbr %f4, 0, %f3
-; S390X-NEXT:    fidbr %f6, 0, %f5
+; S390X-NEXT:    ld %f3, 0(%r1)
+; S390X-NEXT:    fidbr %f6, 0, %f0
+; S390X-NEXT:    fidbr %f4, 0, %f1
+; S390X-NEXT:    fidbr %f2, 0, %f2
+; S390X-NEXT:    fidbr %f0, 0, %f3
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_rint_v4f64:
@ -5382,8 +5392,8 @@ define <2 x float> @constrained_vector_fptrunc_v2f64() {
 ; S390X-NEXT:    ld %f0, 0(%r1)
 ; S390X-NEXT:    larl %r1, .LCPI96_1
 ; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ledbr %f0, %f0
-; S390X-NEXT:    ledbr %f2, %f1
+; S390X-NEXT:    ledbr %f2, %f0
+; S390X-NEXT:    ledbr %f0, %f1
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_fptrunc_v2f64:
@ -5454,13 +5464,13 @@ define <4 x float> @constrained_vector_fptrunc_v4f64() {
 ; S390X-NEXT:    larl %r1, .LCPI98_1
 ; S390X-NEXT:    ld %f1, 0(%r1)
 ; S390X-NEXT:    larl %r1, .LCPI98_2
-; S390X-NEXT:    ld %f3, 0(%r1)
+; S390X-NEXT:    ld %f2, 0(%r1)
 ; S390X-NEXT:    larl %r1, .LCPI98_3
-; S390X-NEXT:    ld %f5, 0(%r1)
-; S390X-NEXT:    ledbr %f0, %f0
-; S390X-NEXT:    ledbr %f2, %f1
-; S390X-NEXT:    ledbr %f4, %f3
-; S390X-NEXT:    ledbr %f6, %f5
+; S390X-NEXT:    ld %f3, 0(%r1)
+; S390X-NEXT:    ledbr %f6, %f0
+; S390X-NEXT:    ledbr %f4, %f1
+; S390X-NEXT:    ledbr %f2, %f2
+; S390X-NEXT:    ledbr %f0, %f3
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_fptrunc_v4f64:
@ -5518,8 +5528,8 @@ define <2 x double> @constrained_vector_fpext_v2f32() {
 ; S390X-NEXT:    le %f0, 0(%r1)
 ; S390X-NEXT:    larl %r1, .LCPI100_1
 ; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ldebr %f0, %f0
-; S390X-NEXT:    ldebr %f2, %f1
+; S390X-NEXT:    ldebr %f2, %f0
+; S390X-NEXT:    ldebr %f0, %f1
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_fpext_v2f32:
@ -5585,13 +5595,13 @@ define <4 x double> @constrained_vector_fpext_v4f32() {
 ; S390X-NEXT:    larl %r1, .LCPI102_1
 ; S390X-NEXT:    le %f1, 0(%r1)
 ; S390X-NEXT:    larl %r1, .LCPI102_2
-; S390X-NEXT:    le %f3, 0(%r1)
+; S390X-NEXT:    le %f2, 0(%r1)
 ; S390X-NEXT:    larl %r1, .LCPI102_3
-; S390X-NEXT:    le %f5, 0(%r1)
-; S390X-NEXT:    ldebr %f0, %f0
-; S390X-NEXT:    ldebr %f2, %f1
-; S390X-NEXT:    ldebr %f4, %f3
-; S390X-NEXT:    ldebr %f6, %f5
+; S390X-NEXT:    le %f3, 0(%r1)
+; S390X-NEXT:    ldebr %f6, %f0
+; S390X-NEXT:    ldebr %f4, %f1
+; S390X-NEXT:    ldebr %f2, %f2
+; S390X-NEXT:    ldebr %f0, %f3
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_fpext_v4f32:
--- a/utils/TableGen/CodeGenInstruction.cpp
+++ b/utils/TableGen/CodeGenInstruction.cpp
@ -401,6 +401,7 @@ CodeGenInstruction::CodeGenInstruction(Record *R)
  mayLoad_Unset = Unset;
  mayStore     = R->getValueAsBitOrUnset("mayStore", Unset);
  mayStore_Unset = Unset;
+  mayRaiseFPException = R->getValueAsBit("mayRaiseFPException");
  hasSideEffects = R->getValueAsBitOrUnset("hasSideEffects", Unset);
  hasSideEffects_Unset = Unset;

--- a/utils/TableGen/CodeGenInstruction.h
+++ b/utils/TableGen/CodeGenInstruction.h
@ -249,6 +249,7 @@ template <typename T> class ArrayRef;
    bool mayLoad_Unset : 1;
    bool mayStore : 1;
    bool mayStore_Unset : 1;
+    bool mayRaiseFPException : 1;
    bool isPredicable : 1;
    bool isConvertibleToThreeAddress : 1;
    bool isCommutable : 1;
--- a/utils/TableGen/InstrInfoEmitter.cpp
+++ b/utils/TableGen/InstrInfoEmitter.cpp
@ -603,6 +603,7 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
  if (Inst.canFoldAsLoad)      OS << "|(1ULL<<MCID::FoldableAsLoad)";
  if (Inst.mayLoad)            OS << "|(1ULL<<MCID::MayLoad)";
  if (Inst.mayStore)           OS << "|(1ULL<<MCID::MayStore)";
+  if (Inst.mayRaiseFPException) OS << "|(1ULL<<MCID::MayRaiseFPException)";
  if (Inst.isPredicable)       OS << "|(1ULL<<MCID::Predicable)";
  if (Inst.isConvertibleToThreeAddress) OS << "|(1ULL<<MCID::ConvertibleTo3Addr)";
  if (Inst.isCommutable)       OS << "|(1ULL<<MCID::Commutable)";