From 10931e4f4167fd082ec7fb979db5fa555ae8a4a0 Mon Sep 17 00:00:00 2001
From: Tim Northover <tnorthover@apple.com>
Date: Fri, 19 Aug 2016 17:17:06 +0000
Subject: [PATCH] GlobalISel: support overflow arithmetic intrinsics.

Unsigned addition and subtraction can reuse the instructions created to
legalize large width operations (i.e. both produce and consume a carry flag).
Signed operations and multiplies get a dedicated op-with-overflow instruction.

Once this is produced the two values are combined into a struct register (which
will almost always be merged with a corresponding G_EXTRACT as part of
legalization).

llvm-svn: 279278
---
 .../llvm/CodeGen/GlobalISel/IRTranslator.h    |  2 +
 .../CodeGen/GlobalISel/MachineIRBuilder.h     | 42 ++++++---
 include/llvm/Target/GenericOpcodes.td         | 57 ++++++++++--
 include/llvm/Target/TargetOpcodes.def         | 29 ++++++-
 lib/CodeGen/GlobalISel/IRTranslator.cpp       | 38 ++++++++
 lib/CodeGen/GlobalISel/MachineIRBuilder.cpp   | 24 ++++--
 .../GlobalISel/MachineLegalizeHelper.cpp      | 16 ++--
 .../AArch64/GlobalISel/arm64-irtranslator.ll  | 86 +++++++++++++++++++
 .../AArch64/GlobalISel/legalize-add.mir       | 16 ++--
 9 files changed, 265 insertions(+), 45 deletions(-)
diff --git a/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/include/llvm/CodeGen/GlobalISel/IRTranslator.h
index 4f45a0a08d4..0a5c9418378 100644
--- a/include/llvm/CodeGen/GlobalISel/IRTranslator.h
+++ b/include/llvm/CodeGen/GlobalISel/IRTranslator.h
@@ -117,6 +117,8 @@ private:
   /// Translate an LLVM store instruction into generic IR.
   bool translateStore(const User &U);
 
+  bool translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID);
+
   /// Translate call instruction.
   /// \pre \p U is a call instruction.
   bool translateCall(const User &U);
diff --git a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index 0de9e2ad32a..be7155717d3 100644
--- a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -131,18 +131,18 @@ public:
   MachineInstrBuilder buildAdd(LLT Ty, unsigned Res, unsigned Op0,
                                 unsigned Op1);
 
-  /// Build and insert \p Res<def>, \p CarryOut = G_ADDE \p Ty \p Op0, \p Op1,
+  /// Build and insert \p Res<def>, \p CarryOut = G_UADDE \p Ty \p Op0, \p Op1,
   /// \p CarryIn
   ///
-  /// G_ADDE sets \p Res to \p Op0 + \p Op1 + \p CarryIn (truncated to the bit
-  /// width) and sets \p CarryOut to 1 if the result overflowed in 2s-complement
+  /// G_UADDE sets \p Res to \p Op0 + \p Op1 + \p CarryIn (truncated to the bit
+  /// width) and sets \p CarryOut to 1 if the result overflowed in unsigned
   /// arithmetic.
   ///
   /// \pre setBasicBlock or setMI must have been called.
   ///
   /// \return The newly created instruction.
-  MachineInstrBuilder buildAdde(LLT Ty, unsigned Res, unsigned CarryOut,
-                                unsigned Op0, unsigned Op1, unsigned CarryIn);
+  MachineInstrBuilder buildUAdde(LLT Ty, unsigned Res, unsigned CarryOut,
+                                 unsigned Op0, unsigned Op1, unsigned CarryIn);
 
   /// Build and insert \p Res<def> = G_ANYEXTEND \p Ty \p Op0
   ///
@@ -227,17 +227,39 @@ public:
   MachineInstrBuilder buildExtract(LLT Ty, ArrayRef<unsigned> Results,
                                    unsigned Src, ArrayRef<unsigned> Indexes);
 
-  /// Build and insert \p Res<def> = G_SEQUENCE \p Ty \p Ops[0], ...
+  /// Build and insert \p Res<def> = G_SEQUENCE \p Ty \p Op0, \p Idx0...
   ///
-  /// G_SEQUENCE concatenates each element in Ops into a single register, where
-  /// Ops[0] starts at bit 0 of \p Res.
+  /// G_SEQUENCE inserts each element of Ops into an IMPLICIT_DEF register,
+  /// where each entry starts at the bit-index specified by \p Indexes.
   ///
   /// \pre setBasicBlock or setMI must have been called.
-  /// \pre The sum of the input sizes must equal the result's size.
+  /// \pre The final element of the sequence must not extend past the end of the
+  ///      destination register.
+  /// \pre The bits defined by each Op (derived from index and scalar size) must
+  ///      not overlap.
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
   MachineInstrBuilder buildSequence(LLT Ty, unsigned Res,
-                                    ArrayRef<unsigned> Ops);
+                                    ArrayRef<unsigned> Ops,
+                                    ArrayRef<unsigned> Indexes);
+
+  void addUsesWithIndexes(MachineInstrBuilder MIB) {}
+
+  template <typename... ArgTys>
+  void addUsesWithIndexes(MachineInstrBuilder MIB, unsigned Reg,
+                          unsigned BitIndex, ArgTys... Args) {
+    MIB.addUse(Reg).addImm(BitIndex);
+    addUsesWithIndexes(MIB, Args...);
+  }
+
+  template <typename... ArgTys>
+  MachineInstrBuilder buildSequence(LLT Ty, unsigned Res, unsigned Op,
+                                    unsigned Index, ArgTys... Args) {
+    MachineInstrBuilder MIB =
+        buildInstr(TargetOpcode::G_SEQUENCE, Ty).addDef(Res);
+    addUsesWithIndexes(MIB, Op, Index, Args...);
+    return MIB;
+  }
 
   /// Build and insert either a G_INTRINSIC (if \p HasSideEffects is false) or
   /// G_INTRINSIC_W_SIDE_EFFECTS instruction. Its first operand will be the
diff --git a/include/llvm/Target/GenericOpcodes.td b/include/llvm/Target/GenericOpcodes.td
index de679eaf6a4..68ef82c32d5 100644
--- a/include/llvm/Target/GenericOpcodes.td
+++ b/include/llvm/Target/GenericOpcodes.td
@@ -139,14 +139,6 @@ def G_UREM : Instruction {
   let isCommutable = 0;
 }
 
-// Generic addition consuming and producing a carry flag.
-def G_ADDE : Instruction {
-  let OutOperandList = (outs unknown:$dst, unknown:$carry_out);
-  let InOperandList = (ins unknown:$src1, unknown:$src2, unknown:$carry_in);
-  let hasSideEffects = 0;
-  let isCommutable = 1;
-}
-
 // Generic bitwise and.
 def G_AND : Instruction {
   let OutOperandList = (outs unknown:$dst);
@@ -199,6 +191,55 @@ def G_ICMP : Instruction {
   let hasSideEffects = 0;
 }
 
+//------------------------------------------------------------------------------
+// Overflow ops
+//------------------------------------------------------------------------------
+
+// Generic unsigned addition consuming and producing a carry flag.
+def G_UADDE : Instruction {
+  let OutOperandList = (outs unknown:$dst, unknown:$carry_out);
+  let InOperandList = (ins unknown:$src1, unknown:$src2, unknown:$carry_in);
+  let hasSideEffects = 0;
+}
+
+// Generic signed addition producing a carry flag.
+def G_SADDO : Instruction {
+  let OutOperandList = (outs unknown:$dst, unknown:$carry_out);
+  let InOperandList = (ins unknown:$src1, unknown:$src2);
+  let hasSideEffects = 0;
+  let isCommutable = 1;
+}
+
+// Generic unsigned subtraction consuming and producing a carry flag.
+def G_USUBE : Instruction {
+  let OutOperandList = (outs unknown:$dst, unknown:$carry_out);
+  let InOperandList = (ins unknown:$src1, unknown:$src2, unknown:$carry_in);
+  let hasSideEffects = 0;
+}
+
+// Generic unsigned subtraction producing a carry flag.
+def G_SSUBO : Instruction {
+  let OutOperandList = (outs unknown:$dst, unknown:$carry_out);
+  let InOperandList = (ins unknown:$src1, unknown:$src2);
+  let hasSideEffects = 0;
+}
+
+// Generic unsigned multiplication producing a carry flag.
+def G_UMULO : Instruction {
+  let OutOperandList = (outs unknown:$dst, unknown:$carry_out);
+  let InOperandList = (ins unknown:$src1, unknown:$src2);
+  let hasSideEffects = 0;
+  let isCommutable = 1;
+}
+
+// Generic signed multiplication producing a carry flag.
+def G_SMULO : Instruction {
+  let OutOperandList = (outs unknown:$dst, unknown:$carry_out);
+  let InOperandList = (ins unknown:$src1, unknown:$src2);
+  let hasSideEffects = 0;
+  let isCommutable = 1;
+}
+
 //------------------------------------------------------------------------------
 // Floating Point Binary ops.
 //------------------------------------------------------------------------------
diff --git a/include/llvm/Target/TargetOpcodes.def b/include/llvm/Target/TargetOpcodes.def
index 90347aa03e9..d547ec5c504 100644
--- a/include/llvm/Target/TargetOpcodes.def
+++ b/include/llvm/Target/TargetOpcodes.def
@@ -163,10 +163,6 @@ HANDLE_TARGET_OPCODE(PATCHABLE_RET)
 HANDLE_TARGET_OPCODE(G_ADD)
 HANDLE_TARGET_OPCODE_MARKER(PRE_ISEL_GENERIC_OPCODE_START, G_ADD)
 
-/// Generic ADD instruction, consuming the normal operands plus a carry flag,
-/// and similarly producing the result and a carry flag.
-HANDLE_TARGET_OPCODE(G_ADDE)
-
 /// Generic SUB instruction. This is an integer sub.
 HANDLE_TARGET_OPCODE(G_SUB)
 
@@ -262,6 +258,30 @@ HANDLE_TARGET_OPCODE(G_ASHR)
 /// Generic integer-base comparison, also applicable to vectors of integers.
 HANDLE_TARGET_OPCODE(G_ICMP)
 
+/// Generic unsigned add instruction, consuming the normal operands plus a carry
+/// flag, and similarly producing the result and a carry flag.
+HANDLE_TARGET_OPCODE(G_UADDE)
+
+/// Generic unsigned subtract instruction, consuming the normal operands plus a
+/// carry flag, and similarly producing the result and a carry flag.
+HANDLE_TARGET_OPCODE(G_USUBE)
+
+/// Generic signed add instruction, producing the result and a signed overflow
+/// flag.
+HANDLE_TARGET_OPCODE(G_SADDO)
+
+/// Generic signed subtract instruction, producing the result and a signed
+/// overflow flag.
+HANDLE_TARGET_OPCODE(G_SSUBO)
+
+/// Generic unsigned multiply instruction, producing the result and a signed
+/// overflow flag.
+HANDLE_TARGET_OPCODE(G_UMULO)
+
+/// Generic signed multiply instruction, producing the result and a signed
+/// overflow flag.
+HANDLE_TARGET_OPCODE(G_SMULO)
+
 /// Generic FP addition.
 HANDLE_TARGET_OPCODE(G_FADD)
 
@@ -277,6 +297,7 @@ HANDLE_TARGET_OPCODE(G_FDIV)
 /// Generic FP remainder.
 HANDLE_TARGET_OPCODE(G_FREM)
 
+
 /// Generic BRANCH instruction. This is an unconditional branch.
 HANDLE_TARGET_OPCODE(G_BR)
 
diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp
index a7b6d20c212..ac95b8262d9 100644
--- a/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -204,6 +204,41 @@ bool IRTranslator::translateCast(unsigned Opcode, const User &U) {
   return true;
 }
 
+bool IRTranslator::translateKnownIntrinsic(const CallInst &CI,
+                                           Intrinsic::ID ID) {
+  unsigned Op = 0;
+  switch (ID) {
+  default: return false;
+  case Intrinsic::uadd_with_overflow: Op = TargetOpcode::G_UADDE; break;
+  case Intrinsic::sadd_with_overflow: Op = TargetOpcode::G_SADDO; break;
+  case Intrinsic::usub_with_overflow: Op = TargetOpcode::G_USUBE; break;
+  case Intrinsic::ssub_with_overflow: Op = TargetOpcode::G_SSUBO; break;
+  case Intrinsic::umul_with_overflow: Op = TargetOpcode::G_UMULO; break;
+  case Intrinsic::smul_with_overflow: Op = TargetOpcode::G_SMULO; break;
+  }
+
+  LLT Ty{*CI.getOperand(0)->getType()};
+  LLT s1 = LLT::scalar(1);
+  unsigned Width = Ty.getSizeInBits();
+  unsigned Res = MRI->createGenericVirtualRegister(Width);
+  unsigned Overflow = MRI->createGenericVirtualRegister(1);
+  auto MIB = MIRBuilder.buildInstr(Op, {Ty, s1})
+                 .addDef(Res)
+                 .addDef(Overflow)
+                 .addUse(getOrCreateVReg(*CI.getOperand(0)))
+                 .addUse(getOrCreateVReg(*CI.getOperand(1)));
+
+  if (Op == TargetOpcode::G_UADDE || Op == TargetOpcode::G_USUBE) {
+    unsigned Zero = MRI->createGenericVirtualRegister(1);
+    EntryBuilder.buildConstant(s1, Zero, 0);
+    MIB.addUse(Zero);
+  }
+
+  MIRBuilder.buildSequence(LLT{*CI.getType(), DL}, getOrCreateVReg(CI), Res, 0,
+                           Overflow, Width);
+  return true;
+}
+
 bool IRTranslator::translateCall(const User &U) {
   const CallInst &CI = cast<CallInst>(U);
   auto TII = MIRBuilder.getMF().getTarget().getIntrinsicInfo();
@@ -227,6 +262,9 @@ bool IRTranslator::translateCall(const User &U) {
 
   assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic");
 
+  if (translateKnownIntrinsic(CI, ID))
+    return true;
+
   // Need types (starting with return) & args.
   SmallVector<LLT, 4> Tys;
   Tys.emplace_back(*CI.getType());
diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 70a24415f1d..92dbab05e03 100644
--- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -124,11 +124,11 @@ MachineInstrBuilder MachineIRBuilder::buildStore(LLT VTy, LLT PTy,
       .addMemOperand(&MMO);
 }
 
-MachineInstrBuilder MachineIRBuilder::buildAdde(LLT Ty, unsigned Res,
-                                                unsigned CarryOut, unsigned Op0,
-                                                unsigned Op1,
-                                                unsigned CarryIn) {
-  return buildInstr(TargetOpcode::G_ADDE, Ty)
+MachineInstrBuilder MachineIRBuilder::buildUAdde(LLT Ty, unsigned Res,
+                                                 unsigned CarryOut,
+                                                 unsigned Op0, unsigned Op1,
+                                                 unsigned CarryIn) {
+  return buildInstr(TargetOpcode::G_UADDE, Ty)
       .addDef(Res)
       .addDef(CarryOut)
       .addUse(Op0)
@@ -157,12 +157,18 @@ MachineIRBuilder::buildExtract(LLT Ty, ArrayRef<unsigned> Results, unsigned Src,
   return MIB;
 }
 
-MachineInstrBuilder MachineIRBuilder::buildSequence(LLT Ty, unsigned Res,
-                                              ArrayRef<unsigned> Ops) {
+MachineInstrBuilder
+MachineIRBuilder::buildSequence(LLT Ty, unsigned Res,
+                                ArrayRef<unsigned> Ops,
+                                ArrayRef<unsigned> Indexes) {
+  assert(Ops.size() == Indexes.size() && "incompatible args");
+
   MachineInstrBuilder MIB = buildInstr(TargetOpcode::G_SEQUENCE, Ty);
   MIB.addDef(Res);
-  for (auto Op : Ops)
-    MIB.addUse(Op);
+  for (unsigned i = 0; i < Ops.size(); ++i) {
+    MIB.addUse(Ops[i]);
+    MIB.addImm(Indexes[i]);
+  }
   return MIB;
 }
 
diff --git a/lib/CodeGen/GlobalISel/MachineLegalizeHelper.cpp b/lib/CodeGen/GlobalISel/MachineLegalizeHelper.cpp
index 01d87d1e70d..d999fbe8892 100644
--- a/lib/CodeGen/GlobalISel/MachineLegalizeHelper.cpp
+++ b/lib/CodeGen/GlobalISel/MachineLegalizeHelper.cpp
@@ -71,7 +71,7 @@ MachineLegalizeHelper::narrowScalar(MachineInstr &MI, LLT NarrowTy) {
 
     MIRBuilder.setInstr(MI);
 
-    SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
+    SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs, Indexes;
     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
     extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
 
@@ -82,13 +82,15 @@ MachineLegalizeHelper::narrowScalar(MachineInstr &MI, LLT NarrowTy) {
       unsigned DstReg = MRI.createGenericVirtualRegister(NarrowSize);
       unsigned CarryOut = MRI.createGenericVirtualRegister(1);
 
-      MIRBuilder.buildAdde(NarrowTy, DstReg, CarryOut, Src1Regs[i], Src2Regs[i],
-                           CarryIn);
+      MIRBuilder.buildUAdde(NarrowTy, DstReg, CarryOut, Src1Regs[i],
+                            Src2Regs[i], CarryIn);
 
       DstRegs.push_back(DstReg);
+      Indexes.push_back(i * NarrowSize);
       CarryIn = CarryOut;
     }
-    MIRBuilder.buildSequence(MI.getType(), MI.getOperand(0).getReg(), DstRegs);
+    MIRBuilder.buildSequence(MI.getType(), MI.getOperand(0).getReg(), DstRegs,
+                             Indexes);
     MI.eraseFromParent();
     return Legalized;
   }
@@ -140,7 +142,7 @@ MachineLegalizeHelper::fewerElementsVector(MachineInstr &MI, LLT NarrowTy) {
 
     MIRBuilder.setInstr(MI);
 
-    SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
+    SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs, Indexes;
     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
     extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
 
@@ -148,9 +150,11 @@ MachineLegalizeHelper::fewerElementsVector(MachineInstr &MI, LLT NarrowTy) {
       unsigned DstReg = MRI.createGenericVirtualRegister(NarrowSize);
       MIRBuilder.buildAdd(NarrowTy, DstReg, Src1Regs[i], Src2Regs[i]);
       DstRegs.push_back(DstReg);
+      Indexes.push_back(i * NarrowSize);
     }
 
-    MIRBuilder.buildSequence(MI.getType(), MI.getOperand(0).getReg(), DstRegs);
+    MIRBuilder.buildSequence(MI.getType(), MI.getOperand(0).getReg(), DstRegs,
+                             Indexes);
     MI.eraseFromParent();
     return Legalized;
   }
diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
index 789ca976e7b..060bae20e45 100644
--- a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
+++ b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
@@ -596,3 +596,89 @@ define float @test_frem(float %arg1, float %arg2) {
   %res = frem float %arg1, %arg2
   ret float %res
 }
+
+; CHECK-LABEL: name: test_sadd_overflow
+; CHECK: [[LHS:%[0-9]+]](32) = COPY %w0
+; CHECK: [[RHS:%[0-9]+]](32) = COPY %w1
+; CHECK: [[ADDR:%[0-9]+]](64) = COPY %x2
+; CHECK: [[VAL:%[0-9]+]](32), [[OVERFLOW:%[0-9]+]](1) = G_SADDO { s32, s1 } [[LHS]], [[RHS]]
+; CHECK: [[RES:%[0-9]+]](64) = G_SEQUENCE s64 [[VAL]], 0, [[OVERFLOW]], 32
+; CHECK: G_STORE { s64, p0 } [[RES]], [[ADDR]]
+declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32)
+define void @test_sadd_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) {
+  %res = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %lhs, i32 %rhs)
+  store { i32, i1 } %res, { i32, i1 }* %addr
+  ret void
+}
+
+; CHECK-LABEL: name: test_uadd_overflow
+; CHECK: [[LHS:%[0-9]+]](32) = COPY %w0
+; CHECK: [[RHS:%[0-9]+]](32) = COPY %w1
+; CHECK: [[ADDR:%[0-9]+]](64) = COPY %x2
+; CHECK: [[ZERO:%[0-9]+]](1) = G_CONSTANT s1 0
+; CHECK: [[VAL:%[0-9]+]](32), [[OVERFLOW:%[0-9]+]](1) = G_UADDE { s32, s1 } [[LHS]], [[RHS]], [[ZERO]]
+; CHECK: [[RES:%[0-9]+]](64) = G_SEQUENCE s64 [[VAL]], 0, [[OVERFLOW]], 32
+; CHECK: G_STORE { s64, p0 } [[RES]], [[ADDR]]
+declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32)
+define void @test_uadd_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) {
+  %res = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %lhs, i32 %rhs)
+  store { i32, i1 } %res, { i32, i1 }* %addr
+  ret void
+}
+
+; CHECK-LABEL: name: test_ssub_overflow
+; CHECK: [[LHS:%[0-9]+]](32) = COPY %w0
+; CHECK: [[RHS:%[0-9]+]](32) = COPY %w1
+; CHECK: [[SUBR:%[0-9]+]](64) = COPY %x2
+; CHECK: [[VAL:%[0-9]+]](32), [[OVERFLOW:%[0-9]+]](1) = G_SSUBO { s32, s1 } [[LHS]], [[RHS]]
+; CHECK: [[RES:%[0-9]+]](64) = G_SEQUENCE s64 [[VAL]], 0, [[OVERFLOW]], 32
+; CHECK: G_STORE { s64, p0 } [[RES]], [[SUBR]]
+declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32)
+define void @test_ssub_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %subr) {
+  %res = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %lhs, i32 %rhs)
+  store { i32, i1 } %res, { i32, i1 }* %subr
+  ret void
+}
+
+; CHECK-LABEL: name: test_usub_overflow
+; CHECK: [[LHS:%[0-9]+]](32) = COPY %w0
+; CHECK: [[RHS:%[0-9]+]](32) = COPY %w1
+; CHECK: [[SUBR:%[0-9]+]](64) = COPY %x2
+; CHECK: [[ZERO:%[0-9]+]](1) = G_CONSTANT s1 0
+; CHECK: [[VAL:%[0-9]+]](32), [[OVERFLOW:%[0-9]+]](1) = G_USUBE { s32, s1 } [[LHS]], [[RHS]], [[ZERO]]
+; CHECK: [[RES:%[0-9]+]](64) = G_SEQUENCE s64 [[VAL]], 0, [[OVERFLOW]], 32
+; CHECK: G_STORE { s64, p0 } [[RES]], [[SUBR]]
+declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32)
+define void @test_usub_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %subr) {
+  %res = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %lhs, i32 %rhs)
+  store { i32, i1 } %res, { i32, i1 }* %subr
+  ret void
+}
+
+; CHECK-LABEL: name: test_smul_overflow
+; CHECK: [[LHS:%[0-9]+]](32) = COPY %w0
+; CHECK: [[RHS:%[0-9]+]](32) = COPY %w1
+; CHECK: [[ADDR:%[0-9]+]](64) = COPY %x2
+; CHECK: [[VAL:%[0-9]+]](32), [[OVERFLOW:%[0-9]+]](1) = G_SMULO { s32, s1 } [[LHS]], [[RHS]]
+; CHECK: [[RES:%[0-9]+]](64) = G_SEQUENCE s64 [[VAL]], 0, [[OVERFLOW]], 32
+; CHECK: G_STORE { s64, p0 } [[RES]], [[ADDR]]
+declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32)
+define void @test_smul_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) {
+  %res = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %lhs, i32 %rhs)
+  store { i32, i1 } %res, { i32, i1 }* %addr
+  ret void
+}
+
+; CHECK-LABEL: name: test_umul_overflow
+; CHECK: [[LHS:%[0-9]+]](32) = COPY %w0
+; CHECK: [[RHS:%[0-9]+]](32) = COPY %w1
+; CHECK: [[ADDR:%[0-9]+]](64) = COPY %x2
+; CHECK: [[VAL:%[0-9]+]](32), [[OVERFLOW:%[0-9]+]](1) = G_UMULO { s32, s1 } [[LHS]], [[RHS]]
+; CHECK: [[RES:%[0-9]+]](64) = G_SEQUENCE s64 [[VAL]], 0, [[OVERFLOW]], 32
+; CHECK: G_STORE { s64, p0 } [[RES]], [[ADDR]]
+declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32)
+define void @test_umul_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) {
+  %res = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %lhs, i32 %rhs)
+  store { i32, i1 } %res, { i32, i1 }* %addr
+  ret void
+}
diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-add.mir b/test/CodeGen/AArch64/GlobalISel/legalize-add.mir
index 77eb4794024..afc5ffbce67 100644
--- a/test/CodeGen/AArch64/GlobalISel/legalize-add.mir
+++ b/test/CodeGen/AArch64/GlobalISel/legalize-add.mir
@@ -31,12 +31,12 @@ body: |
     ; CHECK-DAG: [[LHS_LO:%.*]](64), [[LHS_HI:%.*]](64) = G_EXTRACT s64 %0, 0, 64
     ; CHECK-DAG: [[RHS_LO:%.*]](64), [[RHS_HI:%.*]](64) = G_EXTRACT s64 %1, 0, 64
     ; CHECK-DAG: [[CARRY0:%.*]](1) = G_CONSTANT s1 0
-    ; CHECK: [[RES_LO:%.*]](64), [[CARRY:%.*]](1) = G_ADDE s64 [[LHS_LO]], [[RHS_LO]], [[CARRY0]]
-    ; CHECK: [[RES_HI:%.*]](64), {{%.*}}(1) = G_ADDE s64 [[LHS_HI]], [[RHS_HI]], [[CARRY]]
-    ; CHECK: %2(128) = G_SEQUENCE s128 [[RES_LO]], [[RES_HI]]
+    ; CHECK: [[RES_LO:%.*]](64), [[CARRY:%.*]](1) = G_UADDE s64 [[LHS_LO]], [[RHS_LO]], [[CARRY0]]
+    ; CHECK: [[RES_HI:%.*]](64), {{%.*}}(1) = G_UADDE s64 [[LHS_HI]], [[RHS_HI]], [[CARRY]]
+    ; CHECK: %2(128) = G_SEQUENCE s128 [[RES_LO]], 0, [[RES_HI]], 64
 
-    %0(128) = G_SEQUENCE s128 %x0, %x1
-    %1(128) = G_SEQUENCE s128 %x2, %x3
+    %0(128) = G_SEQUENCE s128 %x0, 0, %x1, 64
+    %1(128) = G_SEQUENCE s128 %x2, 0, %x3, 64
     %2(128) = G_ADD s128 %0, %1
     %x0, %x1 = G_EXTRACT s64 %2, 0, 64
 ...
@@ -78,10 +78,10 @@ body: |
     ; CHECK-DAG: [[RHS_LO:%.*]](128), [[RHS_HI:%.*]](128) = G_EXTRACT <2 x s64> %1, 0, 128
     ; CHECK: [[RES_LO:%.*]](128) = G_ADD <2 x s64> [[LHS_LO]], [[RHS_LO]]
     ; CHECK: [[RES_HI:%.*]](128) = G_ADD <2 x s64> [[LHS_HI]], [[RHS_HI]]
-    ; CHECK: %2(256) = G_SEQUENCE <4 x s64> [[RES_LO]], [[RES_HI]]
+    ; CHECK: %2(256) = G_SEQUENCE <4 x s64> [[RES_LO]], 0, [[RES_HI]], 128
 
-    %0(256) = G_SEQUENCE <4 x s64> %q0, %q1
-    %1(256) = G_SEQUENCE <4 x s64> %q2, %q3
+    %0(256) = G_SEQUENCE <4 x s64> %q0, 0, %q1, 128
+    %1(256) = G_SEQUENCE <4 x s64> %q2, 0, %q3, 128
     %2(256) = G_ADD <4 x s64> %0, %1
     %q0, %q1 = G_EXTRACT <2 x s64> %2, 0, 128
 ...