[SelectionDAG] Improve the legalisation lowering of UMULO.

There is no way in the universe, that doing a full-width division in
software will be faster than doing overflowing multiplication in
software in the first place, especially given that this same full-width
multiplication needs to be done anyway.

This patch replaces the previous implementation with a direct lowering
into an overflowing multiplication algorithm based on half-width
operations.

Correctness of the algorithm was verified by exhaustively checking the
output of this algorithm for overflowing multiplication of 16 bit
integers against an obviously correct widening multiplication. Baring
any oversights introduced by porting the algorithm to DAG, confidence in
correctness of this algorithm is extremely high.

Following table shows the change in both t = runtime and s = space. The
change is expressed as a multiplier of original, so anything under 1 is
“better” and anything above 1 is worse.

+-------+-----------+-----------+-------------+-------------+
| Arch  | u64*u64 t | u64*u64 s | u128*u128 t | u128*u128 s |
+-------+-----------+-----------+-------------+-------------+
|   X64 |     -     |     -     |    ~0.5     |    ~0.64    |
|  i686 |   ~0.5    |   ~0.6666 |    ~0.05    |    ~0.9     |
| armv7 |     -     |   ~0.75   |      -      |    ~1.4     |
+-------+-----------+-----------+-------------+-------------+

Performance numbers have been collected by running overflowing
multiplication in a loop under `perf` on two x86_64 (one Intel Haswell,
other AMD Ryzen) based machines. Size numbers have been collected by
looking at the size of function containing an overflowing multiply in
a loop.

All in all, it can be seen that both performance and size has improved
except in the case of armv7 where code size has regressed for 128-bit
multiply. u128*u128 overflowing multiply on 32-bit platforms seem to
benefit from this change a lot, taking only 5% of the time compared to
original algorithm to calculate the same thing.

The final benefit of this change is that LLVM is now capable of lowering
the overflowing unsigned multiply for integers of any bit-width as long
as the target is capable of lowering regular multiplication for the same
bit-width. Previously, 128-bit overflowing multiply was the widest
possible.

Patch by Simonas Kazlauskas!

Differential Revision: https://reviews.llvm.org/D50310

llvm-svn: 339922
This commit is contained in:
Eli Friedman 2018-08-16 18:39:39 +00:00
parent 8c581db10d
commit ca89c6b055
15 changed files with 1730 additions and 158 deletions

View File

@ -2705,25 +2705,56 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
EVT VT = N->getValueType(0);
SDLoc dl(N);
// A divide for UMULO should be faster than a function call.
if (N->getOpcode() == ISD::UMULO) {
// This section expands the operation into the following sequence of
// instructions. `iNh` here refers to a type which has half the bit width of
// the type the original operation operated on.
//
// %0 = %LHS.HI != 0 && %RHS.HI != 0
// %1 = { iNh, i1 } @umul.with.overflow.iNh(iNh %LHS.HI, iNh %RHS.LO)
// %2 = { iNh, i1 } @umul.with.overflow.iNh(iNh %RHS.HI, iNh %LHS.LO)
// %3 = mul nuw iN (%LHS.LOW as iN), (%RHS.LOW as iN)
// %4 = add iN (%1.0 as iN) << Nh, (%2.0 as iN) << Nh
// %5 = { iN, i1 } @uadd.with.overflow.iN( %4, %3 )
//
// %res = { %5.0, %0 || %1.1 || %2.1 || %5.1 }
SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
SDValue LHSHigh, LHSLow, RHSHigh, RHSLow;
SplitInteger(LHS, LHSLow, LHSHigh);
SplitInteger(RHS, RHSLow, RHSHigh);
EVT HalfVT = LHSLow.getValueType()
, BitVT = N->getValueType(1);
SDVTList VTHalfMulO = DAG.getVTList(HalfVT, BitVT);
SDVTList VTFullAddO = DAG.getVTList(VT, BitVT);
SDValue MUL = DAG.getNode(ISD::MUL, dl, LHS.getValueType(), LHS, RHS);
SplitInteger(MUL, Lo, Hi);
SDValue HalfZero = DAG.getConstant(0, dl, HalfVT);
SDValue Overflow = DAG.getNode(ISD::AND, dl, BitVT,
DAG.getSetCC(dl, BitVT, LHSHigh, HalfZero, ISD::SETNE),
DAG.getSetCC(dl, BitVT, RHSHigh, HalfZero, ISD::SETNE));
// A divide for UMULO will be faster than a function call. Select to
// make sure we aren't using 0.
SDValue isZero = DAG.getSetCC(dl, getSetCCResultType(VT),
RHS, DAG.getConstant(0, dl, VT), ISD::SETEQ);
SDValue NotZero = DAG.getSelect(dl, VT, isZero,
DAG.getConstant(1, dl, VT), RHS);
SDValue DIV = DAG.getNode(ISD::UDIV, dl, VT, MUL, NotZero);
SDValue Overflow = DAG.getSetCC(dl, N->getValueType(1), DIV, LHS,
ISD::SETNE);
Overflow = DAG.getSelect(dl, N->getValueType(1), isZero,
DAG.getConstant(0, dl, N->getValueType(1)),
Overflow);
SDValue One = DAG.getNode(ISD::UMULO, dl, VTHalfMulO, LHSHigh, RHSLow);
Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, One.getValue(1));
SDValue OneInHigh = DAG.getNode(ISD::BUILD_PAIR, dl, VT, HalfZero,
One.getValue(0));
SDValue Two = DAG.getNode(ISD::UMULO, dl, VTHalfMulO, RHSHigh, LHSLow);
Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, Two.getValue(1));
SDValue TwoInHigh = DAG.getNode(ISD::BUILD_PAIR, dl, VT, HalfZero,
Two.getValue(0));
// Cannot use `UMUL_LOHI` directly, because some 32-bit targets (ARM) do not
// know how to expand `i64,i64 = umul_lohi a, b` and abort (why isnt this
// operation recursively legalized?).
//
// Many backends understand this pattern and will convert into LOHI
// themselves, if applicable.
SDValue Three = DAG.getNode(ISD::MUL, dl, VT,
DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LHSLow),
DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RHSLow));
SDValue Four = DAG.getNode(ISD::ADD, dl, VT, OneInHigh, TwoInHigh);
SDValue Five = DAG.getNode(ISD::UADDO, dl, VTFullAddO, Three, Four);
Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, Five.getValue(1));
SplitInteger(Five, Lo, Hi);
ReplaceValueWith(SDValue(N, 1), Overflow);
return;
}

View File

@ -0,0 +1,48 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu | FileCheck %s --check-prefixes=AARCH
define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; AARCH-LABEL: muloti_test:
; AARCH: // %bb.0: // %start
; AARCH-NEXT: mul x8, x3, x0
; AARCH-NEXT: umulh x9, x0, x2
; AARCH-NEXT: madd x11, x1, x2, x8
; AARCH-NEXT: add x8, x9, x11
; AARCH-NEXT: cmp x8, x9
; AARCH-NEXT: cset w9, lo
; AARCH-NEXT: cmp x11, #0 // =0
; AARCH-NEXT: csel w9, wzr, w9, eq
; AARCH-NEXT: cmp x3, #0 // =0
; AARCH-NEXT: umulh x10, x1, x2
; AARCH-NEXT: cset w12, ne
; AARCH-NEXT: cmp x1, #0 // =0
; AARCH-NEXT: umulh x11, x3, x0
; AARCH-NEXT: cset w13, ne
; AARCH-NEXT: cmp xzr, x10
; AARCH-NEXT: and w10, w13, w12
; AARCH-NEXT: cset w12, ne
; AARCH-NEXT: cmp xzr, x11
; AARCH-NEXT: orr w10, w10, w12
; AARCH-NEXT: cset w11, ne
; AARCH-NEXT: orr w10, w10, w11
; AARCH-NEXT: orr w9, w10, w9
; AARCH-NEXT: mul x0, x0, x2
; AARCH-NEXT: mov x1, x8
; AARCH-NEXT: mov w2, w9
; AARCH-NEXT: ret
start:
%0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2
%1 = extractvalue { i128, i1 } %0, 0
%2 = extractvalue { i128, i1 } %0, 1
%3 = zext i1 %2 to i8
%4 = insertvalue { i128, i8 } undef, i128 %1, 0
%5 = insertvalue { i128, i8 } %4, i8 %3, 1
ret { i128, i8 } %5
}
; Function Attrs: nounwind readnone speculatable
declare { i128, i1 } @llvm.umul.with.overflow.i128(i128, i128) #1
attributes #0 = { nounwind readnone uwtable }
attributes #1 = { nounwind readnone speculatable }
attributes #2 = { nounwind }

View File

@ -0,0 +1,210 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=armv6-unknown-linux-gnu | FileCheck %s --check-prefixes=ARMV6
; RUN: llc < %s -mtriple=armv7-unknown-linux-gnu | FileCheck %s --check-prefixes=ARMV7
define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; ARMV6-LABEL: muloti_test:
; ARMV6: @ %bb.0: @ %start
; ARMV6-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; ARMV6-NEXT: sub sp, sp, #28
; ARMV6-NEXT: mov r9, #0
; ARMV6-NEXT: mov r11, r0
; ARMV6-NEXT: ldr r7, [sp, #76]
; ARMV6-NEXT: mov r5, r3
; ARMV6-NEXT: ldr r10, [sp, #72]
; ARMV6-NEXT: mov r1, r3
; ARMV6-NEXT: mov r6, r2
; ARMV6-NEXT: mov r0, r2
; ARMV6-NEXT: mov r2, #0
; ARMV6-NEXT: mov r3, #0
; ARMV6-NEXT: str r9, [sp, #12]
; ARMV6-NEXT: str r9, [sp, #8]
; ARMV6-NEXT: str r7, [sp, #4]
; ARMV6-NEXT: str r10, [sp]
; ARMV6-NEXT: bl __multi3
; ARMV6-NEXT: str r3, [sp, #20] @ 4-byte Spill
; ARMV6-NEXT: str r2, [sp, #16] @ 4-byte Spill
; ARMV6-NEXT: stm r11, {r0, r1}
; ARMV6-NEXT: ldr r0, [sp, #84]
; ARMV6-NEXT: ldr r3, [sp, #80]
; ARMV6-NEXT: ldr r8, [sp, #64]
; ARMV6-NEXT: umull r4, r0, r0, r6
; ARMV6-NEXT: umull r2, r1, r5, r3
; ARMV6-NEXT: add r2, r4, r2
; ARMV6-NEXT: umull lr, r4, r3, r6
; ARMV6-NEXT: umull r3, r6, r7, r8
; ARMV6-NEXT: adds r12, r4, r2
; ARMV6-NEXT: adc r2, r9, #0
; ARMV6-NEXT: str r2, [sp, #24] @ 4-byte Spill
; ARMV6-NEXT: ldr r2, [sp, #68]
; ARMV6-NEXT: umull r4, r2, r2, r10
; ARMV6-NEXT: add r3, r4, r3
; ARMV6-NEXT: umull r4, r10, r8, r10
; ARMV6-NEXT: adds r3, r10, r3
; ARMV6-NEXT: adc r10, r9, #0
; ARMV6-NEXT: adds r4, r4, lr
; ARMV6-NEXT: adc r12, r3, r12
; ARMV6-NEXT: ldr r3, [sp, #16] @ 4-byte Reload
; ARMV6-NEXT: adds r4, r3, r4
; ARMV6-NEXT: str r4, [r11, #8]
; ARMV6-NEXT: ldr r4, [sp, #20] @ 4-byte Reload
; ARMV6-NEXT: adcs r3, r4, r12
; ARMV6-NEXT: str r3, [r11, #12]
; ARMV6-NEXT: ldr r3, [sp, #84]
; ARMV6-NEXT: adc r12, r9, #0
; ARMV6-NEXT: cmp r5, #0
; ARMV6-NEXT: movne r5, #1
; ARMV6-NEXT: cmp r3, #0
; ARMV6-NEXT: mov r4, r3
; ARMV6-NEXT: movne r4, #1
; ARMV6-NEXT: cmp r0, #0
; ARMV6-NEXT: movne r0, #1
; ARMV6-NEXT: cmp r1, #0
; ARMV6-NEXT: and r5, r4, r5
; ARMV6-NEXT: movne r1, #1
; ARMV6-NEXT: orr r0, r5, r0
; ARMV6-NEXT: ldr r5, [sp, #68]
; ARMV6-NEXT: orr r0, r0, r1
; ARMV6-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
; ARMV6-NEXT: cmp r7, #0
; ARMV6-NEXT: orr r0, r0, r1
; ARMV6-NEXT: movne r7, #1
; ARMV6-NEXT: cmp r5, #0
; ARMV6-NEXT: mov r1, r5
; ARMV6-NEXT: movne r1, #1
; ARMV6-NEXT: cmp r2, #0
; ARMV6-NEXT: movne r2, #1
; ARMV6-NEXT: and r1, r1, r7
; ARMV6-NEXT: orr r1, r1, r2
; ARMV6-NEXT: ldr r2, [sp, #80]
; ARMV6-NEXT: cmp r6, #0
; ARMV6-NEXT: movne r6, #1
; ARMV6-NEXT: orrs r2, r2, r3
; ARMV6-NEXT: orr r1, r1, r6
; ARMV6-NEXT: movne r2, #1
; ARMV6-NEXT: orrs r7, r8, r5
; ARMV6-NEXT: orr r1, r1, r10
; ARMV6-NEXT: movne r7, #1
; ARMV6-NEXT: and r2, r7, r2
; ARMV6-NEXT: orr r1, r2, r1
; ARMV6-NEXT: orr r0, r1, r0
; ARMV6-NEXT: orr r0, r0, r12
; ARMV6-NEXT: and r0, r0, #1
; ARMV6-NEXT: strb r0, [r11, #16]
; ARMV6-NEXT: add sp, sp, #28
; ARMV6-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
;
; ARMV7-LABEL: muloti_test:
; ARMV7: @ %bb.0: @ %start
; ARMV7-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; ARMV7-NEXT: sub sp, sp, #44
; ARMV7-NEXT: str r0, [sp, #40] @ 4-byte Spill
; ARMV7-NEXT: mov r0, #0
; ARMV7-NEXT: ldr r8, [sp, #88]
; ARMV7-NEXT: mov r5, r3
; ARMV7-NEXT: ldr r7, [sp, #92]
; ARMV7-NEXT: mov r1, r3
; ARMV7-NEXT: mov r6, r2
; ARMV7-NEXT: str r0, [sp, #8]
; ARMV7-NEXT: str r0, [sp, #12]
; ARMV7-NEXT: mov r0, r2
; ARMV7-NEXT: mov r2, #0
; ARMV7-NEXT: mov r3, #0
; ARMV7-NEXT: str r8, [sp]
; ARMV7-NEXT: str r7, [sp, #4]
; ARMV7-NEXT: bl __multi3
; ARMV7-NEXT: str r1, [sp, #28] @ 4-byte Spill
; ARMV7-NEXT: ldr r1, [sp, #80]
; ARMV7-NEXT: str r2, [sp, #24] @ 4-byte Spill
; ARMV7-NEXT: str r3, [sp, #20] @ 4-byte Spill
; ARMV7-NEXT: umull r2, r9, r7, r1
; ARMV7-NEXT: str r0, [sp, #32] @ 4-byte Spill
; ARMV7-NEXT: ldr r4, [sp, #84]
; ARMV7-NEXT: ldr r0, [sp, #96]
; ARMV7-NEXT: umull r1, r3, r1, r8
; ARMV7-NEXT: umull r12, r10, r4, r8
; ARMV7-NEXT: str r1, [sp, #16] @ 4-byte Spill
; ARMV7-NEXT: umull lr, r1, r5, r0
; ARMV7-NEXT: add r2, r12, r2
; ARMV7-NEXT: umull r11, r8, r0, r6
; ARMV7-NEXT: ldr r0, [sp, #100]
; ARMV7-NEXT: adds r2, r3, r2
; ARMV7-NEXT: mov r12, #0
; ARMV7-NEXT: umull r6, r0, r0, r6
; ARMV7-NEXT: adc r3, r12, #0
; ARMV7-NEXT: str r3, [sp, #36] @ 4-byte Spill
; ARMV7-NEXT: add r3, r6, lr
; ARMV7-NEXT: ldr r6, [sp, #16] @ 4-byte Reload
; ARMV7-NEXT: adds r3, r8, r3
; ARMV7-NEXT: adc lr, r12, #0
; ARMV7-NEXT: adds r6, r6, r11
; ARMV7-NEXT: adc r2, r2, r3
; ARMV7-NEXT: ldr r3, [sp, #24] @ 4-byte Reload
; ARMV7-NEXT: mov r12, #0
; ARMV7-NEXT: adds r3, r3, r6
; ARMV7-NEXT: ldr r6, [sp, #20] @ 4-byte Reload
; ARMV7-NEXT: adcs r8, r6, r2
; ARMV7-NEXT: ldr r6, [sp, #40] @ 4-byte Reload
; ARMV7-NEXT: ldr r2, [sp, #32] @ 4-byte Reload
; ARMV7-NEXT: str r2, [r6]
; ARMV7-NEXT: ldr r2, [sp, #28] @ 4-byte Reload
; ARMV7-NEXT: stmib r6, {r2, r3, r8}
; ARMV7-NEXT: adc r8, r12, #0
; ARMV7-NEXT: cmp r5, #0
; ARMV7-NEXT: ldr r2, [sp, #100]
; ARMV7-NEXT: movwne r5, #1
; ARMV7-NEXT: cmp r2, #0
; ARMV7-NEXT: mov r3, r2
; ARMV7-NEXT: movwne r3, #1
; ARMV7-NEXT: cmp r0, #0
; ARMV7-NEXT: movwne r0, #1
; ARMV7-NEXT: cmp r1, #0
; ARMV7-NEXT: and r3, r3, r5
; ARMV7-NEXT: movwne r1, #1
; ARMV7-NEXT: orr r0, r3, r0
; ARMV7-NEXT: cmp r7, #0
; ARMV7-NEXT: orr r0, r0, r1
; ARMV7-NEXT: ldr r1, [sp, #80]
; ARMV7-NEXT: movwne r7, #1
; ARMV7-NEXT: cmp r4, #0
; ARMV7-NEXT: orr r1, r1, r4
; ARMV7-NEXT: movwne r4, #1
; ARMV7-NEXT: cmp r10, #0
; ARMV7-NEXT: and r3, r4, r7
; ARMV7-NEXT: movwne r10, #1
; ARMV7-NEXT: cmp r9, #0
; ARMV7-NEXT: orr r3, r3, r10
; ARMV7-NEXT: ldr r7, [sp, #36] @ 4-byte Reload
; ARMV7-NEXT: movwne r9, #1
; ARMV7-NEXT: orr r3, r3, r9
; ARMV7-NEXT: orr r3, r3, r7
; ARMV7-NEXT: ldr r7, [sp, #96]
; ARMV7-NEXT: orr r0, r0, lr
; ARMV7-NEXT: orrs r7, r7, r2
; ARMV7-NEXT: movwne r7, #1
; ARMV7-NEXT: cmp r1, #0
; ARMV7-NEXT: movwne r1, #1
; ARMV7-NEXT: and r1, r1, r7
; ARMV7-NEXT: orr r1, r1, r3
; ARMV7-NEXT: orr r0, r1, r0
; ARMV7-NEXT: orr r0, r0, r8
; ARMV7-NEXT: and r0, r0, #1
; ARMV7-NEXT: strb r0, [r6, #16]
; ARMV7-NEXT: add sp, sp, #44
; ARMV7-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
start:
%0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2
%1 = extractvalue { i128, i1 } %0, 0
%2 = extractvalue { i128, i1 } %0, 1
%3 = zext i1 %2 to i8
%4 = insertvalue { i128, i8 } undef, i128 %1, 0
%5 = insertvalue { i128, i8 } %4, i8 %3, 1
ret { i128, i8 } %5
}
; Function Attrs: nounwind readnone speculatable
declare { i128, i1 } @llvm.umul.with.overflow.i128(i128, i128) #1
attributes #0 = { nounwind readnone uwtable }
attributes #1 = { nounwind readnone speculatable }
attributes #2 = { nounwind }

View File

@ -0,0 +1,69 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=armv6-unknown-linux-gnu | FileCheck %s --check-prefixes=ARMV6
; RUN: llc < %s -mtriple=armv7-unknown-linux-gnu | FileCheck %s --check-prefixes=ARMV7
define { i64, i8 } @mulodi_test(i64 %l, i64 %r) unnamed_addr #0 {
; ARMV6-LABEL: mulodi_test:
; ARMV6: @ %bb.0: @ %start
; ARMV6-NEXT: push {r4, r5, r6, lr}
; ARMV6-NEXT: umull r12, lr, r3, r0
; ARMV6-NEXT: mov r6, #0
; ARMV6-NEXT: umull r4, r5, r1, r2
; ARMV6-NEXT: umull r0, r2, r0, r2
; ARMV6-NEXT: add r4, r4, r12
; ARMV6-NEXT: adds r12, r2, r4
; ARMV6-NEXT: adc r2, r6, #0
; ARMV6-NEXT: cmp r3, #0
; ARMV6-NEXT: movne r3, #1
; ARMV6-NEXT: cmp r1, #0
; ARMV6-NEXT: movne r1, #1
; ARMV6-NEXT: cmp r5, #0
; ARMV6-NEXT: and r1, r1, r3
; ARMV6-NEXT: movne r5, #1
; ARMV6-NEXT: cmp lr, #0
; ARMV6-NEXT: orr r1, r1, r5
; ARMV6-NEXT: movne lr, #1
; ARMV6-NEXT: orr r1, r1, lr
; ARMV6-NEXT: orr r2, r1, r2
; ARMV6-NEXT: mov r1, r12
; ARMV6-NEXT: pop {r4, r5, r6, pc}
;
; ARMV7-LABEL: mulodi_test:
; ARMV7: @ %bb.0: @ %start
; ARMV7-NEXT: push {r4, r5, r11, lr}
; ARMV7-NEXT: umull r12, lr, r1, r2
; ARMV7-NEXT: cmp r3, #0
; ARMV7-NEXT: umull r4, r5, r3, r0
; ARMV7-NEXT: movwne r3, #1
; ARMV7-NEXT: cmp r1, #0
; ARMV7-NEXT: movwne r1, #1
; ARMV7-NEXT: umull r0, r2, r0, r2
; ARMV7-NEXT: cmp lr, #0
; ARMV7-NEXT: and r1, r1, r3
; ARMV7-NEXT: movwne lr, #1
; ARMV7-NEXT: cmp r5, #0
; ARMV7-NEXT: orr r1, r1, lr
; ARMV7-NEXT: movwne r5, #1
; ARMV7-NEXT: orr r3, r1, r5
; ARMV7-NEXT: add r1, r12, r4
; ARMV7-NEXT: mov r5, #0
; ARMV7-NEXT: adds r1, r2, r1
; ARMV7-NEXT: adc r2, r5, #0
; ARMV7-NEXT: orr r2, r3, r2
; ARMV7-NEXT: pop {r4, r5, r11, pc}
start:
%0 = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %l, i64 %r) #2
%1 = extractvalue { i64, i1 } %0, 0
%2 = extractvalue { i64, i1 } %0, 1
%3 = zext i1 %2 to i8
%4 = insertvalue { i64, i8 } undef, i64 %1, 0
%5 = insertvalue { i64, i8 } %4, i8 %3, 1
ret { i64, i8 } %5
}
; Function Attrs: nounwind readnone speculatable
declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64) #1
attributes #0 = { nounwind readnone uwtable }
attributes #1 = { nounwind readnone speculatable }
attributes #2 = { nounwind }

View File

@ -0,0 +1,177 @@
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s --check-prefixes=PPC64
; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu | FileCheck %s --check-prefixes=PPC32
define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; PPC64-LABEL muloti_test:
; PPC64: mulld 8, 5, 4
; PPC64-NEXT: cmpdi 5, 3, 0
; PPC64-NEXT: mulhdu. 9, 3, 6
; PPC64-NEXT: mulld 3, 3, 6
; PPC64-NEXT: mcrf 1, 0
; PPC64-NEXT: add 3, 3, 8
; PPC64-NEXT: cmpdi 5, 0
; PPC64-NEXT: crnor 20, 2, 22
; PPC64-NEXT: cmpldi 3, 0
; PPC64-NEXT: mulhdu 8, 4, 6
; PPC64-NEXT: add 3, 8, 3
; PPC64-NEXT: cmpld 6, 3, 8
; PPC64-NEXT: crandc 21, 24, 2
; PPC64-NEXT: crorc 20, 20, 6
; PPC64-NEXT: li 7, 1
; PPC64-NEXT: mulhdu. 5, 5, 4
; PPC64-NEXT: crorc 20, 20, 2
; PPC64-NEXT: crnor 20, 20, 21
; PPC64-NEXT: mulld 4, 4, 6
; PPC64-NEXT: bc 12, 20, .LBB0_2
; PPC64: ori 5, 7, 0
; PPC64-NEXT: blr
; PPC64-NEXT: .LBB0_2:
; PPC64-NEXT: addi 5, 0, 0
; PPC64-NEXT: blr
;
; PPC32-LABEL muloti_test:
; PPC32: mflr 0
; PPC32-NEXT: stw 0, 4(1)
; PPC32-NEXT: stwu 1, -80(1)
; PPC32-NEXT: .cfi_def_cfa_offset 80
; PPC32-NEXT: .cfi_offset lr, 4
; PPC32-NEXT: .cfi_offset r20, -48
; PPC32-NEXT: .cfi_offset r21, -44
; PPC32-NEXT: .cfi_offset r22, -40
; PPC32-NEXT: .cfi_offset r23, -36
; PPC32-NEXT: .cfi_offset r24, -32
; PPC32-NEXT: .cfi_offset r25, -28
; PPC32-NEXT: .cfi_offset r26, -24
; PPC32-NEXT: .cfi_offset r27, -20
; PPC32-NEXT: .cfi_offset r28, -16
; PPC32-NEXT: .cfi_offset r29, -12
; PPC32-NEXT: .cfi_offset r30, -8
; PPC32-NEXT: stw 26, 56(1)
; PPC32-NEXT: stw 27, 60(1)
; PPC32-NEXT: stw 29, 68(1)
; PPC32-NEXT: stw 30, 72(1)
; PPC32-NEXT: mfcr 12
; PPC32-NEXT: mr 30, 8
; PPC32-NEXT: mr 29, 7
; PPC32-NEXT: mr 27, 4
; PPC32-NEXT: mr 26, 3
; PPC32-NEXT: li 3, 0
; PPC32-NEXT: li 4, 0
; PPC32-NEXT: li 7, 0
; PPC32-NEXT: li 8, 0
; PPC32-NEXT: stw 20, 32(1)
; PPC32-NEXT: stw 21, 36(1)
; PPC32-NEXT: stw 22, 40(1)
; PPC32-NEXT: stw 23, 44(1)
; PPC32-NEXT: stw 24, 48(1)
; PPC32-NEXT: stw 25, 52(1)
; PPC32-NEXT: stw 28, 64(1)
; PPC32-NEXT: mr 25, 10
; PPC32-NEXT: stw 12, 28(1)
; PPC32-NEXT: mr 28, 9
; PPC32-NEXT: mr 23, 6
; PPC32-NEXT: mr 24, 5
; PPC32-NEXT: bl __multi3@PLT
; PPC32-NEXT: mr 7, 4
; PPC32-NEXT: mullw 4, 24, 30
; PPC32-NEXT: mullw 8, 29, 23
; PPC32-NEXT: mullw 10, 28, 27
; PPC32-NEXT: mullw 11, 26, 25
; PPC32-NEXT: mulhwu 9, 30, 23
; PPC32-NEXT: mulhwu 12, 27, 25
; PPC32-NEXT: mullw 0, 30, 23
; PPC32-NEXT: mullw 22, 27, 25
; PPC32-NEXT: add 21, 8, 4
; PPC32-NEXT: add 10, 11, 10
; PPC32-NEXT: addc 4, 22, 0
; PPC32-NEXT: add 11, 9, 21
; PPC32-NEXT: add 0, 12, 10
; PPC32-NEXT: adde 8, 0, 11
; PPC32-NEXT: addc 4, 7, 4
; PPC32-NEXT: adde 8, 3, 8
; PPC32-NEXT: xor 22, 4, 7
; PPC32-NEXT: xor 20, 8, 3
; PPC32-NEXT: or. 22, 22, 20
; PPC32-NEXT: mcrf 1, 0
; PPC32-NEXT: cmpwi 29, 0
; PPC32-NEXT: cmpwi 5, 24, 0
; PPC32-NEXT: cmpwi 6, 26, 0
; PPC32-NEXT: cmpwi 7, 28, 0
; PPC32-NEXT: crnor 8, 22, 2
; PPC32-NEXT: mulhwu. 23, 29, 23
; PPC32-NEXT: crnor 9, 30, 26
; PPC32-NEXT: mcrf 5, 0
; PPC32-NEXT: cmplwi 21, 0
; PPC32-NEXT: cmplw 6, 11, 9
; PPC32-NEXT: cmplwi 7, 10, 0
; PPC32-NEXT: crandc 10, 24, 2
; PPC32-NEXT: cmplw 3, 0, 12
; PPC32-NEXT: mulhwu. 9, 24, 30
; PPC32-NEXT: mcrf 6, 0
; PPC32-NEXT: crandc 11, 12, 30
; PPC32-NEXT: cmplw 4, 7
; PPC32-NEXT: cmplw 7, 8, 3
; PPC32-NEXT: crand 12, 30, 0
; PPC32-NEXT: crandc 13, 28, 30
; PPC32-NEXT: mulhwu. 3, 26, 25
; PPC32-NEXT: mcrf 7, 0
; PPC32-NEXT: cror 0, 12, 13
; PPC32-NEXT: crandc 12, 0, 6
; PPC32-NEXT: crorc 20, 8, 22
; PPC32-NEXT: crorc 20, 20, 26
; PPC32-NEXT: mulhwu. 3, 28, 27
; PPC32-NEXT: mcrf 1, 0
; PPC32-NEXT: crorc 25, 9, 30
; PPC32-NEXT: or. 3, 27, 26
; PPC32-NEXT: cror 24, 20, 10
; PPC32-NEXT: mcrf 5, 0
; PPC32-NEXT: crorc 25, 25, 6
; PPC32-NEXT: or. 3, 30, 29
; PPC32-NEXT: cror 25, 25, 11
; PPC32-NEXT: crnor 20, 2, 22
; PPC32-NEXT: lwz 12, 28(1)
; PPC32-NEXT: cror 20, 20, 25
; PPC32-NEXT: cror 20, 20, 24
; PPC32-NEXT: crnor 20, 20, 12
; PPC32-NEXT: li 3, 1
; PPC32-NEXT: bc 12, 20, .LBB0_2
; PPC32: ori 7, 3, 0
; PPC32-NEXT: b .LBB0_3
; PPC32-NEXT:.LBB0_2:
; PPC32-NEXT: addi 7, 0, 0
; PPC32-NEXT:.LBB0_3:
; PPC32-NEXT: mr 3, 8
; PPC32-NEXT: mtcrf 32, 12
; PPC32-NEXT: mtcrf 16, 12
; PPC32-NEXT: lwz 30, 72(1)
; PPC32-NEXT: lwz 29, 68(1)
; PPC32-NEXT: lwz 28, 64(1)
; PPC32-NEXT: lwz 27, 60(1)
; PPC32-NEXT: lwz 26, 56(1)
; PPC32-NEXT: lwz 25, 52(1)
; PPC32-NEXT: lwz 24, 48(1)
; PPC32-NEXT: lwz 23, 44(1)
; PPC32-NEXT: lwz 22, 40(1)
; PPC32-NEXT: lwz 21, 36(1)
; PPC32-NEXT: lwz 20, 32(1)
; PPC32-NEXT: lwz 0, 84(1)
; PPC32-NEXT: addi 1, 1, 80
; PPC32-NEXT: mtlr 0
; PPC32-NEXT: blr
start:
%0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2
%1 = extractvalue { i128, i1 } %0, 0
%2 = extractvalue { i128, i1 } %0, 1
%3 = zext i1 %2 to i8
%4 = insertvalue { i128, i8 } undef, i128 %1, 0
%5 = insertvalue { i128, i8 } %4, i8 %3, 1
ret { i128, i8 } %5
}
; Function Attrs: nounwind readnone speculatable
declare { i128, i1 } @llvm.umul.with.overflow.i128(i128, i128) #1
attributes #0 = { nounwind readnone uwtable }
attributes #1 = { nounwind readnone speculatable }
attributes #2 = { nounwind }

View File

@ -0,0 +1,175 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=riscv32 -mattr=+m | FileCheck %s --check-prefixes=RISCV32
; RUN: llc < %s -mtriple=riscv64 -mattr=+m | FileCheck %s --check-prefixes=RISCV64
define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; RISCV32-LABEL: muloti_test:
; RISCV32: # %bb.0: # %start
; RISCV32-NEXT: addi sp, sp, -80
; RISCV32-NEXT: sw ra, 76(sp)
; RISCV32-NEXT: sw s1, 72(sp)
; RISCV32-NEXT: sw s2, 68(sp)
; RISCV32-NEXT: sw s3, 64(sp)
; RISCV32-NEXT: sw s4, 60(sp)
; RISCV32-NEXT: sw s5, 56(sp)
; RISCV32-NEXT: sw s6, 52(sp)
; RISCV32-NEXT: sw s7, 48(sp)
; RISCV32-NEXT: mv s3, a2
; RISCV32-NEXT: mv s1, a1
; RISCV32-NEXT: mv s2, a0
; RISCV32-NEXT: sw zero, 12(sp)
; RISCV32-NEXT: sw zero, 8(sp)
; RISCV32-NEXT: sw zero, 28(sp)
; RISCV32-NEXT: sw zero, 24(sp)
; RISCV32-NEXT: lw s5, 4(a2)
; RISCV32-NEXT: sw s5, 4(sp)
; RISCV32-NEXT: lw s6, 0(a2)
; RISCV32-NEXT: sw s6, 0(sp)
; RISCV32-NEXT: lw s4, 4(a1)
; RISCV32-NEXT: sw s4, 20(sp)
; RISCV32-NEXT: lw s7, 0(a1)
; RISCV32-NEXT: sw s7, 16(sp)
; RISCV32-NEXT: addi a0, sp, 32
; RISCV32-NEXT: addi a1, sp, 16
; RISCV32-NEXT: mv a2, sp
; RISCV32-NEXT: call __multi3
; RISCV32-NEXT: lw t1, 12(s1)
; RISCV32-NEXT: lw a1, 8(s1)
; RISCV32-NEXT: mul a0, s5, a1
; RISCV32-NEXT: mul a2, t1, s6
; RISCV32-NEXT: add a0, a2, a0
; RISCV32-NEXT: lw t5, 12(s3)
; RISCV32-NEXT: lw a3, 8(s3)
; RISCV32-NEXT: mul a2, s4, a3
; RISCV32-NEXT: mul a4, t5, s7
; RISCV32-NEXT: add a2, a4, a2
; RISCV32-NEXT: mul a4, a3, s7
; RISCV32-NEXT: mul a5, a1, s6
; RISCV32-NEXT: add s1, a5, a4
; RISCV32-NEXT: sltu a4, s1, a5
; RISCV32-NEXT: mulhu a6, a3, s7
; RISCV32-NEXT: add a7, a6, a2
; RISCV32-NEXT: mulhu t2, a1, s6
; RISCV32-NEXT: add t4, t2, a0
; RISCV32-NEXT: add a0, t4, a7
; RISCV32-NEXT: add a0, a0, a4
; RISCV32-NEXT: xor a2, s5, zero
; RISCV32-NEXT: snez a2, a2
; RISCV32-NEXT: xor a4, t1, zero
; RISCV32-NEXT: snez a4, a4
; RISCV32-NEXT: and a2, a4, a2
; RISCV32-NEXT: xor a4, s4, zero
; RISCV32-NEXT: snez a4, a4
; RISCV32-NEXT: xor a5, t5, zero
; RISCV32-NEXT: snez a5, a5
; RISCV32-NEXT: and a4, a5, a4
; RISCV32-NEXT: mulhu a5, t5, s7
; RISCV32-NEXT: xor a5, a5, zero
; RISCV32-NEXT: snez a5, a5
; RISCV32-NEXT: or t0, a4, a5
; RISCV32-NEXT: mulhu a4, t1, s6
; RISCV32-NEXT: xor a4, a4, zero
; RISCV32-NEXT: snez a4, a4
; RISCV32-NEXT: or t3, a2, a4
; RISCV32-NEXT: lw a4, 44(sp)
; RISCV32-NEXT: add a5, a4, a0
; RISCV32-NEXT: lw a2, 40(sp)
; RISCV32-NEXT: add a0, a2, s1
; RISCV32-NEXT: sltu t6, a0, a2
; RISCV32-NEXT: add s1, a5, t6
; RISCV32-NEXT: beq s1, a4, .LBB0_2
; RISCV32-NEXT: # %bb.1: # %start
; RISCV32-NEXT: sltu t6, s1, a4
; RISCV32-NEXT: .LBB0_2: # %start
; RISCV32-NEXT: xor a4, s1, a4
; RISCV32-NEXT: xor a2, a0, a2
; RISCV32-NEXT: or a2, a2, a4
; RISCV32-NEXT: sltu t2, t4, t2
; RISCV32-NEXT: mulhu a4, s5, a1
; RISCV32-NEXT: xor a4, a4, zero
; RISCV32-NEXT: snez a4, a4
; RISCV32-NEXT: or t3, t3, a4
; RISCV32-NEXT: sltu a6, a7, a6
; RISCV32-NEXT: mulhu a4, s4, a3
; RISCV32-NEXT: xor a4, a4, zero
; RISCV32-NEXT: snez a4, a4
; RISCV32-NEXT: or a4, t0, a4
; RISCV32-NEXT: lw a5, 36(sp)
; RISCV32-NEXT: sw a5, 4(s2)
; RISCV32-NEXT: lw a5, 32(sp)
; RISCV32-NEXT: sw a5, 0(s2)
; RISCV32-NEXT: sw a0, 8(s2)
; RISCV32-NEXT: sw s1, 12(s2)
; RISCV32-NEXT: mv a0, zero
; RISCV32-NEXT: beqz a2, .LBB0_4
; RISCV32-NEXT: # %bb.3: # %start
; RISCV32-NEXT: mv a0, t6
; RISCV32-NEXT: .LBB0_4: # %start
; RISCV32-NEXT: or a2, a4, a6
; RISCV32-NEXT: or a4, t3, t2
; RISCV32-NEXT: or a3, a3, t5
; RISCV32-NEXT: or a1, a1, t1
; RISCV32-NEXT: xor a1, a1, zero
; RISCV32-NEXT: xor a3, a3, zero
; RISCV32-NEXT: snez a3, a3
; RISCV32-NEXT: snez a1, a1
; RISCV32-NEXT: and a1, a1, a3
; RISCV32-NEXT: or a1, a1, a4
; RISCV32-NEXT: or a1, a1, a2
; RISCV32-NEXT: or a0, a1, a0
; RISCV32-NEXT: andi a0, a0, 1
; RISCV32-NEXT: sb a0, 16(s2)
; RISCV32-NEXT: lw s7, 48(sp)
; RISCV32-NEXT: lw s6, 52(sp)
; RISCV32-NEXT: lw s5, 56(sp)
; RISCV32-NEXT: lw s4, 60(sp)
; RISCV32-NEXT: lw s3, 64(sp)
; RISCV32-NEXT: lw s2, 68(sp)
; RISCV32-NEXT: lw s1, 72(sp)
; RISCV32-NEXT: lw ra, 76(sp)
; RISCV32-NEXT: addi sp, sp, 80
; RISCV32-NEXT: ret
;
; RISCV64-LABEL: muloti_test:
; RISCV64: # %bb.0: # %start
; RISCV64-NEXT: mul a6, a4, a1
; RISCV64-NEXT: mul a5, a2, a3
; RISCV64-NEXT: add a6, a5, a6
; RISCV64-NEXT: mul a5, a1, a3
; RISCV64-NEXT: sw a5, 0(a0)
; RISCV64-NEXT: mulhu a7, a1, a3
; RISCV64-NEXT: add a5, a7, a6
; RISCV64-NEXT: sw a5, 8(a0)
; RISCV64-NEXT: sltu a6, a5, a7
; RISCV64-NEXT: xor a5, a4, zero
; RISCV64-NEXT: snez a7, a5
; RISCV64-NEXT: xor a5, a2, zero
; RISCV64-NEXT: snez a5, a5
; RISCV64-NEXT: and a5, a5, a7
; RISCV64-NEXT: mulhu a2, a2, a3
; RISCV64-NEXT: xor a2, a2, zero
; RISCV64-NEXT: snez a2, a2
; RISCV64-NEXT: or a2, a5, a2
; RISCV64-NEXT: mulhu a1, a4, a1
; RISCV64-NEXT: xor a1, a1, zero
; RISCV64-NEXT: snez a1, a1
; RISCV64-NEXT: or a1, a2, a1
; RISCV64-NEXT: or a1, a1, a6
; RISCV64-NEXT: sb a1, 16(a0)
; RISCV64-NEXT: ret
start:
%0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2
%1 = extractvalue { i128, i1 } %0, 0
%2 = extractvalue { i128, i1 } %0, 1
%3 = zext i1 %2 to i8
%4 = insertvalue { i128, i8 } undef, i128 %1, 0
%5 = insertvalue { i128, i8 } %4, i8 %3, 1
ret { i128, i8 } %5
}
; Function Attrs: nounwind readnone speculatable
declare { i128, i1 } @llvm.umul.with.overflow.i128(i128, i128) #1
attributes #0 = { nounwind readnone uwtable }
attributes #1 = { nounwind readnone speculatable }
attributes #2 = { nounwind }

View File

@ -0,0 +1,259 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=sparc-unknown-linux-gnu | FileCheck %s --check-prefixes=SPARC
; RUN: llc < %s -mtriple=sparc64-unknown-linux-gnu | FileCheck %s --check-prefixes=SPARC64
define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; SPARC-LABEL: muloti_test:
; SPARC: .cfi_startproc
; SPARC-NEXT: ! %bb.0: ! %start
; SPARC-NEXT: save %sp, -128, %sp
; SPARC-NEXT: .cfi_def_cfa_register %fp
; SPARC-NEXT: .cfi_window_save
; SPARC-NEXT: .cfi_register 15, 31
; SPARC-NEXT: ld [%fp+92], %l3
; SPARC-NEXT: ld [%fp+96], %g2
; SPARC-NEXT: umul %i2, %i5, %g3
; SPARC-NEXT: rd %y, %g4
; SPARC-NEXT: st %g4, [%fp+-20] ! 4-byte Folded Spill
; SPARC-NEXT: umul %i4, %i3, %g4
; SPARC-NEXT: rd %y, %l0
; SPARC-NEXT: st %l0, [%fp+-24] ! 4-byte Folded Spill
; SPARC-NEXT: st %g2, [%sp+96]
; SPARC-NEXT: st %i5, [%fp+-8] ! 4-byte Folded Spill
; SPARC-NEXT: umul %i5, %i3, %l0
; SPARC-NEXT: rd %y, %l5
; SPARC-NEXT: st %l3, [%sp+92]
; SPARC-NEXT: umul %l3, %i1, %l4
; SPARC-NEXT: rd %y, %i5
; SPARC-NEXT: st %i5, [%fp+-12] ! 4-byte Folded Spill
; SPARC-NEXT: add %g4, %g3, %l2
; SPARC-NEXT: mov %i0, %i5
; SPARC-NEXT: umul %i0, %g2, %g3
; SPARC-NEXT: rd %y, %i0
; SPARC-NEXT: st %i0, [%fp+-16] ! 4-byte Folded Spill
; SPARC-NEXT: add %l5, %l2, %l1
; SPARC-NEXT: st %i1, [%fp+-4] ! 4-byte Folded Spill
; SPARC-NEXT: umul %i1, %g2, %g2
; SPARC-NEXT: rd %y, %l6
; SPARC-NEXT: add %g3, %l4, %i0
; SPARC-NEXT: add %l6, %i0, %l7
; SPARC-NEXT: addcc %g2, %l0, %l4
; SPARC-NEXT: mov %g0, %l0
; SPARC-NEXT: addxcc %l7, %l1, %i1
; SPARC-NEXT: mov %l0, %o0
; SPARC-NEXT: mov %l0, %o1
; SPARC-NEXT: mov %i2, %o2
; SPARC-NEXT: mov %i3, %o3
; SPARC-NEXT: mov %l0, %o4
; SPARC-NEXT: call __multi3
; SPARC-NEXT: mov %l0, %o5
; SPARC-NEXT: addcc %o1, %l4, %i3
; SPARC-NEXT: addxcc %o0, %i1, %g2
; SPARC-NEXT: mov 1, %g3
; SPARC-NEXT: cmp %g2, %o0
; SPARC-NEXT: bcs .LBB0_2
; SPARC-NEXT: mov %g3, %g4
; SPARC-NEXT: ! %bb.1: ! %start
; SPARC-NEXT: mov %l0, %g4
; SPARC-NEXT: .LBB0_2: ! %start
; SPARC-NEXT: cmp %i3, %o1
; SPARC-NEXT: bcs .LBB0_4
; SPARC-NEXT: mov %g3, %o4
; SPARC-NEXT: ! %bb.3: ! %start
; SPARC-NEXT: mov %l0, %o4
; SPARC-NEXT: .LBB0_4: ! %start
; SPARC-NEXT: cmp %g2, %o0
; SPARC-NEXT: be .LBB0_6
; SPARC-NEXT: nop
; SPARC-NEXT: ! %bb.5: ! %start
; SPARC-NEXT: mov %g4, %o4
; SPARC-NEXT: .LBB0_6: ! %start
; SPARC-NEXT: xor %g2, %o0, %i1
; SPARC-NEXT: xor %i3, %o1, %g4
; SPARC-NEXT: or %g4, %i1, %i1
; SPARC-NEXT: cmp %i1, 0
; SPARC-NEXT: be .LBB0_8
; SPARC-NEXT: mov %l0, %g4
; SPARC-NEXT: ! %bb.7: ! %start
; SPARC-NEXT: mov %o4, %g4
; SPARC-NEXT: .LBB0_8: ! %start
; SPARC-NEXT: cmp %l1, %l5
; SPARC-NEXT: mov %g3, %l1
; SPARC-NEXT: bcs .LBB0_10
; SPARC-NEXT: mov %i5, %i1
; SPARC-NEXT: ! %bb.9: ! %start
; SPARC-NEXT: mov %l0, %l1
; SPARC-NEXT: .LBB0_10: ! %start
; SPARC-NEXT: cmp %l2, 0
; SPARC-NEXT: be .LBB0_12
; SPARC-NEXT: mov %l0, %o0
; SPARC-NEXT: ! %bb.11: ! %start
; SPARC-NEXT: mov %l1, %o0
; SPARC-NEXT: .LBB0_12: ! %start
; SPARC-NEXT: cmp %i2, 0
; SPARC-NEXT: bne .LBB0_14
; SPARC-NEXT: mov %g3, %i2
; SPARC-NEXT: ! %bb.13: ! %start
; SPARC-NEXT: mov %l0, %i2
; SPARC-NEXT: .LBB0_14: ! %start
; SPARC-NEXT: cmp %i4, 0
; SPARC-NEXT: bne .LBB0_16
; SPARC-NEXT: mov %g3, %o1
; SPARC-NEXT: ! %bb.15: ! %start
; SPARC-NEXT: mov %l0, %o1
; SPARC-NEXT: .LBB0_16: ! %start
; SPARC-NEXT: ld [%fp+-24], %i5 ! 4-byte Folded Reload
; SPARC-NEXT: cmp %i5, 0
; SPARC-NEXT: bne .LBB0_18
; SPARC-NEXT: mov %g3, %l5
; SPARC-NEXT: ! %bb.17: ! %start
; SPARC-NEXT: mov %l0, %l5
; SPARC-NEXT: .LBB0_18: ! %start
; SPARC-NEXT: ld [%fp+-20], %i5 ! 4-byte Folded Reload
; SPARC-NEXT: cmp %i5, 0
; SPARC-NEXT: bne .LBB0_20
; SPARC-NEXT: mov %g3, %l1
; SPARC-NEXT: ! %bb.19: ! %start
; SPARC-NEXT: mov %l0, %l1
; SPARC-NEXT: .LBB0_20: ! %start
; SPARC-NEXT: cmp %l7, %l6
; SPARC-NEXT: bcs .LBB0_22
; SPARC-NEXT: mov %g3, %l6
; SPARC-NEXT: ! %bb.21: ! %start
; SPARC-NEXT: mov %l0, %l6
; SPARC-NEXT: .LBB0_22: ! %start
; SPARC-NEXT: cmp %i0, 0
; SPARC-NEXT: be .LBB0_24
; SPARC-NEXT: mov %l0, %l2
; SPARC-NEXT: ! %bb.23: ! %start
; SPARC-NEXT: mov %l6, %l2
; SPARC-NEXT: .LBB0_24: ! %start
; SPARC-NEXT: cmp %l3, 0
; SPARC-NEXT: bne .LBB0_26
; SPARC-NEXT: mov %g3, %l3
; SPARC-NEXT: ! %bb.25: ! %start
; SPARC-NEXT: mov %l0, %l3
; SPARC-NEXT: .LBB0_26: ! %start
; SPARC-NEXT: cmp %i1, 0
; SPARC-NEXT: bne .LBB0_28
; SPARC-NEXT: mov %g3, %l4
; SPARC-NEXT: ! %bb.27: ! %start
; SPARC-NEXT: mov %l0, %l4
; SPARC-NEXT: .LBB0_28: ! %start
; SPARC-NEXT: and %o1, %i2, %i2
; SPARC-NEXT: ld [%fp+-16], %i0 ! 4-byte Folded Reload
; SPARC-NEXT: cmp %i0, 0
; SPARC-NEXT: and %l4, %l3, %l4
; SPARC-NEXT: bne .LBB0_30
; SPARC-NEXT: mov %g3, %l6
; SPARC-NEXT: ! %bb.29: ! %start
; SPARC-NEXT: mov %l0, %l6
; SPARC-NEXT: .LBB0_30: ! %start
; SPARC-NEXT: or %i2, %l5, %l3
; SPARC-NEXT: ld [%fp+-12], %i0 ! 4-byte Folded Reload
; SPARC-NEXT: cmp %i0, 0
; SPARC-NEXT: or %l4, %l6, %i2
; SPARC-NEXT: bne .LBB0_32
; SPARC-NEXT: mov %g3, %l4
; SPARC-NEXT: ! %bb.31: ! %start
; SPARC-NEXT: mov %l0, %l4
; SPARC-NEXT: .LBB0_32: ! %start
; SPARC-NEXT: or %l3, %l1, %l1
; SPARC-NEXT: ld [%fp+-8], %i0 ! 4-byte Folded Reload
; SPARC-NEXT: or %i0, %i4, %i0
; SPARC-NEXT: cmp %i0, 0
; SPARC-NEXT: or %i2, %l4, %i5
; SPARC-NEXT: bne .LBB0_34
; SPARC-NEXT: mov %g3, %i2
; SPARC-NEXT: ! %bb.33: ! %start
; SPARC-NEXT: mov %l0, %i2
; SPARC-NEXT: .LBB0_34: ! %start
; SPARC-NEXT: or %l1, %o0, %i4
; SPARC-NEXT: ld [%fp+-4], %i0 ! 4-byte Folded Reload
; SPARC-NEXT: or %i0, %i1, %i0
; SPARC-NEXT: cmp %i0, 0
; SPARC-NEXT: bne .LBB0_36
; SPARC-NEXT: or %i5, %l2, %i0
; SPARC-NEXT: ! %bb.35: ! %start
; SPARC-NEXT: mov %l0, %g3
; SPARC-NEXT: .LBB0_36: ! %start
; SPARC-NEXT: and %g3, %i2, %i1
; SPARC-NEXT: or %i1, %i0, %i0
; SPARC-NEXT: or %i0, %i4, %i0
; SPARC-NEXT: or %i0, %g4, %i0
; SPARC-NEXT: and %i0, 1, %i4
; SPARC-NEXT: mov %g2, %i0
; SPARC-NEXT: mov %i3, %i1
; SPARC-NEXT: mov %o2, %i2
; SPARC-NEXT: ret
; SPARC-NEXT: restore %g0, %o3, %o3
;
; SPARC64-LABEL: muloti_test:
; SPARC64: .cfi_startproc
; SPARC64-NEXT: .register %g2, #scratch
; SPARC64-NEXT: ! %bb.0: ! %start
; SPARC64-NEXT: save %sp, -176, %sp
; SPARC64-NEXT: .cfi_def_cfa_register %fp
; SPARC64-NEXT: .cfi_window_save
; SPARC64-NEXT: .cfi_register 15, 31
; SPARC64-NEXT: srax %i2, 63, %o0
; SPARC64-NEXT: srax %i1, 63, %o2
; SPARC64-NEXT: mov %i2, %o1
; SPARC64-NEXT: call __multi3
; SPARC64-NEXT: mov %i1, %o3
; SPARC64-NEXT: mov %o0, %i4
; SPARC64-NEXT: mov %o1, %i5
; SPARC64-NEXT: srax %i0, 63, %o0
; SPARC64-NEXT: srax %i3, 63, %o2
; SPARC64-NEXT: mov %i0, %o1
; SPARC64-NEXT: call __multi3
; SPARC64-NEXT: mov %i3, %o3
; SPARC64-NEXT: mov %o0, %l0
; SPARC64-NEXT: add %o1, %i5, %i5
; SPARC64-NEXT: mov 0, %o0
; SPARC64-NEXT: mov %i1, %o1
; SPARC64-NEXT: mov %o0, %o2
; SPARC64-NEXT: call __multi3
; SPARC64-NEXT: mov %i3, %o3
; SPARC64-NEXT: add %o0, %i5, %i1
; SPARC64-NEXT: mov %g0, %i3
; SPARC64-NEXT: cmp %i1, %o0
; SPARC64-NEXT: mov %i3, %g2
; SPARC64-NEXT: movcs %xcc, 1, %g2
; SPARC64-NEXT: cmp %i5, 0
; SPARC64-NEXT: move %xcc, 0, %g2
; SPARC64-NEXT: cmp %i4, 0
; SPARC64-NEXT: mov %i3, %i4
; SPARC64-NEXT: movne %xcc, 1, %i4
; SPARC64-NEXT: cmp %l0, 0
; SPARC64-NEXT: mov %i3, %i5
; SPARC64-NEXT: movne %xcc, 1, %i5
; SPARC64-NEXT: cmp %i2, 0
; SPARC64-NEXT: mov %i3, %i2
; SPARC64-NEXT: movne %xcc, 1, %i2
; SPARC64-NEXT: cmp %i0, 0
; SPARC64-NEXT: movne %xcc, 1, %i3
; SPARC64-NEXT: and %i3, %i2, %i0
; SPARC64-NEXT: or %i0, %i5, %i0
; SPARC64-NEXT: or %i0, %i4, %i0
; SPARC64-NEXT: or %i0, %g2, %i0
; SPARC64-NEXT: srl %i0, 0, %i2
; SPARC64-NEXT: mov %i1, %i0
; SPARC64-NEXT: ret
; SPARC64-NEXT: restore %g0, %o1, %o1
start:
%0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2
%1 = extractvalue { i128, i1 } %0, 0
%2 = extractvalue { i128, i1 } %0, 1
%3 = zext i1 %2 to i8
%4 = insertvalue { i128, i8 } undef, i128 %1, 0
%5 = insertvalue { i128, i8 } %4, i8 %3, 1
ret { i128, i8 } %5
}
; Function Attrs: nounwind readnone speculatable
declare { i128, i1 } @llvm.umul.with.overflow.i128(i128, i128) #1
attributes #0 = { nounwind readnone uwtable }
attributes #1 = { nounwind readnone speculatable }
attributes #2 = { nounwind }

View File

@ -0,0 +1,183 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=thumb-eabi -mattr=+v6 | FileCheck %s --check-prefixes=THUMBV6
define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; THUMBV6-LABEL: muloti_test:
; THUMBV6: push {r4, r5, r6, r7, lr}
; THUMBV6: sub sp, #84
; THUMBV6-NEXT: mov r6, r3
; THUMBV6-NEXT: mov r7, r2
; THUMBV6-NEXT: mov r4, r0
; THUMBV6-NEXT: movs r5, #0
; THUMBV6-NEXT: mov r0, sp
; THUMBV6-NEXT: str r5, [r0, #12]
; THUMBV6-NEXT: str r5, [r0, #8]
; THUMBV6-NEXT: ldr r1, [sp, #116]
; THUMBV6-NEXT: str r1, [sp, #68] @ 4-byte Spill
; THUMBV6-NEXT: str r1, [r0, #4]
; THUMBV6-NEXT: ldr r1, [sp, #112]
; THUMBV6-NEXT: str r1, [sp, #32] @ 4-byte Spill
; THUMBV6-NEXT: str r1, [r0]
; THUMBV6-NEXT: mov r0, r2
; THUMBV6-NEXT: mov r1, r3
; THUMBV6-NEXT: mov r2, r5
; THUMBV6-NEXT: mov r3, r5
; THUMBV6-NEXT: bl __multi3
; THUMBV6-NEXT: str r2, [sp, #40] @ 4-byte Spill
; THUMBV6-NEXT: str r3, [sp, #44] @ 4-byte Spill
; THUMBV6-NEXT: str r4, [sp, #72] @ 4-byte Spill
; THUMBV6-NEXT: stm r4!, {r0, r1}
; THUMBV6-NEXT: ldr r4, [sp, #120]
; THUMBV6-NEXT: str r6, [sp, #60] @ 4-byte Spill
; THUMBV6-NEXT: mov r0, r6
; THUMBV6-NEXT: mov r1, r5
; THUMBV6-NEXT: mov r2, r4
; THUMBV6-NEXT: mov r3, r5
; THUMBV6-NEXT: bl __aeabi_lmul
; THUMBV6-NEXT: mov r6, r0
; THUMBV6-NEXT: str r1, [sp, #52] @ 4-byte Spill
; THUMBV6-NEXT: ldr r0, [sp, #124]
; THUMBV6-NEXT: str r0, [sp, #80] @ 4-byte Spill
; THUMBV6-NEXT: mov r1, r5
; THUMBV6-NEXT: mov r2, r7
; THUMBV6-NEXT: mov r3, r5
; THUMBV6-NEXT: bl __aeabi_lmul
; THUMBV6-NEXT: str r1, [sp, #28] @ 4-byte Spill
; THUMBV6-NEXT: adds r6, r0, r6
; THUMBV6-NEXT: str r4, [sp, #64] @ 4-byte Spill
; THUMBV6-NEXT: mov r0, r4
; THUMBV6-NEXT: mov r1, r5
; THUMBV6-NEXT: mov r2, r7
; THUMBV6-NEXT: mov r3, r5
; THUMBV6-NEXT: bl __aeabi_lmul
; THUMBV6-NEXT: str r0, [sp, #24] @ 4-byte Spill
; THUMBV6-NEXT: adds r0, r1, r6
; THUMBV6-NEXT: str r0, [sp, #20] @ 4-byte Spill
; THUMBV6-NEXT: mov r0, r5
; THUMBV6-NEXT: adcs r0, r5
; THUMBV6-NEXT: str r0, [sp, #48] @ 4-byte Spill
; THUMBV6-NEXT: ldr r7, [sp, #104]
; THUMBV6-NEXT: ldr r0, [sp, #68] @ 4-byte Reload
; THUMBV6-NEXT: mov r1, r5
; THUMBV6-NEXT: mov r2, r7
; THUMBV6-NEXT: mov r3, r5
; THUMBV6-NEXT: bl __aeabi_lmul
; THUMBV6-NEXT: mov r6, r0
; THUMBV6-NEXT: str r1, [sp, #56] @ 4-byte Spill
; THUMBV6-NEXT: ldr r0, [sp, #108]
; THUMBV6-NEXT: str r0, [sp, #76] @ 4-byte Spill
; THUMBV6-NEXT: mov r1, r5
; THUMBV6-NEXT: ldr r4, [sp, #32] @ 4-byte Reload
; THUMBV6-NEXT: mov r2, r4
; THUMBV6-NEXT: mov r3, r5
; THUMBV6-NEXT: bl __aeabi_lmul
; THUMBV6-NEXT: str r1, [sp, #36] @ 4-byte Spill
; THUMBV6-NEXT: adds r6, r0, r6
; THUMBV6-NEXT: mov r0, r7
; THUMBV6-NEXT: mov r1, r5
; THUMBV6-NEXT: mov r2, r4
; THUMBV6-NEXT: mov r3, r5
; THUMBV6-NEXT: bl __aeabi_lmul
; THUMBV6-NEXT: adds r2, r1, r6
; THUMBV6-NEXT: mov r1, r5
; THUMBV6-NEXT: adcs r1, r5
; THUMBV6-NEXT: ldr r3, [sp, #24] @ 4-byte Reload
; THUMBV6-NEXT: adds r0, r0, r3
; THUMBV6-NEXT: ldr r3, [sp, #20] @ 4-byte Reload
; THUMBV6-NEXT: adcs r2, r3
; THUMBV6-NEXT: ldr r3, [sp, #40] @ 4-byte Reload
; THUMBV6-NEXT: adds r0, r3, r0
; THUMBV6-NEXT: ldr r3, [sp, #72] @ 4-byte Reload
; THUMBV6-NEXT: str r0, [r3, #8]
; THUMBV6-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
; THUMBV6-NEXT: adcs r2, r0
; THUMBV6-NEXT: str r2, [r3, #12]
; THUMBV6-NEXT: ldr r2, [sp, #28] @ 4-byte Reload
; THUMBV6-NEXT: adcs r5, r5
; THUMBV6-NEXT: movs r0, #1
; THUMBV6-NEXT: cmp r2, #0
; THUMBV6-NEXT: mov r3, r0
; THUMBV6-NEXT: bne .LBB0_2
; THUMBV6: mov r3, r2
; THUMBV6: ldr r2, [sp, #60] @ 4-byte Reload
; THUMBV6-NEXT: cmp r2, #0
; THUMBV6-NEXT: mov r4, r0
; THUMBV6-NEXT: bne .LBB0_4
; THUMBV6: mov r4, r2
; THUMBV6: ldr r2, [sp, #80] @ 4-byte Reload
; THUMBV6-NEXT: cmp r2, #0
; THUMBV6-NEXT: mov r2, r0
; THUMBV6-NEXT: bne .LBB0_6
; THUMBV6: ldr r2, [sp, #80] @ 4-byte Reload
; THUMBV6: ands r2, r4
; THUMBV6-NEXT: orrs r2, r3
; THUMBV6-NEXT: ldr r4, [sp, #52] @ 4-byte Reload
; THUMBV6-NEXT: cmp r4, #0
; THUMBV6-NEXT: mov r3, r0
; THUMBV6-NEXT: bne .LBB0_8
; THUMBV6: mov r3, r4
; THUMBV6: orrs r2, r3
; THUMBV6-NEXT: ldr r3, [sp, #48] @ 4-byte Reload
; THUMBV6-NEXT: orrs r2, r3
; THUMBV6-NEXT: ldr r3, [sp, #36] @ 4-byte Reload
; THUMBV6-NEXT: cmp r3, #0
; THUMBV6-NEXT: mov r4, r0
; THUMBV6-NEXT: bne .LBB0_10
; THUMBV6: mov r4, r3
; THUMBV6: ldr r3, [sp, #68] @ 4-byte Reload
; THUMBV6-NEXT: cmp r3, #0
; THUMBV6-NEXT: mov r6, r0
; THUMBV6-NEXT: bne .LBB0_12
; THUMBV6: mov r6, r3
; THUMBV6: ldr r3, [sp, #76] @ 4-byte Reload
; THUMBV6-NEXT: cmp r3, #0
; THUMBV6-NEXT: mov r3, r0
; THUMBV6-NEXT: bne .LBB0_14
; THUMBV6: ldr r3, [sp, #76] @ 4-byte Reload
; THUMBV6: ands r3, r6
; THUMBV6-NEXT: orrs r3, r4
; THUMBV6-NEXT: ldr r6, [sp, #56] @ 4-byte Reload
; THUMBV6-NEXT: cmp r6, #0
; THUMBV6-NEXT: mov r4, r0
; THUMBV6-NEXT: bne .LBB0_16
; THUMBV6: mov r4, r6
; THUMBV6: orrs r3, r4
; THUMBV6-NEXT: orrs r3, r1
; THUMBV6-NEXT: ldr r4, [sp, #64] @ 4-byte Reload
; THUMBV6-NEXT: ldr r1, [sp, #80] @ 4-byte Reload
; THUMBV6-NEXT: orrs r4, r1
; THUMBV6-NEXT: cmp r4, #0
; THUMBV6-NEXT: mov r1, r0
; THUMBV6-NEXT: bne .LBB0_18
; THUMBV6: mov r1, r4
; THUMBV6: ldr r4, [sp, #76] @ 4-byte Reload
; THUMBV6-NEXT: orrs r7, r4
; THUMBV6-NEXT: cmp r7, #0
; THUMBV6-NEXT: mov r4, r0
; THUMBV6-NEXT: bne .LBB0_20
; THUMBV6: mov r4, r7
; THUMBV6: ands r4, r1
; THUMBV6-NEXT: orrs r4, r3
; THUMBV6-NEXT: orrs r4, r2
; THUMBV6-NEXT: orrs r4, r5
; THUMBV6-NEXT: ands r4, r0
; THUMBV6-NEXT: ldr r0, [sp, #72] @ 4-byte Reload
; THUMBV6-NEXT: strb r4, [r0, #16]
; THUMBV6-NEXT: add sp, #84
; THUMBV6-NEXT: pop {r4, r5, r6, r7, pc}
start:
%0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2
%1 = extractvalue { i128, i1 } %0, 0
%2 = extractvalue { i128, i1 } %0, 1
%3 = zext i1 %2 to i8
%4 = insertvalue { i128, i8 } undef, i128 %1, 0
%5 = insertvalue { i128, i8 } %4, i8 %3, 1
ret { i128, i8 } %5
}
; Function Attrs: nounwind readnone speculatable
declare { i128, i1 } @llvm.umul.with.overflow.i128(i128, i128) #1
attributes #0 = { nounwind readnone uwtable }
attributes #1 = { nounwind readnone speculatable }
attributes #2 = { nounwind }

View File

@ -0,0 +1,122 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=thumbv7-unknown-none-gnueabi | FileCheck %s --check-prefixes=THUMBV7
define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; THUMBV7-LABEL: muloti_test:
; THUMBV7: @ %bb.0: @ %start
; THUMBV7-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; THUMBV7-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; THUMBV7-NEXT: .pad #44
; THUMBV7-NEXT: sub sp, #44
; THUMBV7-NEXT: str r0, [sp, #40] @ 4-byte Spill
; THUMBV7-NEXT: movs r0, #0
; THUMBV7-NEXT: ldrd r4, r7, [sp, #88]
; THUMBV7-NEXT: mov r5, r3
; THUMBV7-NEXT: strd r4, r7, [sp]
; THUMBV7-NEXT: mov r1, r3
; THUMBV7-NEXT: strd r0, r0, [sp, #8]
; THUMBV7-NEXT: mov r6, r2
; THUMBV7-NEXT: mov r0, r2
; THUMBV7-NEXT: movs r2, #0
; THUMBV7-NEXT: movs r3, #0
; THUMBV7-NEXT: bl __multi3
; THUMBV7-NEXT: strd r1, r0, [sp, #32]
; THUMBV7-NEXT: strd r3, r2, [sp, #24]
; THUMBV7-NEXT: ldrd r2, r0, [sp, #96]
; THUMBV7-NEXT: ldr.w r9, [sp, #80]
; THUMBV7-NEXT: umull lr, r0, r0, r6
; THUMBV7-NEXT: ldr.w r11, [sp, #84]
; THUMBV7-NEXT: umull r3, r1, r5, r2
; THUMBV7-NEXT: umull r2, r12, r2, r6
; THUMBV7-NEXT: add r3, lr
; THUMBV7-NEXT: umull r8, r10, r7, r9
; THUMBV7-NEXT: str r2, [sp, #20] @ 4-byte Spill
; THUMBV7-NEXT: adds.w lr, r12, r3
; THUMBV7-NEXT: umull r6, r9, r9, r4
; THUMBV7-NEXT: mov.w r3, #0
; THUMBV7-NEXT: adc r12, r3, #0
; THUMBV7-NEXT: umull r2, r4, r11, r4
; THUMBV7-NEXT: add r2, r8
; THUMBV7-NEXT: mov.w r8, #0
; THUMBV7-NEXT: adds.w r2, r2, r9
; THUMBV7-NEXT: adc r9, r3, #0
; THUMBV7-NEXT: ldr r3, [sp, #20] @ 4-byte Reload
; THUMBV7-NEXT: adds r3, r3, r6
; THUMBV7-NEXT: ldr r6, [sp, #28] @ 4-byte Reload
; THUMBV7-NEXT: adc.w r2, r2, lr
; THUMBV7-NEXT: adds r3, r3, r6
; THUMBV7-NEXT: ldr r6, [sp, #24] @ 4-byte Reload
; THUMBV7-NEXT: adcs r2, r6
; THUMBV7-NEXT: ldrd r6, lr, [sp, #36]
; THUMBV7-NEXT: str.w r6, [lr]
; THUMBV7-NEXT: adc r8, r8, #0
; THUMBV7-NEXT: ldr r6, [sp, #32] @ 4-byte Reload
; THUMBV7-NEXT: cmp r5, #0
; THUMBV7-NEXT: strd r6, r3, [lr, #4]
; THUMBV7-NEXT: str.w r2, [lr, #12]
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r5, #1
; THUMBV7-NEXT: ldr r2, [sp, #100]
; THUMBV7-NEXT: cmp r2, #0
; THUMBV7-NEXT: mov r3, r2
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r3, #1
; THUMBV7-NEXT: cmp r0, #0
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r0, #1
; THUMBV7-NEXT: cmp r1, #0
; THUMBV7-NEXT: and.w r3, r3, r5
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r1, #1
; THUMBV7-NEXT: orrs r0, r3
; THUMBV7-NEXT: cmp r7, #0
; THUMBV7-NEXT: orr.w r0, r0, r1
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r7, #1
; THUMBV7-NEXT: cmp.w r11, #0
; THUMBV7-NEXT: mov r1, r11
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r1, #1
; THUMBV7-NEXT: cmp r4, #0
; THUMBV7-NEXT: ldr r3, [sp, #96]
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r4, #1
; THUMBV7-NEXT: cmp.w r10, #0
; THUMBV7-NEXT: and.w r1, r1, r7
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne.w r10, #1
; THUMBV7-NEXT: orrs r3, r2
; THUMBV7-NEXT: ldr r2, [sp, #80]
; THUMBV7-NEXT: orr.w r1, r1, r4
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r3, #1
; THUMBV7-NEXT: orr.w r1, r1, r10
; THUMBV7-NEXT: orrs.w r7, r2, r11
; THUMBV7-NEXT: orr.w r1, r1, r9
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r7, #1
; THUMBV7-NEXT: orr.w r0, r0, r12
; THUMBV7-NEXT: ands r3, r7
; THUMBV7-NEXT: orrs r1, r3
; THUMBV7-NEXT: orrs r0, r1
; THUMBV7-NEXT: orr.w r0, r0, r8
; THUMBV7-NEXT: and r0, r0, #1
; THUMBV7-NEXT: strb.w r0, [lr, #16]
; THUMBV7-NEXT: add sp, #44
; THUMBV7-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
start:
%0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2
%1 = extractvalue { i128, i1 } %0, 0
%2 = extractvalue { i128, i1 } %0, 1
%3 = zext i1 %2 to i8
%4 = insertvalue { i128, i8 } undef, i128 %1, 0
%5 = insertvalue { i128, i8 } %4, i8 %3, 1
ret { i128, i8 } %5
}
; Function Attrs: nounwind readnone speculatable
declare { i128, i1 } @llvm.umul.with.overflow.i128(i128, i128) #1
attributes #0 = { nounwind readnone uwtable }
attributes #1 = { nounwind readnone speculatable }
attributes #2 = { nounwind }

View File

@ -0,0 +1,49 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=thumbv7-unknown-none-gnueabi | FileCheck %s --check-prefixes=THUMBV7
define { i64, i8 } @mulodi_test(i64 %l, i64 %r) unnamed_addr #0 {
; THUMBV7-LABEL: mulodi_test:
; THUMBV7: @ %bb.0: @ %start
; THUMBV7-NEXT: .save {r4, r5, r6, lr}
; THUMBV7-NEXT: push {r4, r5, r6, lr}
; THUMBV7-NEXT: umull r12, lr, r3, r0
; THUMBV7-NEXT: movs r6, #0
; THUMBV7-NEXT: umull r4, r5, r1, r2
; THUMBV7-NEXT: umull r0, r2, r0, r2
; THUMBV7-NEXT: add r4, r12
; THUMBV7-NEXT: adds.w r12, r2, r4
; THUMBV7-NEXT: adc r2, r6, #0
; THUMBV7-NEXT: cmp r3, #0
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r3, #1
; THUMBV7-NEXT: cmp r1, #0
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r1, #1
; THUMBV7-NEXT: cmp r5, #0
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r5, #1
; THUMBV7-NEXT: ands r1, r3
; THUMBV7-NEXT: cmp.w lr, #0
; THUMBV7-NEXT: orr.w r1, r1, r5
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne.w lr, #1
; THUMBV7-NEXT: orr.w r1, r1, lr
; THUMBV7-NEXT: orrs r2, r1
; THUMBV7-NEXT: mov r1, r12
; THUMBV7-NEXT: pop {r4, r5, r6, pc}
start:
%0 = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %l, i64 %r) #2
%1 = extractvalue { i64, i1 } %0, 0
%2 = extractvalue { i64, i1 } %0, 1
%3 = zext i1 %2 to i8
%4 = insertvalue { i64, i8 } undef, i64 %1, 0
%5 = insertvalue { i64, i8 } %4, i8 %3, 1
ret { i64, i8 } %5
}
; Function Attrs: nounwind readnone speculatable
declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64) #1
attributes #0 = { nounwind readnone uwtable }
attributes #1 = { nounwind readnone speculatable }
attributes #2 = { nounwind }

View File

@ -0,0 +1,94 @@
; RUN: llc < %s -mtriple=wasm32 | FileCheck %s --check-prefixes=WASM32
; NOTE: did not compile on wasm64 at the time the test was created!
define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; WASM32-LABEL: muloti_test
; WASM32: get_global $push18=, __stack_pointer@GLOBAL
; WASM32: i32.const $push19=, 48
; WASM32: i32.sub $push40=, $pop18, $pop19
; WASM32: tee_local $push39=, 5, $pop40
; WASM32: set_global __stack_pointer@GLOBAL, $pop39
; WASM32: get_local $push41=, 5
; WASM32: i32.const $push22=, 32
; WASM32: i32.add $push23=, $pop41, $pop22
; WASM32: get_local $push43=, 1
; WASM32: i64.const $push0=, 0
; WASM32: get_local $push42=, 3
; WASM32: i64.const $push38=, 0
; WASM32: call __multi3@FUNCTION, $pop23, $pop43, $pop0, $pop42, $pop38
; WASM32: get_local $push44=, 5
; WASM32: i32.const $push24=, 16
; WASM32: i32.add $push25=, $pop44, $pop24
; WASM32: get_local $push46=, 4
; WASM32: i64.const $push37=, 0
; WASM32: get_local $push45=, 1
; WASM32: i64.const $push36=, 0
; WASM32: call __multi3@FUNCTION, $pop25, $pop46, $pop37, $pop45, $pop36
; WASM32: get_local $push49=, 5
; WASM32: get_local $push48=, 2
; WASM32: i64.const $push35=, 0
; WASM32: get_local $push47=, 3
; WASM32: i64.const $push34=, 0
; WASM32: call __multi3@FUNCTION, $pop49, $pop48, $pop35, $pop47, $pop34
; WASM32: get_local $push51=, 0
; WASM32: get_local $push50=, 5
; WASM32: i64.load $push1=, 32($pop50)
; WASM32: i64.store 0($pop51), $pop1
; WASM32: get_local $push55=, 0
; WASM32: get_local $push52=, 5
; WASM32: i32.const $push5=, 40
; WASM32: i32.add $push6=, $pop52, $pop5
; WASM32: i64.load $push33=, 0($pop6)
; WASM32: tee_local $push32=, 1, $pop33
; WASM32: get_local $push53=, 5
; WASM32: i64.load $push3=, 0($pop53)
; WASM32: get_local $push54=, 5
; WASM32: i64.load $push2=, 16($pop54)
; WASM32: i64.add $push4=, $pop3, $pop2
; WASM32: i64.add $push31=, $pop32, $pop4
; WASM32: tee_local $push30=, 3, $pop31
; WASM32: i64.store 8($pop55), $pop30
; WASM32: get_local $push62=, 0
; WASM32: get_local $push56=, 2
; WASM32: i64.const $push29=, 0
; WASM32: i64.ne $push8=, $pop56, $pop29
; WASM32: get_local $push57=, 4
; WASM32: i64.const $push28=, 0
; WASM32: i64.ne $push7=, $pop57, $pop28
; WASM32: i32.and $push9=, $pop8, $pop7
; WASM32: get_local $push58=, 5
; WASM32: i64.load $push10=, 8($pop58)
; WASM32: i64.const $push27=, 0
; WASM32: i64.ne $push11=, $pop10, $pop27
; WASM32: i32.or $push12=, $pop9, $pop11
; WASM32: get_local $push59=, 5
; WASM32: i64.load $push13=, 24($pop59)
; WASM32: i64.const $push26=, 0
; WASM32: i64.ne $push14=, $pop13, $pop26
; WASM32: i32.or $push15=, $pop12, $pop14
; WASM32: get_local $push61=, 3
; WASM32: get_local $push60=, 1
; WASM32: i64.lt_u $push16=, $pop61, $pop60
; WASM32: i32.or $push17=, $pop15, $pop16
; WASM32: i32.store8 16($pop62), $pop17
; WASM32: get_local $push63=, 5
; WASM32: i32.const $push20=, 48
; WASM32: i32.add $push21=, $pop63, $pop20
; WASM32: set_global __stack_pointer@GLOBAL, $pop21
start:
%0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2
%1 = extractvalue { i128, i1 } %0, 0
%2 = extractvalue { i128, i1 } %0, 1
%3 = zext i1 %2 to i8
%4 = insertvalue { i128, i8 } undef, i128 %1, 0
%5 = insertvalue { i128, i8 } %4, i8 %3, 1
ret { i128, i8 } %5
}
; Function Attrs: nounwind readnone speculatable
declare { i128, i1 } @llvm.umul.with.overflow.i128(i128, i128) #1
attributes #0 = { nounwind readnone uwtable }
attributes #1 = { nounwind readnone speculatable }
attributes #2 = { nounwind }

View File

@ -32,50 +32,6 @@ nooverflow: ; preds = %entry
ret %0 %tmp24
}
define %0 @foo(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0, i64 %b.coerce1) nounwind uwtable ssp {
entry:
; CHECK: foo
%retval = alloca i128, align 16
%coerce = alloca i128, align 16
%a.addr = alloca i128, align 16
%coerce1 = alloca i128, align 16
%b.addr = alloca i128, align 16
%0 = bitcast i128* %coerce to %0*
%1 = getelementptr %0, %0* %0, i32 0, i32 0
store i64 %a.coerce0, i64* %1
%2 = getelementptr %0, %0* %0, i32 0, i32 1
store i64 %a.coerce1, i64* %2
%a = load i128, i128* %coerce, align 16
store i128 %a, i128* %a.addr, align 16
%3 = bitcast i128* %coerce1 to %0*
%4 = getelementptr %0, %0* %3, i32 0, i32 0
store i64 %b.coerce0, i64* %4
%5 = getelementptr %0, %0* %3, i32 0, i32 1
store i64 %b.coerce1, i64* %5
%b = load i128, i128* %coerce1, align 16
store i128 %b, i128* %b.addr, align 16
%tmp = load i128, i128* %a.addr, align 16
%tmp2 = load i128, i128* %b.addr, align 16
%6 = call %1 @llvm.umul.with.overflow.i128(i128 %tmp, i128 %tmp2)
; CHECK: cmov
; CHECK: divti3
%7 = extractvalue %1 %6, 0
%8 = extractvalue %1 %6, 1
br i1 %8, label %overflow, label %nooverflow
overflow: ; preds = %entry
call void @llvm.trap()
unreachable
nooverflow: ; preds = %entry
store i128 %7, i128* %retval
%9 = bitcast i128* %retval to %0*
%10 = load %0, %0* %9, align 1
ret %0 %10
}
declare %1 @llvm.umul.with.overflow.i128(i128, i128) nounwind readnone
declare %1 @llvm.smul.with.overflow.i128(i128, i128) nounwind readnone
declare void @llvm.trap() nounwind

View File

@ -53,6 +53,7 @@ define i32 @test2() nounwind {
; GENERIC-NEXT: popq %rcx
; GENERIC-NEXT: retq
; GENERIC-NEXT: LBB1_1: ## %bb90
; GENERIC-NEXT: ud2
;
; ATOM-LABEL: test2:
; ATOM: ## %bb.0: ## %entry
@ -70,6 +71,7 @@ define i32 @test2() nounwind {
; ATOM-NEXT: popq %rcx
; ATOM-NEXT: retq
; ATOM-NEXT: LBB1_1: ## %bb90
; ATOM-NEXT: ud2
;
; MCU-LABEL: test2:
; MCU: # %bb.0: # %entry
@ -636,71 +638,6 @@ define i64 @test11a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
ret i64 %cond
}
declare noalias i8* @_Znam(i64) noredzone
define noalias i8* @test12(i64 %count) nounwind ssp noredzone {
; GENERIC-LABEL: test12:
; GENERIC: ## %bb.0: ## %entry
; GENERIC-NEXT: movl $4, %ecx
; GENERIC-NEXT: movq %rdi, %rax
; GENERIC-NEXT: mulq %rcx
; GENERIC-NEXT: movq $-1, %rdi
; GENERIC-NEXT: cmovnoq %rax, %rdi
; GENERIC-NEXT: jmp __Znam ## TAILCALL
;
; ATOM-LABEL: test12:
; ATOM: ## %bb.0: ## %entry
; ATOM-NEXT: movq %rdi, %rax
; ATOM-NEXT: movl $4, %ecx
; ATOM-NEXT: movq $-1, %rdi
; ATOM-NEXT: mulq %rcx
; ATOM-NEXT: cmovnoq %rax, %rdi
; ATOM-NEXT: jmp __Znam ## TAILCALL
;
; MCU-LABEL: test12:
; MCU: # %bb.0: # %entry
; MCU-NEXT: pushl %ebp
; MCU-NEXT: pushl %ebx
; MCU-NEXT: pushl %edi
; MCU-NEXT: pushl %esi
; MCU-NEXT: movl %edx, %ebx
; MCU-NEXT: movl %eax, %ebp
; MCU-NEXT: movl $4, %ecx
; MCU-NEXT: mull %ecx
; MCU-NEXT: movl %eax, %esi
; MCU-NEXT: leal (%edx,%ebx,4), %edi
; MCU-NEXT: movl %edi, %edx
; MCU-NEXT: pushl $0
; MCU-NEXT: pushl $4
; MCU-NEXT: calll __udivdi3
; MCU-NEXT: addl $8, %esp
; MCU-NEXT: xorl %ebx, %edx
; MCU-NEXT: xorl %ebp, %eax
; MCU-NEXT: orl %edx, %eax
; MCU-NEXT: movl $-1, %eax
; MCU-NEXT: movl $-1, %edx
; MCU-NEXT: jne .LBB14_2
; MCU-NEXT: # %bb.1: # %entry
; MCU-NEXT: movl %esi, %eax
; MCU-NEXT: movl %edi, %edx
; MCU-NEXT: .LBB14_2: # %entry
; MCU-NEXT: popl %esi
; MCU-NEXT: popl %edi
; MCU-NEXT: popl %ebx
; MCU-NEXT: popl %ebp
; MCU-NEXT: jmp _Znam # TAILCALL
entry:
%A = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %count, i64 4)
%B = extractvalue { i64, i1 } %A, 1
%C = extractvalue { i64, i1 } %A, 0
%D = select i1 %B, i64 -1, i64 %C
%call = tail call noalias i8* @_Znam(i64 %D) nounwind noredzone
ret i8* %call
}
declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64) nounwind readnone
define i32 @test13(i32 %a, i32 %b) nounwind {
; GENERIC-LABEL: test13:
; GENERIC: ## %bb.0:
@ -862,10 +799,10 @@ define i8 @test18(i32 %x, i8 zeroext %a, i8 zeroext %b) nounwind {
; MCU-LABEL: test18:
; MCU: # %bb.0:
; MCU-NEXT: cmpl $15, %eax
; MCU-NEXT: jl .LBB20_2
; MCU-NEXT: jl .LBB19_2
; MCU-NEXT: # %bb.1:
; MCU-NEXT: movl %ecx, %edx
; MCU-NEXT: .LBB20_2:
; MCU-NEXT: .LBB19_2:
; MCU-NEXT: movl %edx, %eax
; MCU-NEXT: retl
%cmp = icmp slt i32 %x, 15
@ -902,10 +839,10 @@ define void @clamp_i8(i32 %src, i8* %dst) {
; GENERIC-NEXT: cmovlel %edi, %eax
; GENERIC-NEXT: cmpl $-128, %eax
; GENERIC-NEXT: movb $-128, %cl
; GENERIC-NEXT: jl LBB22_2
; GENERIC-NEXT: jl LBB21_2
; GENERIC-NEXT: ## %bb.1:
; GENERIC-NEXT: movl %eax, %ecx
; GENERIC-NEXT: LBB22_2:
; GENERIC-NEXT: LBB21_2:
; GENERIC-NEXT: movb %cl, (%rsi)
; GENERIC-NEXT: retq
;
@ -916,10 +853,10 @@ define void @clamp_i8(i32 %src, i8* %dst) {
; ATOM-NEXT: movb $-128, %cl
; ATOM-NEXT: cmovlel %edi, %eax
; ATOM-NEXT: cmpl $-128, %eax
; ATOM-NEXT: jl LBB22_2
; ATOM-NEXT: jl LBB21_2
; ATOM-NEXT: ## %bb.1:
; ATOM-NEXT: movl %eax, %ecx
; ATOM-NEXT: LBB22_2:
; ATOM-NEXT: LBB21_2:
; ATOM-NEXT: movb %cl, (%rsi)
; ATOM-NEXT: retq
;
@ -927,16 +864,16 @@ define void @clamp_i8(i32 %src, i8* %dst) {
; MCU: # %bb.0:
; MCU-NEXT: cmpl $127, %eax
; MCU-NEXT: movl $127, %ecx
; MCU-NEXT: jg .LBB22_2
; MCU-NEXT: jg .LBB21_2
; MCU-NEXT: # %bb.1:
; MCU-NEXT: movl %eax, %ecx
; MCU-NEXT: .LBB22_2:
; MCU-NEXT: .LBB21_2:
; MCU-NEXT: cmpl $-128, %ecx
; MCU-NEXT: movb $-128, %al
; MCU-NEXT: jl .LBB22_4
; MCU-NEXT: jl .LBB21_4
; MCU-NEXT: # %bb.3:
; MCU-NEXT: movl %ecx, %eax
; MCU-NEXT: .LBB22_4:
; MCU-NEXT: .LBB21_4:
; MCU-NEXT: movb %al, (%edx)
; MCU-NEXT: retl
%cmp = icmp sgt i32 %src, 127
@ -976,16 +913,16 @@ define void @clamp(i32 %src, i16* %dst) {
; MCU: # %bb.0:
; MCU-NEXT: cmpl $32767, %eax # imm = 0x7FFF
; MCU-NEXT: movl $32767, %ecx # imm = 0x7FFF
; MCU-NEXT: jg .LBB23_2
; MCU-NEXT: jg .LBB22_2
; MCU-NEXT: # %bb.1:
; MCU-NEXT: movl %eax, %ecx
; MCU-NEXT: .LBB23_2:
; MCU-NEXT: .LBB22_2:
; MCU-NEXT: cmpl $-32768, %ecx # imm = 0x8000
; MCU-NEXT: movl $32768, %eax # imm = 0x8000
; MCU-NEXT: jl .LBB23_4
; MCU-NEXT: jl .LBB22_4
; MCU-NEXT: # %bb.3:
; MCU-NEXT: movl %ecx, %eax
; MCU-NEXT: .LBB23_4:
; MCU-NEXT: .LBB22_4:
; MCU-NEXT: movw %ax, (%edx)
; MCU-NEXT: retl
%cmp = icmp sgt i32 %src, 32767
@ -1009,19 +946,19 @@ define void @test19() {
; CHECK-NEXT: movl $-1, %eax
; CHECK-NEXT: movb $1, %cl
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: LBB24_1: ## %CF
; CHECK-NEXT: LBB23_1: ## %CF
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
; CHECK-NEXT: testb %cl, %cl
; CHECK-NEXT: jne LBB24_1
; CHECK-NEXT: jne LBB23_1
; CHECK-NEXT: ## %bb.2: ## %CF250
; CHECK-NEXT: ## in Loop: Header=BB24_1 Depth=1
; CHECK-NEXT: jne LBB24_1
; CHECK-NEXT: ## in Loop: Header=BB23_1 Depth=1
; CHECK-NEXT: jne LBB23_1
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: LBB24_3: ## %CF242
; CHECK-NEXT: LBB23_3: ## %CF242
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
; CHECK-NEXT: cmpl %eax, %eax
; CHECK-NEXT: ucomiss %xmm0, %xmm0
; CHECK-NEXT: jp LBB24_3
; CHECK-NEXT: jp LBB23_3
; CHECK-NEXT: ## %bb.4: ## %CF244
; CHECK-NEXT: retq
;
@ -1030,24 +967,24 @@ define void @test19() {
; MCU-NEXT: movl $-1, %ecx
; MCU-NEXT: movb $1, %al
; MCU-NEXT: .p2align 4, 0x90
; MCU-NEXT: .LBB24_1: # %CF
; MCU-NEXT: .LBB23_1: # %CF
; MCU-NEXT: # =>This Inner Loop Header: Depth=1
; MCU-NEXT: testb %al, %al
; MCU-NEXT: jne .LBB24_1
; MCU-NEXT: jne .LBB23_1
; MCU-NEXT: # %bb.2: # %CF250
; MCU-NEXT: # in Loop: Header=BB24_1 Depth=1
; MCU-NEXT: jne .LBB24_1
; MCU-NEXT: # in Loop: Header=BB23_1 Depth=1
; MCU-NEXT: jne .LBB23_1
; MCU-NEXT: # %bb.3: # %CF242.preheader
; MCU-NEXT: fldz
; MCU-NEXT: .p2align 4, 0x90
; MCU-NEXT: .LBB24_4: # %CF242
; MCU-NEXT: .LBB23_4: # %CF242
; MCU-NEXT: # =>This Inner Loop Header: Depth=1
; MCU-NEXT: cmpl %eax, %ecx
; MCU-NEXT: fucom %st(0)
; MCU-NEXT: fnstsw %ax
; MCU-NEXT: # kill: def $ah killed $ah killed $ax
; MCU-NEXT: sahf
; MCU-NEXT: jp .LBB24_4
; MCU-NEXT: jp .LBB23_4
; MCU-NEXT: # %bb.5: # %CF244
; MCU-NEXT: fstp %st(0)
; MCU-NEXT: retl
@ -1116,10 +1053,10 @@ define i16 @select_xor_1b(i16 %A, i8 %cond) {
; MCU-LABEL: select_xor_1b:
; MCU: # %bb.0: # %entry
; MCU-NEXT: testb $1, %dl
; MCU-NEXT: je .LBB26_2
; MCU-NEXT: je .LBB25_2
; MCU-NEXT: # %bb.1:
; MCU-NEXT: xorl $43, %eax
; MCU-NEXT: .LBB26_2: # %entry
; MCU-NEXT: .LBB25_2: # %entry
; MCU-NEXT: # kill: def $ax killed $ax killed $eax
; MCU-NEXT: retl
entry:
@ -1168,10 +1105,10 @@ define i32 @select_xor_2b(i32 %A, i32 %B, i8 %cond) {
; MCU-LABEL: select_xor_2b:
; MCU: # %bb.0: # %entry
; MCU-NEXT: testb $1, %cl
; MCU-NEXT: je .LBB28_2
; MCU-NEXT: je .LBB27_2
; MCU-NEXT: # %bb.1:
; MCU-NEXT: xorl %edx, %eax
; MCU-NEXT: .LBB28_2: # %entry
; MCU-NEXT: .LBB27_2: # %entry
; MCU-NEXT: retl
entry:
%and = and i8 %cond, 1
@ -1219,10 +1156,10 @@ define i32 @select_or_b(i32 %A, i32 %B, i8 %cond) {
; MCU-LABEL: select_or_b:
; MCU: # %bb.0: # %entry
; MCU-NEXT: testb $1, %cl
; MCU-NEXT: je .LBB30_2
; MCU-NEXT: je .LBB29_2
; MCU-NEXT: # %bb.1:
; MCU-NEXT: orl %edx, %eax
; MCU-NEXT: .LBB30_2: # %entry
; MCU-NEXT: .LBB29_2: # %entry
; MCU-NEXT: retl
entry:
%and = and i8 %cond, 1
@ -1270,10 +1207,10 @@ define i32 @select_or_1b(i32 %A, i32 %B, i32 %cond) {
; MCU-LABEL: select_or_1b:
; MCU: # %bb.0: # %entry
; MCU-NEXT: testb $1, %cl
; MCU-NEXT: je .LBB32_2
; MCU-NEXT: je .LBB31_2
; MCU-NEXT: # %bb.1:
; MCU-NEXT: orl %edx, %eax
; MCU-NEXT: .LBB32_2: # %entry
; MCU-NEXT: .LBB31_2: # %entry
; MCU-NEXT: retl
entry:
%and = and i32 %cond, 1

View File

@ -0,0 +1,196 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s --check-prefixes=X64
; RUN: llc < %s -mtriple=i686-unknown-linux-gnu | FileCheck %s --check-prefixes=X86
define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; X64-LABEL: muloti_test:
; X64: # %bb.0: # %start
; X64-NEXT: movq %rdx, %r8
; X64-NEXT: testq %rcx, %rcx
; X64-NEXT: setne %al
; X64-NEXT: testq %rsi, %rsi
; X64-NEXT: setne %r9b
; X64-NEXT: andb %al, %r9b
; X64-NEXT: movq %rsi, %rax
; X64-NEXT: mulq %rdx
; X64-NEXT: movq %rax, %rsi
; X64-NEXT: seto %r10b
; X64-NEXT: movq %rcx, %rax
; X64-NEXT: mulq %rdi
; X64-NEXT: movq %rax, %rcx
; X64-NEXT: seto %r11b
; X64-NEXT: orb %r10b, %r11b
; X64-NEXT: addq %rsi, %rcx
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: mulq %r8
; X64-NEXT: addq %rcx, %rdx
; X64-NEXT: setb %cl
; X64-NEXT: orb %r11b, %cl
; X64-NEXT: orb %r9b, %cl
; X64-NEXT: retq
;
; X86-LABEL: muloti_test:
; X86: # %bb.0: # %start
; X86-NEXT: pushl %ebp
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: pushl %ebx
; X86-NEXT: .cfi_def_cfa_offset 12
; X86-NEXT: pushl %edi
; X86-NEXT: .cfi_def_cfa_offset 16
; X86-NEXT: pushl %esi
; X86-NEXT: .cfi_def_cfa_offset 20
; X86-NEXT: subl $28, %esp
; X86-NEXT: .cfi_def_cfa_offset 48
; X86-NEXT: .cfi_offset %esi, -20
; X86-NEXT: .cfi_offset %edi, -16
; X86-NEXT: .cfi_offset %ebx, -12
; X86-NEXT: .cfi_offset %ebp, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: mull %ebx
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: seto {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X86-NEXT: movl %esi, %eax
; X86-NEXT: mull %edi
; X86-NEXT: movl %eax, %esi
; X86-NEXT: seto {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X86-NEXT: addl %ecx, %esi
; X86-NEXT: movl %edi, %eax
; X86-NEXT: mull %ebx
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %edx, %ecx
; X86-NEXT: addl %esi, %ecx
; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %ebp
; X86-NEXT: movl %eax, %edi
; X86-NEXT: seto {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %ebx
; X86-NEXT: movl %eax, %esi
; X86-NEXT: seto {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X86-NEXT: addl %edi, %esi
; X86-NEXT: movl %ebx, %eax
; X86-NEXT: mull %ebp
; X86-NEXT: movl %ebp, %ebx
; X86-NEXT: movl %edx, %edi
; X86-NEXT: addl %esi, %edi
; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl %ecx, %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: movl %ebp, %eax
; X86-NEXT: movl %ebx, %ecx
; X86-NEXT: mull %ebx
; X86-NEXT: movl %edx, %ebx
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %ecx
; X86-NEXT: movl %edx, %esi
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: addl %ebx, %ecx
; X86-NEXT: adcl $0, %esi
; X86-NEXT: movl %ebp, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: mull %ebp
; X86-NEXT: movl %edx, %ebx
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl %esi, %ebx
; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %ebp
; X86-NEXT: movl %edx, %ecx
; X86-NEXT: movl %eax, %esi
; X86-NEXT: addl %ebx, %esi
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X86-NEXT: adcl %eax, %ecx
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %edx
; X86-NEXT: movl %edx, %ebp
; X86-NEXT: movl %eax, %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: mull %edx
; X86-NEXT: addl %ebx, %eax
; X86-NEXT: adcl %ebp, %edx
; X86-NEXT: addl %esi, %eax
; X86-NEXT: adcl %ecx, %edx
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: adcl %edi, %edx
; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
; X86-NEXT: setne %cl
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: testl %esi, %esi
; X86-NEXT: setne %ch
; X86-NEXT: andb %cl, %ch
; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload
; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload
; X86-NEXT: orb %ch, %cl
; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
; X86-NEXT: setne %cl
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: testl %edi, %edi
; X86-NEXT: setne %bh
; X86-NEXT: andb %cl, %bh
; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload
; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload
; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: orl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: setne %bl
; X86-NEXT: orl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: movl %esi, (%ecx)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: movl %esi, 4(%ecx)
; X86-NEXT: movl %eax, 8(%ecx)
; X86-NEXT: movl %edx, 12(%ecx)
; X86-NEXT: setne %al
; X86-NEXT: andb %bl, %al
; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload
; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload
; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload
; X86-NEXT: orb %bh, %al
; X86-NEXT: andb $1, %al
; X86-NEXT: movb %al, 16(%ecx)
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: addl $28, %esp
; X86-NEXT: .cfi_def_cfa_offset 20
; X86-NEXT: popl %esi
; X86-NEXT: .cfi_def_cfa_offset 16
; X86-NEXT: popl %edi
; X86-NEXT: .cfi_def_cfa_offset 12
; X86-NEXT: popl %ebx
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: popl %ebp
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl $4
start:
%0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2
%1 = extractvalue { i128, i1 } %0, 0
%2 = extractvalue { i128, i1 } %0, 1
%3 = zext i1 %2 to i8
%4 = insertvalue { i128, i8 } undef, i128 %1, 0
%5 = insertvalue { i128, i8 } %4, i8 %3, 1
ret { i128, i8 } %5
}
; Function Attrs: nounwind readnone speculatable
declare { i128, i1 } @llvm.umul.with.overflow.i128(i128, i128) #1
attributes #0 = { nounwind readnone uwtable }
attributes #1 = { nounwind readnone speculatable }
attributes #2 = { nounwind }

View File

@ -0,0 +1,66 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown-linux-gnu | FileCheck %s --check-prefixes=X86
define { i64, i8 } @mulodi_test(i64 %l, i64 %r) unnamed_addr #0 {
; X86-LABEL: mulodi_test:
; X86: # %bb.0: # %start
; X86-NEXT: pushl %ebp
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: pushl %ebx
; X86-NEXT: .cfi_def_cfa_offset 12
; X86-NEXT: pushl %edi
; X86-NEXT: .cfi_def_cfa_offset 16
; X86-NEXT: pushl %esi
; X86-NEXT: .cfi_def_cfa_offset 20
; X86-NEXT: .cfi_offset %esi, -20
; X86-NEXT: .cfi_offset %edi, -16
; X86-NEXT: .cfi_offset %ebx, -12
; X86-NEXT: .cfi_offset %ebp, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: testl %esi, %esi
; X86-NEXT: setne %dl
; X86-NEXT: testl %eax, %eax
; X86-NEXT: setne %bl
; X86-NEXT: andb %dl, %bl
; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: movl %eax, %edi
; X86-NEXT: seto %cl
; X86-NEXT: movl %esi, %eax
; X86-NEXT: mull %ebp
; X86-NEXT: movl %eax, %esi
; X86-NEXT: seto %ch
; X86-NEXT: orb %cl, %ch
; X86-NEXT: addl %edi, %esi
; X86-NEXT: movl %ebp, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: addl %esi, %edx
; X86-NEXT: setb %cl
; X86-NEXT: orb %ch, %cl
; X86-NEXT: orb %bl, %cl
; X86-NEXT: popl %esi
; X86-NEXT: .cfi_def_cfa_offset 16
; X86-NEXT: popl %edi
; X86-NEXT: .cfi_def_cfa_offset 12
; X86-NEXT: popl %ebx
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: popl %ebp
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
start:
%0 = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %l, i64 %r) #2
%1 = extractvalue { i64, i1 } %0, 0
%2 = extractvalue { i64, i1 } %0, 1
%3 = zext i1 %2 to i8
%4 = insertvalue { i64, i8 } undef, i64 %1, 0
%5 = insertvalue { i64, i8 } %4, i8 %3, 1
ret { i64, i8 } %5
}
; Function Attrs: nounwind readnone speculatable
declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64) #1
attributes #0 = { nounwind readnone uwtable }
attributes #1 = { nounwind readnone speculatable }
attributes #2 = { nounwind }