[AArch64] Swap comparison operands if that enables some folding.

Summary:
AArch64 can fold some shift+extend operations on the RHS operand of
comparisons, so swap the operands if that makes sense.

This provides a fix for https://bugs.llvm.org/show_bug.cgi?id=38751

Reviewers: efriedma, t.p.northover, javed.absar

Subscribers: mcrosier, kristof.beyls, llvm-commits

Differential Revision: https://reviews.llvm.org/D53067

llvm-svn: 344439
This commit is contained in:
Arnaud A. de Grandmaison 2018-10-13 07:43:56 +00:00
parent aeb76c8513
commit 66eb737214
6 changed files with 752 additions and 84 deletions

View File

@ -1460,6 +1460,21 @@ static bool isLegalArithImmed(uint64_t C) {
return IsLegal;
}
// Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on
// the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
// can be set differently by this operation. It comes down to whether
// "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
// everything is fine. If not then the optimization is wrong. Thus general
// comparisons are only valid if op2 != 0.
//
// So, finally, the only LLVM-native comparisons that don't mention C and V
// are SETEQ and SETNE. They're the only ones we can safely use CMN for in
// the absence of information about op2.
static bool isCMN(SDValue Op, ISD::CondCode CC) {
return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
(CC == ISD::SETEQ || CC == ISD::SETNE);
}
static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
const SDLoc &dl, SelectionDAG &DAG) {
EVT VT = LHS.getValueType();
@ -1482,18 +1497,8 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
// register to WZR/XZR if it ends up being unused.
unsigned Opcode = AArch64ISD::SUBS;
if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
// We'd like to combine a (CMP op1, (sub 0, op2) into a CMN instruction on
// the grounds that "op1 - (-op2) == op1 + op2". However, the C and V flags
// can be set differently by this operation. It comes down to whether
// "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
// everything is fine. If not then the optimization is wrong. Thus general
// comparisons are only valid if op2 != 0.
// So, finally, the only LLVM-native comparisons that don't mention C and V
// are SETEQ and SETNE. They're the only ones we can safely use CMN for in
// the absence of information about op2.
if (isCMN(RHS, CC)) {
// Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
Opcode = AArch64ISD::ADDS;
RHS = RHS.getOperand(1);
} else if (LHS.getOpcode() == ISD::AND && isNullConstant(RHS) &&
@ -1765,6 +1770,42 @@ static SDValue emitConjunctionDisjunctionTree(SelectionDAG &DAG, SDValue Val,
/// @}
/// Returns how profitable it is to fold a comparison's operand's shift and/or
/// extension operations.
static unsigned getCmpOperandFoldingProfit(SDValue Op) {
auto isSupportedExtend = [&](SDValue V) {
if (V.getOpcode() == ISD::SIGN_EXTEND_INREG)
return true;
if (V.getOpcode() == ISD::AND)
if (ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
uint64_t Mask = MaskCst->getZExtValue();
return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
}
return false;
};
if (!Op.hasOneUse())
return 0;
if (isSupportedExtend(Op))
return 1;
unsigned Opc = Op.getOpcode();
if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA)
if (ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
uint64_t Shift = ShiftCst->getZExtValue();
if (isSupportedExtend(Op.getOperand(0)))
return (Shift <= 4) ? 2 : 1;
EVT VT = Op.getValueType();
if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63))
return 1;
}
return 0;
}
static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &AArch64cc, SelectionDAG &DAG,
const SDLoc &dl) {
@ -1822,6 +1863,27 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
}
}
}
// Comparisons are canonicalized so that the RHS operand is simpler than the
// LHS one, the extreme case being when RHS is an immediate. However, AArch64
// can fold some shift+extend operations on the RHS operand, so swap the
// operands if that can be done.
//
// For example:
// lsl w13, w11, #1
// cmp w13, w12
// can be turned into:
// cmp w12, w11, lsl #1
if (!isa<ConstantSDNode>(RHS) ||
!isLegalArithImmed(cast<ConstantSDNode>(RHS)->getZExtValue())) {
SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS;
if (getCmpOperandFoldingProfit(TheLHS) > getCmpOperandFoldingProfit(RHS)) {
std::swap(LHS, RHS);
CC = ISD::getSetCCSwappedOperands(CC);
}
}
SDValue Cmp;
AArch64CC::CondCode AArch64CC;
if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {

View File

@ -179,7 +179,9 @@ ret_false:
ret_true:
ret i1 true
; CHECK-LABEL: test16_2
; CHECK: and
; CHECK: mov [[CST:w[0-9]+]], #16882
; CHECK: add [[ADD:w[0-9]+]], w0, [[CST]]
; CHECK: cmp {{.*}}, [[ADD]], uxth
; CHECK: ret
}
@ -207,7 +209,9 @@ ret_false:
ret_true:
ret i1 true
; CHECK-LABEL: test16_4
; CHECK: and
; CHECK: mov [[CST:w[0-9]+]], #29985
; CHECK: add [[ADD:w[0-9]+]], w0, [[CST]]
; CHECK: cmp {{.*}}, [[ADD]], uxth
; CHECK: ret
}
@ -249,7 +253,9 @@ ret_false:
ret_true:
ret i1 true
; CHECK-LABEL: test16_7
; CHECK: and
; CHECK: mov [[CST:w[0-9]+]], #9272
; CHECK: add [[ADD:w[0-9]+]], w0, [[CST]]
; CHECK: cmp {{.*}}, [[ADD]], uxth
; CHECK: ret
}

View File

@ -35,8 +35,7 @@ define i1 @shifts_necmp_i16_i8(i16 %x) nounwind {
define i1 @shifts_necmp_i32_i16(i32 %x) nounwind {
; CHECK-LABEL: shifts_necmp_i32_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
; CHECK-NEXT: cmp w8, w0
; CHECK-NEXT: cmp w0, w0, sxth
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = shl i32 %x, 16 ; 32-16
@ -48,8 +47,7 @@ define i1 @shifts_necmp_i32_i16(i32 %x) nounwind {
define i1 @shifts_necmp_i32_i8(i32 %x) nounwind {
; CHECK-LABEL: shifts_necmp_i32_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtb w8, w0
; CHECK-NEXT: cmp w8, w0
; CHECK-NEXT: cmp w0, w0, sxtb
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = shl i32 %x, 24 ; 32-8
@ -61,8 +59,7 @@ define i1 @shifts_necmp_i32_i8(i32 %x) nounwind {
define i1 @shifts_necmp_i64_i32(i64 %x) nounwind {
; CHECK-LABEL: shifts_necmp_i64_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtw x8, w0
; CHECK-NEXT: cmp x8, x0
; CHECK-NEXT: cmp x0, w0, sxtw
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = shl i64 %x, 32 ; 64-32
@ -74,8 +71,7 @@ define i1 @shifts_necmp_i64_i32(i64 %x) nounwind {
define i1 @shifts_necmp_i64_i16(i64 %x) nounwind {
; CHECK-LABEL: shifts_necmp_i64_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth x8, w0
; CHECK-NEXT: cmp x8, x0
; CHECK-NEXT: cmp x0, w0, sxth
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = shl i64 %x, 48 ; 64-16
@ -87,8 +83,7 @@ define i1 @shifts_necmp_i64_i16(i64 %x) nounwind {
define i1 @shifts_necmp_i64_i8(i64 %x) nounwind {
; CHECK-LABEL: shifts_necmp_i64_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtb x8, w0
; CHECK-NEXT: cmp x8, x0
; CHECK-NEXT: cmp x0, w0, sxtb
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = shl i64 %x, 56 ; 64-8
@ -117,8 +112,7 @@ define i1 @add_ultcmp_i16_i8(i16 %x) nounwind {
define i1 @add_ultcmp_i32_i16(i32 %x) nounwind {
; CHECK-LABEL: add_ultcmp_i32_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
; CHECK-NEXT: cmp w8, w0
; CHECK-NEXT: cmp w0, w0, sxth
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = add i32 %x, -32768 ; ~0U << (16-1)
@ -129,8 +123,7 @@ define i1 @add_ultcmp_i32_i16(i32 %x) nounwind {
define i1 @add_ultcmp_i32_i8(i32 %x) nounwind {
; CHECK-LABEL: add_ultcmp_i32_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtb w8, w0
; CHECK-NEXT: cmp w8, w0
; CHECK-NEXT: cmp w0, w0, sxtb
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = add i32 %x, -128 ; ~0U << (8-1)
@ -141,8 +134,7 @@ define i1 @add_ultcmp_i32_i8(i32 %x) nounwind {
define i1 @add_ultcmp_i64_i32(i64 %x) nounwind {
; CHECK-LABEL: add_ultcmp_i64_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtw x8, w0
; CHECK-NEXT: cmp x8, x0
; CHECK-NEXT: cmp x0, w0, sxtw
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = add i64 %x, -2147483648 ; ~0U << (32-1)
@ -153,8 +145,7 @@ define i1 @add_ultcmp_i64_i32(i64 %x) nounwind {
define i1 @add_ultcmp_i64_i16(i64 %x) nounwind {
; CHECK-LABEL: add_ultcmp_i64_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth x8, w0
; CHECK-NEXT: cmp x8, x0
; CHECK-NEXT: cmp x0, w0, sxth
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = add i64 %x, -32768 ; ~0U << (16-1)
@ -165,8 +156,7 @@ define i1 @add_ultcmp_i64_i16(i64 %x) nounwind {
define i1 @add_ultcmp_i64_i8(i64 %x) nounwind {
; CHECK-LABEL: add_ultcmp_i64_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtb x8, w0
; CHECK-NEXT: cmp x8, x0
; CHECK-NEXT: cmp x0, w0, sxtb
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = add i64 %x, -128 ; ~0U << (8-1)
@ -208,8 +198,7 @@ define i1 @add_ugecmp_i16_i8(i16 %x) nounwind {
define i1 @add_ugecmp_i32_i16(i32 %x) nounwind {
; CHECK-LABEL: add_ugecmp_i32_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
; CHECK-NEXT: cmp w8, w0
; CHECK-NEXT: cmp w0, w0, sxth
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = add i32 %x, 32768 ; 1U << (16-1)
@ -220,8 +209,7 @@ define i1 @add_ugecmp_i32_i16(i32 %x) nounwind {
define i1 @add_ugecmp_i32_i8(i32 %x) nounwind {
; CHECK-LABEL: add_ugecmp_i32_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtb w8, w0
; CHECK-NEXT: cmp w8, w0
; CHECK-NEXT: cmp w0, w0, sxtb
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = add i32 %x, 128 ; 1U << (8-1)
@ -232,8 +220,7 @@ define i1 @add_ugecmp_i32_i8(i32 %x) nounwind {
define i1 @add_ugecmp_i64_i32(i64 %x) nounwind {
; CHECK-LABEL: add_ugecmp_i64_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtw x8, w0
; CHECK-NEXT: cmp x8, x0
; CHECK-NEXT: cmp x0, w0, sxtw
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = add i64 %x, 2147483648 ; 1U << (32-1)
@ -244,8 +231,7 @@ define i1 @add_ugecmp_i64_i32(i64 %x) nounwind {
define i1 @add_ugecmp_i64_i16(i64 %x) nounwind {
; CHECK-LABEL: add_ugecmp_i64_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth x8, w0
; CHECK-NEXT: cmp x8, x0
; CHECK-NEXT: cmp x0, w0, sxth
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = add i64 %x, 32768 ; 1U << (16-1)
@ -256,8 +242,7 @@ define i1 @add_ugecmp_i64_i16(i64 %x) nounwind {
define i1 @add_ugecmp_i64_i8(i64 %x) nounwind {
; CHECK-LABEL: add_ugecmp_i64_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtb x8, w0
; CHECK-NEXT: cmp x8, x0
; CHECK-NEXT: cmp x0, w0, sxtb
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%tmp0 = add i64 %x, 128 ; 1U << (8-1)

View File

@ -52,11 +52,10 @@ define i8 @unsigned_sat_constant_i8_using_cmp_notval(i8 %x) {
define i16 @unsigned_sat_constant_i16_using_min(i16 %x) {
; CHECK-LABEL: unsigned_sat_constant_i16_using_min:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
; CHECK-NEXT: mov w9, #65493
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: mov w8, #65493
; CHECK-NEXT: cmp w8, w0, uxth
; CHECK-NEXT: mov w8, #-43
; CHECK-NEXT: csel w8, w0, w8, lo
; CHECK-NEXT: csel w8, w0, w8, hi
; CHECK-NEXT: add w0, w8, #42 // =42
; CHECK-NEXT: ret
%c = icmp ult i16 %x, -43
@ -82,11 +81,10 @@ define i16 @unsigned_sat_constant_i16_using_cmp_sum(i16 %x) {
define i16 @unsigned_sat_constant_i16_using_cmp_notval(i16 %x) {
; CHECK-LABEL: unsigned_sat_constant_i16_using_cmp_notval:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
; CHECK-NEXT: mov w10, #65493
; CHECK-NEXT: add w9, w0, #42 // =42
; CHECK-NEXT: cmp w8, w10
; CHECK-NEXT: csinv w0, w9, wzr, ls
; CHECK-NEXT: mov w9, #65493
; CHECK-NEXT: add w8, w0, #42 // =42
; CHECK-NEXT: cmp w9, w0, uxth
; CHECK-NEXT: csinv w0, w8, wzr, hs
; CHECK-NEXT: ret
%a = add i16 %x, 42
%c = icmp ugt i16 %x, -43

View File

@ -35,8 +35,7 @@ define i1 @shifts_eqcmp_i16_i8(i16 %x) nounwind {
define i1 @shifts_eqcmp_i32_i16(i32 %x) nounwind {
; CHECK-LABEL: shifts_eqcmp_i32_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
; CHECK-NEXT: cmp w8, w0
; CHECK-NEXT: cmp w0, w0, sxth
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = shl i32 %x, 16 ; 32-16
@ -48,8 +47,7 @@ define i1 @shifts_eqcmp_i32_i16(i32 %x) nounwind {
define i1 @shifts_eqcmp_i32_i8(i32 %x) nounwind {
; CHECK-LABEL: shifts_eqcmp_i32_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtb w8, w0
; CHECK-NEXT: cmp w8, w0
; CHECK-NEXT: cmp w0, w0, sxtb
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = shl i32 %x, 24 ; 32-8
@ -61,8 +59,7 @@ define i1 @shifts_eqcmp_i32_i8(i32 %x) nounwind {
define i1 @shifts_eqcmp_i64_i32(i64 %x) nounwind {
; CHECK-LABEL: shifts_eqcmp_i64_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtw x8, w0
; CHECK-NEXT: cmp x8, x0
; CHECK-NEXT: cmp x0, w0, sxtw
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = shl i64 %x, 32 ; 64-32
@ -74,8 +71,7 @@ define i1 @shifts_eqcmp_i64_i32(i64 %x) nounwind {
define i1 @shifts_eqcmp_i64_i16(i64 %x) nounwind {
; CHECK-LABEL: shifts_eqcmp_i64_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth x8, w0
; CHECK-NEXT: cmp x8, x0
; CHECK-NEXT: cmp x0, w0, sxth
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = shl i64 %x, 48 ; 64-16
@ -87,8 +83,7 @@ define i1 @shifts_eqcmp_i64_i16(i64 %x) nounwind {
define i1 @shifts_eqcmp_i64_i8(i64 %x) nounwind {
; CHECK-LABEL: shifts_eqcmp_i64_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtb x8, w0
; CHECK-NEXT: cmp x8, x0
; CHECK-NEXT: cmp x0, w0, sxtb
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = shl i64 %x, 56 ; 64-8
@ -117,8 +112,7 @@ define i1 @add_ugecmp_i16_i8(i16 %x) nounwind {
define i1 @add_ugecmp_i32_i16(i32 %x) nounwind {
; CHECK-LABEL: add_ugecmp_i32_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
; CHECK-NEXT: cmp w8, w0
; CHECK-NEXT: cmp w0, w0, sxth
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i32 %x, -32768 ; ~0U << (16-1)
@ -129,8 +123,7 @@ define i1 @add_ugecmp_i32_i16(i32 %x) nounwind {
define i1 @add_ugecmp_i32_i8(i32 %x) nounwind {
; CHECK-LABEL: add_ugecmp_i32_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtb w8, w0
; CHECK-NEXT: cmp w8, w0
; CHECK-NEXT: cmp w0, w0, sxtb
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i32 %x, -128 ; ~0U << (8-1)
@ -141,8 +134,7 @@ define i1 @add_ugecmp_i32_i8(i32 %x) nounwind {
define i1 @add_ugecmp_i64_i32(i64 %x) nounwind {
; CHECK-LABEL: add_ugecmp_i64_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtw x8, w0
; CHECK-NEXT: cmp x8, x0
; CHECK-NEXT: cmp x0, w0, sxtw
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i64 %x, -2147483648 ; ~0U << (32-1)
@ -153,8 +145,7 @@ define i1 @add_ugecmp_i64_i32(i64 %x) nounwind {
define i1 @add_ugecmp_i64_i16(i64 %x) nounwind {
; CHECK-LABEL: add_ugecmp_i64_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth x8, w0
; CHECK-NEXT: cmp x8, x0
; CHECK-NEXT: cmp x0, w0, sxth
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i64 %x, -32768 ; ~0U << (16-1)
@ -165,8 +156,7 @@ define i1 @add_ugecmp_i64_i16(i64 %x) nounwind {
define i1 @add_ugecmp_i64_i8(i64 %x) nounwind {
; CHECK-LABEL: add_ugecmp_i64_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtb x8, w0
; CHECK-NEXT: cmp x8, x0
; CHECK-NEXT: cmp x0, w0, sxtb
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i64 %x, -128 ; ~0U << (8-1)
@ -208,8 +198,7 @@ define i1 @add_ultcmp_i16_i8(i16 %x) nounwind {
define i1 @add_ultcmp_i32_i16(i32 %x) nounwind {
; CHECK-LABEL: add_ultcmp_i32_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth w8, w0
; CHECK-NEXT: cmp w8, w0
; CHECK-NEXT: cmp w0, w0, sxth
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i32 %x, 32768 ; 1U << (16-1)
@ -220,8 +209,7 @@ define i1 @add_ultcmp_i32_i16(i32 %x) nounwind {
define i1 @add_ultcmp_i32_i8(i32 %x) nounwind {
; CHECK-LABEL: add_ultcmp_i32_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtb w8, w0
; CHECK-NEXT: cmp w8, w0
; CHECK-NEXT: cmp w0, w0, sxtb
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i32 %x, 128 ; 1U << (8-1)
@ -232,8 +220,7 @@ define i1 @add_ultcmp_i32_i8(i32 %x) nounwind {
define i1 @add_ultcmp_i64_i32(i64 %x) nounwind {
; CHECK-LABEL: add_ultcmp_i64_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtw x8, w0
; CHECK-NEXT: cmp x8, x0
; CHECK-NEXT: cmp x0, w0, sxtw
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i64 %x, 2147483648 ; 1U << (32-1)
@ -244,8 +231,7 @@ define i1 @add_ultcmp_i64_i32(i64 %x) nounwind {
define i1 @add_ultcmp_i64_i16(i64 %x) nounwind {
; CHECK-LABEL: add_ultcmp_i64_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sxth x8, w0
; CHECK-NEXT: cmp x8, x0
; CHECK-NEXT: cmp x0, w0, sxth
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i64 %x, 32768 ; 1U << (16-1)
@ -256,8 +242,7 @@ define i1 @add_ultcmp_i64_i16(i64 %x) nounwind {
define i1 @add_ultcmp_i64_i8(i64 %x) nounwind {
; CHECK-LABEL: add_ultcmp_i64_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sxtb x8, w0
; CHECK-NEXT: cmp x8, x0
; CHECK-NEXT: cmp x0, w0, sxtb
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i64 %x, 128 ; 1U << (8-1)

View File

@ -0,0 +1,632 @@
; RUN: llc < %s -mtriple=arm64 | FileCheck %s
define i1 @testSwapCmpWithLSL64_1(i64 %a, i64 %b) {
; CHECK-LABEL testSwapCmpWithLSL64_1:
; CHECK: cmp x1, x0, lsl #1
; CHECK-NEXT: cset w0, gt
entry:
%shl = shl i64 %a, 1
%cmp = icmp slt i64 %shl, %b
ret i1 %cmp
}
define i1 @testSwapCmpWithLSL64_63(i64 %a, i64 %b) {
; CHECK-LABEL testSwapCmpWithLSL64_63:
; CHECK: cmp x1, x0, lsl #63
; CHECK-NEXT: cset w0, gt
entry:
%shl = shl i64 %a, 63
%cmp = icmp slt i64 %shl, %b
ret i1 %cmp
}
define i1 @testSwapCmpWithLSL32_1(i32 %a, i32 %b) {
; CHECK-LABEL testSwapCmpWithLSL32_1:
; CHECK: cmp w1, w0, lsl #1
; CHECK-NEXT: cset w0, gt
entry:
%shl = shl i32 %a, 1
%cmp = icmp slt i32 %shl, %b
ret i1 %cmp
}
define i1 @testSwapCmpWithLSL32_31(i32 %a, i32 %b) {
; CHECK-LABEL testSwapCmpWithLSL32_31:
; CHECK: cmp w1, w0, lsl #31
; CHECK-NEXT: cset w0, gt
entry:
%shl = shl i32 %a, 31
%cmp = icmp slt i32 %shl, %b
ret i1 %cmp
}
define i1 @testSwapCmpWithLSR64_1(i64 %a, i64 %b) {
; CHECK-LABEL testSwapCmpWithLSR64_1:
; CHECK: cmp x1, x0, lsr #1
; CHECK-NEXT: cset w0, gt
entry:
%lshr = lshr i64 %a, 1
%cmp = icmp slt i64 %lshr, %b
ret i1 %cmp
}
define i1 @testSwapCmpWithLSR64_63(i64 %a, i64 %b) {
; CHECK-LABEL testSwapCmpWithLSR64_63:
; CHECK: cmp x1, x0, lsr #63
; CHECK-NEXT: cset w0, gt
entry:
%lshr = lshr i64 %a, 63
%cmp = icmp slt i64 %lshr, %b
ret i1 %cmp
}
define i1 @testSwapCmpWithLSR32_1(i32 %a, i32 %b) {
; CHECK-LABEL testSwapCmpWithLSR32_1:
; CHECK: cmp w1, w0, lsr #1
; CHECK-NEXT: cset w0, gt
entry:
%lshr = lshr i32 %a, 1
%cmp = icmp slt i32 %lshr, %b
ret i1 %cmp
}
define i1 @testSwapCmpWithLSR32_31(i32 %a, i32 %b) {
; CHECK-LABEL testSwapCmpWithLSR32_31:
; CHECK: cmp w1, w0, lsr #31
; CHECK-NEXT: cset w0, gt
entry:
%lshr = lshr i32 %a, 31
%cmp = icmp slt i32 %lshr, %b
ret i1 %cmp
}
define i1 @testSwapCmpWithASR64_1(i64 %a, i64 %b) {
; CHECK-LABEL testSwapCmpWithASR64_1:
; CHECK: cmp x1, x0, asr #1
; CHECK-NEXT: cset w0, gt
entry:
%ashr = ashr i64 %a, 1
%cmp = icmp slt i64 %ashr, %b
ret i1 %cmp
}
define i1 @testSwapCmpWithASR64_63(i64 %a, i64 %b) {
; CHECK-LABEL testSwapCmpWithASR64_63:
; CHECK: cmp x1, x0, asr #63
; CHECK-NEXT: cset w0, gt
entry:
%ashr = ashr i64 %a, 63
%cmp = icmp slt i64 %ashr, %b
ret i1 %cmp
}
define i1 @testSwapCmpWithASR32_1(i32 %a, i32 %b) {
; CHECK-LABEL testSwapCmpWithASR32_1:
; CHECK: cmp w1, w0, asr #1
; CHECK-NEXT: cset w0, gt
entry:
%ashr = ashr i32 %a, 1
%cmp = icmp slt i32 %ashr, %b
ret i1 %cmp
}
define i1 @testSwapCmpWithASR32_31(i32 %a, i32 %b) {
; CHECK-LABEL testSwapCmpWithASR32_31:
; CHECK: cmp w1, w0, asr #31
; CHECK-NEXT: cset w0, gt
entry:
%ashr = ashr i32 %a, 31
%cmp = icmp slt i32 %ashr, %b
ret i1 %cmp
}
define i1 @testSwapCmpWithShiftedZeroExtend32_64(i32 %a, i64 %b) {
; CHECK-LABEL testSwapCmpWithShiftedZeroExtend32_64
; CHECK: cmp x1, w0, uxtw #2
; CHECK-NEXT: cset w0, lo
entry:
%a64 = zext i32 %a to i64
%shl.0 = shl i64 %a64, 2
%cmp = icmp ugt i64 %shl.0, %b
ret i1 %cmp
}
define i1 @testSwapCmpWithShiftedZeroExtend16_64(i16 %a, i64 %b) {
; CHECK-LABEL testSwapCmpWithShiftedZeroExtend16_64
; CHECK: cmp x1, w0, uxth #2
; CHECK-NEXT: cset w0, lo
entry:
%a64 = zext i16 %a to i64
%shl.0 = shl i64 %a64, 2
%cmp = icmp ugt i64 %shl.0, %b
ret i1 %cmp
}
define i1 @testSwapCmpWithShiftedZeroExtend8_64(i8 %a, i64 %b) {
; CHECK-LABEL testSwapCmpWithShiftedZeroExtend8_64
; CHECK: cmp x1, w0, uxtb #4
; CHECK-NEXT: cset w0, lo
entry:
%a64 = zext i8 %a to i64
%shl.2 = shl i64 %a64, 4
%cmp = icmp ugt i64 %shl.2, %b
ret i1 %cmp
}
define i1 @testSwapCmpWithShiftedZeroExtend16_32(i16 %a, i32 %b) {
; CHECK-LABEL testSwapCmpWithShiftedZeroExtend8_64
; CHECK: cmp w1, w0, uxth #3
; CHECK-NEXT: cset w0, lo
entry:
%a32 = zext i16 %a to i32
%shl = shl i32 %a32, 3
%cmp = icmp ugt i32 %shl, %b
ret i1 %cmp
}
define i1 @testSwapCmpWithShiftedZeroExtend8_32(i8 %a, i32 %b) {
; CHECK-LABEL testSwapCmpWithShiftedZeroExtend8_64
; CHECK: cmp w1, w0, uxtb #4
; CHECK-NEXT: cset w0, lo
entry:
%a32 = zext i8 %a to i32
%shl = shl i32 %a32, 4
%cmp = icmp ugt i32 %shl, %b
ret i1 %cmp
}
define i1 @testSwapCmpWithTooLargeShiftedZeroExtend8_32(i8 %a, i32 %b) {
; CHECK-LABEL testSwapCmpWithTooLargeShiftedZeroExtend8_64
; CHECK: and [[REG:w[0-9]+]], w0, #0xff
; CHECK: cmp w1, [[REG]], lsl #5
; CHECK-NEXT: cset w0, lo
entry:
%a32 = zext i8 %a to i32
%shl = shl i32 %a32, 5
%cmp = icmp ugt i32 %shl, %b
ret i1 %cmp
}
define i1 @testSwapCmpWithZeroExtend8_32(i8 %a, i32 %b) {
; CHECK-LABEL testSwapCmpWithZeroExtend8_64
; CHECK: cmp w1, w0, uxtb
; CHECK-NEXT: cset w0, lo
entry:
%a32 = zext i8 %a to i32
%cmp = icmp ugt i32 %a32, %b
ret i1 %cmp
}
define i1 @testSwapCmpWithShiftedSignExtend32_64(i32 %a, i64 %b) {
; CHECK-LABEL testSwapCmpWithShiftedZeroExtend32_64
; CHECK: cmp x1, w0, sxtw #2
; CHECK-NEXT: cset w0, lo
entry:
%a64 = sext i32 %a to i64
%shl.0 = shl i64 %a64, 2
%cmp = icmp ugt i64 %shl.0, %b
ret i1 %cmp
}
define i1 @testSwapCmpWithShiftedSignExtend16_64(i16 %a, i64 %b) {
; CHECK-LABEL testSwapCmpWithShiftedZeroExtend16_64
; CHECK: cmp x1, w0, sxth #2
; CHECK-NEXT: cset w0, lo
entry:
%a64 = sext i16 %a to i64
%shl.0 = shl i64 %a64, 2
%cmp = icmp ugt i64 %shl.0, %b
ret i1 %cmp
}
define i1 @testSwapCmpWithShiftedSignExtend8_64(i8 %a, i64 %b) {
; CHECK-LABEL testSwapCmpWithShiftedSignExtend8_64
; CHECK: cmp x1, w0, sxtb #4
; CHECK-NEXT: cset w0, lo
entry:
%a64 = sext i8 %a to i64
%shl.2 = shl i64 %a64, 4
%cmp = icmp ugt i64 %shl.2, %b
ret i1 %cmp
}
define i1 @testSwapCmpWithShiftedSignExtend16_32(i16 %a, i32 %b) {
; CHECK-LABEL testSwapCmpWithShiftedSignExtend8_64
; CHECK: cmp w1, w0, sxth #3
; CHECK-NEXT: cset w0, lo
entry:
%a32 = sext i16 %a to i32
%shl = shl i32 %a32, 3
%cmp = icmp ugt i32 %shl, %b
ret i1 %cmp
}
define i1 @testSwapCmpWithShiftedSignExtend8_32(i8 %a, i32 %b) {
; CHECK-LABEL testSwapCmpWithShiftedSignExtend8_64
; CHECK: cmp w1, w0, sxtb #4
; CHECK-NEXT: cset w0, lo
entry:
%a32 = sext i8 %a to i32
%shl = shl i32 %a32, 4
%cmp = icmp ugt i32 %shl, %b
ret i1 %cmp
}
define i1 @testSwapCmpWithTooLargeShiftedSignExtend8_32(i8 %a, i32 %b) {
; CHECK-LABEL testSwapCmpWithTooLargeShiftedSignExtend8_64
; CHECK: sxtb [[REG:w[0-9]+]], w0
; CHECK-NEXT: cmp w1, [[REG]], lsl #5
; CHECK-NEXT: cset w0, lo
entry:
%a32 = sext i8 %a to i32
%shl = shl i32 %a32, 5
%cmp = icmp ugt i32 %shl, %b
ret i1 %cmp
}
define i1 @testSwapCmpWithSignExtend8_32(i8 %a, i32 %b) {
; CHECK-LABEL testSwapCmpWithSignExtend8_64
; CHECK: cmp w1, w0, sxtb
; CHECK-NEXT: cset w0, lo
entry:
%a32 = sext i8 %a to i32
%cmp = icmp ugt i32 %a32, %b
ret i1 %cmp
}
define i1 @testSwapCmnWithLSL64_1(i64 %a, i64 %b) {
; CHECK-LABEL testSwapCmnWithLSL64_1:
; CHECK: cmn x1, x0, lsl #1
; CHECK-NEXT: cset w0, ne
entry:
%shl = shl i64 %a, 1
%na = sub i64 0, %shl
%cmp = icmp ne i64 %na, %b
ret i1 %cmp
}
; Note: testing with a 62 bits shift as 63 has another optimization kicking in.
define i1 @testSwapCmnWithLSL64_62(i64 %a, i64 %b) {
; CHECK-LABEL testSwapCmnWithLSL64_62:
; CHECK: cmn x1, x0, lsl #62
; CHECK-NEXT: cset w0, ne
entry:
%shl = shl i64 %a, 62
%na = sub i64 0, %shl
%cmp = icmp ne i64 %na, %b
ret i1 %cmp
}
; Note: the 63 bits shift triggers a different optimization path, which leads
; to a similar result in terms of performances. We try to catch here any change
; so that this test can be adapted should the optimization be done with the
; operand swap.
define i1 @testSwapCmnWithLSL64_63(i64 %a, i64 %b) {
; CHECK-LABEL testSwapCmnWithLSL64_63:
; CHECK: cmp x1, x0, lsl #63
; CHECK-NEXT: cset w0, ne
entry:
%shl = shl i64 %a, 63
%na = sub i64 0, %shl
%cmp = icmp ne i64 %na, %b
ret i1 %cmp
}
define i1 @testSwapCmnWithLSL32_1(i32 %a, i32 %b) {
; CHECK-LABEL testSwapCmnWithLSL32_1:
; CHECK: cmn w1, w0, lsl #1
; CHECK-NEXT: cset w0, ne
entry:
%shl = shl i32 %a, 1
%na = sub i32 0, %shl
%cmp = icmp ne i32 %na, %b
ret i1 %cmp
}
; Note: testing with a 30 bits shift as 30 has another optimization kicking in.
define i1 @testSwapCmnWithLSL32_30(i32 %a, i32 %b) {
; CHECK-LABEL testSwapCmnWithLSL32_30:
; CHECK: cmn w1, w0, lsl #30
; CHECK-NEXT: cset w0, ne
entry:
%shl = shl i32 %a, 30
%na = sub i32 0, %shl
%cmp = icmp ne i32 %na, %b
ret i1 %cmp
}
; Note: the 31 bits shift triggers a different optimization path, which leads
; to a similar result in terms of performances. We try to catch here any change
; so that this test can be adapted should the optimization be done with the
; operand swap.
define i1 @testSwapCmnWithLSL32_31(i32 %a, i32 %b) {
; CHECK-LABEL testSwapCmnWithLSL32_31:
; CHECK: cmp w1, w0, lsl #31
; CHECK-NEXT: cset w0, ne
entry:
%shl = shl i32 %a, 31
%na = sub i32 0, %shl
%cmp = icmp ne i32 %na, %b
ret i1 %cmp
}
define i1 @testSwapCmnWithLSR64_1(i64 %a, i64 %b) {
; CHECK-LABEL testSwapCmnWithLSR64_1:
; CHECK: cmn x1, x0, lsr #1
; CHECK-NEXT: cset w0, ne
entry:
%lshr = lshr i64 %a, 1
%na = sub i64 0, %lshr
%cmp = icmp ne i64 %na, %b
ret i1 %cmp
}
; Note: testing with a 62 bits shift as 63 has another optimization kicking in.
define i1 @testSwapCmnWithLSR64_62(i64 %a, i64 %b) {
; CHECK-LABEL testSwapCmnWithLSR64_62:
; CHECK: cmn x1, x0, lsr #62
; CHECK-NEXT: cset w0, ne
entry:
%lshr = lshr i64 %a, 62
%na = sub i64 0, %lshr
%cmp = icmp ne i64 %na, %b
ret i1 %cmp
}
; Note: the 63 bits shift triggers a different optimization path, which leads
; to a similar result in terms of performances. We try to catch here any change
; so that this test can be adapted should the optimization be done with the
; operand swap.
define i1 @testSwapCmnWithLSR64_63(i64 %a, i64 %b) {
; CHECK-LABEL testSwapCmnWithLSR64_63:
; CHECK: cmp x1, x0, asr #63
; CHECK-NEXT: cset w0, ne
entry:
%lshr = lshr i64 %a, 63
%na = sub i64 0, %lshr
%cmp = icmp ne i64 %na, %b
ret i1 %cmp
}
define i1 @testSwapCmnWithLSR32_1(i32 %a, i32 %b) {
; CHECK-LABEL testSwapCmnWithLSR32_1:
; CHECK: cmn w1, w0, lsr #1
; CHECK-NEXT: cset w0, ne
entry:
%lshr = lshr i32 %a, 1
%na = sub i32 0, %lshr
%cmp = icmp ne i32 %na, %b
ret i1 %cmp
}
; Note: testing with a 30 bits shift as 31 has another optimization kicking in.
define i1 @testSwapCmnWithLSR32_30(i32 %a, i32 %b) {
; CHECK-LABEL testSwapCmnWithLSR32_30:
; CHECK: cmn w1, w0, lsr #30
; CHECK-NEXT: cset w0, ne
entry:
%lshr = lshr i32 %a, 30
%na = sub i32 0, %lshr
%cmp = icmp ne i32 %na, %b
ret i1 %cmp
}
; Note: the 31 bits shift triggers a different optimization path, which leads
; to a similar result in terms of performances. We try to catch here any change
; so that this test can be adapted should the optimization be done with the
; operand swap.
define i1 @testSwapCmnWithLSR32_31(i32 %a, i32 %b) {
; CHECK-LABEL testSwapCmnWithLSR32_31:
; CHECK: cmp w1, w0, asr #31
; CHECK-NEXT: cset w0, ne
entry:
%lshr = lshr i32 %a, 31
%na = sub i32 0, %lshr
%cmp = icmp ne i32 %na, %b
ret i1 %cmp
}
define i1 @testSwapCmnWithASR64_1(i64 %a, i64 %b) {
; CHECK-LABEL testSwapCmnWithASR64_1:
; CHECK: cmn x1, x0, asr #3
; CHECK-NEXT: cset w0, ne
entry:
%lshr = ashr i64 %a, 3
%na = sub i64 0, %lshr
%cmp = icmp ne i64 %na, %b
ret i1 %cmp
}
; Note: testing with a 62 bits shift as 63 has another optimization kicking in.
define i1 @testSwapCmnWithASR64_62(i64 %a, i64 %b) {
; CHECK-LABEL testSwapCmnWithASR64_62:
; CHECK: cmn x1, x0, asr #62
; CHECK-NEXT: cset w0, ne
entry:
%lshr = ashr i64 %a, 62
%na = sub i64 0, %lshr
%cmp = icmp ne i64 %na, %b
ret i1 %cmp
}
; Note: the 63 bits shift triggers a different optimization path, which leads
; to a similar result in terms of performances. We try to catch here any change
; so that this test can be adapted should the optimization be done with the
; operand swap.
define i1 @testSwapCmnWithASR64_63(i64 %a, i64 %b) {
; CHECK-LABEL testSwapCmnWithASR64_63:
; CHECK: cmp x1, x0, lsr #63
; CHECK-NEXT: cset w0, ne
entry:
%lshr = ashr i64 %a, 63
%na = sub i64 0, %lshr
%cmp = icmp ne i64 %na, %b
ret i1 %cmp
}
define i1 @testSwapCmnWithASR32_1(i32 %a, i32 %b) {
; CHECK-LABEL testSwapCmnWithASR32_1:
; CHECK: cmn w1, w0, asr #1
; CHECK-NEXT: cset w0, eq
entry:
%lshr = ashr i32 %a, 1
%na = sub i32 0, %lshr
%cmp = icmp eq i32 %na, %b
ret i1 %cmp
}
; Note: testing with a 30 bits shift as 31 has another optimization kicking in.
define i1 @testSwapCmnWithASR32_30(i32 %a, i32 %b) {
; CHECK-LABEL testSwapCmnWithASR32_30:
; CHECK: cmn w1, w0, asr #30
; CHECK-NEXT: cset w0, ne
entry:
%lshr = ashr i32 %a, 30
%na = sub i32 0, %lshr
%cmp = icmp ne i32 %na, %b
ret i1 %cmp
}
; Note: the 31 bits shift triggers a different optimization path, which leads
; to a similar result in terms of performances. We try to catch here any change
; so that this test can be adapted should the optimization be done with the
; operand swap.
define i1 @testSwapCmnWithASR32_31(i32 %a, i32 %b) {
; CHECK-LABEL testSwapCmnWithASR32_31:
; CHECK: cmp w1, w0, lsr #31
; CHECK-NEXT: cset w0, ne
entry:
%lshr = ashr i32 %a, 31
%na = sub i32 0, %lshr
%cmp = icmp ne i32 %na, %b
ret i1 %cmp
}
define i64 @testSwapCmpToCmnWithZeroExtend(i32 %a32, i16 %a16, i8 %a8, i64 %b64, i32 %b32) {
; CHECK-LABEL testSwapCmpToCmnWithZeroExtend:
t0:
%conv0 = zext i32 %a32 to i64
%shl0 = shl i64 %conv0, 1
%na0 = sub i64 0, %shl0
%cmp0 = icmp ne i64 %na0, %b64
; CHECK: cmn x3, w0, uxtw #1
br i1 %cmp0, label %t1, label %end
t1:
%conv1 = zext i16 %a16 to i64
%shl1 = shl i64 %conv1, 4
%na1 = sub i64 0, %shl1
%cmp1 = icmp ne i64 %na1, %b64
; CHECK: cmn x3, w1, uxth #4
br i1 %cmp1, label %t2, label %end
t2:
%conv2 = zext i8 %a8 to i64
%shl2 = shl i64 %conv2, 3
%na2 = sub i64 0, %shl2
%cmp2 = icmp ne i64 %na2, %b64
; CHECK: cmn x3, w2, uxtb #3
br i1 %cmp2, label %t3, label %end
t3:
%conv3 = zext i16 %a16 to i32
%shl3 = shl i32 %conv3, 2
%na3 = sub i32 0, %shl3
%cmp3 = icmp ne i32 %na3, %b32
; CHECK: cmn w4, w1, uxth #2
br i1 %cmp3, label %t4, label %end
t4:
%conv4 = zext i8 %a8 to i32
%shl4 = shl i32 %conv4, 1
%na4 = sub i32 0, %shl4
%cmp4 = icmp ne i32 %na4, %b32
; CHECK: cmn w4, w2, uxtb #1
br i1 %cmp4, label %t5, label %end
t5:
%conv5 = zext i8 %a8 to i32
%shl5 = shl i32 %conv5, 5
%na5 = sub i32 0, %shl5
%cmp5 = icmp ne i32 %na5, %b32
; CHECK: and [[REG:w[0-9]+]], w2, #0xff
; CHECK: cmn w4, [[REG]], lsl #5
br i1 %cmp5, label %t6, label %end
t6:
%conv6 = zext i8 %a8 to i32
%na6 = sub i32 0, %conv6
%cmp6 = icmp ne i32 %na6, %b32
; CHECK: cmn w4, w2, uxtb
br i1 %cmp6, label %t7, label %end
t7:
ret i64 0
end:
ret i64 1
}
define i64 @testSwapCmpToCmnWithSignExtend(i32 %a32, i16 %a16, i8 %a8, i64 %b64, i32 %b32) {
; CHECK-LABEL testSwapCmpToCmnWithSignExtend:
t0:
%conv0 = sext i32 %a32 to i64
%shl0 = shl i64 %conv0, 1
%na0 = sub i64 0, %shl0
%cmp0 = icmp ne i64 %na0, %b64
; CHECK: cmn x3, w0, sxtw #1
br i1 %cmp0, label %t1, label %end
t1:
%conv1 = sext i16 %a16 to i64
%shl1 = shl i64 %conv1, 4
%na1 = sub i64 0, %shl1
%cmp1 = icmp ne i64 %na1, %b64
; CHECK: cmn x3, w1, sxth #4
br i1 %cmp1, label %t2, label %end
t2:
%conv2 = sext i8 %a8 to i64
%shl2 = shl i64 %conv2, 3
%na2 = sub i64 0, %shl2
%cmp2 = icmp ne i64 %na2, %b64
; CHECK: cmn x3, w2, sxtb #3
br i1 %cmp2, label %t3, label %end
t3:
%conv3 = sext i16 %a16 to i32
%shl3 = shl i32 %conv3, 2
%na3 = sub i32 0, %shl3
%cmp3 = icmp ne i32 %na3, %b32
; CHECK: cmn w4, w1, sxth #2
br i1 %cmp3, label %t4, label %end
t4:
%conv4 = sext i8 %a8 to i32
%shl4 = shl i32 %conv4, 1
%na4 = sub i32 0, %shl4
%cmp4 = icmp ne i32 %na4, %b32
; CHECK: cmn w4, w2, sxtb #1
br i1 %cmp4, label %t5, label %end
t5:
%conv5 = sext i8 %a8 to i32
%shl5 = shl i32 %conv5, 5
%na5 = sub i32 0, %shl5
%cmp5 = icmp ne i32 %na5, %b32
; CHECK: sxtb [[REG:w[0-9]+]], w2
; CHECK: cmn w4, [[REG]], lsl #5
br i1 %cmp5, label %t6, label %end
t6:
%conv6 = sext i8 %a8 to i32
%na6 = sub i32 0, %conv6
%cmp6 = icmp ne i32 %na6, %b32
; CHECK: cmn w4, w2, sxtb
br i1 %cmp6, label %t7, label %end
t7:
ret i64 0
end:
ret i64 1
}