mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-10-09 04:14:11 +00:00
[AArch64] Add foldCSELOfCSEl
DAG combine
Differential Revision: https://reviews.llvm.org/D125504
This commit is contained in:
parent
30bd90b8cd
commit
6f9423ef06
@ -18420,6 +18420,54 @@ static SDValue foldCSELofCTTZ(SDNode *N, SelectionDAG &DAG) {
|
||||
BitWidthMinusOne);
|
||||
}
|
||||
|
||||
// (CSEL l r cc1 (CMP (CSEL x y cc2 cond) x)) => (CSEL l r cc2 cond)
|
||||
// (CSEL l r cc1 (CMP (CSEL x y cc2 cond) y)) => (CSEL l r !cc2 cond)
|
||||
// Where cc1 is any reflexive relation (eg EQ)
|
||||
|
||||
// (CSEL l r cc1 (CMP (CSEL x y cc2 cond) x)) => (CSEL l r !cc2 cond)
|
||||
// (CSEL l r cc1 (CMP (CSEL x y cc2 cond) y)) => (CSEL l r cc2 cond)
|
||||
// Where cc1 is any irreflexive relation (eg NE)
|
||||
static SDValue foldCSELOfCSEL(SDNode *Op, SelectionDAG &DAG) {
|
||||
SDValue L = Op->getOperand(0);
|
||||
SDValue R = Op->getOperand(1);
|
||||
AArch64CC::CondCode OpCC =
|
||||
static_cast<AArch64CC::CondCode>(Op->getConstantOperandVal(2));
|
||||
|
||||
SDValue OpCmp = Op->getOperand(3);
|
||||
if (!isCMP(OpCmp))
|
||||
return SDValue();
|
||||
|
||||
SDValue CmpLHS = OpCmp.getOperand(0);
|
||||
SDValue CmpRHS = OpCmp.getOperand(1);
|
||||
|
||||
if (CmpRHS.getOpcode() == AArch64ISD::CSEL)
|
||||
std::swap(CmpLHS, CmpRHS);
|
||||
else if (CmpLHS.getOpcode() != AArch64ISD::CSEL)
|
||||
return SDValue();
|
||||
|
||||
SDValue X = CmpLHS->getOperand(0);
|
||||
SDValue Y = CmpLHS->getOperand(1);
|
||||
AArch64CC::CondCode CC =
|
||||
static_cast<AArch64CC::CondCode>(CmpLHS->getConstantOperandVal(2));
|
||||
SDValue Cond = CmpLHS->getOperand(3);
|
||||
|
||||
if (CmpRHS == Y)
|
||||
CC = AArch64CC::getInvertedCondCode(CC);
|
||||
else if (CmpRHS != X)
|
||||
return SDValue();
|
||||
|
||||
if (AArch64CC::isIrreflexive(OpCC))
|
||||
CC = AArch64CC::getInvertedCondCode(CC);
|
||||
else if (!AArch64CC::isReflexive(OpCC))
|
||||
return SDValue();
|
||||
|
||||
SDLoc DL(Op);
|
||||
EVT VT = Op->getValueType(0);
|
||||
|
||||
SDValue CCValue = DAG.getConstant(CC, DL, MVT::i32);
|
||||
return DAG.getNode(AArch64ISD::CSEL, DL, VT, L, R, CCValue, Cond);
|
||||
}
|
||||
|
||||
// Optimize CSEL instructions
|
||||
static SDValue performCSELCombine(SDNode *N,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
@ -18428,6 +18476,9 @@ static SDValue performCSELCombine(SDNode *N,
|
||||
if (N->getOperand(0) == N->getOperand(1))
|
||||
return N->getOperand(0);
|
||||
|
||||
if (SDValue R = foldCSELOfCSEL(N, DAG))
|
||||
return R;
|
||||
|
||||
// CSEL 0, cttz(X), eq(X, 0) -> AND cttz bitwidth-1
|
||||
// CSEL cttz(X), 0, ne(X, 0) -> AND cttz bitwidth-1
|
||||
if (SDValue Folded = foldCSELofCTTZ(N, DAG))
|
||||
|
@ -331,6 +331,41 @@ inline static unsigned getNZCVToSatisfyCondCode(CondCode Code) {
|
||||
case LE: return Z; // Z == 1 || N != V
|
||||
}
|
||||
}
|
||||
|
||||
/// Return true if Code is a reflexive relationship:
|
||||
/// forall x. (CSET Code (CMP x x)) == 1
|
||||
inline static bool isReflexive(CondCode Code) {
|
||||
switch (Code) {
|
||||
case EQ:
|
||||
case HS:
|
||||
case PL:
|
||||
case LS:
|
||||
case GE:
|
||||
case LE:
|
||||
case AL:
|
||||
case NV:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// Return true if Code is an irreflexive relationship:
|
||||
/// forall x. (CSET Code (CMP x x)) == 0
|
||||
inline static bool isIrreflexive(CondCode Code) {
|
||||
switch (Code) {
|
||||
case NE:
|
||||
case LO:
|
||||
case MI:
|
||||
case HI:
|
||||
case LT:
|
||||
case GT:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
} // end namespace AArch64CC
|
||||
|
||||
struct SysAlias {
|
||||
|
@ -396,10 +396,9 @@ define i64 @ustest_f64i64(double %x) {
|
||||
; CHECK-NEXT: .cfi_offset w30, -16
|
||||
; CHECK-NEXT: bl __fixdfti
|
||||
; CHECK-NEXT: cmp x1, #1
|
||||
; CHECK-NEXT: csel x8, x0, xzr, lt
|
||||
; CHECK-NEXT: csinc x9, x1, xzr, lt
|
||||
; CHECK-NEXT: cmp x8, #0
|
||||
; CHECK-NEXT: cset w10, ne
|
||||
; CHECK-NEXT: csel x8, x0, xzr, lt
|
||||
; CHECK-NEXT: cset w10, lt
|
||||
; CHECK-NEXT: cmp x9, #0
|
||||
; CHECK-NEXT: cset w9, gt
|
||||
; CHECK-NEXT: csel w9, w10, w9, eq
|
||||
@ -459,10 +458,9 @@ define i64 @ustest_f32i64(float %x) {
|
||||
; CHECK-NEXT: .cfi_offset w30, -16
|
||||
; CHECK-NEXT: bl __fixsfti
|
||||
; CHECK-NEXT: cmp x1, #1
|
||||
; CHECK-NEXT: csel x8, x0, xzr, lt
|
||||
; CHECK-NEXT: csinc x9, x1, xzr, lt
|
||||
; CHECK-NEXT: cmp x8, #0
|
||||
; CHECK-NEXT: cset w10, ne
|
||||
; CHECK-NEXT: csel x8, x0, xzr, lt
|
||||
; CHECK-NEXT: cset w10, lt
|
||||
; CHECK-NEXT: cmp x9, #0
|
||||
; CHECK-NEXT: cset w9, gt
|
||||
; CHECK-NEXT: csel w9, w10, w9, eq
|
||||
@ -528,10 +526,9 @@ define i64 @ustest_f16i64(half %x) {
|
||||
; CHECK-NEXT: .cfi_offset w30, -16
|
||||
; CHECK-NEXT: bl __fixhfti
|
||||
; CHECK-NEXT: cmp x1, #1
|
||||
; CHECK-NEXT: csel x8, x0, xzr, lt
|
||||
; CHECK-NEXT: csinc x9, x1, xzr, lt
|
||||
; CHECK-NEXT: cmp x8, #0
|
||||
; CHECK-NEXT: cset w10, ne
|
||||
; CHECK-NEXT: csel x8, x0, xzr, lt
|
||||
; CHECK-NEXT: cset w10, lt
|
||||
; CHECK-NEXT: cmp x9, #0
|
||||
; CHECK-NEXT: cset w9, gt
|
||||
; CHECK-NEXT: csel w9, w10, w9, eq
|
||||
|
@ -397,37 +397,35 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) {
|
||||
; CHECK-NEXT: .cfi_offset w20, -16
|
||||
; CHECK-NEXT: .cfi_offset w30, -32
|
||||
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
|
||||
; CHECK-NEXT: mov d0, v0.d[1]
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
||||
; CHECK-NEXT: bl __fixdfti
|
||||
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: mov x19, x0
|
||||
; CHECK-NEXT: mov x20, x1
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
||||
; CHECK-NEXT: mov d0, v0.d[1]
|
||||
; CHECK-NEXT: bl __fixdfti
|
||||
; CHECK-NEXT: cmp x1, #1
|
||||
; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
|
||||
; CHECK-NEXT: csel x8, x0, xzr, lt
|
||||
; CHECK-NEXT: csinc x9, x1, xzr, lt
|
||||
; CHECK-NEXT: csel x8, x0, xzr, lt
|
||||
; CHECK-NEXT: cset w10, lt
|
||||
; CHECK-NEXT: cmp x9, #0
|
||||
; CHECK-NEXT: cset w9, gt
|
||||
; CHECK-NEXT: csel w9, w10, w9, eq
|
||||
; CHECK-NEXT: cmp x20, #1
|
||||
; CHECK-NEXT: csel x10, x19, xzr, lt
|
||||
; CHECK-NEXT: csinc x11, x20, xzr, lt
|
||||
; CHECK-NEXT: cmp x10, #0
|
||||
; CHECK-NEXT: cset w12, ne
|
||||
; CHECK-NEXT: csel x10, x19, xzr, lt
|
||||
; CHECK-NEXT: cset w12, lt
|
||||
; CHECK-NEXT: cmp x11, #0
|
||||
; CHECK-NEXT: cset w11, gt
|
||||
; CHECK-NEXT: csel w11, w12, w11, eq
|
||||
; CHECK-NEXT: cmp x8, #0
|
||||
; CHECK-NEXT: cset w12, ne
|
||||
; CHECK-NEXT: cmp x9, #0
|
||||
; CHECK-NEXT: cset w9, gt
|
||||
; CHECK-NEXT: csel w9, w12, w9, eq
|
||||
; CHECK-NEXT: cmp w11, #0
|
||||
; CHECK-NEXT: csel x10, x10, xzr, ne
|
||||
; CHECK-NEXT: cmp w9, #0
|
||||
; CHECK-NEXT: csel x8, x8, xzr, ne
|
||||
; CHECK-NEXT: cmp w11, #0
|
||||
; CHECK-NEXT: csel x9, x10, xzr, ne
|
||||
; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: fmov d0, x8
|
||||
; CHECK-NEXT: fmov d1, x9
|
||||
; CHECK-NEXT: fmov d0, x10
|
||||
; CHECK-NEXT: fmov d1, x8
|
||||
; CHECK-NEXT: mov v0.d[1], v1.d[0]
|
||||
; CHECK-NEXT: add sp, sp, #48
|
||||
; CHECK-NEXT: ret
|
||||
@ -511,37 +509,35 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) {
|
||||
; CHECK-NEXT: .cfi_offset w30, -32
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
|
||||
; CHECK-NEXT: mov s0, v0.s[1]
|
||||
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
|
||||
; CHECK-NEXT: bl __fixsfti
|
||||
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: mov x19, x0
|
||||
; CHECK-NEXT: mov x20, x1
|
||||
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
|
||||
; CHECK-NEXT: mov s0, v0.s[1]
|
||||
; CHECK-NEXT: bl __fixsfti
|
||||
; CHECK-NEXT: cmp x1, #1
|
||||
; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
|
||||
; CHECK-NEXT: csinc x8, x1, xzr, lt
|
||||
; CHECK-NEXT: csel x9, x0, xzr, lt
|
||||
; CHECK-NEXT: csinc x9, x1, xzr, lt
|
||||
; CHECK-NEXT: csel x8, x0, xzr, lt
|
||||
; CHECK-NEXT: cset w10, lt
|
||||
; CHECK-NEXT: cmp x9, #0
|
||||
; CHECK-NEXT: cset w9, gt
|
||||
; CHECK-NEXT: csel w9, w10, w9, eq
|
||||
; CHECK-NEXT: cmp x20, #1
|
||||
; CHECK-NEXT: csel x10, x19, xzr, lt
|
||||
; CHECK-NEXT: csinc x11, x20, xzr, lt
|
||||
; CHECK-NEXT: cmp x10, #0
|
||||
; CHECK-NEXT: cset w12, ne
|
||||
; CHECK-NEXT: csel x10, x19, xzr, lt
|
||||
; CHECK-NEXT: cset w12, lt
|
||||
; CHECK-NEXT: cmp x11, #0
|
||||
; CHECK-NEXT: cset w11, gt
|
||||
; CHECK-NEXT: csel w11, w12, w11, eq
|
||||
; CHECK-NEXT: cmp x9, #0
|
||||
; CHECK-NEXT: cset w12, ne
|
||||
; CHECK-NEXT: cmp x8, #0
|
||||
; CHECK-NEXT: cset w8, gt
|
||||
; CHECK-NEXT: csel w8, w12, w8, eq
|
||||
; CHECK-NEXT: cmp w8, #0
|
||||
; CHECK-NEXT: csel x8, x9, xzr, ne
|
||||
; CHECK-NEXT: cmp w11, #0
|
||||
; CHECK-NEXT: csel x9, x10, xzr, ne
|
||||
; CHECK-NEXT: csel x10, x10, xzr, ne
|
||||
; CHECK-NEXT: cmp w9, #0
|
||||
; CHECK-NEXT: csel x8, x8, xzr, ne
|
||||
; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: fmov d0, x8
|
||||
; CHECK-NEXT: fmov d1, x9
|
||||
; CHECK-NEXT: fmov d0, x10
|
||||
; CHECK-NEXT: fmov d1, x8
|
||||
; CHECK-NEXT: mov v0.d[1], v1.d[0]
|
||||
; CHECK-NEXT: add sp, sp, #48
|
||||
; CHECK-NEXT: ret
|
||||
@ -637,37 +633,35 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) {
|
||||
; CHECK-NEXT: .cfi_offset w30, -32
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
|
||||
; CHECK-NEXT: mov h0, v0.h[1]
|
||||
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0
|
||||
; CHECK-NEXT: bl __fixhfti
|
||||
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: mov x19, x0
|
||||
; CHECK-NEXT: mov x20, x1
|
||||
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0
|
||||
; CHECK-NEXT: mov h0, v0.h[1]
|
||||
; CHECK-NEXT: bl __fixhfti
|
||||
; CHECK-NEXT: cmp x1, #1
|
||||
; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
|
||||
; CHECK-NEXT: csinc x8, x1, xzr, lt
|
||||
; CHECK-NEXT: csel x9, x0, xzr, lt
|
||||
; CHECK-NEXT: csinc x9, x1, xzr, lt
|
||||
; CHECK-NEXT: csel x8, x0, xzr, lt
|
||||
; CHECK-NEXT: cset w10, lt
|
||||
; CHECK-NEXT: cmp x9, #0
|
||||
; CHECK-NEXT: cset w9, gt
|
||||
; CHECK-NEXT: csel w9, w10, w9, eq
|
||||
; CHECK-NEXT: cmp x20, #1
|
||||
; CHECK-NEXT: csel x10, x19, xzr, lt
|
||||
; CHECK-NEXT: csinc x11, x20, xzr, lt
|
||||
; CHECK-NEXT: cmp x10, #0
|
||||
; CHECK-NEXT: cset w12, ne
|
||||
; CHECK-NEXT: csel x10, x19, xzr, lt
|
||||
; CHECK-NEXT: cset w12, lt
|
||||
; CHECK-NEXT: cmp x11, #0
|
||||
; CHECK-NEXT: cset w11, gt
|
||||
; CHECK-NEXT: csel w11, w12, w11, eq
|
||||
; CHECK-NEXT: cmp x9, #0
|
||||
; CHECK-NEXT: cset w12, ne
|
||||
; CHECK-NEXT: cmp x8, #0
|
||||
; CHECK-NEXT: cset w8, gt
|
||||
; CHECK-NEXT: csel w8, w12, w8, eq
|
||||
; CHECK-NEXT: cmp w8, #0
|
||||
; CHECK-NEXT: csel x8, x9, xzr, ne
|
||||
; CHECK-NEXT: cmp w11, #0
|
||||
; CHECK-NEXT: csel x9, x10, xzr, ne
|
||||
; CHECK-NEXT: csel x10, x10, xzr, ne
|
||||
; CHECK-NEXT: cmp w9, #0
|
||||
; CHECK-NEXT: csel x8, x8, xzr, ne
|
||||
; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: fmov d0, x8
|
||||
; CHECK-NEXT: fmov d1, x9
|
||||
; CHECK-NEXT: fmov d0, x10
|
||||
; CHECK-NEXT: fmov d1, x8
|
||||
; CHECK-NEXT: mov v0.d[1], v1.d[0]
|
||||
; CHECK-NEXT: add sp, sp, #48
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -23,7 +23,6 @@ define i1 @test_signed_i1_f32(float %f) nounwind {
|
||||
; CHECK-NEXT: fcvtzs w8, s0
|
||||
; CHECK-NEXT: cmp w8, #0
|
||||
; CHECK-NEXT: csel w8, w8, wzr, lt
|
||||
; CHECK-NEXT: cmp w8, #0
|
||||
; CHECK-NEXT: csinv w8, w8, wzr, ge
|
||||
; CHECK-NEXT: and w0, w8, #0x1
|
||||
; CHECK-NEXT: ret
|
||||
@ -201,7 +200,6 @@ define i1 @test_signed_i1_f64(double %f) nounwind {
|
||||
; CHECK-NEXT: fcvtzs w8, d0
|
||||
; CHECK-NEXT: cmp w8, #0
|
||||
; CHECK-NEXT: csel w8, w8, wzr, lt
|
||||
; CHECK-NEXT: cmp w8, #0
|
||||
; CHECK-NEXT: csinv w8, w8, wzr, ge
|
||||
; CHECK-NEXT: and w0, w8, #0x1
|
||||
; CHECK-NEXT: ret
|
||||
@ -382,7 +380,6 @@ define i1 @test_signed_i1_f16(half %f) nounwind {
|
||||
; CHECK-CVT-NEXT: fcvtzs w8, s0
|
||||
; CHECK-CVT-NEXT: cmp w8, #0
|
||||
; CHECK-CVT-NEXT: csel w8, w8, wzr, lt
|
||||
; CHECK-CVT-NEXT: cmp w8, #0
|
||||
; CHECK-CVT-NEXT: csinv w8, w8, wzr, ge
|
||||
; CHECK-CVT-NEXT: and w0, w8, #0x1
|
||||
; CHECK-CVT-NEXT: ret
|
||||
@ -392,7 +389,6 @@ define i1 @test_signed_i1_f16(half %f) nounwind {
|
||||
; CHECK-FP16-NEXT: fcvtzs w8, h0
|
||||
; CHECK-FP16-NEXT: cmp w8, #0
|
||||
; CHECK-FP16-NEXT: csel w8, w8, wzr, lt
|
||||
; CHECK-FP16-NEXT: cmp w8, #0
|
||||
; CHECK-FP16-NEXT: csinv w8, w8, wzr, ge
|
||||
; CHECK-FP16-NEXT: and w0, w8, #0x1
|
||||
; CHECK-FP16-NEXT: ret
|
||||
|
@ -1306,11 +1306,9 @@ define <2 x i1> @test_signed_v2f64_v2i1(<2 x double> %f) {
|
||||
; CHECK-NEXT: fcvtzs w8, d1
|
||||
; CHECK-NEXT: cmp w8, #0
|
||||
; CHECK-NEXT: csel w8, w8, wzr, lt
|
||||
; CHECK-NEXT: cmp w8, #0
|
||||
; CHECK-NEXT: csinv w8, w8, wzr, ge
|
||||
; CHECK-NEXT: cmp w9, #0
|
||||
; CHECK-NEXT: csel w9, w9, wzr, lt
|
||||
; CHECK-NEXT: cmp w9, #0
|
||||
; CHECK-NEXT: csinv w9, w9, wzr, ge
|
||||
; CHECK-NEXT: fmov s0, w9
|
||||
; CHECK-NEXT: mov v0.s[1], w8
|
||||
@ -2062,57 +2060,49 @@ define <8 x i1> @test_signed_v8f16_v8i1(<8 x half> %f) {
|
||||
; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h
|
||||
; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h
|
||||
; CHECK-CVT-NEXT: mov s2, v1.s[1]
|
||||
; CHECK-CVT-NEXT: mov s3, v1.s[2]
|
||||
; CHECK-CVT-NEXT: fcvtzs w9, s1
|
||||
; CHECK-CVT-NEXT: mov s1, v1.s[3]
|
||||
; CHECK-CVT-NEXT: fcvtzs w13, s0
|
||||
; CHECK-CVT-NEXT: fcvtzs w8, s2
|
||||
; CHECK-CVT-NEXT: mov s2, v1.s[2]
|
||||
; CHECK-CVT-NEXT: mov s1, v1.s[3]
|
||||
; CHECK-CVT-NEXT: mov s2, v0.s[1]
|
||||
; CHECK-CVT-NEXT: fcvtzs w10, s3
|
||||
; CHECK-CVT-NEXT: fcvtzs w11, s1
|
||||
; CHECK-CVT-NEXT: mov s1, v0.s[2]
|
||||
; CHECK-CVT-NEXT: mov s0, v0.s[3]
|
||||
; CHECK-CVT-NEXT: cmp w8, #0
|
||||
; CHECK-CVT-NEXT: csel w8, w8, wzr, lt
|
||||
; CHECK-CVT-NEXT: fcvtzs w10, s2
|
||||
; CHECK-CVT-NEXT: cmp w8, #0
|
||||
; CHECK-CVT-NEXT: mov s2, v0.s[1]
|
||||
; CHECK-CVT-NEXT: fcvtzs w12, s2
|
||||
; CHECK-CVT-NEXT: csinv w8, w8, wzr, ge
|
||||
; CHECK-CVT-NEXT: cmp w9, #0
|
||||
; CHECK-CVT-NEXT: csel w9, w9, wzr, lt
|
||||
; CHECK-CVT-NEXT: fcvtzs w11, s1
|
||||
; CHECK-CVT-NEXT: cmp w9, #0
|
||||
; CHECK-CVT-NEXT: mov s1, v0.s[2]
|
||||
; CHECK-CVT-NEXT: csinv w9, w9, wzr, ge
|
||||
; CHECK-CVT-NEXT: cmp w10, #0
|
||||
; CHECK-CVT-NEXT: csel w10, w10, wzr, lt
|
||||
; CHECK-CVT-NEXT: fcvtzs w12, s2
|
||||
; CHECK-CVT-NEXT: cmp w10, #0
|
||||
; CHECK-CVT-NEXT: mov s0, v0.s[3]
|
||||
; CHECK-CVT-NEXT: csinv w10, w10, wzr, ge
|
||||
; CHECK-CVT-NEXT: cmp w11, #0
|
||||
; CHECK-CVT-NEXT: csel w11, w11, wzr, lt
|
||||
; CHECK-CVT-NEXT: fmov s2, w9
|
||||
; CHECK-CVT-NEXT: cmp w11, #0
|
||||
; CHECK-CVT-NEXT: csinv w11, w11, wzr, ge
|
||||
; CHECK-CVT-NEXT: cmp w12, #0
|
||||
; CHECK-CVT-NEXT: csel w12, w12, wzr, lt
|
||||
; CHECK-CVT-NEXT: cmp w12, #0
|
||||
; CHECK-CVT-NEXT: fcvtzs w9, s1
|
||||
; CHECK-CVT-NEXT: csinv w12, w12, wzr, ge
|
||||
; CHECK-CVT-NEXT: cmp w13, #0
|
||||
; CHECK-CVT-NEXT: csel w13, w13, wzr, lt
|
||||
; CHECK-CVT-NEXT: cmp w13, #0
|
||||
; CHECK-CVT-NEXT: csinv w9, w13, wzr, ge
|
||||
; CHECK-CVT-NEXT: fcvtzs w13, s1
|
||||
; CHECK-CVT-NEXT: csinv w13, w13, wzr, ge
|
||||
; CHECK-CVT-NEXT: cmp w9, #0
|
||||
; CHECK-CVT-NEXT: mov v2.s[1], w8
|
||||
; CHECK-CVT-NEXT: fmov s1, w9
|
||||
; CHECK-CVT-NEXT: cmp w13, #0
|
||||
; CHECK-CVT-NEXT: csel w8, w13, wzr, lt
|
||||
; CHECK-CVT-NEXT: csel w8, w9, wzr, lt
|
||||
; CHECK-CVT-NEXT: fcvtzs w9, s0
|
||||
; CHECK-CVT-NEXT: cmp w8, #0
|
||||
; CHECK-CVT-NEXT: mov v1.s[1], w12
|
||||
; CHECK-CVT-NEXT: csinv w8, w8, wzr, ge
|
||||
; CHECK-CVT-NEXT: fmov s1, w13
|
||||
; CHECK-CVT-NEXT: cmp w9, #0
|
||||
; CHECK-CVT-NEXT: csel w9, w9, wzr, lt
|
||||
; CHECK-CVT-NEXT: mov v1.s[1], w12
|
||||
; CHECK-CVT-NEXT: mov v2.s[2], w10
|
||||
; CHECK-CVT-NEXT: cmp w9, #0
|
||||
; CHECK-CVT-NEXT: mov v1.s[2], w8
|
||||
; CHECK-CVT-NEXT: csinv w8, w9, wzr, ge
|
||||
; CHECK-CVT-NEXT: csel w8, w9, wzr, lt
|
||||
; CHECK-CVT-NEXT: csinv w8, w8, wzr, ge
|
||||
; CHECK-CVT-NEXT: mov v2.s[3], w11
|
||||
; CHECK-CVT-NEXT: mov v1.s[3], w8
|
||||
; CHECK-CVT-NEXT: uzp1 v0.8h, v1.8h, v2.8h
|
||||
|
@ -68,10 +68,8 @@ define i128 @u128_saturating_add(i128 %x, i128 %y) {
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: adds x8, x0, x2
|
||||
; CHECK-NEXT: adcs x9, x1, x3
|
||||
; CHECK-NEXT: cset w10, hs
|
||||
; CHECK-NEXT: cmp w10, #0
|
||||
; CHECK-NEXT: csinv x0, x8, xzr, eq
|
||||
; CHECK-NEXT: csinv x1, x9, xzr, eq
|
||||
; CHECK-NEXT: csinv x0, x8, xzr, lo
|
||||
; CHECK-NEXT: csinv x1, x9, xzr, lo
|
||||
; CHECK-NEXT: ret
|
||||
%1 = tail call i128 @llvm.uadd.sat.i128(i128 %x, i128 %y)
|
||||
ret i128 %1
|
||||
@ -126,10 +124,8 @@ define i128 @u128_saturating_sub(i128 %x, i128 %y) {
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: subs x8, x0, x2
|
||||
; CHECK-NEXT: sbcs x9, x1, x3
|
||||
; CHECK-NEXT: cset w10, lo
|
||||
; CHECK-NEXT: cmp w10, #0
|
||||
; CHECK-NEXT: csel x0, xzr, x8, ne
|
||||
; CHECK-NEXT: csel x1, xzr, x9, ne
|
||||
; CHECK-NEXT: csel x0, xzr, x8, lo
|
||||
; CHECK-NEXT: csel x1, xzr, x9, lo
|
||||
; CHECK-NEXT: ret
|
||||
%1 = tail call i128 @llvm.usub.sat.i128(i128 %x, i128 %y)
|
||||
ret i128 %1
|
||||
@ -185,11 +181,9 @@ define i128 @i128_saturating_add(i128 %x, i128 %y) {
|
||||
; CHECK-NEXT: adds x8, x0, x2
|
||||
; CHECK-NEXT: adcs x9, x1, x3
|
||||
; CHECK-NEXT: asr x10, x9, #63
|
||||
; CHECK-NEXT: cset w11, vs
|
||||
; CHECK-NEXT: cmp w11, #0
|
||||
; CHECK-NEXT: eor x11, x10, #0x8000000000000000
|
||||
; CHECK-NEXT: csel x0, x10, x8, ne
|
||||
; CHECK-NEXT: csel x1, x11, x9, ne
|
||||
; CHECK-NEXT: csel x0, x10, x8, vs
|
||||
; CHECK-NEXT: csel x1, x11, x9, vs
|
||||
; CHECK-NEXT: ret
|
||||
%1 = tail call i128 @llvm.sadd.sat.i128(i128 %x, i128 %y)
|
||||
ret i128 %1
|
||||
@ -245,11 +239,9 @@ define i128 @i128_saturating_sub(i128 %x, i128 %y) {
|
||||
; CHECK-NEXT: subs x8, x0, x2
|
||||
; CHECK-NEXT: sbcs x9, x1, x3
|
||||
; CHECK-NEXT: asr x10, x9, #63
|
||||
; CHECK-NEXT: cset w11, vs
|
||||
; CHECK-NEXT: cmp w11, #0
|
||||
; CHECK-NEXT: eor x11, x10, #0x8000000000000000
|
||||
; CHECK-NEXT: csel x0, x10, x8, ne
|
||||
; CHECK-NEXT: csel x1, x11, x9, ne
|
||||
; CHECK-NEXT: csel x0, x10, x8, vs
|
||||
; CHECK-NEXT: csel x1, x11, x9, vs
|
||||
; CHECK-NEXT: ret
|
||||
%1 = tail call i128 @llvm.ssub.sat.i128(i128 %x, i128 %y)
|
||||
ret i128 %1
|
||||
|
@ -70,12 +70,10 @@ define i256 @u256_saturating_add(i256 %x, i256 %y) {
|
||||
; CHECK-NEXT: adcs x9, x1, x5
|
||||
; CHECK-NEXT: adcs x10, x2, x6
|
||||
; CHECK-NEXT: adcs x11, x3, x7
|
||||
; CHECK-NEXT: cset w12, hs
|
||||
; CHECK-NEXT: cmp w12, #0
|
||||
; CHECK-NEXT: csinv x0, x8, xzr, eq
|
||||
; CHECK-NEXT: csinv x1, x9, xzr, eq
|
||||
; CHECK-NEXT: csinv x2, x10, xzr, eq
|
||||
; CHECK-NEXT: csinv x3, x11, xzr, eq
|
||||
; CHECK-NEXT: csinv x0, x8, xzr, lo
|
||||
; CHECK-NEXT: csinv x1, x9, xzr, lo
|
||||
; CHECK-NEXT: csinv x2, x10, xzr, lo
|
||||
; CHECK-NEXT: csinv x3, x11, xzr, lo
|
||||
; CHECK-NEXT: ret
|
||||
%1 = tail call i256 @llvm.uadd.sat.i256(i256 %x, i256 %y)
|
||||
ret i256 %1
|
||||
@ -138,12 +136,10 @@ define i256 @u256_saturating_sub(i256 %x, i256 %y) {
|
||||
; CHECK-NEXT: sbcs x9, x1, x5
|
||||
; CHECK-NEXT: sbcs x10, x2, x6
|
||||
; CHECK-NEXT: sbcs x11, x3, x7
|
||||
; CHECK-NEXT: cset w12, lo
|
||||
; CHECK-NEXT: cmp w12, #0
|
||||
; CHECK-NEXT: csel x0, xzr, x8, ne
|
||||
; CHECK-NEXT: csel x1, xzr, x9, ne
|
||||
; CHECK-NEXT: csel x2, xzr, x10, ne
|
||||
; CHECK-NEXT: csel x3, xzr, x11, ne
|
||||
; CHECK-NEXT: csel x0, xzr, x8, lo
|
||||
; CHECK-NEXT: csel x1, xzr, x9, lo
|
||||
; CHECK-NEXT: csel x2, xzr, x10, lo
|
||||
; CHECK-NEXT: csel x3, xzr, x11, lo
|
||||
; CHECK-NEXT: ret
|
||||
%1 = tail call i256 @llvm.usub.sat.i256(i256 %x, i256 %y)
|
||||
ret i256 %1
|
||||
@ -206,14 +202,12 @@ define i256 @i256_saturating_add(i256 %x, i256 %y) {
|
||||
; CHECK-NEXT: adcs x9, x1, x5
|
||||
; CHECK-NEXT: adcs x10, x2, x6
|
||||
; CHECK-NEXT: adcs x11, x3, x7
|
||||
; CHECK-NEXT: cset w12, vs
|
||||
; CHECK-NEXT: asr x13, x11, #63
|
||||
; CHECK-NEXT: cmp w12, #0
|
||||
; CHECK-NEXT: csel x0, x13, x8, ne
|
||||
; CHECK-NEXT: eor x8, x13, #0x8000000000000000
|
||||
; CHECK-NEXT: csel x1, x13, x9, ne
|
||||
; CHECK-NEXT: csel x2, x13, x10, ne
|
||||
; CHECK-NEXT: csel x3, x8, x11, ne
|
||||
; CHECK-NEXT: asr x12, x11, #63
|
||||
; CHECK-NEXT: csel x0, x12, x8, vs
|
||||
; CHECK-NEXT: eor x8, x12, #0x8000000000000000
|
||||
; CHECK-NEXT: csel x1, x12, x9, vs
|
||||
; CHECK-NEXT: csel x2, x12, x10, vs
|
||||
; CHECK-NEXT: csel x3, x8, x11, vs
|
||||
; CHECK-NEXT: ret
|
||||
%1 = tail call i256 @llvm.sadd.sat.i256(i256 %x, i256 %y)
|
||||
ret i256 %1
|
||||
@ -276,14 +270,12 @@ define i256 @i256_saturating_sub(i256 %x, i256 %y) {
|
||||
; CHECK-NEXT: sbcs x9, x1, x5
|
||||
; CHECK-NEXT: sbcs x10, x2, x6
|
||||
; CHECK-NEXT: sbcs x11, x3, x7
|
||||
; CHECK-NEXT: cset w12, vs
|
||||
; CHECK-NEXT: asr x13, x11, #63
|
||||
; CHECK-NEXT: cmp w12, #0
|
||||
; CHECK-NEXT: csel x0, x13, x8, ne
|
||||
; CHECK-NEXT: eor x8, x13, #0x8000000000000000
|
||||
; CHECK-NEXT: csel x1, x13, x9, ne
|
||||
; CHECK-NEXT: csel x2, x13, x10, ne
|
||||
; CHECK-NEXT: csel x3, x8, x11, ne
|
||||
; CHECK-NEXT: asr x12, x11, #63
|
||||
; CHECK-NEXT: csel x0, x12, x8, vs
|
||||
; CHECK-NEXT: eor x8, x12, #0x8000000000000000
|
||||
; CHECK-NEXT: csel x1, x12, x9, vs
|
||||
; CHECK-NEXT: csel x2, x12, x10, vs
|
||||
; CHECK-NEXT: csel x3, x8, x11, vs
|
||||
; CHECK-NEXT: ret
|
||||
%1 = tail call i256 @llvm.ssub.sat.i256(i256 %x, i256 %y)
|
||||
ret i256 %1
|
||||
|
@ -352,20 +352,16 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: adds x8, x2, x6
|
||||
; CHECK-NEXT: adcs x9, x3, x7
|
||||
; CHECK-NEXT: cset w10, vs
|
||||
; CHECK-NEXT: asr x11, x9, #63
|
||||
; CHECK-NEXT: cmp w10, #0
|
||||
; CHECK-NEXT: csel x2, x11, x8, ne
|
||||
; CHECK-NEXT: eor x8, x11, #0x8000000000000000
|
||||
; CHECK-NEXT: csel x3, x8, x9, ne
|
||||
; CHECK-NEXT: asr x10, x9, #63
|
||||
; CHECK-NEXT: csel x2, x10, x8, vs
|
||||
; CHECK-NEXT: eor x8, x10, #0x8000000000000000
|
||||
; CHECK-NEXT: csel x3, x8, x9, vs
|
||||
; CHECK-NEXT: adds x8, x0, x4
|
||||
; CHECK-NEXT: adcs x9, x1, x5
|
||||
; CHECK-NEXT: cset w10, vs
|
||||
; CHECK-NEXT: asr x11, x9, #63
|
||||
; CHECK-NEXT: cmp w10, #0
|
||||
; CHECK-NEXT: eor x10, x11, #0x8000000000000000
|
||||
; CHECK-NEXT: csel x8, x11, x8, ne
|
||||
; CHECK-NEXT: csel x1, x10, x9, ne
|
||||
; CHECK-NEXT: asr x10, x9, #63
|
||||
; CHECK-NEXT: csel x8, x10, x8, vs
|
||||
; CHECK-NEXT: eor x10, x10, #0x8000000000000000
|
||||
; CHECK-NEXT: csel x1, x10, x9, vs
|
||||
; CHECK-NEXT: fmov d0, x8
|
||||
; CHECK-NEXT: mov v0.d[1], x1
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
|
@ -355,20 +355,16 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: subs x8, x2, x6
|
||||
; CHECK-NEXT: sbcs x9, x3, x7
|
||||
; CHECK-NEXT: cset w10, vs
|
||||
; CHECK-NEXT: asr x11, x9, #63
|
||||
; CHECK-NEXT: cmp w10, #0
|
||||
; CHECK-NEXT: csel x2, x11, x8, ne
|
||||
; CHECK-NEXT: eor x8, x11, #0x8000000000000000
|
||||
; CHECK-NEXT: csel x3, x8, x9, ne
|
||||
; CHECK-NEXT: asr x10, x9, #63
|
||||
; CHECK-NEXT: csel x2, x10, x8, vs
|
||||
; CHECK-NEXT: eor x8, x10, #0x8000000000000000
|
||||
; CHECK-NEXT: csel x3, x8, x9, vs
|
||||
; CHECK-NEXT: subs x8, x0, x4
|
||||
; CHECK-NEXT: sbcs x9, x1, x5
|
||||
; CHECK-NEXT: cset w10, vs
|
||||
; CHECK-NEXT: asr x11, x9, #63
|
||||
; CHECK-NEXT: cmp w10, #0
|
||||
; CHECK-NEXT: eor x10, x11, #0x8000000000000000
|
||||
; CHECK-NEXT: csel x8, x11, x8, ne
|
||||
; CHECK-NEXT: csel x1, x10, x9, ne
|
||||
; CHECK-NEXT: asr x10, x9, #63
|
||||
; CHECK-NEXT: csel x8, x10, x8, vs
|
||||
; CHECK-NEXT: eor x10, x10, #0x8000000000000000
|
||||
; CHECK-NEXT: csel x1, x10, x9, vs
|
||||
; CHECK-NEXT: fmov d0, x8
|
||||
; CHECK-NEXT: mov v0.d[1], x1
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
|
@ -350,16 +350,12 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: adds x8, x2, x6
|
||||
; CHECK-NEXT: adcs x9, x3, x7
|
||||
; CHECK-NEXT: cset w10, hs
|
||||
; CHECK-NEXT: cmp w10, #0
|
||||
; CHECK-NEXT: csinv x2, x8, xzr, eq
|
||||
; CHECK-NEXT: csinv x3, x9, xzr, eq
|
||||
; CHECK-NEXT: csinv x2, x8, xzr, lo
|
||||
; CHECK-NEXT: csinv x3, x9, xzr, lo
|
||||
; CHECK-NEXT: adds x8, x0, x4
|
||||
; CHECK-NEXT: adcs x9, x1, x5
|
||||
; CHECK-NEXT: cset w10, hs
|
||||
; CHECK-NEXT: cmp w10, #0
|
||||
; CHECK-NEXT: csinv x8, x8, xzr, eq
|
||||
; CHECK-NEXT: csinv x1, x9, xzr, eq
|
||||
; CHECK-NEXT: csinv x8, x8, xzr, lo
|
||||
; CHECK-NEXT: csinv x1, x9, xzr, lo
|
||||
; CHECK-NEXT: fmov d0, x8
|
||||
; CHECK-NEXT: mov v0.d[1], x1
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
|
@ -346,16 +346,12 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: subs x8, x2, x6
|
||||
; CHECK-NEXT: sbcs x9, x3, x7
|
||||
; CHECK-NEXT: cset w10, lo
|
||||
; CHECK-NEXT: cmp w10, #0
|
||||
; CHECK-NEXT: csel x2, xzr, x8, ne
|
||||
; CHECK-NEXT: csel x3, xzr, x9, ne
|
||||
; CHECK-NEXT: csel x2, xzr, x8, lo
|
||||
; CHECK-NEXT: csel x3, xzr, x9, lo
|
||||
; CHECK-NEXT: subs x8, x0, x4
|
||||
; CHECK-NEXT: sbcs x9, x1, x5
|
||||
; CHECK-NEXT: cset w10, lo
|
||||
; CHECK-NEXT: cmp w10, #0
|
||||
; CHECK-NEXT: csel x8, xzr, x8, ne
|
||||
; CHECK-NEXT: csel x1, xzr, x9, ne
|
||||
; CHECK-NEXT: csel x8, xzr, x8, lo
|
||||
; CHECK-NEXT: csel x1, xzr, x9, lo
|
||||
; CHECK-NEXT: fmov d0, x8
|
||||
; CHECK-NEXT: mov v0.d[1], x1
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
|
Loading…
Reference in New Issue
Block a user