[AArch64] Select saturating Neon instructions

This adds some extra patterns to select AArch64 Neon SQADD, UQADD, SQSUB
and UQSUB from the existing target independent sadd_sat, uadd_sat,
ssub_sat and usub_sat nodes.

It does not attempt to replace the existing int_aarch64_neon_uqadd
intrinsic nodes as they are apparently used for both scalar and vector,
and need to be legal on scalar types for some of the patterns to work.
The int_aarch64_neon_uqadd on scalar would move the two integers into
floating point registers, perform a Neon uqadd and move the value back.
I don't believe this is good idea for uadd_sat to do the same as the
scalar alternative is simpler (an adds with a csinv). For signed it may
be smaller, but I'm not sure about it being better.

So this just adds some extra patterns for the existing vector
instructions, matching on the _sat nodes.

Differential Revision: https://reviews.llvm.org/D69374
This commit is contained in:
David Green 2019-10-31 15:22:24 +00:00
parent 2b77dc62da
commit e9d7161099
9 changed files with 305 additions and 979 deletions

View File

@ -741,14 +741,20 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
// Vector reductions
for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
// Vector reductions
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
// Saturates
setOperationAction(ISD::SADDSAT, VT, Legal);
setOperationAction(ISD::UADDSAT, VT, Legal);
setOperationAction(ISD::SSUBSAT, VT, Legal);
setOperationAction(ISD::USUBSAT, VT, Legal);
}
for (MVT VT : { MVT::v4f16, MVT::v2f32,
MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {

View File

@ -5066,6 +5066,24 @@ multiclass SIMDThreeSameVector<bit U, bits<5> opc, string asm,
[(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (v2i64 V128:$Rm)))]>;
}
multiclass SIMDThreeSameVectorExtraPatterns<string inst, SDPatternOperator OpNode> {
def : Pat<(v8i8 (OpNode V64:$LHS, V64:$RHS)),
(!cast<Instruction>(inst#"v8i8") V64:$LHS, V64:$RHS)>;
def : Pat<(v4i16 (OpNode V64:$LHS, V64:$RHS)),
(!cast<Instruction>(inst#"v4i16") V64:$LHS, V64:$RHS)>;
def : Pat<(v2i32 (OpNode V64:$LHS, V64:$RHS)),
(!cast<Instruction>(inst#"v2i32") V64:$LHS, V64:$RHS)>;
def : Pat<(v16i8 (OpNode V128:$LHS, V128:$RHS)),
(!cast<Instruction>(inst#"v16i8") V128:$LHS, V128:$RHS)>;
def : Pat<(v8i16 (OpNode V128:$LHS, V128:$RHS)),
(!cast<Instruction>(inst#"v8i16") V128:$LHS, V128:$RHS)>;
def : Pat<(v4i32 (OpNode V128:$LHS, V128:$RHS)),
(!cast<Instruction>(inst#"v4i32") V128:$LHS, V128:$RHS)>;
def : Pat<(v2i64 (OpNode V128:$LHS, V128:$RHS)),
(!cast<Instruction>(inst#"v2i64") V128:$LHS, V128:$RHS)>;
}
// As above, but D sized elements unsupported.
multiclass SIMDThreeSameVectorBHS<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {

View File

@ -3839,6 +3839,12 @@ defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah",
defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh",
int_aarch64_neon_sqsub>;
// Extra saturate patterns, other than the intrinsics matches above
defm : SIMDThreeSameVectorExtraPatterns<"SQADD", saddsat>;
defm : SIMDThreeSameVectorExtraPatterns<"UQADD", uaddsat>;
defm : SIMDThreeSameVectorExtraPatterns<"SQSUB", ssubsat>;
defm : SIMDThreeSameVectorExtraPatterns<"UQSUB", usubsat>;
defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>;
defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic",
BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >;

View File

@ -88,15 +88,7 @@ define i4 @func3(i4 %x, i4 %y) nounwind {
define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; CHECK-LABEL: vec:
; CHECK: // %bb.0:
; CHECK-NEXT: add v2.4s, v0.4s, v1.4s
; CHECK-NEXT: cmlt v4.4s, v2.4s, #0
; CHECK-NEXT: mvni v3.4s, #128, lsl #24
; CHECK-NEXT: cmlt v1.4s, v1.4s, #0
; CHECK-NEXT: cmgt v0.4s, v0.4s, v2.4s
; CHECK-NEXT: mvn v5.16b, v4.16b
; CHECK-NEXT: bsl v3.16b, v4.16b, v5.16b
; CHECK-NEXT: eor v0.16b, v1.16b, v0.16b
; CHECK-NEXT: bsl v0.16b, v3.16b, v2.16b
; CHECK-NEXT: sqadd v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%tmp = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y);
ret <4 x i32> %tmp;

View File

@ -35,15 +35,7 @@ declare <2 x i128> @llvm.sadd.sat.v2i128(<2 x i128>, <2 x i128>)
define <16 x i8> @v16i8(<16 x i8> %x, <16 x i8> %y) nounwind {
; CHECK-LABEL: v16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: add v2.16b, v0.16b, v1.16b
; CHECK-NEXT: cmlt v4.16b, v2.16b, #0
; CHECK-NEXT: movi v3.16b, #127
; CHECK-NEXT: cmlt v1.16b, v1.16b, #0
; CHECK-NEXT: cmgt v0.16b, v0.16b, v2.16b
; CHECK-NEXT: mvn v5.16b, v4.16b
; CHECK-NEXT: bsl v3.16b, v4.16b, v5.16b
; CHECK-NEXT: eor v0.16b, v1.16b, v0.16b
; CHECK-NEXT: bsl v0.16b, v3.16b, v2.16b
; CHECK-NEXT: sqadd v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%z = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
ret <16 x i8> %z
@ -52,24 +44,8 @@ define <16 x i8> @v16i8(<16 x i8> %x, <16 x i8> %y) nounwind {
define <32 x i8> @v32i8(<32 x i8> %x, <32 x i8> %y) nounwind {
; CHECK-LABEL: v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: add v4.16b, v0.16b, v2.16b
; CHECK-NEXT: cmlt v7.16b, v4.16b, #0
; CHECK-NEXT: movi v6.16b, #127
; CHECK-NEXT: mvn v16.16b, v7.16b
; CHECK-NEXT: bsl v6.16b, v7.16b, v16.16b
; CHECK-NEXT: add v7.16b, v1.16b, v3.16b
; CHECK-NEXT: cmlt v2.16b, v2.16b, #0
; CHECK-NEXT: cmgt v0.16b, v0.16b, v4.16b
; CHECK-NEXT: cmlt v16.16b, v7.16b, #0
; CHECK-NEXT: movi v5.16b, #127
; CHECK-NEXT: cmlt v3.16b, v3.16b, #0
; CHECK-NEXT: cmgt v1.16b, v1.16b, v7.16b
; CHECK-NEXT: eor v0.16b, v2.16b, v0.16b
; CHECK-NEXT: mvn v2.16b, v16.16b
; CHECK-NEXT: eor v1.16b, v3.16b, v1.16b
; CHECK-NEXT: bsl v5.16b, v16.16b, v2.16b
; CHECK-NEXT: bsl v0.16b, v6.16b, v4.16b
; CHECK-NEXT: bsl v1.16b, v5.16b, v7.16b
; CHECK-NEXT: sqadd v0.16b, v0.16b, v2.16b
; CHECK-NEXT: sqadd v1.16b, v1.16b, v3.16b
; CHECK-NEXT: ret
%z = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %x, <32 x i8> %y)
ret <32 x i8> %z
@ -78,42 +54,10 @@ define <32 x i8> @v32i8(<32 x i8> %x, <32 x i8> %y) nounwind {
define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
; CHECK-LABEL: v64i8:
; CHECK: // %bb.0:
; CHECK-NEXT: add v16.16b, v0.16b, v4.16b
; CHECK-NEXT: cmlt v24.16b, v16.16b, #0
; CHECK-NEXT: movi v18.16b, #127
; CHECK-NEXT: add v19.16b, v1.16b, v5.16b
; CHECK-NEXT: mvn v25.16b, v24.16b
; CHECK-NEXT: bsl v18.16b, v24.16b, v25.16b
; CHECK-NEXT: cmlt v24.16b, v19.16b, #0
; CHECK-NEXT: movi v20.16b, #127
; CHECK-NEXT: add v21.16b, v2.16b, v6.16b
; CHECK-NEXT: mvn v25.16b, v24.16b
; CHECK-NEXT: bsl v20.16b, v24.16b, v25.16b
; CHECK-NEXT: cmlt v24.16b, v21.16b, #0
; CHECK-NEXT: cmlt v4.16b, v4.16b, #0
; CHECK-NEXT: cmgt v0.16b, v0.16b, v16.16b
; CHECK-NEXT: movi v22.16b, #127
; CHECK-NEXT: add v23.16b, v3.16b, v7.16b
; CHECK-NEXT: mvn v25.16b, v24.16b
; CHECK-NEXT: eor v0.16b, v4.16b, v0.16b
; CHECK-NEXT: cmlt v4.16b, v5.16b, #0
; CHECK-NEXT: cmgt v1.16b, v1.16b, v19.16b
; CHECK-NEXT: bsl v22.16b, v24.16b, v25.16b
; CHECK-NEXT: cmlt v24.16b, v23.16b, #0
; CHECK-NEXT: eor v1.16b, v4.16b, v1.16b
; CHECK-NEXT: cmlt v4.16b, v6.16b, #0
; CHECK-NEXT: cmgt v2.16b, v2.16b, v21.16b
; CHECK-NEXT: movi v17.16b, #127
; CHECK-NEXT: mvn v25.16b, v24.16b
; CHECK-NEXT: eor v2.16b, v4.16b, v2.16b
; CHECK-NEXT: cmlt v4.16b, v7.16b, #0
; CHECK-NEXT: cmgt v3.16b, v3.16b, v23.16b
; CHECK-NEXT: bsl v17.16b, v24.16b, v25.16b
; CHECK-NEXT: eor v3.16b, v4.16b, v3.16b
; CHECK-NEXT: bsl v0.16b, v18.16b, v16.16b
; CHECK-NEXT: bsl v1.16b, v20.16b, v19.16b
; CHECK-NEXT: bsl v2.16b, v22.16b, v21.16b
; CHECK-NEXT: bsl v3.16b, v17.16b, v23.16b
; CHECK-NEXT: sqadd v0.16b, v0.16b, v4.16b
; CHECK-NEXT: sqadd v1.16b, v1.16b, v5.16b
; CHECK-NEXT: sqadd v2.16b, v2.16b, v6.16b
; CHECK-NEXT: sqadd v3.16b, v3.16b, v7.16b
; CHECK-NEXT: ret
%z = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> %x, <64 x i8> %y)
ret <64 x i8> %z
@ -122,15 +66,7 @@ define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
define <8 x i16> @v8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
; CHECK-LABEL: v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: add v2.8h, v0.8h, v1.8h
; CHECK-NEXT: cmlt v4.8h, v2.8h, #0
; CHECK-NEXT: mvni v3.8h, #128, lsl #8
; CHECK-NEXT: cmlt v1.8h, v1.8h, #0
; CHECK-NEXT: cmgt v0.8h, v0.8h, v2.8h
; CHECK-NEXT: mvn v5.16b, v4.16b
; CHECK-NEXT: bsl v3.16b, v4.16b, v5.16b
; CHECK-NEXT: eor v0.16b, v1.16b, v0.16b
; CHECK-NEXT: bsl v0.16b, v3.16b, v2.16b
; CHECK-NEXT: sqadd v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%z = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
ret <8 x i16> %z
@ -139,24 +75,8 @@ define <8 x i16> @v8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
define <16 x i16> @v16i16(<16 x i16> %x, <16 x i16> %y) nounwind {
; CHECK-LABEL: v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: add v4.8h, v0.8h, v2.8h
; CHECK-NEXT: cmlt v7.8h, v4.8h, #0
; CHECK-NEXT: mvni v6.8h, #128, lsl #8
; CHECK-NEXT: mvn v16.16b, v7.16b
; CHECK-NEXT: bsl v6.16b, v7.16b, v16.16b
; CHECK-NEXT: add v7.8h, v1.8h, v3.8h
; CHECK-NEXT: cmlt v2.8h, v2.8h, #0
; CHECK-NEXT: cmgt v0.8h, v0.8h, v4.8h
; CHECK-NEXT: cmlt v16.8h, v7.8h, #0
; CHECK-NEXT: mvni v5.8h, #128, lsl #8
; CHECK-NEXT: cmlt v3.8h, v3.8h, #0
; CHECK-NEXT: cmgt v1.8h, v1.8h, v7.8h
; CHECK-NEXT: eor v0.16b, v2.16b, v0.16b
; CHECK-NEXT: mvn v2.16b, v16.16b
; CHECK-NEXT: eor v1.16b, v3.16b, v1.16b
; CHECK-NEXT: bsl v5.16b, v16.16b, v2.16b
; CHECK-NEXT: bsl v0.16b, v6.16b, v4.16b
; CHECK-NEXT: bsl v1.16b, v5.16b, v7.16b
; CHECK-NEXT: sqadd v0.8h, v0.8h, v2.8h
; CHECK-NEXT: sqadd v1.8h, v1.8h, v3.8h
; CHECK-NEXT: ret
%z = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %x, <16 x i16> %y)
ret <16 x i16> %z
@ -165,42 +85,10 @@ define <16 x i16> @v16i16(<16 x i16> %x, <16 x i16> %y) nounwind {
define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind {
; CHECK-LABEL: v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: add v16.8h, v0.8h, v4.8h
; CHECK-NEXT: cmlt v24.8h, v16.8h, #0
; CHECK-NEXT: mvni v18.8h, #128, lsl #8
; CHECK-NEXT: add v19.8h, v1.8h, v5.8h
; CHECK-NEXT: mvn v25.16b, v24.16b
; CHECK-NEXT: bsl v18.16b, v24.16b, v25.16b
; CHECK-NEXT: cmlt v24.8h, v19.8h, #0
; CHECK-NEXT: mvni v20.8h, #128, lsl #8
; CHECK-NEXT: add v21.8h, v2.8h, v6.8h
; CHECK-NEXT: mvn v25.16b, v24.16b
; CHECK-NEXT: bsl v20.16b, v24.16b, v25.16b
; CHECK-NEXT: cmlt v24.8h, v21.8h, #0
; CHECK-NEXT: cmlt v4.8h, v4.8h, #0
; CHECK-NEXT: cmgt v0.8h, v0.8h, v16.8h
; CHECK-NEXT: mvni v22.8h, #128, lsl #8
; CHECK-NEXT: add v23.8h, v3.8h, v7.8h
; CHECK-NEXT: mvn v25.16b, v24.16b
; CHECK-NEXT: eor v0.16b, v4.16b, v0.16b
; CHECK-NEXT: cmlt v4.8h, v5.8h, #0
; CHECK-NEXT: cmgt v1.8h, v1.8h, v19.8h
; CHECK-NEXT: bsl v22.16b, v24.16b, v25.16b
; CHECK-NEXT: cmlt v24.8h, v23.8h, #0
; CHECK-NEXT: eor v1.16b, v4.16b, v1.16b
; CHECK-NEXT: cmlt v4.8h, v6.8h, #0
; CHECK-NEXT: cmgt v2.8h, v2.8h, v21.8h
; CHECK-NEXT: mvni v17.8h, #128, lsl #8
; CHECK-NEXT: mvn v25.16b, v24.16b
; CHECK-NEXT: eor v2.16b, v4.16b, v2.16b
; CHECK-NEXT: cmlt v4.8h, v7.8h, #0
; CHECK-NEXT: cmgt v3.8h, v3.8h, v23.8h
; CHECK-NEXT: bsl v17.16b, v24.16b, v25.16b
; CHECK-NEXT: eor v3.16b, v4.16b, v3.16b
; CHECK-NEXT: bsl v0.16b, v18.16b, v16.16b
; CHECK-NEXT: bsl v1.16b, v20.16b, v19.16b
; CHECK-NEXT: bsl v2.16b, v22.16b, v21.16b
; CHECK-NEXT: bsl v3.16b, v17.16b, v23.16b
; CHECK-NEXT: sqadd v0.8h, v0.8h, v4.8h
; CHECK-NEXT: sqadd v1.8h, v1.8h, v5.8h
; CHECK-NEXT: sqadd v2.8h, v2.8h, v6.8h
; CHECK-NEXT: sqadd v3.8h, v3.8h, v7.8h
; CHECK-NEXT: ret
%z = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> %x, <32 x i16> %y)
ret <32 x i16> %z
@ -211,15 +99,7 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: movi v2.8b, #127
; CHECK-NEXT: add v3.8b, v0.8b, v1.8b
; CHECK-NEXT: cmlt v4.8b, v3.8b, #0
; CHECK-NEXT: cmlt v1.8b, v1.8b, #0
; CHECK-NEXT: cmgt v0.8b, v0.8b, v3.8b
; CHECK-NEXT: mvn v5.8b, v4.8b
; CHECK-NEXT: bsl v2.8b, v4.8b, v5.8b
; CHECK-NEXT: eor v0.8b, v1.8b, v0.8b
; CHECK-NEXT: bsl v0.8b, v2.8b, v3.8b
; CHECK-NEXT: sqadd v0.8b, v0.8b, v1.8b
; CHECK-NEXT: str d0, [x2]
; CHECK-NEXT: ret
%x = load <8 x i8>, <8 x i8>* %px
@ -248,11 +128,10 @@ define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind {
; CHECK-NEXT: mov v1.h[2], w9
; CHECK-NEXT: mov v0.h[3], w10
; CHECK-NEXT: mov v1.h[3], w11
; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
; CHECK-NEXT: movi v1.4h, #127
; CHECK-NEXT: smin v0.4h, v0.4h, v1.4h
; CHECK-NEXT: mvni v1.4h, #127
; CHECK-NEXT: smax v0.4h, v0.4h, v1.4h
; CHECK-NEXT: shl v1.4h, v1.4h, #8
; CHECK-NEXT: shl v0.4h, v0.4h, #8
; CHECK-NEXT: sqadd v0.4h, v0.4h, v1.4h
; CHECK-NEXT: sshr v0.4h, v0.4h, #8
; CHECK-NEXT: xtn v0.8b, v0.8h
; CHECK-NEXT: str s0, [x2]
; CHECK-NEXT: ret
@ -266,19 +145,18 @@ define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind {
define void @v2i8(<2 x i8>* %px, <2 x i8>* %py, <2 x i8>* %pz) nounwind {
; CHECK-LABEL: v2i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldrsb w8, [x0]
; CHECK-NEXT: ldrsb w9, [x1]
; CHECK-NEXT: ldrsb w10, [x0, #1]
; CHECK-NEXT: ldrsb w11, [x1, #1]
; CHECK-NEXT: ldrb w8, [x0]
; CHECK-NEXT: ldrb w9, [x1]
; CHECK-NEXT: ldrb w10, [x0, #1]
; CHECK-NEXT: ldrb w11, [x1, #1]
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: fmov s1, w9
; CHECK-NEXT: mov v0.s[1], w10
; CHECK-NEXT: mov v1.s[1], w11
; CHECK-NEXT: add v0.2s, v0.2s, v1.2s
; CHECK-NEXT: movi v1.2s, #127
; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s
; CHECK-NEXT: mvni v1.2s, #127
; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s
; CHECK-NEXT: shl v1.2s, v1.2s, #24
; CHECK-NEXT: shl v0.2s, v0.2s, #24
; CHECK-NEXT: sqadd v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ushr v0.2s, v0.2s, #24
; CHECK-NEXT: mov w8, v0.s[1]
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: strb w8, [x2, #1]
@ -296,15 +174,7 @@ define void @v4i16(<4 x i16>* %px, <4 x i16>* %py, <4 x i16>* %pz) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: mvni v2.4h, #128, lsl #8
; CHECK-NEXT: add v3.4h, v0.4h, v1.4h
; CHECK-NEXT: cmlt v4.4h, v3.4h, #0
; CHECK-NEXT: cmlt v1.4h, v1.4h, #0
; CHECK-NEXT: cmgt v0.4h, v0.4h, v3.4h
; CHECK-NEXT: mvn v5.8b, v4.8b
; CHECK-NEXT: bsl v2.8b, v4.8b, v5.8b
; CHECK-NEXT: eor v0.8b, v1.8b, v0.8b
; CHECK-NEXT: bsl v0.8b, v2.8b, v3.8b
; CHECK-NEXT: sqadd v0.4h, v0.4h, v1.4h
; CHECK-NEXT: str d0, [x2]
; CHECK-NEXT: ret
%x = load <4 x i16>, <4 x i16>* %px
@ -317,19 +187,18 @@ define void @v4i16(<4 x i16>* %px, <4 x i16>* %py, <4 x i16>* %pz) nounwind {
define void @v2i16(<2 x i16>* %px, <2 x i16>* %py, <2 x i16>* %pz) nounwind {
; CHECK-LABEL: v2i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldrsh w8, [x0]
; CHECK-NEXT: ldrsh w9, [x1]
; CHECK-NEXT: ldrsh w10, [x0, #2]
; CHECK-NEXT: ldrsh w11, [x1, #2]
; CHECK-NEXT: ldrh w8, [x0]
; CHECK-NEXT: ldrh w9, [x1]
; CHECK-NEXT: ldrh w10, [x0, #2]
; CHECK-NEXT: ldrh w11, [x1, #2]
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: fmov s1, w9
; CHECK-NEXT: mov v0.s[1], w10
; CHECK-NEXT: mov v1.s[1], w11
; CHECK-NEXT: add v0.2s, v0.2s, v1.2s
; CHECK-NEXT: movi v1.2s, #127, msl #8
; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s
; CHECK-NEXT: mvni v1.2s, #127, msl #8
; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s
; CHECK-NEXT: shl v1.2s, v1.2s, #16
; CHECK-NEXT: shl v0.2s, v0.2s, #16
; CHECK-NEXT: sqadd v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ushr v0.2s, v0.2s, #16
; CHECK-NEXT: mov w8, v0.s[1]
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: strh w8, [x2, #2]
@ -345,15 +214,7 @@ define void @v2i16(<2 x i16>* %px, <2 x i16>* %py, <2 x i16>* %pz) nounwind {
define <12 x i8> @v12i8(<12 x i8> %x, <12 x i8> %y) nounwind {
; CHECK-LABEL: v12i8:
; CHECK: // %bb.0:
; CHECK-NEXT: add v2.16b, v0.16b, v1.16b
; CHECK-NEXT: cmlt v4.16b, v2.16b, #0
; CHECK-NEXT: movi v3.16b, #127
; CHECK-NEXT: cmlt v1.16b, v1.16b, #0
; CHECK-NEXT: cmgt v0.16b, v0.16b, v2.16b
; CHECK-NEXT: mvn v5.16b, v4.16b
; CHECK-NEXT: bsl v3.16b, v4.16b, v5.16b
; CHECK-NEXT: eor v0.16b, v1.16b, v0.16b
; CHECK-NEXT: bsl v0.16b, v3.16b, v2.16b
; CHECK-NEXT: sqadd v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%z = call <12 x i8> @llvm.sadd.sat.v12i8(<12 x i8> %x, <12 x i8> %y)
ret <12 x i8> %z
@ -364,24 +225,8 @@ define void @v12i16(<12 x i16>* %px, <12 x i16>* %py, <12 x i16>* %pz) nounwind
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ldp q3, q2, [x1]
; CHECK-NEXT: mvni v5.8h, #128, lsl #8
; CHECK-NEXT: mvni v4.8h, #128, lsl #8
; CHECK-NEXT: add v6.8h, v1.8h, v2.8h
; CHECK-NEXT: cmlt v7.8h, v6.8h, #0
; CHECK-NEXT: mvn v16.16b, v7.16b
; CHECK-NEXT: bsl v5.16b, v7.16b, v16.16b
; CHECK-NEXT: add v7.8h, v0.8h, v3.8h
; CHECK-NEXT: cmlt v2.8h, v2.8h, #0
; CHECK-NEXT: cmgt v1.8h, v1.8h, v6.8h
; CHECK-NEXT: cmlt v16.8h, v7.8h, #0
; CHECK-NEXT: cmlt v3.8h, v3.8h, #0
; CHECK-NEXT: cmgt v0.8h, v0.8h, v7.8h
; CHECK-NEXT: eor v1.16b, v2.16b, v1.16b
; CHECK-NEXT: mvn v2.16b, v16.16b
; CHECK-NEXT: eor v0.16b, v3.16b, v0.16b
; CHECK-NEXT: bsl v4.16b, v16.16b, v2.16b
; CHECK-NEXT: bsl v1.16b, v5.16b, v6.16b
; CHECK-NEXT: bsl v0.16b, v4.16b, v7.16b
; CHECK-NEXT: sqadd v1.8h, v1.8h, v2.8h
; CHECK-NEXT: sqadd v0.8h, v0.8h, v3.8h
; CHECK-NEXT: str q0, [x2]
; CHECK-NEXT: str d1, [x2, #16]
; CHECK-NEXT: ret
@ -397,15 +242,7 @@ define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr b0, [x0]
; CHECK-NEXT: ldr b1, [x1]
; CHECK-NEXT: movi v2.8b, #127
; CHECK-NEXT: add v3.8b, v0.8b, v1.8b
; CHECK-NEXT: cmlt v4.8b, v3.8b, #0
; CHECK-NEXT: cmlt v1.8b, v1.8b, #0
; CHECK-NEXT: cmgt v0.8b, v0.8b, v3.8b
; CHECK-NEXT: mvn v5.8b, v4.8b
; CHECK-NEXT: bsl v2.8b, v4.8b, v5.8b
; CHECK-NEXT: eor v0.8b, v1.8b, v0.8b
; CHECK-NEXT: bsl v0.8b, v2.8b, v3.8b
; CHECK-NEXT: sqadd v0.8b, v0.8b, v1.8b
; CHECK-NEXT: st1 { v0.b }[0], [x2]
; CHECK-NEXT: ret
%x = load <1 x i8>, <1 x i8>* %px
@ -420,15 +257,7 @@ define void @v1i16(<1 x i16>* %px, <1 x i16>* %py, <1 x i16>* %pz) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr h0, [x0]
; CHECK-NEXT: ldr h1, [x1]
; CHECK-NEXT: mvni v2.4h, #128, lsl #8
; CHECK-NEXT: add v3.4h, v0.4h, v1.4h
; CHECK-NEXT: cmlt v4.4h, v3.4h, #0
; CHECK-NEXT: cmlt v1.4h, v1.4h, #0
; CHECK-NEXT: cmgt v0.4h, v0.4h, v3.4h
; CHECK-NEXT: mvn v5.8b, v4.8b
; CHECK-NEXT: bsl v2.8b, v4.8b, v5.8b
; CHECK-NEXT: eor v0.8b, v1.8b, v0.8b
; CHECK-NEXT: bsl v0.8b, v2.8b, v3.8b
; CHECK-NEXT: sqadd v0.4h, v0.4h, v1.4h
; CHECK-NEXT: str h0, [x2]
; CHECK-NEXT: ret
%x = load <1 x i16>, <1 x i16>* %px
@ -444,11 +273,11 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
; CHECK-NEXT: shl v0.16b, v0.16b, #4
; CHECK-NEXT: shl v1.16b, v1.16b, #4
; CHECK-NEXT: sshr v0.16b, v0.16b, #4
; CHECK-NEXT: movi v2.16b, #7
; CHECK-NEXT: ssra v0.16b, v1.16b, #4
; CHECK-NEXT: smin v0.16b, v0.16b, v2.16b
; CHECK-NEXT: movi v1.16b, #248
; CHECK-NEXT: smax v0.16b, v0.16b, v1.16b
; CHECK-NEXT: sshr v1.16b, v1.16b, #4
; CHECK-NEXT: shl v1.16b, v1.16b, #4
; CHECK-NEXT: shl v0.16b, v0.16b, #4
; CHECK-NEXT: sqadd v0.16b, v0.16b, v1.16b
; CHECK-NEXT: sshr v0.16b, v0.16b, #4
; CHECK-NEXT: ret
%z = call <16 x i4> @llvm.sadd.sat.v16i4(<16 x i4> %x, <16 x i4> %y)
ret <16 x i4> %z
@ -460,11 +289,11 @@ define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
; CHECK-NEXT: shl v0.16b, v0.16b, #7
; CHECK-NEXT: shl v1.16b, v1.16b, #7
; CHECK-NEXT: sshr v0.16b, v0.16b, #7
; CHECK-NEXT: movi v2.2d, #0000000000000000
; CHECK-NEXT: ssra v0.16b, v1.16b, #7
; CHECK-NEXT: smin v0.16b, v0.16b, v2.16b
; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff
; CHECK-NEXT: smax v0.16b, v0.16b, v1.16b
; CHECK-NEXT: sshr v1.16b, v1.16b, #7
; CHECK-NEXT: shl v1.16b, v1.16b, #7
; CHECK-NEXT: shl v0.16b, v0.16b, #7
; CHECK-NEXT: sqadd v0.16b, v0.16b, v1.16b
; CHECK-NEXT: sshr v0.16b, v0.16b, #7
; CHECK-NEXT: ret
%z = call <16 x i1> @llvm.sadd.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
ret <16 x i1> %z
@ -473,15 +302,7 @@ define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
define <2 x i32> @v2i32(<2 x i32> %x, <2 x i32> %y) nounwind {
; CHECK-LABEL: v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: add v2.2s, v0.2s, v1.2s
; CHECK-NEXT: cmlt v4.2s, v2.2s, #0
; CHECK-NEXT: mvni v3.2s, #128, lsl #24
; CHECK-NEXT: cmlt v1.2s, v1.2s, #0
; CHECK-NEXT: cmgt v0.2s, v0.2s, v2.2s
; CHECK-NEXT: mvn v5.8b, v4.8b
; CHECK-NEXT: bsl v3.8b, v4.8b, v5.8b
; CHECK-NEXT: eor v0.8b, v1.8b, v0.8b
; CHECK-NEXT: bsl v0.8b, v3.8b, v2.8b
; CHECK-NEXT: sqadd v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ret
%z = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %x, <2 x i32> %y)
ret <2 x i32> %z
@ -490,15 +311,7 @@ define <2 x i32> @v2i32(<2 x i32> %x, <2 x i32> %y) nounwind {
define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
; CHECK-LABEL: v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: add v2.4s, v0.4s, v1.4s
; CHECK-NEXT: cmlt v4.4s, v2.4s, #0
; CHECK-NEXT: mvni v3.4s, #128, lsl #24
; CHECK-NEXT: cmlt v1.4s, v1.4s, #0
; CHECK-NEXT: cmgt v0.4s, v0.4s, v2.4s
; CHECK-NEXT: mvn v5.16b, v4.16b
; CHECK-NEXT: bsl v3.16b, v4.16b, v5.16b
; CHECK-NEXT: eor v0.16b, v1.16b, v0.16b
; CHECK-NEXT: bsl v0.16b, v3.16b, v2.16b
; CHECK-NEXT: sqadd v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%z = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
ret <4 x i32> %z
@ -507,24 +320,8 @@ define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
; CHECK-LABEL: v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: add v4.4s, v0.4s, v2.4s
; CHECK-NEXT: cmlt v7.4s, v4.4s, #0
; CHECK-NEXT: mvni v6.4s, #128, lsl #24
; CHECK-NEXT: mvn v16.16b, v7.16b
; CHECK-NEXT: bsl v6.16b, v7.16b, v16.16b
; CHECK-NEXT: add v7.4s, v1.4s, v3.4s
; CHECK-NEXT: cmlt v2.4s, v2.4s, #0
; CHECK-NEXT: cmgt v0.4s, v0.4s, v4.4s
; CHECK-NEXT: cmlt v16.4s, v7.4s, #0
; CHECK-NEXT: mvni v5.4s, #128, lsl #24
; CHECK-NEXT: cmlt v3.4s, v3.4s, #0
; CHECK-NEXT: cmgt v1.4s, v1.4s, v7.4s
; CHECK-NEXT: eor v0.16b, v2.16b, v0.16b
; CHECK-NEXT: mvn v2.16b, v16.16b
; CHECK-NEXT: eor v1.16b, v3.16b, v1.16b
; CHECK-NEXT: bsl v5.16b, v16.16b, v2.16b
; CHECK-NEXT: bsl v0.16b, v6.16b, v4.16b
; CHECK-NEXT: bsl v1.16b, v5.16b, v7.16b
; CHECK-NEXT: sqadd v0.4s, v0.4s, v2.4s
; CHECK-NEXT: sqadd v1.4s, v1.4s, v3.4s
; CHECK-NEXT: ret
%z = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> %x, <8 x i32> %y)
ret <8 x i32> %z
@ -533,42 +330,10 @@ define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind {
; CHECK-LABEL: v16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: add v16.4s, v0.4s, v4.4s
; CHECK-NEXT: cmlt v24.4s, v16.4s, #0
; CHECK-NEXT: mvni v18.4s, #128, lsl #24
; CHECK-NEXT: add v19.4s, v1.4s, v5.4s
; CHECK-NEXT: mvn v25.16b, v24.16b
; CHECK-NEXT: bsl v18.16b, v24.16b, v25.16b
; CHECK-NEXT: cmlt v24.4s, v19.4s, #0
; CHECK-NEXT: mvni v20.4s, #128, lsl #24
; CHECK-NEXT: add v21.4s, v2.4s, v6.4s
; CHECK-NEXT: mvn v25.16b, v24.16b
; CHECK-NEXT: bsl v20.16b, v24.16b, v25.16b
; CHECK-NEXT: cmlt v24.4s, v21.4s, #0
; CHECK-NEXT: cmlt v4.4s, v4.4s, #0
; CHECK-NEXT: cmgt v0.4s, v0.4s, v16.4s
; CHECK-NEXT: mvni v22.4s, #128, lsl #24
; CHECK-NEXT: add v23.4s, v3.4s, v7.4s
; CHECK-NEXT: mvn v25.16b, v24.16b
; CHECK-NEXT: eor v0.16b, v4.16b, v0.16b
; CHECK-NEXT: cmlt v4.4s, v5.4s, #0
; CHECK-NEXT: cmgt v1.4s, v1.4s, v19.4s
; CHECK-NEXT: bsl v22.16b, v24.16b, v25.16b
; CHECK-NEXT: cmlt v24.4s, v23.4s, #0
; CHECK-NEXT: eor v1.16b, v4.16b, v1.16b
; CHECK-NEXT: cmlt v4.4s, v6.4s, #0
; CHECK-NEXT: cmgt v2.4s, v2.4s, v21.4s
; CHECK-NEXT: mvni v17.4s, #128, lsl #24
; CHECK-NEXT: mvn v25.16b, v24.16b
; CHECK-NEXT: eor v2.16b, v4.16b, v2.16b
; CHECK-NEXT: cmlt v4.4s, v7.4s, #0
; CHECK-NEXT: cmgt v3.4s, v3.4s, v23.4s
; CHECK-NEXT: bsl v17.16b, v24.16b, v25.16b
; CHECK-NEXT: eor v3.16b, v4.16b, v3.16b
; CHECK-NEXT: bsl v0.16b, v18.16b, v16.16b
; CHECK-NEXT: bsl v1.16b, v20.16b, v19.16b
; CHECK-NEXT: bsl v2.16b, v22.16b, v21.16b
; CHECK-NEXT: bsl v3.16b, v17.16b, v23.16b
; CHECK-NEXT: sqadd v0.4s, v0.4s, v4.4s
; CHECK-NEXT: sqadd v1.4s, v1.4s, v5.4s
; CHECK-NEXT: sqadd v2.4s, v2.4s, v6.4s
; CHECK-NEXT: sqadd v3.4s, v3.4s, v7.4s
; CHECK-NEXT: ret
%z = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> %x, <16 x i32> %y)
ret <16 x i32> %z
@ -577,16 +342,7 @@ define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind {
define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
; CHECK-LABEL: v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: add v2.2d, v0.2d, v1.2d
; CHECK-NEXT: mov x8, #9223372036854775807
; CHECK-NEXT: cmlt v3.2d, v2.2d, #0
; CHECK-NEXT: cmlt v1.2d, v1.2d, #0
; CHECK-NEXT: dup v4.2d, x8
; CHECK-NEXT: cmgt v0.2d, v0.2d, v2.2d
; CHECK-NEXT: mvn v5.16b, v3.16b
; CHECK-NEXT: bsl v4.16b, v3.16b, v5.16b
; CHECK-NEXT: eor v0.16b, v1.16b, v0.16b
; CHECK-NEXT: bsl v0.16b, v4.16b, v2.16b
; CHECK-NEXT: sqadd v0.2d, v0.2d, v1.2d
; CHECK-NEXT: ret
%z = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %x, <2 x i64> %y)
ret <2 x i64> %z
@ -595,25 +351,8 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
; CHECK-LABEL: v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: add v4.2d, v0.2d, v2.2d
; CHECK-NEXT: mov x8, #9223372036854775807
; CHECK-NEXT: cmlt v5.2d, v4.2d, #0
; CHECK-NEXT: dup v6.2d, x8
; CHECK-NEXT: mvn v7.16b, v5.16b
; CHECK-NEXT: mov v16.16b, v6.16b
; CHECK-NEXT: bsl v16.16b, v5.16b, v7.16b
; CHECK-NEXT: add v5.2d, v1.2d, v3.2d
; CHECK-NEXT: cmlt v2.2d, v2.2d, #0
; CHECK-NEXT: cmgt v0.2d, v0.2d, v4.2d
; CHECK-NEXT: cmlt v7.2d, v5.2d, #0
; CHECK-NEXT: cmlt v3.2d, v3.2d, #0
; CHECK-NEXT: cmgt v1.2d, v1.2d, v5.2d
; CHECK-NEXT: eor v0.16b, v2.16b, v0.16b
; CHECK-NEXT: mvn v2.16b, v7.16b
; CHECK-NEXT: eor v1.16b, v3.16b, v1.16b
; CHECK-NEXT: bsl v6.16b, v7.16b, v2.16b
; CHECK-NEXT: bsl v0.16b, v16.16b, v4.16b
; CHECK-NEXT: bsl v1.16b, v6.16b, v5.16b
; CHECK-NEXT: sqadd v0.2d, v0.2d, v2.2d
; CHECK-NEXT: sqadd v1.2d, v1.2d, v3.2d
; CHECK-NEXT: ret
%z = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> %x, <4 x i64> %y)
ret <4 x i64> %z
@ -622,43 +361,10 @@ define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
; CHECK-LABEL: v8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: add v16.2d, v0.2d, v4.2d
; CHECK-NEXT: mov x8, #9223372036854775807
; CHECK-NEXT: add v17.2d, v1.2d, v5.2d
; CHECK-NEXT: cmlt v20.2d, v16.2d, #0
; CHECK-NEXT: dup v21.2d, x8
; CHECK-NEXT: add v18.2d, v2.2d, v6.2d
; CHECK-NEXT: cmlt v22.2d, v17.2d, #0
; CHECK-NEXT: mvn v24.16b, v20.16b
; CHECK-NEXT: mov v25.16b, v21.16b
; CHECK-NEXT: cmlt v23.2d, v18.2d, #0
; CHECK-NEXT: bsl v25.16b, v20.16b, v24.16b
; CHECK-NEXT: mvn v20.16b, v22.16b
; CHECK-NEXT: mov v24.16b, v21.16b
; CHECK-NEXT: cmlt v4.2d, v4.2d, #0
; CHECK-NEXT: cmgt v0.2d, v0.2d, v16.2d
; CHECK-NEXT: add v19.2d, v3.2d, v7.2d
; CHECK-NEXT: bsl v24.16b, v22.16b, v20.16b
; CHECK-NEXT: mvn v20.16b, v23.16b
; CHECK-NEXT: mov v22.16b, v21.16b
; CHECK-NEXT: eor v0.16b, v4.16b, v0.16b
; CHECK-NEXT: cmlt v4.2d, v5.2d, #0
; CHECK-NEXT: cmgt v1.2d, v1.2d, v17.2d
; CHECK-NEXT: bsl v22.16b, v23.16b, v20.16b
; CHECK-NEXT: cmlt v20.2d, v19.2d, #0
; CHECK-NEXT: eor v1.16b, v4.16b, v1.16b
; CHECK-NEXT: cmlt v4.2d, v6.2d, #0
; CHECK-NEXT: cmgt v2.2d, v2.2d, v18.2d
; CHECK-NEXT: mvn v23.16b, v20.16b
; CHECK-NEXT: eor v2.16b, v4.16b, v2.16b
; CHECK-NEXT: cmlt v4.2d, v7.2d, #0
; CHECK-NEXT: cmgt v3.2d, v3.2d, v19.2d
; CHECK-NEXT: bsl v21.16b, v20.16b, v23.16b
; CHECK-NEXT: eor v3.16b, v4.16b, v3.16b
; CHECK-NEXT: bsl v0.16b, v25.16b, v16.16b
; CHECK-NEXT: bsl v1.16b, v24.16b, v17.16b
; CHECK-NEXT: bsl v2.16b, v22.16b, v18.16b
; CHECK-NEXT: bsl v3.16b, v21.16b, v19.16b
; CHECK-NEXT: sqadd v0.2d, v0.2d, v4.2d
; CHECK-NEXT: sqadd v1.2d, v1.2d, v5.2d
; CHECK-NEXT: sqadd v2.2d, v2.2d, v6.2d
; CHECK-NEXT: sqadd v3.2d, v3.2d, v7.2d
; CHECK-NEXT: ret
%z = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> %x, <8 x i64> %y)
ret <8 x i64> %z

View File

@ -88,15 +88,7 @@ define i4 @func3(i4 %x, i4 %y) nounwind {
define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; CHECK-LABEL: vec:
; CHECK: // %bb.0:
; CHECK-NEXT: sub v2.4s, v0.4s, v1.4s
; CHECK-NEXT: cmlt v4.4s, v2.4s, #0
; CHECK-NEXT: mvni v3.4s, #128, lsl #24
; CHECK-NEXT: cmgt v1.4s, v1.4s, #0
; CHECK-NEXT: cmgt v0.4s, v0.4s, v2.4s
; CHECK-NEXT: mvn v5.16b, v4.16b
; CHECK-NEXT: bsl v3.16b, v4.16b, v5.16b
; CHECK-NEXT: eor v0.16b, v1.16b, v0.16b
; CHECK-NEXT: bsl v0.16b, v3.16b, v2.16b
; CHECK-NEXT: sqsub v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%tmp = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %y);
ret <4 x i32> %tmp;

View File

@ -36,15 +36,7 @@ declare <2 x i128> @llvm.ssub.sat.v2i128(<2 x i128>, <2 x i128>)
define <16 x i8> @v16i8(<16 x i8> %x, <16 x i8> %y) nounwind {
; CHECK-LABEL: v16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sub v2.16b, v0.16b, v1.16b
; CHECK-NEXT: cmlt v4.16b, v2.16b, #0
; CHECK-NEXT: movi v3.16b, #127
; CHECK-NEXT: cmgt v1.16b, v1.16b, #0
; CHECK-NEXT: cmgt v0.16b, v0.16b, v2.16b
; CHECK-NEXT: mvn v5.16b, v4.16b
; CHECK-NEXT: bsl v3.16b, v4.16b, v5.16b
; CHECK-NEXT: eor v0.16b, v1.16b, v0.16b
; CHECK-NEXT: bsl v0.16b, v3.16b, v2.16b
; CHECK-NEXT: sqsub v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%z = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
ret <16 x i8> %z
@ -53,24 +45,8 @@ define <16 x i8> @v16i8(<16 x i8> %x, <16 x i8> %y) nounwind {
define <32 x i8> @v32i8(<32 x i8> %x, <32 x i8> %y) nounwind {
; CHECK-LABEL: v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sub v4.16b, v0.16b, v2.16b
; CHECK-NEXT: cmlt v7.16b, v4.16b, #0
; CHECK-NEXT: movi v6.16b, #127
; CHECK-NEXT: mvn v16.16b, v7.16b
; CHECK-NEXT: bsl v6.16b, v7.16b, v16.16b
; CHECK-NEXT: sub v7.16b, v1.16b, v3.16b
; CHECK-NEXT: cmgt v2.16b, v2.16b, #0
; CHECK-NEXT: cmgt v0.16b, v0.16b, v4.16b
; CHECK-NEXT: cmlt v16.16b, v7.16b, #0
; CHECK-NEXT: movi v5.16b, #127
; CHECK-NEXT: cmgt v3.16b, v3.16b, #0
; CHECK-NEXT: cmgt v1.16b, v1.16b, v7.16b
; CHECK-NEXT: eor v0.16b, v2.16b, v0.16b
; CHECK-NEXT: mvn v2.16b, v16.16b
; CHECK-NEXT: eor v1.16b, v3.16b, v1.16b
; CHECK-NEXT: bsl v5.16b, v16.16b, v2.16b
; CHECK-NEXT: bsl v0.16b, v6.16b, v4.16b
; CHECK-NEXT: bsl v1.16b, v5.16b, v7.16b
; CHECK-NEXT: sqsub v0.16b, v0.16b, v2.16b
; CHECK-NEXT: sqsub v1.16b, v1.16b, v3.16b
; CHECK-NEXT: ret
%z = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %x, <32 x i8> %y)
ret <32 x i8> %z
@ -79,42 +55,10 @@ define <32 x i8> @v32i8(<32 x i8> %x, <32 x i8> %y) nounwind {
define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
; CHECK-LABEL: v64i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sub v16.16b, v0.16b, v4.16b
; CHECK-NEXT: cmlt v24.16b, v16.16b, #0
; CHECK-NEXT: movi v18.16b, #127
; CHECK-NEXT: sub v19.16b, v1.16b, v5.16b
; CHECK-NEXT: mvn v25.16b, v24.16b
; CHECK-NEXT: bsl v18.16b, v24.16b, v25.16b
; CHECK-NEXT: cmlt v24.16b, v19.16b, #0
; CHECK-NEXT: movi v20.16b, #127
; CHECK-NEXT: sub v21.16b, v2.16b, v6.16b
; CHECK-NEXT: mvn v25.16b, v24.16b
; CHECK-NEXT: bsl v20.16b, v24.16b, v25.16b
; CHECK-NEXT: cmlt v24.16b, v21.16b, #0
; CHECK-NEXT: cmgt v4.16b, v4.16b, #0
; CHECK-NEXT: cmgt v0.16b, v0.16b, v16.16b
; CHECK-NEXT: movi v22.16b, #127
; CHECK-NEXT: sub v23.16b, v3.16b, v7.16b
; CHECK-NEXT: mvn v25.16b, v24.16b
; CHECK-NEXT: eor v0.16b, v4.16b, v0.16b
; CHECK-NEXT: cmgt v4.16b, v5.16b, #0
; CHECK-NEXT: cmgt v1.16b, v1.16b, v19.16b
; CHECK-NEXT: bsl v22.16b, v24.16b, v25.16b
; CHECK-NEXT: cmlt v24.16b, v23.16b, #0
; CHECK-NEXT: eor v1.16b, v4.16b, v1.16b
; CHECK-NEXT: cmgt v4.16b, v6.16b, #0
; CHECK-NEXT: cmgt v2.16b, v2.16b, v21.16b
; CHECK-NEXT: movi v17.16b, #127
; CHECK-NEXT: mvn v25.16b, v24.16b
; CHECK-NEXT: eor v2.16b, v4.16b, v2.16b
; CHECK-NEXT: cmgt v4.16b, v7.16b, #0
; CHECK-NEXT: cmgt v3.16b, v3.16b, v23.16b
; CHECK-NEXT: bsl v17.16b, v24.16b, v25.16b
; CHECK-NEXT: eor v3.16b, v4.16b, v3.16b
; CHECK-NEXT: bsl v0.16b, v18.16b, v16.16b
; CHECK-NEXT: bsl v1.16b, v20.16b, v19.16b
; CHECK-NEXT: bsl v2.16b, v22.16b, v21.16b
; CHECK-NEXT: bsl v3.16b, v17.16b, v23.16b
; CHECK-NEXT: sqsub v0.16b, v0.16b, v4.16b
; CHECK-NEXT: sqsub v1.16b, v1.16b, v5.16b
; CHECK-NEXT: sqsub v2.16b, v2.16b, v6.16b
; CHECK-NEXT: sqsub v3.16b, v3.16b, v7.16b
; CHECK-NEXT: ret
%z = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> %x, <64 x i8> %y)
ret <64 x i8> %z
@ -123,15 +67,7 @@ define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
define <8 x i16> @v8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
; CHECK-LABEL: v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sub v2.8h, v0.8h, v1.8h
; CHECK-NEXT: cmlt v4.8h, v2.8h, #0
; CHECK-NEXT: mvni v3.8h, #128, lsl #8
; CHECK-NEXT: cmgt v1.8h, v1.8h, #0
; CHECK-NEXT: cmgt v0.8h, v0.8h, v2.8h
; CHECK-NEXT: mvn v5.16b, v4.16b
; CHECK-NEXT: bsl v3.16b, v4.16b, v5.16b
; CHECK-NEXT: eor v0.16b, v1.16b, v0.16b
; CHECK-NEXT: bsl v0.16b, v3.16b, v2.16b
; CHECK-NEXT: sqsub v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%z = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
ret <8 x i16> %z
@ -140,24 +76,8 @@ define <8 x i16> @v8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
define <16 x i16> @v16i16(<16 x i16> %x, <16 x i16> %y) nounwind {
; CHECK-LABEL: v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sub v4.8h, v0.8h, v2.8h
; CHECK-NEXT: cmlt v7.8h, v4.8h, #0
; CHECK-NEXT: mvni v6.8h, #128, lsl #8
; CHECK-NEXT: mvn v16.16b, v7.16b
; CHECK-NEXT: bsl v6.16b, v7.16b, v16.16b
; CHECK-NEXT: sub v7.8h, v1.8h, v3.8h
; CHECK-NEXT: cmgt v2.8h, v2.8h, #0
; CHECK-NEXT: cmgt v0.8h, v0.8h, v4.8h
; CHECK-NEXT: cmlt v16.8h, v7.8h, #0
; CHECK-NEXT: mvni v5.8h, #128, lsl #8
; CHECK-NEXT: cmgt v3.8h, v3.8h, #0
; CHECK-NEXT: cmgt v1.8h, v1.8h, v7.8h
; CHECK-NEXT: eor v0.16b, v2.16b, v0.16b
; CHECK-NEXT: mvn v2.16b, v16.16b
; CHECK-NEXT: eor v1.16b, v3.16b, v1.16b
; CHECK-NEXT: bsl v5.16b, v16.16b, v2.16b
; CHECK-NEXT: bsl v0.16b, v6.16b, v4.16b
; CHECK-NEXT: bsl v1.16b, v5.16b, v7.16b
; CHECK-NEXT: sqsub v0.8h, v0.8h, v2.8h
; CHECK-NEXT: sqsub v1.8h, v1.8h, v3.8h
; CHECK-NEXT: ret
%z = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %x, <16 x i16> %y)
ret <16 x i16> %z
@ -166,42 +86,10 @@ define <16 x i16> @v16i16(<16 x i16> %x, <16 x i16> %y) nounwind {
define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind {
; CHECK-LABEL: v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sub v16.8h, v0.8h, v4.8h
; CHECK-NEXT: cmlt v24.8h, v16.8h, #0
; CHECK-NEXT: mvni v18.8h, #128, lsl #8
; CHECK-NEXT: sub v19.8h, v1.8h, v5.8h
; CHECK-NEXT: mvn v25.16b, v24.16b
; CHECK-NEXT: bsl v18.16b, v24.16b, v25.16b
; CHECK-NEXT: cmlt v24.8h, v19.8h, #0
; CHECK-NEXT: mvni v20.8h, #128, lsl #8
; CHECK-NEXT: sub v21.8h, v2.8h, v6.8h
; CHECK-NEXT: mvn v25.16b, v24.16b
; CHECK-NEXT: bsl v20.16b, v24.16b, v25.16b
; CHECK-NEXT: cmlt v24.8h, v21.8h, #0
; CHECK-NEXT: cmgt v4.8h, v4.8h, #0
; CHECK-NEXT: cmgt v0.8h, v0.8h, v16.8h
; CHECK-NEXT: mvni v22.8h, #128, lsl #8
; CHECK-NEXT: sub v23.8h, v3.8h, v7.8h
; CHECK-NEXT: mvn v25.16b, v24.16b
; CHECK-NEXT: eor v0.16b, v4.16b, v0.16b
; CHECK-NEXT: cmgt v4.8h, v5.8h, #0
; CHECK-NEXT: cmgt v1.8h, v1.8h, v19.8h
; CHECK-NEXT: bsl v22.16b, v24.16b, v25.16b
; CHECK-NEXT: cmlt v24.8h, v23.8h, #0
; CHECK-NEXT: eor v1.16b, v4.16b, v1.16b
; CHECK-NEXT: cmgt v4.8h, v6.8h, #0
; CHECK-NEXT: cmgt v2.8h, v2.8h, v21.8h
; CHECK-NEXT: mvni v17.8h, #128, lsl #8
; CHECK-NEXT: mvn v25.16b, v24.16b
; CHECK-NEXT: eor v2.16b, v4.16b, v2.16b
; CHECK-NEXT: cmgt v4.8h, v7.8h, #0
; CHECK-NEXT: cmgt v3.8h, v3.8h, v23.8h
; CHECK-NEXT: bsl v17.16b, v24.16b, v25.16b
; CHECK-NEXT: eor v3.16b, v4.16b, v3.16b
; CHECK-NEXT: bsl v0.16b, v18.16b, v16.16b
; CHECK-NEXT: bsl v1.16b, v20.16b, v19.16b
; CHECK-NEXT: bsl v2.16b, v22.16b, v21.16b
; CHECK-NEXT: bsl v3.16b, v17.16b, v23.16b
; CHECK-NEXT: sqsub v0.8h, v0.8h, v4.8h
; CHECK-NEXT: sqsub v1.8h, v1.8h, v5.8h
; CHECK-NEXT: sqsub v2.8h, v2.8h, v6.8h
; CHECK-NEXT: sqsub v3.8h, v3.8h, v7.8h
; CHECK-NEXT: ret
%z = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> %x, <32 x i16> %y)
ret <32 x i16> %z
@ -212,15 +100,7 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: movi v2.8b, #127
; CHECK-NEXT: sub v3.8b, v0.8b, v1.8b
; CHECK-NEXT: cmlt v4.8b, v3.8b, #0
; CHECK-NEXT: cmgt v1.8b, v1.8b, #0
; CHECK-NEXT: cmgt v0.8b, v0.8b, v3.8b
; CHECK-NEXT: mvn v5.8b, v4.8b
; CHECK-NEXT: bsl v2.8b, v4.8b, v5.8b
; CHECK-NEXT: eor v0.8b, v1.8b, v0.8b
; CHECK-NEXT: bsl v0.8b, v2.8b, v3.8b
; CHECK-NEXT: sqsub v0.8b, v0.8b, v1.8b
; CHECK-NEXT: str d0, [x2]
; CHECK-NEXT: ret
%x = load <8 x i8>, <8 x i8>* %px
@ -249,11 +129,10 @@ define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind {
; CHECK-NEXT: mov v1.h[2], w9
; CHECK-NEXT: mov v0.h[3], w10
; CHECK-NEXT: mov v1.h[3], w11
; CHECK-NEXT: sub v0.4h, v0.4h, v1.4h
; CHECK-NEXT: movi v1.4h, #127
; CHECK-NEXT: smin v0.4h, v0.4h, v1.4h
; CHECK-NEXT: mvni v1.4h, #127
; CHECK-NEXT: smax v0.4h, v0.4h, v1.4h
; CHECK-NEXT: shl v1.4h, v1.4h, #8
; CHECK-NEXT: shl v0.4h, v0.4h, #8
; CHECK-NEXT: sqsub v0.4h, v0.4h, v1.4h
; CHECK-NEXT: sshr v0.4h, v0.4h, #8
; CHECK-NEXT: xtn v0.8b, v0.8h
; CHECK-NEXT: str s0, [x2]
; CHECK-NEXT: ret
@ -267,19 +146,18 @@ define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind {
define void @v2i8(<2 x i8>* %px, <2 x i8>* %py, <2 x i8>* %pz) nounwind {
; CHECK-LABEL: v2i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldrsb w8, [x0]
; CHECK-NEXT: ldrsb w9, [x1]
; CHECK-NEXT: ldrsb w10, [x0, #1]
; CHECK-NEXT: ldrsb w11, [x1, #1]
; CHECK-NEXT: ldrb w8, [x0]
; CHECK-NEXT: ldrb w9, [x1]
; CHECK-NEXT: ldrb w10, [x0, #1]
; CHECK-NEXT: ldrb w11, [x1, #1]
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: fmov s1, w9
; CHECK-NEXT: mov v0.s[1], w10
; CHECK-NEXT: mov v1.s[1], w11
; CHECK-NEXT: sub v0.2s, v0.2s, v1.2s
; CHECK-NEXT: movi v1.2s, #127
; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s
; CHECK-NEXT: mvni v1.2s, #127
; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s
; CHECK-NEXT: shl v1.2s, v1.2s, #24
; CHECK-NEXT: shl v0.2s, v0.2s, #24
; CHECK-NEXT: sqsub v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ushr v0.2s, v0.2s, #24
; CHECK-NEXT: mov w8, v0.s[1]
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: strb w8, [x2, #1]
@ -297,15 +175,7 @@ define void @v4i16(<4 x i16>* %px, <4 x i16>* %py, <4 x i16>* %pz) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: mvni v2.4h, #128, lsl #8
; CHECK-NEXT: sub v3.4h, v0.4h, v1.4h
; CHECK-NEXT: cmlt v4.4h, v3.4h, #0
; CHECK-NEXT: cmgt v1.4h, v1.4h, #0
; CHECK-NEXT: cmgt v0.4h, v0.4h, v3.4h
; CHECK-NEXT: mvn v5.8b, v4.8b
; CHECK-NEXT: bsl v2.8b, v4.8b, v5.8b
; CHECK-NEXT: eor v0.8b, v1.8b, v0.8b
; CHECK-NEXT: bsl v0.8b, v2.8b, v3.8b
; CHECK-NEXT: sqsub v0.4h, v0.4h, v1.4h
; CHECK-NEXT: str d0, [x2]
; CHECK-NEXT: ret
%x = load <4 x i16>, <4 x i16>* %px
@ -318,19 +188,18 @@ define void @v4i16(<4 x i16>* %px, <4 x i16>* %py, <4 x i16>* %pz) nounwind {
define void @v2i16(<2 x i16>* %px, <2 x i16>* %py, <2 x i16>* %pz) nounwind {
; CHECK-LABEL: v2i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldrsh w8, [x0]
; CHECK-NEXT: ldrsh w9, [x1]
; CHECK-NEXT: ldrsh w10, [x0, #2]
; CHECK-NEXT: ldrsh w11, [x1, #2]
; CHECK-NEXT: ldrh w8, [x0]
; CHECK-NEXT: ldrh w9, [x1]
; CHECK-NEXT: ldrh w10, [x0, #2]
; CHECK-NEXT: ldrh w11, [x1, #2]
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: fmov s1, w9
; CHECK-NEXT: mov v0.s[1], w10
; CHECK-NEXT: mov v1.s[1], w11
; CHECK-NEXT: sub v0.2s, v0.2s, v1.2s
; CHECK-NEXT: movi v1.2s, #127, msl #8
; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s
; CHECK-NEXT: mvni v1.2s, #127, msl #8
; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s
; CHECK-NEXT: shl v1.2s, v1.2s, #16
; CHECK-NEXT: shl v0.2s, v0.2s, #16
; CHECK-NEXT: sqsub v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ushr v0.2s, v0.2s, #16
; CHECK-NEXT: mov w8, v0.s[1]
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: strh w8, [x2, #2]
@ -346,15 +215,7 @@ define void @v2i16(<2 x i16>* %px, <2 x i16>* %py, <2 x i16>* %pz) nounwind {
define <12 x i8> @v12i8(<12 x i8> %x, <12 x i8> %y) nounwind {
; CHECK-LABEL: v12i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sub v2.16b, v0.16b, v1.16b
; CHECK-NEXT: cmlt v4.16b, v2.16b, #0
; CHECK-NEXT: movi v3.16b, #127
; CHECK-NEXT: cmgt v1.16b, v1.16b, #0
; CHECK-NEXT: cmgt v0.16b, v0.16b, v2.16b
; CHECK-NEXT: mvn v5.16b, v4.16b
; CHECK-NEXT: bsl v3.16b, v4.16b, v5.16b
; CHECK-NEXT: eor v0.16b, v1.16b, v0.16b
; CHECK-NEXT: bsl v0.16b, v3.16b, v2.16b
; CHECK-NEXT: sqsub v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%z = call <12 x i8> @llvm.ssub.sat.v12i8(<12 x i8> %x, <12 x i8> %y)
ret <12 x i8> %z
@ -365,24 +226,8 @@ define void @v12i16(<12 x i16>* %px, <12 x i16>* %py, <12 x i16>* %pz) nounwind
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ldp q3, q2, [x1]
; CHECK-NEXT: mvni v5.8h, #128, lsl #8
; CHECK-NEXT: mvni v4.8h, #128, lsl #8
; CHECK-NEXT: sub v6.8h, v1.8h, v2.8h
; CHECK-NEXT: cmlt v7.8h, v6.8h, #0
; CHECK-NEXT: mvn v16.16b, v7.16b
; CHECK-NEXT: bsl v5.16b, v7.16b, v16.16b
; CHECK-NEXT: sub v7.8h, v0.8h, v3.8h
; CHECK-NEXT: cmgt v2.8h, v2.8h, #0
; CHECK-NEXT: cmgt v1.8h, v1.8h, v6.8h
; CHECK-NEXT: cmlt v16.8h, v7.8h, #0
; CHECK-NEXT: cmgt v3.8h, v3.8h, #0
; CHECK-NEXT: cmgt v0.8h, v0.8h, v7.8h
; CHECK-NEXT: eor v1.16b, v2.16b, v1.16b
; CHECK-NEXT: mvn v2.16b, v16.16b
; CHECK-NEXT: eor v0.16b, v3.16b, v0.16b
; CHECK-NEXT: bsl v4.16b, v16.16b, v2.16b
; CHECK-NEXT: bsl v1.16b, v5.16b, v6.16b
; CHECK-NEXT: bsl v0.16b, v4.16b, v7.16b
; CHECK-NEXT: sqsub v1.8h, v1.8h, v2.8h
; CHECK-NEXT: sqsub v0.8h, v0.8h, v3.8h
; CHECK-NEXT: str q0, [x2]
; CHECK-NEXT: str d1, [x2, #16]
; CHECK-NEXT: ret
@ -398,15 +243,7 @@ define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr b0, [x0]
; CHECK-NEXT: ldr b1, [x1]
; CHECK-NEXT: movi v2.8b, #127
; CHECK-NEXT: sub v3.8b, v0.8b, v1.8b
; CHECK-NEXT: cmlt v4.8b, v3.8b, #0
; CHECK-NEXT: cmgt v1.8b, v1.8b, #0
; CHECK-NEXT: cmgt v0.8b, v0.8b, v3.8b
; CHECK-NEXT: mvn v5.8b, v4.8b
; CHECK-NEXT: bsl v2.8b, v4.8b, v5.8b
; CHECK-NEXT: eor v0.8b, v1.8b, v0.8b
; CHECK-NEXT: bsl v0.8b, v2.8b, v3.8b
; CHECK-NEXT: sqsub v0.8b, v0.8b, v1.8b
; CHECK-NEXT: st1 { v0.b }[0], [x2]
; CHECK-NEXT: ret
%x = load <1 x i8>, <1 x i8>* %px
@ -421,15 +258,7 @@ define void @v1i16(<1 x i16>* %px, <1 x i16>* %py, <1 x i16>* %pz) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr h0, [x0]
; CHECK-NEXT: ldr h1, [x1]
; CHECK-NEXT: mvni v2.4h, #128, lsl #8
; CHECK-NEXT: sub v3.4h, v0.4h, v1.4h
; CHECK-NEXT: cmlt v4.4h, v3.4h, #0
; CHECK-NEXT: cmgt v1.4h, v1.4h, #0
; CHECK-NEXT: cmgt v0.4h, v0.4h, v3.4h
; CHECK-NEXT: mvn v5.8b, v4.8b
; CHECK-NEXT: bsl v2.8b, v4.8b, v5.8b
; CHECK-NEXT: eor v0.8b, v1.8b, v0.8b
; CHECK-NEXT: bsl v0.8b, v2.8b, v3.8b
; CHECK-NEXT: sqsub v0.4h, v0.4h, v1.4h
; CHECK-NEXT: str h0, [x2]
; CHECK-NEXT: ret
%x = load <1 x i16>, <1 x i16>* %px
@ -442,15 +271,14 @@ define void @v1i16(<1 x i16>* %px, <1 x i16>* %py, <1 x i16>* %pz) nounwind {
define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
; CHECK-LABEL: v16i4:
; CHECK: // %bb.0:
; CHECK-NEXT: shl v0.16b, v0.16b, #4
; CHECK-NEXT: shl v1.16b, v1.16b, #4
; CHECK-NEXT: sshr v0.16b, v0.16b, #4
; CHECK-NEXT: sshr v1.16b, v1.16b, #4
; CHECK-NEXT: shl v1.16b, v1.16b, #4
; CHECK-NEXT: shl v0.16b, v0.16b, #4
; CHECK-NEXT: sshr v1.16b, v1.16b, #4
; CHECK-NEXT: sqsub v0.16b, v0.16b, v1.16b
; CHECK-NEXT: sshr v0.16b, v0.16b, #4
; CHECK-NEXT: movi v2.16b, #7
; CHECK-NEXT: sub v0.16b, v0.16b, v1.16b
; CHECK-NEXT: smin v0.16b, v0.16b, v2.16b
; CHECK-NEXT: movi v1.16b, #248
; CHECK-NEXT: smax v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%z = call <16 x i4> @llvm.ssub.sat.v16i4(<16 x i4> %x, <16 x i4> %y)
ret <16 x i4> %z
@ -459,15 +287,14 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
; CHECK-LABEL: v16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: shl v0.16b, v0.16b, #7
; CHECK-NEXT: shl v1.16b, v1.16b, #7
; CHECK-NEXT: sshr v0.16b, v0.16b, #7
; CHECK-NEXT: sshr v1.16b, v1.16b, #7
; CHECK-NEXT: shl v1.16b, v1.16b, #7
; CHECK-NEXT: shl v0.16b, v0.16b, #7
; CHECK-NEXT: sshr v1.16b, v1.16b, #7
; CHECK-NEXT: sqsub v0.16b, v0.16b, v1.16b
; CHECK-NEXT: sshr v0.16b, v0.16b, #7
; CHECK-NEXT: movi v2.2d, #0000000000000000
; CHECK-NEXT: sub v0.16b, v0.16b, v1.16b
; CHECK-NEXT: smin v0.16b, v0.16b, v2.16b
; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff
; CHECK-NEXT: smax v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%z = call <16 x i1> @llvm.ssub.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
ret <16 x i1> %z
@ -476,15 +303,7 @@ define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
define <2 x i32> @v2i32(<2 x i32> %x, <2 x i32> %y) nounwind {
; CHECK-LABEL: v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: sub v2.2s, v0.2s, v1.2s
; CHECK-NEXT: cmlt v4.2s, v2.2s, #0
; CHECK-NEXT: mvni v3.2s, #128, lsl #24
; CHECK-NEXT: cmgt v1.2s, v1.2s, #0
; CHECK-NEXT: cmgt v0.2s, v0.2s, v2.2s
; CHECK-NEXT: mvn v5.8b, v4.8b
; CHECK-NEXT: bsl v3.8b, v4.8b, v5.8b
; CHECK-NEXT: eor v0.8b, v1.8b, v0.8b
; CHECK-NEXT: bsl v0.8b, v3.8b, v2.8b
; CHECK-NEXT: sqsub v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ret
%z = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %x, <2 x i32> %y)
ret <2 x i32> %z
@ -493,15 +312,7 @@ define <2 x i32> @v2i32(<2 x i32> %x, <2 x i32> %y) nounwind {
define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
; CHECK-LABEL: v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: sub v2.4s, v0.4s, v1.4s
; CHECK-NEXT: cmlt v4.4s, v2.4s, #0
; CHECK-NEXT: mvni v3.4s, #128, lsl #24
; CHECK-NEXT: cmgt v1.4s, v1.4s, #0
; CHECK-NEXT: cmgt v0.4s, v0.4s, v2.4s
; CHECK-NEXT: mvn v5.16b, v4.16b
; CHECK-NEXT: bsl v3.16b, v4.16b, v5.16b
; CHECK-NEXT: eor v0.16b, v1.16b, v0.16b
; CHECK-NEXT: bsl v0.16b, v3.16b, v2.16b
; CHECK-NEXT: sqsub v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%z = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
ret <4 x i32> %z
@ -510,24 +321,8 @@ define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
; CHECK-LABEL: v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: sub v4.4s, v0.4s, v2.4s
; CHECK-NEXT: cmlt v7.4s, v4.4s, #0
; CHECK-NEXT: mvni v6.4s, #128, lsl #24
; CHECK-NEXT: mvn v16.16b, v7.16b
; CHECK-NEXT: bsl v6.16b, v7.16b, v16.16b
; CHECK-NEXT: sub v7.4s, v1.4s, v3.4s
; CHECK-NEXT: cmgt v2.4s, v2.4s, #0
; CHECK-NEXT: cmgt v0.4s, v0.4s, v4.4s
; CHECK-NEXT: cmlt v16.4s, v7.4s, #0
; CHECK-NEXT: mvni v5.4s, #128, lsl #24
; CHECK-NEXT: cmgt v3.4s, v3.4s, #0
; CHECK-NEXT: cmgt v1.4s, v1.4s, v7.4s
; CHECK-NEXT: eor v0.16b, v2.16b, v0.16b
; CHECK-NEXT: mvn v2.16b, v16.16b
; CHECK-NEXT: eor v1.16b, v3.16b, v1.16b
; CHECK-NEXT: bsl v5.16b, v16.16b, v2.16b
; CHECK-NEXT: bsl v0.16b, v6.16b, v4.16b
; CHECK-NEXT: bsl v1.16b, v5.16b, v7.16b
; CHECK-NEXT: sqsub v0.4s, v0.4s, v2.4s
; CHECK-NEXT: sqsub v1.4s, v1.4s, v3.4s
; CHECK-NEXT: ret
%z = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> %x, <8 x i32> %y)
ret <8 x i32> %z
@ -536,42 +331,10 @@ define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind {
; CHECK-LABEL: v16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: sub v16.4s, v0.4s, v4.4s
; CHECK-NEXT: cmlt v24.4s, v16.4s, #0
; CHECK-NEXT: mvni v18.4s, #128, lsl #24
; CHECK-NEXT: sub v19.4s, v1.4s, v5.4s
; CHECK-NEXT: mvn v25.16b, v24.16b
; CHECK-NEXT: bsl v18.16b, v24.16b, v25.16b
; CHECK-NEXT: cmlt v24.4s, v19.4s, #0
; CHECK-NEXT: mvni v20.4s, #128, lsl #24
; CHECK-NEXT: sub v21.4s, v2.4s, v6.4s
; CHECK-NEXT: mvn v25.16b, v24.16b
; CHECK-NEXT: bsl v20.16b, v24.16b, v25.16b
; CHECK-NEXT: cmlt v24.4s, v21.4s, #0
; CHECK-NEXT: cmgt v4.4s, v4.4s, #0
; CHECK-NEXT: cmgt v0.4s, v0.4s, v16.4s
; CHECK-NEXT: mvni v22.4s, #128, lsl #24
; CHECK-NEXT: sub v23.4s, v3.4s, v7.4s
; CHECK-NEXT: mvn v25.16b, v24.16b
; CHECK-NEXT: eor v0.16b, v4.16b, v0.16b
; CHECK-NEXT: cmgt v4.4s, v5.4s, #0
; CHECK-NEXT: cmgt v1.4s, v1.4s, v19.4s
; CHECK-NEXT: bsl v22.16b, v24.16b, v25.16b
; CHECK-NEXT: cmlt v24.4s, v23.4s, #0
; CHECK-NEXT: eor v1.16b, v4.16b, v1.16b
; CHECK-NEXT: cmgt v4.4s, v6.4s, #0
; CHECK-NEXT: cmgt v2.4s, v2.4s, v21.4s
; CHECK-NEXT: mvni v17.4s, #128, lsl #24
; CHECK-NEXT: mvn v25.16b, v24.16b
; CHECK-NEXT: eor v2.16b, v4.16b, v2.16b
; CHECK-NEXT: cmgt v4.4s, v7.4s, #0
; CHECK-NEXT: cmgt v3.4s, v3.4s, v23.4s
; CHECK-NEXT: bsl v17.16b, v24.16b, v25.16b
; CHECK-NEXT: eor v3.16b, v4.16b, v3.16b
; CHECK-NEXT: bsl v0.16b, v18.16b, v16.16b
; CHECK-NEXT: bsl v1.16b, v20.16b, v19.16b
; CHECK-NEXT: bsl v2.16b, v22.16b, v21.16b
; CHECK-NEXT: bsl v3.16b, v17.16b, v23.16b
; CHECK-NEXT: sqsub v0.4s, v0.4s, v4.4s
; CHECK-NEXT: sqsub v1.4s, v1.4s, v5.4s
; CHECK-NEXT: sqsub v2.4s, v2.4s, v6.4s
; CHECK-NEXT: sqsub v3.4s, v3.4s, v7.4s
; CHECK-NEXT: ret
%z = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> %x, <16 x i32> %y)
ret <16 x i32> %z
@ -580,16 +343,7 @@ define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind {
define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
; CHECK-LABEL: v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: sub v2.2d, v0.2d, v1.2d
; CHECK-NEXT: mov x8, #9223372036854775807
; CHECK-NEXT: cmlt v3.2d, v2.2d, #0
; CHECK-NEXT: cmgt v1.2d, v1.2d, #0
; CHECK-NEXT: dup v4.2d, x8
; CHECK-NEXT: cmgt v0.2d, v0.2d, v2.2d
; CHECK-NEXT: mvn v5.16b, v3.16b
; CHECK-NEXT: bsl v4.16b, v3.16b, v5.16b
; CHECK-NEXT: eor v0.16b, v1.16b, v0.16b
; CHECK-NEXT: bsl v0.16b, v4.16b, v2.16b
; CHECK-NEXT: sqsub v0.2d, v0.2d, v1.2d
; CHECK-NEXT: ret
%z = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %x, <2 x i64> %y)
ret <2 x i64> %z
@ -598,25 +352,8 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
; CHECK-LABEL: v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: sub v4.2d, v0.2d, v2.2d
; CHECK-NEXT: mov x8, #9223372036854775807
; CHECK-NEXT: cmlt v5.2d, v4.2d, #0
; CHECK-NEXT: dup v6.2d, x8
; CHECK-NEXT: mvn v7.16b, v5.16b
; CHECK-NEXT: mov v16.16b, v6.16b
; CHECK-NEXT: bsl v16.16b, v5.16b, v7.16b
; CHECK-NEXT: sub v5.2d, v1.2d, v3.2d
; CHECK-NEXT: cmgt v2.2d, v2.2d, #0
; CHECK-NEXT: cmgt v0.2d, v0.2d, v4.2d
; CHECK-NEXT: cmlt v7.2d, v5.2d, #0
; CHECK-NEXT: cmgt v3.2d, v3.2d, #0
; CHECK-NEXT: cmgt v1.2d, v1.2d, v5.2d
; CHECK-NEXT: eor v0.16b, v2.16b, v0.16b
; CHECK-NEXT: mvn v2.16b, v7.16b
; CHECK-NEXT: eor v1.16b, v3.16b, v1.16b
; CHECK-NEXT: bsl v6.16b, v7.16b, v2.16b
; CHECK-NEXT: bsl v0.16b, v16.16b, v4.16b
; CHECK-NEXT: bsl v1.16b, v6.16b, v5.16b
; CHECK-NEXT: sqsub v0.2d, v0.2d, v2.2d
; CHECK-NEXT: sqsub v1.2d, v1.2d, v3.2d
; CHECK-NEXT: ret
%z = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> %x, <4 x i64> %y)
ret <4 x i64> %z
@ -625,43 +362,10 @@ define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
; CHECK-LABEL: v8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: sub v16.2d, v0.2d, v4.2d
; CHECK-NEXT: mov x8, #9223372036854775807
; CHECK-NEXT: sub v17.2d, v1.2d, v5.2d
; CHECK-NEXT: cmlt v20.2d, v16.2d, #0
; CHECK-NEXT: dup v21.2d, x8
; CHECK-NEXT: sub v18.2d, v2.2d, v6.2d
; CHECK-NEXT: cmlt v22.2d, v17.2d, #0
; CHECK-NEXT: mvn v24.16b, v20.16b
; CHECK-NEXT: mov v25.16b, v21.16b
; CHECK-NEXT: cmlt v23.2d, v18.2d, #0
; CHECK-NEXT: bsl v25.16b, v20.16b, v24.16b
; CHECK-NEXT: mvn v20.16b, v22.16b
; CHECK-NEXT: mov v24.16b, v21.16b
; CHECK-NEXT: cmgt v4.2d, v4.2d, #0
; CHECK-NEXT: cmgt v0.2d, v0.2d, v16.2d
; CHECK-NEXT: sub v19.2d, v3.2d, v7.2d
; CHECK-NEXT: bsl v24.16b, v22.16b, v20.16b
; CHECK-NEXT: mvn v20.16b, v23.16b
; CHECK-NEXT: mov v22.16b, v21.16b
; CHECK-NEXT: eor v0.16b, v4.16b, v0.16b
; CHECK-NEXT: cmgt v4.2d, v5.2d, #0
; CHECK-NEXT: cmgt v1.2d, v1.2d, v17.2d
; CHECK-NEXT: bsl v22.16b, v23.16b, v20.16b
; CHECK-NEXT: cmlt v20.2d, v19.2d, #0
; CHECK-NEXT: eor v1.16b, v4.16b, v1.16b
; CHECK-NEXT: cmgt v4.2d, v6.2d, #0
; CHECK-NEXT: cmgt v2.2d, v2.2d, v18.2d
; CHECK-NEXT: mvn v23.16b, v20.16b
; CHECK-NEXT: eor v2.16b, v4.16b, v2.16b
; CHECK-NEXT: cmgt v4.2d, v7.2d, #0
; CHECK-NEXT: cmgt v3.2d, v3.2d, v19.2d
; CHECK-NEXT: bsl v21.16b, v20.16b, v23.16b
; CHECK-NEXT: eor v3.16b, v4.16b, v3.16b
; CHECK-NEXT: bsl v0.16b, v25.16b, v16.16b
; CHECK-NEXT: bsl v1.16b, v24.16b, v17.16b
; CHECK-NEXT: bsl v2.16b, v22.16b, v18.16b
; CHECK-NEXT: bsl v3.16b, v21.16b, v19.16b
; CHECK-NEXT: sqsub v0.2d, v0.2d, v4.2d
; CHECK-NEXT: sqsub v1.2d, v1.2d, v5.2d
; CHECK-NEXT: sqsub v2.2d, v2.2d, v6.2d
; CHECK-NEXT: sqsub v3.2d, v3.2d, v7.2d
; CHECK-NEXT: ret
%z = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> %x, <8 x i64> %y)
ret <8 x i64> %z

View File

@ -35,9 +35,7 @@ declare <2 x i128> @llvm.uadd.sat.v2i128(<2 x i128>, <2 x i128>)
define <16 x i8> @v16i8(<16 x i8> %x, <16 x i8> %y) nounwind {
; CHECK-LABEL: v16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: mvn v2.16b, v1.16b
; CHECK-NEXT: umin v0.16b, v0.16b, v2.16b
; CHECK-NEXT: add v0.16b, v0.16b, v1.16b
; CHECK-NEXT: uqadd v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%z = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
ret <16 x i8> %z
@ -46,12 +44,8 @@ define <16 x i8> @v16i8(<16 x i8> %x, <16 x i8> %y) nounwind {
define <32 x i8> @v32i8(<32 x i8> %x, <32 x i8> %y) nounwind {
; CHECK-LABEL: v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: mvn v4.16b, v2.16b
; CHECK-NEXT: mvn v5.16b, v3.16b
; CHECK-NEXT: umin v0.16b, v0.16b, v4.16b
; CHECK-NEXT: umin v1.16b, v1.16b, v5.16b
; CHECK-NEXT: add v0.16b, v0.16b, v2.16b
; CHECK-NEXT: add v1.16b, v1.16b, v3.16b
; CHECK-NEXT: uqadd v0.16b, v0.16b, v2.16b
; CHECK-NEXT: uqadd v1.16b, v1.16b, v3.16b
; CHECK-NEXT: ret
%z = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %x, <32 x i8> %y)
ret <32 x i8> %z
@ -60,18 +54,10 @@ define <32 x i8> @v32i8(<32 x i8> %x, <32 x i8> %y) nounwind {
define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
; CHECK-LABEL: v64i8:
; CHECK: // %bb.0:
; CHECK-NEXT: mvn v16.16b, v4.16b
; CHECK-NEXT: umin v0.16b, v0.16b, v16.16b
; CHECK-NEXT: mvn v16.16b, v5.16b
; CHECK-NEXT: umin v1.16b, v1.16b, v16.16b
; CHECK-NEXT: mvn v16.16b, v6.16b
; CHECK-NEXT: umin v2.16b, v2.16b, v16.16b
; CHECK-NEXT: mvn v16.16b, v7.16b
; CHECK-NEXT: umin v3.16b, v3.16b, v16.16b
; CHECK-NEXT: add v0.16b, v0.16b, v4.16b
; CHECK-NEXT: add v1.16b, v1.16b, v5.16b
; CHECK-NEXT: add v2.16b, v2.16b, v6.16b
; CHECK-NEXT: add v3.16b, v3.16b, v7.16b
; CHECK-NEXT: uqadd v0.16b, v0.16b, v4.16b
; CHECK-NEXT: uqadd v1.16b, v1.16b, v5.16b
; CHECK-NEXT: uqadd v2.16b, v2.16b, v6.16b
; CHECK-NEXT: uqadd v3.16b, v3.16b, v7.16b
; CHECK-NEXT: ret
%z = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> %x, <64 x i8> %y)
ret <64 x i8> %z
@ -80,9 +66,7 @@ define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
define <8 x i16> @v8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
; CHECK-LABEL: v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: mvn v2.16b, v1.16b
; CHECK-NEXT: umin v0.8h, v0.8h, v2.8h
; CHECK-NEXT: add v0.8h, v0.8h, v1.8h
; CHECK-NEXT: uqadd v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%z = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
ret <8 x i16> %z
@ -91,12 +75,8 @@ define <8 x i16> @v8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
define <16 x i16> @v16i16(<16 x i16> %x, <16 x i16> %y) nounwind {
; CHECK-LABEL: v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: mvn v4.16b, v2.16b
; CHECK-NEXT: mvn v5.16b, v3.16b
; CHECK-NEXT: umin v0.8h, v0.8h, v4.8h
; CHECK-NEXT: umin v1.8h, v1.8h, v5.8h
; CHECK-NEXT: add v0.8h, v0.8h, v2.8h
; CHECK-NEXT: add v1.8h, v1.8h, v3.8h
; CHECK-NEXT: uqadd v0.8h, v0.8h, v2.8h
; CHECK-NEXT: uqadd v1.8h, v1.8h, v3.8h
; CHECK-NEXT: ret
%z = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %x, <16 x i16> %y)
ret <16 x i16> %z
@ -105,18 +85,10 @@ define <16 x i16> @v16i16(<16 x i16> %x, <16 x i16> %y) nounwind {
define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind {
; CHECK-LABEL: v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: mvn v16.16b, v4.16b
; CHECK-NEXT: umin v0.8h, v0.8h, v16.8h
; CHECK-NEXT: mvn v16.16b, v5.16b
; CHECK-NEXT: umin v1.8h, v1.8h, v16.8h
; CHECK-NEXT: mvn v16.16b, v6.16b
; CHECK-NEXT: umin v2.8h, v2.8h, v16.8h
; CHECK-NEXT: mvn v16.16b, v7.16b
; CHECK-NEXT: umin v3.8h, v3.8h, v16.8h
; CHECK-NEXT: add v0.8h, v0.8h, v4.8h
; CHECK-NEXT: add v1.8h, v1.8h, v5.8h
; CHECK-NEXT: add v2.8h, v2.8h, v6.8h
; CHECK-NEXT: add v3.8h, v3.8h, v7.8h
; CHECK-NEXT: uqadd v0.8h, v0.8h, v4.8h
; CHECK-NEXT: uqadd v1.8h, v1.8h, v5.8h
; CHECK-NEXT: uqadd v2.8h, v2.8h, v6.8h
; CHECK-NEXT: uqadd v3.8h, v3.8h, v7.8h
; CHECK-NEXT: ret
%z = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> %x, <32 x i16> %y)
ret <32 x i16> %z
@ -125,11 +97,9 @@ define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind {
define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind {
; CHECK-LABEL: v8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x1]
; CHECK-NEXT: ldr d1, [x0]
; CHECK-NEXT: mvn v2.8b, v0.8b
; CHECK-NEXT: umin v1.8b, v1.8b, v2.8b
; CHECK-NEXT: add v0.8b, v1.8b, v0.8b
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: uqadd v0.8b, v0.8b, v1.8b
; CHECK-NEXT: str d0, [x2]
; CHECK-NEXT: ret
%x = load <8 x i8>, <8 x i8>* %px
@ -146,21 +116,22 @@ define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind {
; CHECK-NEXT: ldrb w9, [x1]
; CHECK-NEXT: ldrb w10, [x0, #1]
; CHECK-NEXT: ldrb w11, [x1, #1]
; CHECK-NEXT: ldrb w12, [x0, #2]
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: ldrb w8, [x1, #2]
; CHECK-NEXT: fmov s1, w9
; CHECK-NEXT: ldrb w8, [x0, #2]
; CHECK-NEXT: ldrb w9, [x1, #2]
; CHECK-NEXT: mov v0.h[1], w10
; CHECK-NEXT: ldrb w9, [x0, #3]
; CHECK-NEXT: ldrb w10, [x1, #3]
; CHECK-NEXT: mov v1.h[1], w11
; CHECK-NEXT: mov v0.h[2], w12
; CHECK-NEXT: mov v1.h[2], w8
; CHECK-NEXT: mov v0.h[3], w9
; CHECK-NEXT: mov v1.h[3], w10
; CHECK-NEXT: movi d2, #0xff00ff00ff00ff
; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
; CHECK-NEXT: umin v0.4h, v0.4h, v2.4h
; CHECK-NEXT: ldrb w10, [x0, #3]
; CHECK-NEXT: ldrb w11, [x1, #3]
; CHECK-NEXT: mov v0.h[2], w8
; CHECK-NEXT: mov v1.h[2], w9
; CHECK-NEXT: mov v0.h[3], w10
; CHECK-NEXT: mov v1.h[3], w11
; CHECK-NEXT: shl v1.4h, v1.4h, #8
; CHECK-NEXT: shl v0.4h, v0.4h, #8
; CHECK-NEXT: uqadd v0.4h, v0.4h, v1.4h
; CHECK-NEXT: ushr v0.4h, v0.4h, #8
; CHECK-NEXT: xtn v0.8b, v0.8h
; CHECK-NEXT: str s0, [x2]
; CHECK-NEXT: ret
@ -179,12 +150,13 @@ define void @v2i8(<2 x i8>* %px, <2 x i8>* %py, <2 x i8>* %pz) nounwind {
; CHECK-NEXT: ldrb w10, [x0, #1]
; CHECK-NEXT: ldrb w11, [x1, #1]
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: fmov s2, w9
; CHECK-NEXT: fmov s1, w9
; CHECK-NEXT: mov v0.s[1], w10
; CHECK-NEXT: mov v2.s[1], w11
; CHECK-NEXT: movi d1, #0x0000ff000000ff
; CHECK-NEXT: add v0.2s, v0.2s, v2.2s
; CHECK-NEXT: umin v0.2s, v0.2s, v1.2s
; CHECK-NEXT: mov v1.s[1], w11
; CHECK-NEXT: shl v1.2s, v1.2s, #24
; CHECK-NEXT: shl v0.2s, v0.2s, #24
; CHECK-NEXT: uqadd v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ushr v0.2s, v0.2s, #24
; CHECK-NEXT: mov w8, v0.s[1]
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: strb w8, [x2, #1]
@ -200,11 +172,9 @@ define void @v2i8(<2 x i8>* %px, <2 x i8>* %py, <2 x i8>* %pz) nounwind {
define void @v4i16(<4 x i16>* %px, <4 x i16>* %py, <4 x i16>* %pz) nounwind {
; CHECK-LABEL: v4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x1]
; CHECK-NEXT: ldr d1, [x0]
; CHECK-NEXT: mvn v2.8b, v0.8b
; CHECK-NEXT: umin v1.4h, v1.4h, v2.4h
; CHECK-NEXT: add v0.4h, v1.4h, v0.4h
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: uqadd v0.4h, v0.4h, v1.4h
; CHECK-NEXT: str d0, [x2]
; CHECK-NEXT: ret
%x = load <4 x i16>, <4 x i16>* %px
@ -222,12 +192,13 @@ define void @v2i16(<2 x i16>* %px, <2 x i16>* %py, <2 x i16>* %pz) nounwind {
; CHECK-NEXT: ldrh w10, [x0, #2]
; CHECK-NEXT: ldrh w11, [x1, #2]
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: fmov s2, w9
; CHECK-NEXT: fmov s1, w9
; CHECK-NEXT: mov v0.s[1], w10
; CHECK-NEXT: mov v2.s[1], w11
; CHECK-NEXT: movi d1, #0x00ffff0000ffff
; CHECK-NEXT: add v0.2s, v0.2s, v2.2s
; CHECK-NEXT: umin v0.2s, v0.2s, v1.2s
; CHECK-NEXT: mov v1.s[1], w11
; CHECK-NEXT: shl v1.2s, v1.2s, #16
; CHECK-NEXT: shl v0.2s, v0.2s, #16
; CHECK-NEXT: uqadd v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ushr v0.2s, v0.2s, #16
; CHECK-NEXT: mov w8, v0.s[1]
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: strh w8, [x2, #2]
@ -243,9 +214,7 @@ define void @v2i16(<2 x i16>* %px, <2 x i16>* %py, <2 x i16>* %pz) nounwind {
define <12 x i8> @v12i8(<12 x i8> %x, <12 x i8> %y) nounwind {
; CHECK-LABEL: v12i8:
; CHECK: // %bb.0:
; CHECK-NEXT: mvn v2.16b, v1.16b
; CHECK-NEXT: umin v0.16b, v0.16b, v2.16b
; CHECK-NEXT: add v0.16b, v0.16b, v1.16b
; CHECK-NEXT: uqadd v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%z = call <12 x i8> @llvm.uadd.sat.v12i8(<12 x i8> %x, <12 x i8> %y)
ret <12 x i8> %z
@ -254,16 +223,12 @@ define <12 x i8> @v12i8(<12 x i8> %x, <12 x i8> %y) nounwind {
define void @v12i16(<12 x i16>* %px, <12 x i16>* %py, <12 x i16>* %pz) nounwind {
; CHECK-LABEL: v12i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q1, q0, [x1]
; CHECK-NEXT: ldp q3, q2, [x0]
; CHECK-NEXT: mvn v4.16b, v0.16b
; CHECK-NEXT: mvn v5.16b, v1.16b
; CHECK-NEXT: umin v2.8h, v2.8h, v4.8h
; CHECK-NEXT: umin v3.8h, v3.8h, v5.8h
; CHECK-NEXT: add v0.8h, v2.8h, v0.8h
; CHECK-NEXT: add v1.8h, v3.8h, v1.8h
; CHECK-NEXT: str q1, [x2]
; CHECK-NEXT: str d0, [x2, #16]
; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ldp q3, q2, [x1]
; CHECK-NEXT: uqadd v1.8h, v1.8h, v2.8h
; CHECK-NEXT: uqadd v0.8h, v0.8h, v3.8h
; CHECK-NEXT: str q0, [x2]
; CHECK-NEXT: str d1, [x2, #16]
; CHECK-NEXT: ret
%x = load <12 x i16>, <12 x i16>* %px
%y = load <12 x i16>, <12 x i16>* %py
@ -275,11 +240,9 @@ define void @v12i16(<12 x i16>* %px, <12 x i16>* %py, <12 x i16>* %pz) nounwind
define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind {
; CHECK-LABEL: v1i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr b0, [x1]
; CHECK-NEXT: ldr b1, [x0]
; CHECK-NEXT: mvn v2.8b, v0.8b
; CHECK-NEXT: umin v1.8b, v1.8b, v2.8b
; CHECK-NEXT: add v0.8b, v1.8b, v0.8b
; CHECK-NEXT: ldr b0, [x0]
; CHECK-NEXT: ldr b1, [x1]
; CHECK-NEXT: uqadd v0.8b, v0.8b, v1.8b
; CHECK-NEXT: st1 { v0.b }[0], [x2]
; CHECK-NEXT: ret
%x = load <1 x i8>, <1 x i8>* %px
@ -292,11 +255,9 @@ define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind {
define void @v1i16(<1 x i16>* %px, <1 x i16>* %py, <1 x i16>* %pz) nounwind {
; CHECK-LABEL: v1i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr h0, [x1]
; CHECK-NEXT: ldr h1, [x0]
; CHECK-NEXT: mvn v2.8b, v0.8b
; CHECK-NEXT: umin v1.4h, v1.4h, v2.4h
; CHECK-NEXT: add v0.4h, v1.4h, v0.4h
; CHECK-NEXT: ldr h0, [x0]
; CHECK-NEXT: ldr h1, [x1]
; CHECK-NEXT: uqadd v0.4h, v0.4h, v1.4h
; CHECK-NEXT: str h0, [x2]
; CHECK-NEXT: ret
%x = load <1 x i16>, <1 x i16>* %px
@ -310,10 +271,12 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
; CHECK-LABEL: v16i4:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v2.16b, #15
; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-NEXT: add v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umin v0.16b, v0.16b, v2.16b
; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
; CHECK-NEXT: shl v1.16b, v1.16b, #4
; CHECK-NEXT: shl v0.16b, v0.16b, #4
; CHECK-NEXT: uqadd v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ushr v0.16b, v0.16b, #4
; CHECK-NEXT: ret
%z = call <16 x i4> @llvm.uadd.sat.v16i4(<16 x i4> %x, <16 x i4> %y)
ret <16 x i4> %z
@ -323,10 +286,12 @@ define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
; CHECK-LABEL: v16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v2.16b, #1
; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-NEXT: add v0.16b, v0.16b, v1.16b
; CHECK-NEXT: umin v0.16b, v0.16b, v2.16b
; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
; CHECK-NEXT: shl v1.16b, v1.16b, #7
; CHECK-NEXT: shl v0.16b, v0.16b, #7
; CHECK-NEXT: uqadd v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ushr v0.16b, v0.16b, #7
; CHECK-NEXT: ret
%z = call <16 x i1> @llvm.uadd.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
ret <16 x i1> %z
@ -335,9 +300,7 @@ define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
define <2 x i32> @v2i32(<2 x i32> %x, <2 x i32> %y) nounwind {
; CHECK-LABEL: v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mvn v2.8b, v1.8b
; CHECK-NEXT: umin v0.2s, v0.2s, v2.2s
; CHECK-NEXT: add v0.2s, v0.2s, v1.2s
; CHECK-NEXT: uqadd v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ret
%z = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %x, <2 x i32> %y)
ret <2 x i32> %z
@ -346,9 +309,7 @@ define <2 x i32> @v2i32(<2 x i32> %x, <2 x i32> %y) nounwind {
define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
; CHECK-LABEL: v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mvn v2.16b, v1.16b
; CHECK-NEXT: umin v0.4s, v0.4s, v2.4s
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-NEXT: uqadd v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%z = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
ret <4 x i32> %z
@ -357,12 +318,8 @@ define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
; CHECK-LABEL: v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mvn v4.16b, v2.16b
; CHECK-NEXT: mvn v5.16b, v3.16b
; CHECK-NEXT: umin v0.4s, v0.4s, v4.4s
; CHECK-NEXT: umin v1.4s, v1.4s, v5.4s
; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
; CHECK-NEXT: add v1.4s, v1.4s, v3.4s
; CHECK-NEXT: uqadd v0.4s, v0.4s, v2.4s
; CHECK-NEXT: uqadd v1.4s, v1.4s, v3.4s
; CHECK-NEXT: ret
%z = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> %x, <8 x i32> %y)
ret <8 x i32> %z
@ -371,18 +328,10 @@ define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind {
; CHECK-LABEL: v16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mvn v16.16b, v4.16b
; CHECK-NEXT: umin v0.4s, v0.4s, v16.4s
; CHECK-NEXT: mvn v16.16b, v5.16b
; CHECK-NEXT: umin v1.4s, v1.4s, v16.4s
; CHECK-NEXT: mvn v16.16b, v6.16b
; CHECK-NEXT: umin v2.4s, v2.4s, v16.4s
; CHECK-NEXT: mvn v16.16b, v7.16b
; CHECK-NEXT: umin v3.4s, v3.4s, v16.4s
; CHECK-NEXT: add v0.4s, v0.4s, v4.4s
; CHECK-NEXT: add v1.4s, v1.4s, v5.4s
; CHECK-NEXT: add v2.4s, v2.4s, v6.4s
; CHECK-NEXT: add v3.4s, v3.4s, v7.4s
; CHECK-NEXT: uqadd v0.4s, v0.4s, v4.4s
; CHECK-NEXT: uqadd v1.4s, v1.4s, v5.4s
; CHECK-NEXT: uqadd v2.4s, v2.4s, v6.4s
; CHECK-NEXT: uqadd v3.4s, v3.4s, v7.4s
; CHECK-NEXT: ret
%z = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> %x, <16 x i32> %y)
ret <16 x i32> %z
@ -391,9 +340,7 @@ define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind {
define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
; CHECK-LABEL: v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: add v1.2d, v0.2d, v1.2d
; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d
; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
; CHECK-NEXT: uqadd v0.2d, v0.2d, v1.2d
; CHECK-NEXT: ret
%z = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> %x, <2 x i64> %y)
ret <2 x i64> %z
@ -402,12 +349,8 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
; CHECK-LABEL: v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: add v2.2d, v0.2d, v2.2d
; CHECK-NEXT: add v3.2d, v1.2d, v3.2d
; CHECK-NEXT: cmhi v0.2d, v0.2d, v2.2d
; CHECK-NEXT: cmhi v1.2d, v1.2d, v3.2d
; CHECK-NEXT: orr v0.16b, v2.16b, v0.16b
; CHECK-NEXT: orr v1.16b, v3.16b, v1.16b
; CHECK-NEXT: uqadd v0.2d, v0.2d, v2.2d
; CHECK-NEXT: uqadd v1.2d, v1.2d, v3.2d
; CHECK-NEXT: ret
%z = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> %x, <4 x i64> %y)
ret <4 x i64> %z
@ -416,18 +359,10 @@ define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
; CHECK-LABEL: v8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: add v4.2d, v0.2d, v4.2d
; CHECK-NEXT: add v5.2d, v1.2d, v5.2d
; CHECK-NEXT: add v6.2d, v2.2d, v6.2d
; CHECK-NEXT: add v7.2d, v3.2d, v7.2d
; CHECK-NEXT: cmhi v0.2d, v0.2d, v4.2d
; CHECK-NEXT: cmhi v1.2d, v1.2d, v5.2d
; CHECK-NEXT: cmhi v2.2d, v2.2d, v6.2d
; CHECK-NEXT: cmhi v3.2d, v3.2d, v7.2d
; CHECK-NEXT: orr v0.16b, v4.16b, v0.16b
; CHECK-NEXT: orr v1.16b, v5.16b, v1.16b
; CHECK-NEXT: orr v2.16b, v6.16b, v2.16b
; CHECK-NEXT: orr v3.16b, v7.16b, v3.16b
; CHECK-NEXT: uqadd v0.2d, v0.2d, v4.2d
; CHECK-NEXT: uqadd v1.2d, v1.2d, v5.2d
; CHECK-NEXT: uqadd v2.2d, v2.2d, v6.2d
; CHECK-NEXT: uqadd v3.2d, v3.2d, v7.2d
; CHECK-NEXT: ret
%z = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> %x, <8 x i64> %y)
ret <8 x i64> %z

View File

@ -36,8 +36,7 @@ declare <2 x i128> @llvm.usub.sat.v2i128(<2 x i128>, <2 x i128>)
define <16 x i8> @v16i8(<16 x i8> %x, <16 x i8> %y) nounwind {
; CHECK-LABEL: v16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: umax v0.16b, v0.16b, v1.16b
; CHECK-NEXT: sub v0.16b, v0.16b, v1.16b
; CHECK-NEXT: uqsub v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%z = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
ret <16 x i8> %z
@ -46,10 +45,8 @@ define <16 x i8> @v16i8(<16 x i8> %x, <16 x i8> %y) nounwind {
define <32 x i8> @v32i8(<32 x i8> %x, <32 x i8> %y) nounwind {
; CHECK-LABEL: v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: umax v0.16b, v0.16b, v2.16b
; CHECK-NEXT: umax v1.16b, v1.16b, v3.16b
; CHECK-NEXT: sub v0.16b, v0.16b, v2.16b
; CHECK-NEXT: sub v1.16b, v1.16b, v3.16b
; CHECK-NEXT: uqsub v0.16b, v0.16b, v2.16b
; CHECK-NEXT: uqsub v1.16b, v1.16b, v3.16b
; CHECK-NEXT: ret
%z = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %x, <32 x i8> %y)
ret <32 x i8> %z
@ -58,14 +55,10 @@ define <32 x i8> @v32i8(<32 x i8> %x, <32 x i8> %y) nounwind {
define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
; CHECK-LABEL: v64i8:
; CHECK: // %bb.0:
; CHECK-NEXT: umax v0.16b, v0.16b, v4.16b
; CHECK-NEXT: umax v1.16b, v1.16b, v5.16b
; CHECK-NEXT: umax v2.16b, v2.16b, v6.16b
; CHECK-NEXT: umax v3.16b, v3.16b, v7.16b
; CHECK-NEXT: sub v0.16b, v0.16b, v4.16b
; CHECK-NEXT: sub v1.16b, v1.16b, v5.16b
; CHECK-NEXT: sub v2.16b, v2.16b, v6.16b
; CHECK-NEXT: sub v3.16b, v3.16b, v7.16b
; CHECK-NEXT: uqsub v0.16b, v0.16b, v4.16b
; CHECK-NEXT: uqsub v1.16b, v1.16b, v5.16b
; CHECK-NEXT: uqsub v2.16b, v2.16b, v6.16b
; CHECK-NEXT: uqsub v3.16b, v3.16b, v7.16b
; CHECK-NEXT: ret
%z = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> %x, <64 x i8> %y)
ret <64 x i8> %z
@ -74,8 +67,7 @@ define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
define <8 x i16> @v8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
; CHECK-LABEL: v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: umax v0.8h, v0.8h, v1.8h
; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h
; CHECK-NEXT: uqsub v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%z = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
ret <8 x i16> %z
@ -84,10 +76,8 @@ define <8 x i16> @v8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
define <16 x i16> @v16i16(<16 x i16> %x, <16 x i16> %y) nounwind {
; CHECK-LABEL: v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: umax v0.8h, v0.8h, v2.8h
; CHECK-NEXT: umax v1.8h, v1.8h, v3.8h
; CHECK-NEXT: sub v0.8h, v0.8h, v2.8h
; CHECK-NEXT: sub v1.8h, v1.8h, v3.8h
; CHECK-NEXT: uqsub v0.8h, v0.8h, v2.8h
; CHECK-NEXT: uqsub v1.8h, v1.8h, v3.8h
; CHECK-NEXT: ret
%z = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %x, <16 x i16> %y)
ret <16 x i16> %z
@ -96,14 +86,10 @@ define <16 x i16> @v16i16(<16 x i16> %x, <16 x i16> %y) nounwind {
define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind {
; CHECK-LABEL: v32i16:
; CHECK: // %bb.0:
; CHECK-NEXT: umax v0.8h, v0.8h, v4.8h
; CHECK-NEXT: umax v1.8h, v1.8h, v5.8h
; CHECK-NEXT: umax v2.8h, v2.8h, v6.8h
; CHECK-NEXT: umax v3.8h, v3.8h, v7.8h
; CHECK-NEXT: sub v0.8h, v0.8h, v4.8h
; CHECK-NEXT: sub v1.8h, v1.8h, v5.8h
; CHECK-NEXT: sub v2.8h, v2.8h, v6.8h
; CHECK-NEXT: sub v3.8h, v3.8h, v7.8h
; CHECK-NEXT: uqsub v0.8h, v0.8h, v4.8h
; CHECK-NEXT: uqsub v1.8h, v1.8h, v5.8h
; CHECK-NEXT: uqsub v2.8h, v2.8h, v6.8h
; CHECK-NEXT: uqsub v3.8h, v3.8h, v7.8h
; CHECK-NEXT: ret
%z = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> %x, <32 x i16> %y)
ret <32 x i16> %z
@ -114,8 +100,7 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: umax v0.8b, v0.8b, v1.8b
; CHECK-NEXT: sub v0.8b, v0.8b, v1.8b
; CHECK-NEXT: uqsub v0.8b, v0.8b, v1.8b
; CHECK-NEXT: str d0, [x2]
; CHECK-NEXT: ret
%x = load <8 x i8>, <8 x i8>* %px
@ -144,8 +129,10 @@ define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind {
; CHECK-NEXT: mov v1.h[2], w9
; CHECK-NEXT: mov v0.h[3], w10
; CHECK-NEXT: mov v1.h[3], w11
; CHECK-NEXT: umax v0.4h, v0.4h, v1.4h
; CHECK-NEXT: sub v0.4h, v0.4h, v1.4h
; CHECK-NEXT: shl v1.4h, v1.4h, #8
; CHECK-NEXT: shl v0.4h, v0.4h, #8
; CHECK-NEXT: uqsub v0.4h, v0.4h, v1.4h
; CHECK-NEXT: ushr v0.4h, v0.4h, #8
; CHECK-NEXT: xtn v0.8b, v0.8h
; CHECK-NEXT: str s0, [x2]
; CHECK-NEXT: ret
@ -167,8 +154,10 @@ define void @v2i8(<2 x i8>* %px, <2 x i8>* %py, <2 x i8>* %pz) nounwind {
; CHECK-NEXT: fmov s1, w9
; CHECK-NEXT: mov v0.s[1], w10
; CHECK-NEXT: mov v1.s[1], w11
; CHECK-NEXT: umax v0.2s, v0.2s, v1.2s
; CHECK-NEXT: sub v0.2s, v0.2s, v1.2s
; CHECK-NEXT: shl v1.2s, v1.2s, #24
; CHECK-NEXT: shl v0.2s, v0.2s, #24
; CHECK-NEXT: uqsub v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ushr v0.2s, v0.2s, #24
; CHECK-NEXT: mov w8, v0.s[1]
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: strb w8, [x2, #1]
@ -186,8 +175,7 @@ define void @v4i16(<4 x i16>* %px, <4 x i16>* %py, <4 x i16>* %pz) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ldr d1, [x1]
; CHECK-NEXT: umax v0.4h, v0.4h, v1.4h
; CHECK-NEXT: sub v0.4h, v0.4h, v1.4h
; CHECK-NEXT: uqsub v0.4h, v0.4h, v1.4h
; CHECK-NEXT: str d0, [x2]
; CHECK-NEXT: ret
%x = load <4 x i16>, <4 x i16>* %px
@ -208,8 +196,10 @@ define void @v2i16(<2 x i16>* %px, <2 x i16>* %py, <2 x i16>* %pz) nounwind {
; CHECK-NEXT: fmov s1, w9
; CHECK-NEXT: mov v0.s[1], w10
; CHECK-NEXT: mov v1.s[1], w11
; CHECK-NEXT: umax v0.2s, v0.2s, v1.2s
; CHECK-NEXT: sub v0.2s, v0.2s, v1.2s
; CHECK-NEXT: shl v1.2s, v1.2s, #16
; CHECK-NEXT: shl v0.2s, v0.2s, #16
; CHECK-NEXT: uqsub v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ushr v0.2s, v0.2s, #16
; CHECK-NEXT: mov w8, v0.s[1]
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: strh w8, [x2, #2]
@ -225,8 +215,7 @@ define void @v2i16(<2 x i16>* %px, <2 x i16>* %py, <2 x i16>* %pz) nounwind {
define <12 x i8> @v12i8(<12 x i8> %x, <12 x i8> %y) nounwind {
; CHECK-LABEL: v12i8:
; CHECK: // %bb.0:
; CHECK-NEXT: umax v0.16b, v0.16b, v1.16b
; CHECK-NEXT: sub v0.16b, v0.16b, v1.16b
; CHECK-NEXT: uqsub v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%z = call <12 x i8> @llvm.usub.sat.v12i8(<12 x i8> %x, <12 x i8> %y)
ret <12 x i8> %z
@ -237,10 +226,8 @@ define void @v12i16(<12 x i16>* %px, <12 x i16>* %py, <12 x i16>* %pz) nounwind
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ldp q3, q2, [x1]
; CHECK-NEXT: umax v1.8h, v1.8h, v2.8h
; CHECK-NEXT: umax v0.8h, v0.8h, v3.8h
; CHECK-NEXT: sub v1.8h, v1.8h, v2.8h
; CHECK-NEXT: sub v0.8h, v0.8h, v3.8h
; CHECK-NEXT: uqsub v1.8h, v1.8h, v2.8h
; CHECK-NEXT: uqsub v0.8h, v0.8h, v3.8h
; CHECK-NEXT: str q0, [x2]
; CHECK-NEXT: str d1, [x2, #16]
; CHECK-NEXT: ret
@ -256,8 +243,7 @@ define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr b0, [x0]
; CHECK-NEXT: ldr b1, [x1]
; CHECK-NEXT: umax v0.8b, v0.8b, v1.8b
; CHECK-NEXT: sub v0.8b, v0.8b, v1.8b
; CHECK-NEXT: uqsub v0.8b, v0.8b, v1.8b
; CHECK-NEXT: st1 { v0.b }[0], [x2]
; CHECK-NEXT: ret
%x = load <1 x i8>, <1 x i8>* %px
@ -272,8 +258,7 @@ define void @v1i16(<1 x i16>* %px, <1 x i16>* %py, <1 x i16>* %pz) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr h0, [x0]
; CHECK-NEXT: ldr h1, [x1]
; CHECK-NEXT: umax v0.4h, v0.4h, v1.4h
; CHECK-NEXT: sub v0.4h, v0.4h, v1.4h
; CHECK-NEXT: uqsub v0.4h, v0.4h, v1.4h
; CHECK-NEXT: str h0, [x2]
; CHECK-NEXT: ret
%x = load <1 x i16>, <1 x i16>* %px
@ -287,10 +272,12 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
; CHECK-LABEL: v16i4:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v2.16b, #15
; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-NEXT: umax v0.16b, v0.16b, v1.16b
; CHECK-NEXT: sub v0.16b, v0.16b, v1.16b
; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
; CHECK-NEXT: shl v1.16b, v1.16b, #4
; CHECK-NEXT: shl v0.16b, v0.16b, #4
; CHECK-NEXT: uqsub v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ushr v0.16b, v0.16b, #4
; CHECK-NEXT: ret
%z = call <16 x i4> @llvm.usub.sat.v16i4(<16 x i4> %x, <16 x i4> %y)
ret <16 x i4> %z
@ -300,10 +287,12 @@ define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
; CHECK-LABEL: v16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v2.16b, #1
; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-NEXT: umax v0.16b, v0.16b, v1.16b
; CHECK-NEXT: sub v0.16b, v0.16b, v1.16b
; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
; CHECK-NEXT: shl v1.16b, v1.16b, #7
; CHECK-NEXT: shl v0.16b, v0.16b, #7
; CHECK-NEXT: uqsub v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ushr v0.16b, v0.16b, #7
; CHECK-NEXT: ret
%z = call <16 x i1> @llvm.usub.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
ret <16 x i1> %z
@ -312,8 +301,7 @@ define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
define <2 x i32> @v2i32(<2 x i32> %x, <2 x i32> %y) nounwind {
; CHECK-LABEL: v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: umax v0.2s, v0.2s, v1.2s
; CHECK-NEXT: sub v0.2s, v0.2s, v1.2s
; CHECK-NEXT: uqsub v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ret
%z = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> %x, <2 x i32> %y)
ret <2 x i32> %z
@ -322,8 +310,7 @@ define <2 x i32> @v2i32(<2 x i32> %x, <2 x i32> %y) nounwind {
define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
; CHECK-LABEL: v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
; CHECK-NEXT: uqsub v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%z = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
ret <4 x i32> %z
@ -332,10 +319,8 @@ define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
; CHECK-LABEL: v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: umax v0.4s, v0.4s, v2.4s
; CHECK-NEXT: umax v1.4s, v1.4s, v3.4s
; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s
; CHECK-NEXT: sub v1.4s, v1.4s, v3.4s
; CHECK-NEXT: uqsub v0.4s, v0.4s, v2.4s
; CHECK-NEXT: uqsub v1.4s, v1.4s, v3.4s
; CHECK-NEXT: ret
%z = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> %x, <8 x i32> %y)
ret <8 x i32> %z
@ -344,14 +329,10 @@ define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind {
; CHECK-LABEL: v16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: umax v0.4s, v0.4s, v4.4s
; CHECK-NEXT: umax v1.4s, v1.4s, v5.4s
; CHECK-NEXT: umax v2.4s, v2.4s, v6.4s
; CHECK-NEXT: umax v3.4s, v3.4s, v7.4s
; CHECK-NEXT: sub v0.4s, v0.4s, v4.4s
; CHECK-NEXT: sub v1.4s, v1.4s, v5.4s
; CHECK-NEXT: sub v2.4s, v2.4s, v6.4s
; CHECK-NEXT: sub v3.4s, v3.4s, v7.4s
; CHECK-NEXT: uqsub v0.4s, v0.4s, v4.4s
; CHECK-NEXT: uqsub v1.4s, v1.4s, v5.4s
; CHECK-NEXT: uqsub v2.4s, v2.4s, v6.4s
; CHECK-NEXT: uqsub v3.4s, v3.4s, v7.4s
; CHECK-NEXT: ret
%z = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> %x, <16 x i32> %y)
ret <16 x i32> %z
@ -360,9 +341,7 @@ define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind {
define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
; CHECK-LABEL: v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: sub v1.2d, v0.2d, v1.2d
; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d
; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b
; CHECK-NEXT: uqsub v0.2d, v0.2d, v1.2d
; CHECK-NEXT: ret
%z = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> %x, <2 x i64> %y)
ret <2 x i64> %z
@ -371,12 +350,8 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
; CHECK-LABEL: v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: sub v2.2d, v0.2d, v2.2d
; CHECK-NEXT: sub v3.2d, v1.2d, v3.2d
; CHECK-NEXT: cmhi v0.2d, v2.2d, v0.2d
; CHECK-NEXT: cmhi v1.2d, v3.2d, v1.2d
; CHECK-NEXT: bic v0.16b, v2.16b, v0.16b
; CHECK-NEXT: bic v1.16b, v3.16b, v1.16b
; CHECK-NEXT: uqsub v0.2d, v0.2d, v2.2d
; CHECK-NEXT: uqsub v1.2d, v1.2d, v3.2d
; CHECK-NEXT: ret
%z = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> %x, <4 x i64> %y)
ret <4 x i64> %z
@ -385,18 +360,10 @@ define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
; CHECK-LABEL: v8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: sub v4.2d, v0.2d, v4.2d
; CHECK-NEXT: sub v5.2d, v1.2d, v5.2d
; CHECK-NEXT: sub v6.2d, v2.2d, v6.2d
; CHECK-NEXT: sub v7.2d, v3.2d, v7.2d
; CHECK-NEXT: cmhi v0.2d, v4.2d, v0.2d
; CHECK-NEXT: cmhi v1.2d, v5.2d, v1.2d
; CHECK-NEXT: cmhi v2.2d, v6.2d, v2.2d
; CHECK-NEXT: cmhi v3.2d, v7.2d, v3.2d
; CHECK-NEXT: bic v0.16b, v4.16b, v0.16b
; CHECK-NEXT: bic v1.16b, v5.16b, v1.16b
; CHECK-NEXT: bic v2.16b, v6.16b, v2.16b
; CHECK-NEXT: bic v3.16b, v7.16b, v3.16b
; CHECK-NEXT: uqsub v0.2d, v0.2d, v4.2d
; CHECK-NEXT: uqsub v1.2d, v1.2d, v5.2d
; CHECK-NEXT: uqsub v2.2d, v2.2d, v6.2d
; CHECK-NEXT: uqsub v3.2d, v3.2d, v7.2d
; CHECK-NEXT: ret
%z = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> %x, <8 x i64> %y)
ret <8 x i64> %z