Convert more tests to FileCheck.

llvm-svn: 81915
This commit is contained in:
Bob Wilson 2009-09-15 20:58:02 +00:00
parent e62a9a60c7
commit 9bcea11785
6 changed files with 153 additions and 45 deletions

View File

@ -1,8 +1,8 @@
; RUN: llc < %s -march=arm -mattr=+neon > %t
; RUN: grep veor %t | count 8
; Note: function names do not include "veor" to allow simple grep for opcodes
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
define <8 x i8> @v_eori8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: v_eori8:
;CHECK: veor
%tmp1 = load <8 x i8>* %A
%tmp2 = load <8 x i8>* %B
%tmp3 = xor <8 x i8> %tmp1, %tmp2
@ -10,6 +10,8 @@ define <8 x i8> @v_eori8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
}
define <4 x i16> @v_eori16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK: v_eori16:
;CHECK: veor
%tmp1 = load <4 x i16>* %A
%tmp2 = load <4 x i16>* %B
%tmp3 = xor <4 x i16> %tmp1, %tmp2
@ -17,6 +19,8 @@ define <4 x i16> @v_eori16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
}
define <2 x i32> @v_eori32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK: v_eori32:
;CHECK: veor
%tmp1 = load <2 x i32>* %A
%tmp2 = load <2 x i32>* %B
%tmp3 = xor <2 x i32> %tmp1, %tmp2
@ -24,6 +28,8 @@ define <2 x i32> @v_eori32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
}
define <1 x i64> @v_eori64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK: v_eori64:
;CHECK: veor
%tmp1 = load <1 x i64>* %A
%tmp2 = load <1 x i64>* %B
%tmp3 = xor <1 x i64> %tmp1, %tmp2
@ -31,6 +37,8 @@ define <1 x i64> @v_eori64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
}
define <16 x i8> @v_eorQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK: v_eorQi8:
;CHECK: veor
%tmp1 = load <16 x i8>* %A
%tmp2 = load <16 x i8>* %B
%tmp3 = xor <16 x i8> %tmp1, %tmp2
@ -38,6 +46,8 @@ define <16 x i8> @v_eorQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
}
define <8 x i16> @v_eorQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK: v_eorQi16:
;CHECK: veor
%tmp1 = load <8 x i16>* %A
%tmp2 = load <8 x i16>* %B
%tmp3 = xor <8 x i16> %tmp1, %tmp2
@ -45,6 +55,8 @@ define <8 x i16> @v_eorQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
}
define <4 x i32> @v_eorQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK: v_eorQi32:
;CHECK: veor
%tmp1 = load <4 x i32>* %A
%tmp2 = load <4 x i32>* %B
%tmp3 = xor <4 x i32> %tmp1, %tmp2
@ -52,6 +64,8 @@ define <4 x i32> @v_eorQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
}
define <2 x i64> @v_eorQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK: v_eorQi64:
;CHECK: veor
%tmp1 = load <2 x i64>* %A
%tmp2 = load <2 x i64>* %B
%tmp3 = xor <2 x i64> %tmp1, %tmp2

View File

@ -1,14 +1,12 @@
; RUN: llc < %s -march=arm -mattr=+neon > %t
; RUN: grep {vceq\\.f32} %t | count 1
; RUN: grep {vcgt\\.f32} %t | count 9
; RUN: grep {vcge\\.f32} %t | count 5
; RUN: grep vorr %t | count 4
; RUN: grep vmvn %t | count 7
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
; This tests fcmp operations that do not map directly to NEON instructions.
; une is implemented with VCEQ/VMVN
define <2 x i32> @vcunef32(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK: vcunef32:
;CHECK: vceq.f32
;CHECK-NEXT: vmvn
%tmp1 = load <2 x float>* %A
%tmp2 = load <2 x float>* %B
%tmp3 = fcmp une <2 x float> %tmp1, %tmp2
@ -18,6 +16,8 @@ define <2 x i32> @vcunef32(<2 x float>* %A, <2 x float>* %B) nounwind {
; olt is implemented with VCGT
define <2 x i32> @vcoltf32(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK: vcoltf32:
;CHECK: vcgt.f32
%tmp1 = load <2 x float>* %A
%tmp2 = load <2 x float>* %B
%tmp3 = fcmp olt <2 x float> %tmp1, %tmp2
@ -27,6 +27,8 @@ define <2 x i32> @vcoltf32(<2 x float>* %A, <2 x float>* %B) nounwind {
; ole is implemented with VCGE
define <2 x i32> @vcolef32(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK: vcolef32:
;CHECK: vcge.f32
%tmp1 = load <2 x float>* %A
%tmp2 = load <2 x float>* %B
%tmp3 = fcmp ole <2 x float> %tmp1, %tmp2
@ -36,6 +38,9 @@ define <2 x i32> @vcolef32(<2 x float>* %A, <2 x float>* %B) nounwind {
; uge is implemented with VCGT/VMVN
define <2 x i32> @vcugef32(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK: vcugef32:
;CHECK: vcgt.f32
;CHECK-NEXT: vmvn
%tmp1 = load <2 x float>* %A
%tmp2 = load <2 x float>* %B
%tmp3 = fcmp uge <2 x float> %tmp1, %tmp2
@ -45,6 +50,9 @@ define <2 x i32> @vcugef32(<2 x float>* %A, <2 x float>* %B) nounwind {
; ule is implemented with VCGT/VMVN
define <2 x i32> @vculef32(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK: vculef32:
;CHECK: vcgt.f32
;CHECK-NEXT: vmvn
%tmp1 = load <2 x float>* %A
%tmp2 = load <2 x float>* %B
%tmp3 = fcmp ule <2 x float> %tmp1, %tmp2
@ -54,6 +62,9 @@ define <2 x i32> @vculef32(<2 x float>* %A, <2 x float>* %B) nounwind {
; ugt is implemented with VCGE/VMVN
define <2 x i32> @vcugtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK: vcugtf32:
;CHECK: vcge.f32
;CHECK-NEXT: vmvn
%tmp1 = load <2 x float>* %A
%tmp2 = load <2 x float>* %B
%tmp3 = fcmp ugt <2 x float> %tmp1, %tmp2
@ -63,6 +74,9 @@ define <2 x i32> @vcugtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
; ult is implemented with VCGE/VMVN
define <2 x i32> @vcultf32(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK: vcultf32:
;CHECK: vcge.f32
;CHECK-NEXT: vmvn
%tmp1 = load <2 x float>* %A
%tmp2 = load <2 x float>* %B
%tmp3 = fcmp ult <2 x float> %tmp1, %tmp2
@ -72,6 +86,11 @@ define <2 x i32> @vcultf32(<2 x float>* %A, <2 x float>* %B) nounwind {
; ueq is implemented with VCGT/VCGT/VORR/VMVN
define <2 x i32> @vcueqf32(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK: vcueqf32:
;CHECK: vcgt.f32
;CHECK-NEXT: vcgt.f32
;CHECK-NEXT: vorr
;CHECK-NEXT: vmvn
%tmp1 = load <2 x float>* %A
%tmp2 = load <2 x float>* %B
%tmp3 = fcmp ueq <2 x float> %tmp1, %tmp2
@ -81,6 +100,10 @@ define <2 x i32> @vcueqf32(<2 x float>* %A, <2 x float>* %B) nounwind {
; one is implemented with VCGT/VCGT/VORR
define <2 x i32> @vconef32(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK: vconef32:
;CHECK: vcgt.f32
;CHECK-NEXT: vcgt.f32
;CHECK-NEXT: vorr
%tmp1 = load <2 x float>* %A
%tmp2 = load <2 x float>* %B
%tmp3 = fcmp one <2 x float> %tmp1, %tmp2
@ -90,6 +113,11 @@ define <2 x i32> @vconef32(<2 x float>* %A, <2 x float>* %B) nounwind {
; uno is implemented with VCGT/VCGE/VORR/VMVN
define <2 x i32> @vcunof32(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK: vcunof32:
;CHECK: vcge.f32
;CHECK-NEXT: vcgt.f32
;CHECK-NEXT: vorr
;CHECK-NEXT: vmvn
%tmp1 = load <2 x float>* %A
%tmp2 = load <2 x float>* %B
%tmp3 = fcmp uno <2 x float> %tmp1, %tmp2
@ -99,6 +127,10 @@ define <2 x i32> @vcunof32(<2 x float>* %A, <2 x float>* %B) nounwind {
; ord is implemented with VCGT/VCGE/VORR
define <2 x i32> @vcordf32(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK: vcordf32:
;CHECK: vcge.f32
;CHECK-NEXT: vcgt.f32
;CHECK-NEXT: vorr
%tmp1 = load <2 x float>* %A
%tmp2 = load <2 x float>* %B
%tmp3 = fcmp ord <2 x float> %tmp1, %tmp2

View File

@ -1,19 +1,4 @@
; RUN: llc < %s -march=arm -mattr=+vfp2 | \
; RUN: grep fabs | count 2
; RUN: llc < %s -march=arm -mattr=+vfp2 | \
; RUN: grep fmscs | count 1
; RUN: llc < %s -march=arm -mattr=+vfp2 | \
; RUN: grep fcvt | count 2
; RUN: llc < %s -march=arm -mattr=+vfp2 | \
; RUN: grep fuito | count 2
; RUN: llc < %s -march=arm -mattr=+vfp2 | \
; RUN: grep fto.i | count 4
; RUN: llc < %s -march=arm -mattr=+vfp2 | \
; RUN: grep bmi | count 1
; RUN: llc < %s -march=arm -mattr=+vfp2 | \
; RUN: grep bgt | count 1
; RUN: llc < %s -march=arm -mattr=+vfp2 | \
; RUN: grep fcmpezs | count 1
; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
define void @test(float* %P, double* %D) {
%A = load float* %P ; <float> [#uses=1]
@ -28,16 +13,20 @@ declare float @fabsf(float)
declare double @fabs(double)
define void @test_abs(float* %P, double* %D) {
;CHECK: test_abs:
%a = load float* %P ; <float> [#uses=1]
;CHECK: fabss
%b = call float @fabsf( float %a ) ; <float> [#uses=1]
store float %b, float* %P
%A = load double* %D ; <double> [#uses=1]
;CHECK: fabsd
%B = call double @fabs( double %A ) ; <double> [#uses=1]
store double %B, double* %D
ret void
}
define void @test_add(float* %P, double* %D) {
;CHECK: test_add:
%a = load float* %P ; <float> [#uses=2]
%b = fadd float %a, %a ; <float> [#uses=1]
store float %b, float* %P
@ -48,9 +37,12 @@ define void @test_add(float* %P, double* %D) {
}
define void @test_ext_round(float* %P, double* %D) {
;CHECK: test_ext_round:
%a = load float* %P ; <float> [#uses=1]
;CHECK: fcvtds
%b = fpext float %a to double ; <double> [#uses=1]
%A = load double* %D ; <double> [#uses=1]
;CHECK: fcvtsd
%B = fptrunc double %A to float ; <float> [#uses=1]
store double %b, double* %D
store float %B, float* %P
@ -58,9 +50,11 @@ define void @test_ext_round(float* %P, double* %D) {
}
define void @test_fma(float* %P1, float* %P2, float* %P3) {
;CHECK: test_fma:
%a1 = load float* %P1 ; <float> [#uses=1]
%a2 = load float* %P2 ; <float> [#uses=1]
%a3 = load float* %P3 ; <float> [#uses=1]
;CHECK: fmscs
%X = fmul float %a1, %a2 ; <float> [#uses=1]
%Y = fsub float %X, %a3 ; <float> [#uses=1]
store float %Y, float* %P1
@ -68,42 +62,55 @@ define void @test_fma(float* %P1, float* %P2, float* %P3) {
}
define i32 @test_ftoi(float* %P1) {
;CHECK: test_ftoi:
%a1 = load float* %P1 ; <float> [#uses=1]
;CHECK: ftosizs
%b1 = fptosi float %a1 to i32 ; <i32> [#uses=1]
ret i32 %b1
}
define i32 @test_ftou(float* %P1) {
;CHECK: test_ftou:
%a1 = load float* %P1 ; <float> [#uses=1]
;CHECK: ftouizs
%b1 = fptoui float %a1 to i32 ; <i32> [#uses=1]
ret i32 %b1
}
define i32 @test_dtoi(double* %P1) {
;CHECK: test_dtoi:
%a1 = load double* %P1 ; <double> [#uses=1]
;CHECK: ftosizd
%b1 = fptosi double %a1 to i32 ; <i32> [#uses=1]
ret i32 %b1
}
define i32 @test_dtou(double* %P1) {
;CHECK: test_dtou:
%a1 = load double* %P1 ; <double> [#uses=1]
;CHECK: ftouizd
%b1 = fptoui double %a1 to i32 ; <i32> [#uses=1]
ret i32 %b1
}
define void @test_utod(double* %P1, i32 %X) {
;CHECK: test_utod:
;CHECK: fuitod
%b1 = uitofp i32 %X to double ; <double> [#uses=1]
store double %b1, double* %P1
ret void
}
define void @test_utod2(double* %P1, i8 %X) {
;CHECK: test_utod2:
;CHECK: fuitod
%b1 = uitofp i8 %X to double ; <double> [#uses=1]
store double %b1, double* %P1
ret void
}
define void @test_cmp(float* %glob, i32 %X) {
;CHECK: test_cmp:
entry:
%tmp = load float* %glob ; <float> [#uses=2]
%tmp3 = getelementptr float* %glob, i32 2 ; <float*> [#uses=1]
@ -111,6 +118,8 @@ entry:
%tmp.upgrd.1 = fcmp oeq float %tmp, %tmp4 ; <i1> [#uses=1]
%tmp5 = fcmp uno float %tmp, %tmp4 ; <i1> [#uses=1]
%tmp6 = or i1 %tmp.upgrd.1, %tmp5 ; <i1> [#uses=1]
;CHECK: bmi
;CHECK-NEXT: bgt
br i1 %tmp6, label %cond_true, label %cond_false
cond_true: ; preds = %entry
@ -129,8 +138,10 @@ declare i32 @bar(...)
declare i32 @baz(...)
define void @test_cmpfp0(float* %glob, i32 %X) {
;CHECK: test_cmpfp0:
entry:
%tmp = load float* %glob ; <float> [#uses=1]
;CHECK: fcmpezs
%tmp.upgrd.3 = fcmp ogt float %tmp, 0.000000e+00 ; <i1> [#uses=1]
br i1 %tmp.upgrd.3, label %cond_true, label %cond_false

View File

@ -1,11 +1,8 @@
; RUN: llc < %s -march=arm -mattr=+neon > %t
; RUN: grep {vmov\\.s8} %t | count 2
; RUN: grep {vmov\\.s16} %t | count 2
; RUN: grep {vmov\\.u8} %t | count 2
; RUN: grep {vmov\\.u16} %t | count 2
; RUN: grep {vmov\\.32} %t | count 2
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
define i32 @vget_lanes8(<8 x i8>* %A) nounwind {
;CHECK: vget_lanes8:
;CHECK: vmov.s8
%tmp1 = load <8 x i8>* %A
%tmp2 = extractelement <8 x i8> %tmp1, i32 1
%tmp3 = sext i8 %tmp2 to i32
@ -13,6 +10,8 @@ define i32 @vget_lanes8(<8 x i8>* %A) nounwind {
}
define i32 @vget_lanes16(<4 x i16>* %A) nounwind {
;CHECK: vget_lanes16:
;CHECK: vmov.s16
%tmp1 = load <4 x i16>* %A
%tmp2 = extractelement <4 x i16> %tmp1, i32 1
%tmp3 = sext i16 %tmp2 to i32
@ -20,6 +19,8 @@ define i32 @vget_lanes16(<4 x i16>* %A) nounwind {
}
define i32 @vget_laneu8(<8 x i8>* %A) nounwind {
;CHECK: vget_laneu8:
;CHECK: vmov.u8
%tmp1 = load <8 x i8>* %A
%tmp2 = extractelement <8 x i8> %tmp1, i32 1
%tmp3 = zext i8 %tmp2 to i32
@ -27,6 +28,8 @@ define i32 @vget_laneu8(<8 x i8>* %A) nounwind {
}
define i32 @vget_laneu16(<4 x i16>* %A) nounwind {
;CHECK: vget_laneu16:
;CHECK: vmov.u16
%tmp1 = load <4 x i16>* %A
%tmp2 = extractelement <4 x i16> %tmp1, i32 1
%tmp3 = zext i16 %tmp2 to i32
@ -35,6 +38,8 @@ define i32 @vget_laneu16(<4 x i16>* %A) nounwind {
; Do a vector add to keep the extraction from being done directly from memory.
define i32 @vget_lanei32(<2 x i32>* %A) nounwind {
;CHECK: vget_lanei32:
;CHECK: vmov.32
%tmp1 = load <2 x i32>* %A
%tmp2 = add <2 x i32> %tmp1, %tmp1
%tmp3 = extractelement <2 x i32> %tmp2, i32 1
@ -42,6 +47,8 @@ define i32 @vget_lanei32(<2 x i32>* %A) nounwind {
}
define i32 @vgetQ_lanes8(<16 x i8>* %A) nounwind {
;CHECK: vgetQ_lanes8:
;CHECK: vmov.s8
%tmp1 = load <16 x i8>* %A
%tmp2 = extractelement <16 x i8> %tmp1, i32 1
%tmp3 = sext i8 %tmp2 to i32
@ -49,6 +56,8 @@ define i32 @vgetQ_lanes8(<16 x i8>* %A) nounwind {
}
define i32 @vgetQ_lanes16(<8 x i16>* %A) nounwind {
;CHECK: vgetQ_lanes16:
;CHECK: vmov.s16
%tmp1 = load <8 x i16>* %A
%tmp2 = extractelement <8 x i16> %tmp1, i32 1
%tmp3 = sext i16 %tmp2 to i32
@ -56,6 +65,8 @@ define i32 @vgetQ_lanes16(<8 x i16>* %A) nounwind {
}
define i32 @vgetQ_laneu8(<16 x i8>* %A) nounwind {
;CHECK: vgetQ_laneu8:
;CHECK: vmov.u8
%tmp1 = load <16 x i8>* %A
%tmp2 = extractelement <16 x i8> %tmp1, i32 1
%tmp3 = zext i8 %tmp2 to i32
@ -63,6 +74,8 @@ define i32 @vgetQ_laneu8(<16 x i8>* %A) nounwind {
}
define i32 @vgetQ_laneu16(<8 x i16>* %A) nounwind {
;CHECK: vgetQ_laneu16:
;CHECK: vmov.u16
%tmp1 = load <8 x i16>* %A
%tmp2 = extractelement <8 x i16> %tmp1, i32 1
%tmp3 = zext i16 %tmp2 to i32
@ -71,6 +84,8 @@ define i32 @vgetQ_laneu16(<8 x i16>* %A) nounwind {
; Do a vector add to keep the extraction from being done directly from memory.
define i32 @vgetQ_lanei32(<4 x i32>* %A) nounwind {
;CHECK: vgetQ_lanei32:
;CHECK: vmov.32
%tmp1 = load <4 x i32>* %A
%tmp2 = add <4 x i32> %tmp1, %tmp1
%tmp3 = extractelement <4 x i32> %tmp2, i32 1

View File

@ -1,12 +1,8 @@
; RUN: llc < %s -march=arm -mattr=+neon > %t
; RUN: grep {vhadd\\.s8} %t | count 2
; RUN: grep {vhadd\\.s16} %t | count 2
; RUN: grep {vhadd\\.s32} %t | count 2
; RUN: grep {vhadd\\.u8} %t | count 2
; RUN: grep {vhadd\\.u16} %t | count 2
; RUN: grep {vhadd\\.u32} %t | count 2
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
define <8 x i8> @vhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: vhadds8:
;CHECK: vhadd.s8
%tmp1 = load <8 x i8>* %A
%tmp2 = load <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@ -14,6 +10,8 @@ define <8 x i8> @vhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
}
define <4 x i16> @vhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK: vhadds16:
;CHECK: vhadd.s16
%tmp1 = load <4 x i16>* %A
%tmp2 = load <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@ -21,6 +19,8 @@ define <4 x i16> @vhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
}
define <2 x i32> @vhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK: vhadds32:
;CHECK: vhadd.s32
%tmp1 = load <2 x i32>* %A
%tmp2 = load <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@ -28,6 +28,8 @@ define <2 x i32> @vhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
}
define <8 x i8> @vhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: vhaddu8:
;CHECK: vhadd.u8
%tmp1 = load <8 x i8>* %A
%tmp2 = load <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@ -35,6 +37,8 @@ define <8 x i8> @vhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
}
define <4 x i16> @vhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK: vhaddu16:
;CHECK: vhadd.u16
%tmp1 = load <4 x i16>* %A
%tmp2 = load <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@ -42,6 +46,8 @@ define <4 x i16> @vhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
}
define <2 x i32> @vhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK: vhaddu32:
;CHECK: vhadd.u32
%tmp1 = load <2 x i32>* %A
%tmp2 = load <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@ -49,6 +55,8 @@ define <2 x i32> @vhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
}
define <16 x i8> @vhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK: vhaddQs8:
;CHECK: vhadd.s8
%tmp1 = load <16 x i8>* %A
%tmp2 = load <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@ -56,6 +64,8 @@ define <16 x i8> @vhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
}
define <8 x i16> @vhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK: vhaddQs16:
;CHECK: vhadd.s16
%tmp1 = load <8 x i16>* %A
%tmp2 = load <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@ -63,6 +73,8 @@ define <8 x i16> @vhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
}
define <4 x i32> @vhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK: vhaddQs32:
;CHECK: vhadd.s32
%tmp1 = load <4 x i32>* %A
%tmp2 = load <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@ -70,6 +82,8 @@ define <4 x i32> @vhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
}
define <16 x i8> @vhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK: vhaddQu8:
;CHECK: vhadd.u8
%tmp1 = load <16 x i8>* %A
%tmp2 = load <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@ -77,6 +91,8 @@ define <16 x i8> @vhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
}
define <8 x i16> @vhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK: vhaddQu16:
;CHECK: vhadd.u16
%tmp1 = load <8 x i16>* %A
%tmp2 = load <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@ -84,6 +100,8 @@ define <8 x i16> @vhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
}
define <4 x i32> @vhaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK: vhaddQu32:
;CHECK: vhadd.u32
%tmp1 = load <4 x i32>* %A
%tmp2 = load <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)

View File

@ -1,12 +1,8 @@
; RUN: llc < %s -march=arm -mattr=+neon > %t
; RUN: grep {vhsub\\.s8} %t | count 2
; RUN: grep {vhsub\\.s16} %t | count 2
; RUN: grep {vhsub\\.s32} %t | count 2
; RUN: grep {vhsub\\.u8} %t | count 2
; RUN: grep {vhsub\\.u16} %t | count 2
; RUN: grep {vhsub\\.u32} %t | count 2
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
define <8 x i8> @vhsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: vhsubs8:
;CHECK: vhsub.s8
%tmp1 = load <8 x i8>* %A
%tmp2 = load <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@ -14,6 +10,8 @@ define <8 x i8> @vhsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
}
define <4 x i16> @vhsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK: vhsubs16:
;CHECK: vhsub.s16
%tmp1 = load <4 x i16>* %A
%tmp2 = load <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@ -21,6 +19,8 @@ define <4 x i16> @vhsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
}
define <2 x i32> @vhsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK: vhsubs32:
;CHECK: vhsub.s32
%tmp1 = load <2 x i32>* %A
%tmp2 = load <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@ -28,6 +28,8 @@ define <2 x i32> @vhsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
}
define <8 x i8> @vhsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: vhsubu8:
;CHECK: vhsub.u8
%tmp1 = load <8 x i8>* %A
%tmp2 = load <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@ -35,6 +37,8 @@ define <8 x i8> @vhsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
}
define <4 x i16> @vhsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK: vhsubu16:
;CHECK: vhsub.u16
%tmp1 = load <4 x i16>* %A
%tmp2 = load <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@ -42,6 +46,8 @@ define <4 x i16> @vhsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
}
define <2 x i32> @vhsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK: vhsubu32:
;CHECK: vhsub.u32
%tmp1 = load <2 x i32>* %A
%tmp2 = load <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@ -49,6 +55,8 @@ define <2 x i32> @vhsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
}
define <16 x i8> @vhsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK: vhsubQs8:
;CHECK: vhsub.s8
%tmp1 = load <16 x i8>* %A
%tmp2 = load <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@ -56,6 +64,8 @@ define <16 x i8> @vhsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
}
define <8 x i16> @vhsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK: vhsubQs16:
;CHECK: vhsub.s16
%tmp1 = load <8 x i16>* %A
%tmp2 = load <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@ -63,6 +73,8 @@ define <8 x i16> @vhsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
}
define <4 x i32> @vhsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK: vhsubQs32:
;CHECK: vhsub.s32
%tmp1 = load <4 x i32>* %A
%tmp2 = load <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
@ -70,6 +82,8 @@ define <4 x i32> @vhsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
}
define <16 x i8> @vhsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK: vhsubQu8:
;CHECK: vhsub.u8
%tmp1 = load <16 x i8>* %A
%tmp2 = load <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
@ -77,6 +91,8 @@ define <16 x i8> @vhsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
}
define <8 x i16> @vhsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK: vhsubQu16:
;CHECK: vhsub.u16
%tmp1 = load <8 x i16>* %A
%tmp2 = load <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
@ -84,6 +100,8 @@ define <8 x i16> @vhsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
}
define <4 x i32> @vhsubQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK: vhsubQu32:
;CHECK: vhsub.u32
%tmp1 = load <4 x i32>* %A
%tmp2 = load <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)