llvm/test/CodeGen/AArch64/arm64-neon-select_cc.ll
Ahmed Bougacha ae618e7873 [AArch64] Also combine vector selects fed by non-i1 SETCCs.
After legalization, scalar SETCC has an i32 result type on AArch64.
The i1 requirement seems too conservative, replace it with an assert.

This also means that we now can run after legalization. That should also
be fine, since the ops legalizer runs again after each combine, and
all types created all have the same sizes as the (legal) inputs.

Exposed by r235917; while there, robustize its tests (bsl also uses the
register it defines).


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@235922 91177308-0d34-0410-b5e6-96231b3b80d8
2015-04-27 21:43:12 +00:00

250 lines
9.3 KiB
LLVM

; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast \
; RUN: < %s -verify-machineinstrs -asm-verbose=false | FileCheck %s
define <8x i8> @test_select_cc_v8i8_i8(i8 %a, i8 %b, <8x i8> %c, <8x i8> %d ) {
; CHECK-LABEL: test_select_cc_v8i8_i8:
; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
; CHECK: cmeq [[MASK:v[0-9]+]].8b, v[[LHS]].8b, v[[RHS]].8b
; CHECK: dup [[DUPMASK:v[0-9]+]].8b, [[MASK]].b[0]
; CHECK: bsl [[DUPMASK]].8b, v0.8b, v1.8b
%cmp31 = icmp eq i8 %a, %b
%e = select i1 %cmp31, <8x i8> %c, <8x i8> %d
ret <8x i8> %e
}
define <8x i8> @test_select_cc_v8i8_f32(float %a, float %b, <8x i8> %c, <8x i8> %d ) {
; CHECK-LABEL: test_select_cc_v8i8_f32:
; CHECK: fcmeq [[MASK:v[0-9]+]].2s, v0.2s, v1.2s
; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].2s, [[MASK]].s[0]
; CHECK-NEXT: bsl [[DUPMASK]].8b, v2.8b, v3.8b
%cmp31 = fcmp oeq float %a, %b
%e = select i1 %cmp31, <8x i8> %c, <8x i8> %d
ret <8x i8> %e
}
define <8x i8> @test_select_cc_v8i8_f64(double %a, double %b, <8x i8> %c, <8x i8> %d ) {
; CHECK-LABEL: test_select_cc_v8i8_f64:
; CHECK: fcmeq d[[MASK:[0-9]+]], d0, d1
; CHECK-NEXT: bsl v[[MASK]].8b, v2.8b, v3.8b
%cmp31 = fcmp oeq double %a, %b
%e = select i1 %cmp31, <8x i8> %c, <8x i8> %d
ret <8x i8> %e
}
define <16x i8> @test_select_cc_v16i8_i8(i8 %a, i8 %b, <16x i8> %c, <16x i8> %d ) {
; CHECK-LABEL: test_select_cc_v16i8_i8:
; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
; CHECK: cmeq [[MASK:v[0-9]+]].16b, v[[LHS]].16b, v[[RHS]].16b
; CHECK: dup [[DUPMASK:v[0-9]+]].16b, [[MASK]].b[0]
; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b
%cmp31 = icmp eq i8 %a, %b
%e = select i1 %cmp31, <16x i8> %c, <16x i8> %d
ret <16x i8> %e
}
define <16x i8> @test_select_cc_v16i8_f32(float %a, float %b, <16x i8> %c, <16x i8> %d ) {
; CHECK-LABEL: test_select_cc_v16i8_f32:
; CHECK: fcmeq [[MASK:v[0-9]+]].4s, v0.4s, v1.4s
; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]
; CHECK-NEXT: bsl [[DUPMASK]].16b, v2.16b, v3.16b
%cmp31 = fcmp oeq float %a, %b
%e = select i1 %cmp31, <16x i8> %c, <16x i8> %d
ret <16x i8> %e
}
define <16x i8> @test_select_cc_v16i8_f64(double %a, double %b, <16x i8> %c, <16x i8> %d ) {
; CHECK-LABEL: test_select_cc_v16i8_f64:
; CHECK: fcmeq [[MASK:v[0-9]+]].2d, v0.2d, v1.2d
; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0]
; CHECK-NEXT: bsl [[DUPMASK]].16b, v2.16b, v3.16b
%cmp31 = fcmp oeq double %a, %b
%e = select i1 %cmp31, <16x i8> %c, <16x i8> %d
ret <16x i8> %e
}
define <4x i16> @test_select_cc_v4i16(i16 %a, i16 %b, <4x i16> %c, <4x i16> %d ) {
; CHECK-LABEL: test_select_cc_v4i16:
; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
; CHECK: cmeq [[MASK:v[0-9]+]].4h, v[[LHS]].4h, v[[RHS]].4h
; CHECK: dup [[DUPMASK:v[0-9]+]].4h, [[MASK]].h[0]
; CHECK: bsl [[DUPMASK]].8b, v0.8b, v1.8b
%cmp31 = icmp eq i16 %a, %b
%e = select i1 %cmp31, <4x i16> %c, <4x i16> %d
ret <4x i16> %e
}
define <8x i16> @test_select_cc_v8i16(i16 %a, i16 %b, <8x i16> %c, <8x i16> %d ) {
; CHECK-LABEL: test_select_cc_v8i16:
; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
; CHECK: cmeq [[MASK:v[0-9]+]].8h, v[[LHS]].8h, v[[RHS]].8h
; CHECK: dup [[DUPMASK:v[0-9]+]].8h, [[MASK]].h[0]
; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b
%cmp31 = icmp eq i16 %a, %b
%e = select i1 %cmp31, <8x i16> %c, <8x i16> %d
ret <8x i16> %e
}
define <2x i32> @test_select_cc_v2i32(i32 %a, i32 %b, <2x i32> %c, <2x i32> %d ) {
; CHECK-LABEL: test_select_cc_v2i32:
; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
; CHECK: cmeq [[MASK:v[0-9]+]].2s, v[[LHS]].2s, v[[RHS]].2s
; CHECK: dup [[DUPMASK:v[0-9]+]].2s, [[MASK]].s[0]
; CHECK: bsl [[DUPMASK]].8b, v0.8b, v1.8b
%cmp31 = icmp eq i32 %a, %b
%e = select i1 %cmp31, <2x i32> %c, <2x i32> %d
ret <2x i32> %e
}
define <4x i32> @test_select_cc_v4i32(i32 %a, i32 %b, <4x i32> %c, <4x i32> %d ) {
; CHECK-LABEL: test_select_cc_v4i32:
; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
; CHECK: cmeq [[MASK:v[0-9]+]].4s, v[[LHS]].4s, v[[RHS]].4s
; CHECK: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]
; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b
%cmp31 = icmp eq i32 %a, %b
%e = select i1 %cmp31, <4x i32> %c, <4x i32> %d
ret <4x i32> %e
}
define <1x i64> @test_select_cc_v1i64(i64 %a, i64 %b, <1x i64> %c, <1x i64> %d ) {
; CHECK-LABEL: test_select_cc_v1i64:
; CHECK-DAG: fmov d[[LHS:[0-9]+]], x0
; CHECK-DAG: fmov d[[RHS:[0-9]+]], x1
; CHECK: cmeq d[[MASK:[0-9]+]], d[[LHS]], d[[RHS]]
; CHECK: bsl v[[MASK]].8b, v0.8b, v1.8b
%cmp31 = icmp eq i64 %a, %b
%e = select i1 %cmp31, <1x i64> %c, <1x i64> %d
ret <1x i64> %e
}
define <2x i64> @test_select_cc_v2i64(i64 %a, i64 %b, <2x i64> %c, <2x i64> %d ) {
; CHECK-LABEL: test_select_cc_v2i64:
; CHECK-DAG: fmov d[[LHS:[0-9]+]], x0
; CHECK-DAG: fmov d[[RHS:[0-9]+]], x1
; CHECK: cmeq [[MASK:v[0-9]+]].2d, v[[LHS]].2d, v[[RHS]].2d
; CHECK: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0]
; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b
%cmp31 = icmp eq i64 %a, %b
%e = select i1 %cmp31, <2x i64> %c, <2x i64> %d
ret <2x i64> %e
}
define <1 x float> @test_select_cc_v1f32(float %a, float %b, <1 x float> %c, <1 x float> %d ) {
; CHECK-LABEL: test_select_cc_v1f32:
; CHECK: fcmeq [[MASK:v[0-9]+]].2s, v0.2s, v1.2s
; CHECK-NEXT: bsl [[MASK]].8b, v2.8b, v3.8b
%cmp31 = fcmp oeq float %a, %b
%e = select i1 %cmp31, <1 x float> %c, <1 x float> %d
ret <1 x float> %e
}
define <2 x float> @test_select_cc_v2f32(float %a, float %b, <2 x float> %c, <2 x float> %d ) {
; CHECK-LABEL: test_select_cc_v2f32:
; CHECK: fcmeq [[MASK:v[0-9]+]].2s, v0.2s, v1.2s
; CHECK: dup [[DUPMASK:v[0-9]+]].2s, [[MASK]].s[0]
; CHECK: bsl [[DUPMASK]].8b, v2.8b, v3.8b
%cmp31 = fcmp oeq float %a, %b
%e = select i1 %cmp31, <2 x float> %c, <2 x float> %d
ret <2 x float> %e
}
define <4x float> @test_select_cc_v4f32(float %a, float %b, <4x float> %c, <4x float> %d ) {
; CHECK-LABEL: test_select_cc_v4f32:
; CHECK: fcmeq [[MASK:v[0-9]+]].4s, v0.4s, v1.4s
; CHECK: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]
; CHECK: bsl [[DUPMASK]].16b, v2.16b, v3.16b
%cmp31 = fcmp oeq float %a, %b
%e = select i1 %cmp31, <4x float> %c, <4x float> %d
ret <4x float> %e
}
define <4x float> @test_select_cc_v4f32_icmp(i32 %a, i32 %b, <4x float> %c, <4x float> %d ) {
; CHECK-LABEL: test_select_cc_v4f32_icmp:
; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
; CHECK: cmeq [[MASK:v[0-9]+]].4s, v[[LHS]].4s, v[[RHS]].4s
; CHECK: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]
; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b
%cmp31 = icmp eq i32 %a, %b
%e = select i1 %cmp31, <4x float> %c, <4x float> %d
ret <4x float> %e
}
define <1 x double> @test_select_cc_v1f64(double %a, double %b, <1 x double> %c, <1 x double> %d ) {
; CHECK-LABEL: test_select_cc_v1f64:
; CHECK: fcmeq d[[MASK:[0-9]+]], d0, d1
; CHECK: bsl v[[MASK]].8b, v2.8b, v3.8b
%cmp31 = fcmp oeq double %a, %b
%e = select i1 %cmp31, <1 x double> %c, <1 x double> %d
ret <1 x double> %e
}
define <1 x double> @test_select_cc_v1f64_icmp(i64 %a, i64 %b, <1 x double> %c, <1 x double> %d ) {
; CHECK-LABEL: test_select_cc_v1f64_icmp:
; CHECK-DAG: fmov [[LHS:d[0-9]+]], x0
; CHECK-DAG: fmov [[RHS:d[0-9]+]], x1
; CHECK: cmeq d[[MASK:[0-9]+]], [[LHS]], [[RHS]]
; CHECK: bsl v[[MASK]].8b, v0.8b, v1.8b
%cmp31 = icmp eq i64 %a, %b
%e = select i1 %cmp31, <1 x double> %c, <1 x double> %d
ret <1 x double> %e
}
define <2 x double> @test_select_cc_v2f64(double %a, double %b, <2 x double> %c, <2 x double> %d ) {
; CHECK-LABEL: test_select_cc_v2f64:
; CHECK: fcmeq [[MASK:v[0-9]+]].2d, v0.2d, v1.2d
; CHECK: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0]
; CHECK: bsl [[DUPMASK]].16b, v2.16b, v3.16b
%cmp31 = fcmp oeq double %a, %b
%e = select i1 %cmp31, <2 x double> %c, <2 x double> %d
ret <2 x double> %e
}
; Special case: when the select condition is an icmp with i1 operands, don't
; do the comparison on vectors.
; Part of PR21549.
define <2 x i32> @test_select_cc_v2i32_icmpi1(i1 %cc, <2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: test_select_cc_v2i32_icmpi1:
; CHECK: tst w0, #0x1
; CHECK: csetm [[MASK:w[0-9]+]], ne
; CHECK: dup [[DUPMASK:v[0-9]+]].2s, [[MASK]]
; CHECK: bsl [[DUPMASK]].8b, v0.8b, v1.8b
; CHECK: mov v0.16b, [[DUPMASK]].16b
%cmp = icmp ne i1 %cc, 0
%e = select i1 %cmp, <2 x i32> %a, <2 x i32> %b
ret <2 x i32> %e
}
; Also make sure we support irregular/non-power-of-2 types such as v3f32.
define <3 x float> @test_select_cc_v3f32_fcmp_f32(<3 x float> %a, <3 x float> %b, float %c1, float %c2) #0 {
; CHECK-LABEL: test_select_cc_v3f32_fcmp_f32:
; CHECK-NEXT: fcmeq [[MASK:v[0-9]+]].4s, v2.4s, v3.4s
; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]
; CHECK-NEXT: bsl [[DUPMASK:v[0-9]+]].16b, v0.16b, v1.16b
; CHECK-NEXT: mov v0.16b, [[DUPMASK]].16b
; CHECK-NEXT: ret
%cc = fcmp oeq float %c1, %c2
%r = select i1 %cc, <3 x float> %a, <3 x float> %b
ret <3 x float> %r
}
define <3 x float> @test_select_cc_v3f32_fcmp_f64(<3 x float> %a, <3 x float> %b, double %c1, double %c2) #0 {
; CHECK-LABEL: test_select_cc_v3f32_fcmp_f64:
; CHECK-NEXT: fcmeq [[MASK:v[0-9]+]].2d, v2.2d, v3.2d
; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0]
; CHECK-NEXT: bsl [[DUPMASK:v[0-9]+]].16b, v0.16b, v1.16b
; CHECK-NEXT: mov v0.16b, [[DUPMASK]].16b
; CHECK-NEXT: ret
%cc = fcmp oeq double %c1, %c2
%r = select i1 %cc, <3 x float> %a, <3 x float> %b
ret <3 x float> %r
}
attributes #0 = { nounwind}