mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-24 14:33:40 +00:00
[X86][XOP] Add XOP target vselect-pcmp tests
Noticed in the D83181 that XOP can probably do a lot more than other targets due to its vector shifts and vpcmov instructions
This commit is contained in:
parent
1700c587ab
commit
7f40cfe2c5
@ -1,8 +1,9 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX12F --check-prefix=AVX12 --check-prefix=AVX1
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX12F --check-prefix=AVX12 --check-prefix=AVX2
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX12F --check-prefix=AVX512 --check-prefix=AVX512F
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512VL
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX12F,AVX12,AVX1
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX12F,AVX12,AVX2
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefixes=CHECK,AVX,AVX12F,AVX512,AVX512F
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512VL
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=xop | FileCheck %s --check-prefixes=CHECK,XOP
|
||||
|
||||
; The condition vector for BLENDV* only cares about the sign bit of each element.
|
||||
; So in these tests, if we generate BLENDV*, we should be able to remove the redundant cmp op.
|
||||
@ -10,10 +11,10 @@
|
||||
; Test 128-bit vectors for all legal element types.
|
||||
|
||||
define <16 x i8> @signbit_sel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) {
|
||||
; AVX-LABEL: signbit_sel_v16i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
; CHECK-LABEL: signbit_sel_v16i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%tr = icmp slt <16 x i8> %mask, zeroinitializer
|
||||
%z = select <16 x i1> %tr, <16 x i8> %x, <16 x i8> %y
|
||||
ret <16 x i8> %z
|
||||
@ -28,6 +29,13 @@ define <8 x i16> @signbit_sel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask)
|
||||
; AVX-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
|
||||
; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: signbit_sel_v8i16:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; XOP-NEXT: vpcomltw %xmm3, %xmm2, %xmm2
|
||||
; XOP-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
%tr = icmp slt <8 x i16> %mask, zeroinitializer
|
||||
%z = select <8 x i1> %tr, <8 x i16> %x, <8 x i16> %y
|
||||
ret <8 x i16> %z
|
||||
@ -57,6 +65,11 @@ define <4 x i32> @signbit_sel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask)
|
||||
; AVX512VL-NEXT: vpcmpgtd %xmm2, %xmm3, %k1
|
||||
; AVX512VL-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: signbit_sel_v4i32:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
%tr = icmp slt <4 x i32> %mask, zeroinitializer
|
||||
%z = select <4 x i1> %tr, <4 x i32> %x, <4 x i32> %y
|
||||
ret <4 x i32> %z
|
||||
@ -86,6 +99,11 @@ define <2 x i64> @signbit_sel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask)
|
||||
; AVX512VL-NEXT: vpcmpgtq %xmm2, %xmm3, %k1
|
||||
; AVX512VL-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: signbit_sel_v2i64:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
%tr = icmp slt <2 x i64> %mask, zeroinitializer
|
||||
%z = select <2 x i1> %tr, <2 x i64> %x, <2 x i64> %y
|
||||
ret <2 x i64> %z
|
||||
@ -115,6 +133,11 @@ define <4 x float> @signbit_sel_v4f32(<4 x float> %x, <4 x float> %y, <4 x i32>
|
||||
; AVX512VL-NEXT: vpcmpgtd %xmm2, %xmm3, %k1
|
||||
; AVX512VL-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: signbit_sel_v4f32:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
%tr = icmp slt <4 x i32> %mask, zeroinitializer
|
||||
%z = select <4 x i1> %tr, <4 x float> %x, <4 x float> %y
|
||||
ret <4 x float> %z
|
||||
@ -144,6 +167,11 @@ define <2 x double> @signbit_sel_v2f64(<2 x double> %x, <2 x double> %y, <2 x i6
|
||||
; AVX512VL-NEXT: vpcmpgtq %xmm2, %xmm3, %k1
|
||||
; AVX512VL-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: signbit_sel_v2f64:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
%tr = icmp slt <2 x i64> %mask, zeroinitializer
|
||||
%z = select <2 x i1> %tr, <2 x double> %x, <2 x double> %y
|
||||
ret <2 x double> %z
|
||||
@ -173,6 +201,16 @@ define <32 x i8> @signbit_sel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %mask)
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: signbit_sel_v32i8:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vextractf128 $1, %ymm2, %xmm3
|
||||
; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; XOP-NEXT: vpcomltb %xmm4, %xmm3, %xmm3
|
||||
; XOP-NEXT: vpcomltb %xmm4, %xmm2, %xmm2
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
|
||||
; XOP-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; XOP-NEXT: retq
|
||||
%tr = icmp slt <32 x i8> %mask, zeroinitializer
|
||||
%z = select <32 x i1> %tr, <32 x i8> %x, <32 x i8> %y
|
||||
ret <32 x i8> %z
|
||||
@ -206,6 +244,16 @@ define <16 x i16> @signbit_sel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %
|
||||
; AVX512-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2
|
||||
; AVX512-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: signbit_sel_v16i16:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vextractf128 $1, %ymm2, %xmm3
|
||||
; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; XOP-NEXT: vpcomltw %xmm4, %xmm3, %xmm3
|
||||
; XOP-NEXT: vpcomltw %xmm4, %xmm2, %xmm2
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
|
||||
; XOP-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; XOP-NEXT: retq
|
||||
%tr = icmp slt <16 x i16> %mask, zeroinitializer
|
||||
%z = select <16 x i1> %tr, <16 x i16> %x, <16 x i16> %y
|
||||
ret <16 x i16> %z
|
||||
@ -234,6 +282,11 @@ define <8 x i32> @signbit_sel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %mask)
|
||||
; AVX512VL-NEXT: vpcmpgtd %ymm2, %ymm3, %k1
|
||||
; AVX512VL-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: signbit_sel_v8i32:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; XOP-NEXT: retq
|
||||
%tr = icmp slt <8 x i32> %mask, zeroinitializer
|
||||
%z = select <8 x i1> %tr, <8 x i32> %x, <8 x i32> %y
|
||||
ret <8 x i32> %z
|
||||
@ -262,6 +315,11 @@ define <4 x i64> @signbit_sel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %mask)
|
||||
; AVX512VL-NEXT: vpcmpgtq %ymm2, %ymm3, %k1
|
||||
; AVX512VL-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: signbit_sel_v4i64:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; XOP-NEXT: retq
|
||||
%tr = icmp slt <4 x i64> %mask, zeroinitializer
|
||||
%z = select <4 x i1> %tr, <4 x i64> %x, <4 x i64> %y
|
||||
ret <4 x i64> %z
|
||||
@ -290,6 +348,11 @@ define <4 x double> @signbit_sel_v4f64(<4 x double> %x, <4 x double> %y, <4 x i6
|
||||
; AVX512VL-NEXT: vpcmpgtq %ymm2, %ymm3, %k1
|
||||
; AVX512VL-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: signbit_sel_v4f64:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; XOP-NEXT: retq
|
||||
%tr = icmp slt <4 x i64> %mask, zeroinitializer
|
||||
%z = select <4 x i1> %tr, <4 x double> %x, <4 x double> %y
|
||||
ret <4 x double> %z
|
||||
@ -330,6 +393,15 @@ define <4 x double> @signbit_sel_v4f64_small_mask(<4 x double> %x, <4 x double>
|
||||
; AVX512VL-NEXT: vpcmpgtd %xmm2, %xmm3, %k1
|
||||
; AVX512VL-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: signbit_sel_v4f64_small_mask:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpmovsxdq %xmm2, %xmm3
|
||||
; XOP-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
|
||||
; XOP-NEXT: vpmovsxdq %xmm2, %xmm2
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
|
||||
; XOP-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; XOP-NEXT: retq
|
||||
%tr = icmp slt <4 x i32> %mask, zeroinitializer
|
||||
%z = select <4 x i1> %tr, <4 x double> %x, <4 x double> %y
|
||||
ret <4 x double> %z
|
||||
@ -350,6 +422,12 @@ define <8 x double> @signbit_sel_v8f64(<8 x double> %x, <8 x double> %y, <8 x i6
|
||||
; AVX512-NEXT: vpcmpgtq %zmm2, %zmm3, %k1
|
||||
; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: signbit_sel_v8f64:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vblendvpd %ymm4, %ymm0, %ymm2, %ymm0
|
||||
; XOP-NEXT: vblendvpd %ymm5, %ymm1, %ymm3, %ymm1
|
||||
; XOP-NEXT: retq
|
||||
%tr = icmp slt <8 x i64> %mask, zeroinitializer
|
||||
%z = select <8 x i1> %tr, <8 x double> %x, <8 x double> %y
|
||||
ret <8 x double> %z
|
||||
@ -384,6 +462,13 @@ define <4 x float> @signbit_sel_v4f32_fcmp(<4 x float> %x, <4 x float> %y, <4 x
|
||||
; AVX512VL-NEXT: vcmpltps %xmm2, %xmm0, %k1
|
||||
; AVX512VL-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: signbit_sel_v4f32_fcmp:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; XOP-NEXT: vcmpltps %xmm2, %xmm0, %xmm2
|
||||
; XOP-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
%cmp = fcmp olt <4 x float> %x, zeroinitializer
|
||||
%sel = select <4 x i1> %cmp, <4 x float> %x, <4 x float> %y
|
||||
ret <4 x float> %sel
|
||||
@ -420,6 +505,18 @@ define <4 x i64> @blend_splat1_mask_cond_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x
|
||||
; AVX512VL-NEXT: vptestnmq {{.*}}(%rip){1to4}, %ymm0, %k1
|
||||
; AVX512VL-NEXT: vpblendmq %ymm1, %ymm2, %ymm0 {%k1}
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: blend_splat1_mask_cond_v4i64:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; XOP-NEXT: vpsllq $63, %xmm3, %xmm3
|
||||
; XOP-NEXT: vmovdqa {{.*#+}} xmm4 = [18446744073709551553,18446744073709551553]
|
||||
; XOP-NEXT: vpshaq %xmm4, %xmm3, %xmm3
|
||||
; XOP-NEXT: vpsllq $63, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpshaq %xmm4, %xmm0, %xmm0
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
|
||||
; XOP-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
|
||||
; XOP-NEXT: retq
|
||||
%a = and <4 x i64> %x, <i64 1, i64 1, i64 1, i64 1>
|
||||
%c = icmp eq <4 x i64> %a, zeroinitializer
|
||||
%r = select <4 x i1> %c, <4 x i64> %y, <4 x i64> %z
|
||||
@ -449,6 +546,14 @@ define <4 x i32> @blend_splat1_mask_cond_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x
|
||||
; AVX512VL-NEXT: vptestnmd {{.*}}(%rip){1to4}, %xmm0, %k1
|
||||
; AVX512VL-NEXT: vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: blend_splat1_mask_cond_v4i32:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; XOP-NEXT: vpcomneqd %xmm3, %xmm0, %xmm0
|
||||
; XOP-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
%a = and <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
|
||||
%c = icmp eq <4 x i32> %a, zeroinitializer
|
||||
%r = select <4 x i1> %c, <4 x i32> %y, <4 x i32> %z
|
||||
@ -483,6 +588,17 @@ define <16 x i16> @blend_splat1_mask_cond_v16i16(<16 x i16> %x, <16 x i16> %y, <
|
||||
; AVX512-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: blend_splat1_mask_cond_v16i16:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpsllw $15, %xmm0, %xmm3
|
||||
; XOP-NEXT: vpsraw $15, %xmm3, %xmm3
|
||||
; XOP-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; XOP-NEXT: vpsllw $15, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpsraw $15, %xmm0, %xmm0
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
|
||||
; XOP-NEXT: vpcmov %ymm0, %ymm1, %ymm2, %ymm0
|
||||
; XOP-NEXT: retq
|
||||
%a = and <16 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
||||
%c = icmp eq <16 x i16> %a, zeroinitializer
|
||||
%r = select <16 x i1> %c, <16 x i16> %y, <16 x i16> %z
|
||||
@ -503,6 +619,14 @@ define <16 x i8> @blend_splat1_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x
|
||||
; AVX512-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: blend_splat1_mask_cond_v16i8:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; XOP-NEXT: vpcomneqb %xmm3, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
%a = and <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
||||
%c = icmp eq <16 x i8> %a, zeroinitializer
|
||||
%r = select <16 x i1> %c, <16 x i8> %y, <16 x i8> %z
|
||||
@ -532,6 +656,14 @@ define <2 x i64> @blend_splatmax_mask_cond_v2i64(<2 x i64> %x, <2 x i64> %y, <2
|
||||
; AVX512VL-NEXT: vptestnmq {{.*}}(%rip), %xmm0, %k1
|
||||
; AVX512VL-NEXT: vpblendmq %xmm1, %xmm2, %xmm0 {%k1}
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: blend_splatmax_mask_cond_v2i64:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; XOP-NEXT: vpcomneqq %xmm3, %xmm0, %xmm0
|
||||
; XOP-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
%a = and <2 x i64> %x, <i64 9223372036854775808, i64 9223372036854775808>
|
||||
%c = icmp eq <2 x i64> %a, zeroinitializer
|
||||
%r = select <2 x i1> %c, <2 x i64> %y, <2 x i64> %z
|
||||
@ -559,6 +691,11 @@ define <8 x i32> @blend_splatmax_mask_cond_v8i32(<8 x i32> %x, <8 x i32> %y, <8
|
||||
; AVX512VL-NEXT: vptestnmd {{.*}}(%rip){1to8}, %ymm0, %k1
|
||||
; AVX512VL-NEXT: vpblendmd %ymm1, %ymm2, %ymm0 {%k1}
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: blend_splatmax_mask_cond_v8i32:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0
|
||||
; XOP-NEXT: retq
|
||||
%a = and <8 x i32> %x, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
|
||||
%c = icmp eq <8 x i32> %a, zeroinitializer
|
||||
%r = select <8 x i1> %c, <8 x i32> %y, <8 x i32> %z
|
||||
@ -579,6 +716,14 @@ define <8 x i16> @blend_splatmax_mask_cond_v8i16(<8 x i16> %x, <8 x i16> %y, <8
|
||||
; AVX512-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: blend_splatmax_mask_cond_v8i16:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; XOP-NEXT: vpcomneqw %xmm3, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
%a = and <8 x i16> %x, <i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768>
|
||||
%c = icmp eq <8 x i16> %a, zeroinitializer
|
||||
%r = select <8 x i1> %c, <8 x i16> %y, <8 x i16> %z
|
||||
@ -610,6 +755,16 @@ define <32 x i8> @blend_splatmax_mask_cond_v32i8(<32 x i8> %x, <32 x i8> %y, <32
|
||||
; AVX512-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: blend_splatmax_mask_cond_v32i8:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; XOP-NEXT: vpcmpgtb %xmm3, %xmm4, %xmm3
|
||||
; XOP-NEXT: vpcmpgtb %xmm0, %xmm4, %xmm0
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
|
||||
; XOP-NEXT: vpcmov %ymm0, %ymm1, %ymm2, %ymm0
|
||||
; XOP-NEXT: retq
|
||||
%a = and <32 x i8> %x, <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>
|
||||
%c = icmp eq <32 x i8> %a, zeroinitializer
|
||||
%r = select <32 x i1> %c, <32 x i8> %y, <32 x i8> %z
|
||||
@ -647,6 +802,18 @@ define <4 x i64> @blend_splat_mask_cond_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i
|
||||
; AVX512VL-NEXT: vptestnmq {{.*}}(%rip){1to4}, %ymm0, %k1
|
||||
; AVX512VL-NEXT: vpblendmq %ymm1, %ymm2, %ymm0 {%k1}
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: blend_splat_mask_cond_v4i64:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; XOP-NEXT: vpsllq $62, %xmm3, %xmm3
|
||||
; XOP-NEXT: vmovdqa {{.*#+}} xmm4 = [18446744073709551553,18446744073709551553]
|
||||
; XOP-NEXT: vpshaq %xmm4, %xmm3, %xmm3
|
||||
; XOP-NEXT: vpsllq $62, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpshaq %xmm4, %xmm0, %xmm0
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
|
||||
; XOP-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
|
||||
; XOP-NEXT: retq
|
||||
%a = and <4 x i64> %x, <i64 2, i64 2, i64 2, i64 2>
|
||||
%c = icmp eq <4 x i64> %a, zeroinitializer
|
||||
%r = select <4 x i1> %c, <4 x i64> %y, <4 x i64> %z
|
||||
@ -676,6 +843,14 @@ define <4 x i32> @blend_splat_mask_cond_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i
|
||||
; AVX512VL-NEXT: vptestnmd {{.*}}(%rip){1to4}, %xmm0, %k1
|
||||
; AVX512VL-NEXT: vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: blend_splat_mask_cond_v4i32:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; XOP-NEXT: vpcomneqd %xmm3, %xmm0, %xmm0
|
||||
; XOP-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
%a = and <4 x i32> %x, <i32 65536, i32 65536, i32 65536, i32 65536>
|
||||
%c = icmp eq <4 x i32> %a, zeroinitializer
|
||||
%r = select <4 x i1> %c, <4 x i32> %y, <4 x i32> %z
|
||||
@ -710,6 +885,17 @@ define <16 x i16> @blend_splat_mask_cond_v16i16(<16 x i16> %x, <16 x i16> %y, <1
|
||||
; AVX512-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: blend_splat_mask_cond_v16i16:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpsllw $5, %xmm0, %xmm3
|
||||
; XOP-NEXT: vpsraw $15, %xmm3, %xmm3
|
||||
; XOP-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; XOP-NEXT: vpsllw $5, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpsraw $15, %xmm0, %xmm0
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
|
||||
; XOP-NEXT: vpcmov %ymm0, %ymm1, %ymm2, %ymm0
|
||||
; XOP-NEXT: retq
|
||||
%a = and <16 x i16> %x, <i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024>
|
||||
%c = icmp eq <16 x i16> %a, zeroinitializer
|
||||
%r = select <16 x i1> %c, <16 x i16> %y, <16 x i16> %z
|
||||
@ -730,6 +916,14 @@ define <16 x i8> @blend_splat_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x
|
||||
; AVX512-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: blend_splat_mask_cond_v16i8:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; XOP-NEXT: vpcomneqb %xmm3, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
%a = and <16 x i8> %x, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
|
||||
%c = icmp eq <16 x i8> %a, zeroinitializer
|
||||
%r = select <16 x i1> %c, <16 x i8> %y, <16 x i8> %z
|
||||
@ -772,6 +966,17 @@ define <4 x i64> @blend_mask_cond_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %z
|
||||
; AVX512VL-NEXT: vptestnmq {{.*}}(%rip), %ymm0, %k1
|
||||
; AVX512VL-NEXT: vpblendmq %ymm1, %ymm2, %ymm0 {%k1}
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: blend_mask_cond_v4i64:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
||||
; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; XOP-NEXT: vpcomeqq %xmm4, %xmm3, %xmm3
|
||||
; XOP-NEXT: vpcomeqq %xmm4, %xmm0, %xmm0
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
|
||||
; XOP-NEXT: vblendvpd %ymm0, %ymm1, %ymm2, %ymm0
|
||||
; XOP-NEXT: retq
|
||||
%a = and <4 x i64> %x, <i64 2, i64 4, i64 8, i64 16>
|
||||
%c = icmp eq <4 x i64> %a, zeroinitializer
|
||||
%r = select <4 x i1> %c, <4 x i64> %y, <4 x i64> %z
|
||||
@ -804,6 +1009,14 @@ define <4 x i32> @blend_mask_cond_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z
|
||||
; AVX512VL-NEXT: vptestnmd {{.*}}(%rip), %xmm0, %k1
|
||||
; AVX512VL-NEXT: vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: blend_mask_cond_v4i32:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; XOP-NEXT: vpcomeqd %xmm3, %xmm0, %xmm0
|
||||
; XOP-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
%a = and <4 x i32> %x, <i32 65536, i32 512, i32 2, i32 1>
|
||||
%c = icmp eq <4 x i32> %a, zeroinitializer
|
||||
%r = select <4 x i1> %c, <4 x i32> %y, <4 x i32> %z
|
||||
@ -839,6 +1052,17 @@ define <16 x i16> @blend_mask_cond_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i1
|
||||
; AVX512-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: blend_mask_cond_v16i16:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
||||
; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; XOP-NEXT: vpcomeqw %xmm4, %xmm3, %xmm3
|
||||
; XOP-NEXT: vpcomeqw %xmm4, %xmm0, %xmm0
|
||||
; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
|
||||
; XOP-NEXT: vpcmov %ymm0, %ymm2, %ymm1, %ymm0
|
||||
; XOP-NEXT: retq
|
||||
%a = and <16 x i16> %x, <i16 1, i16 2, i16 8, i16 4, i16 8, i16 2, i16 2, i16 2, i16 2, i16 8, i16 8, i16 64, i16 64, i16 1024, i16 4096, i16 1024>
|
||||
%c = icmp eq <16 x i16> %a, zeroinitializer
|
||||
%r = select <16 x i1> %c, <16 x i16> %y, <16 x i16> %z
|
||||
@ -853,6 +1077,14 @@ define <16 x i8> @blend_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %z
|
||||
; AVX-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: blend_mask_cond_v16i8:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; XOP-NEXT: vpcomeqb %xmm3, %xmm0, %xmm0
|
||||
; XOP-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
|
||||
; XOP-NEXT: retq
|
||||
%a = and <16 x i8> %x, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 4, i8 4, i8 4, i8 4, i8 2, i8 2, i8 2, i8 2>
|
||||
%c = icmp eq <16 x i8> %a, zeroinitializer
|
||||
%r = select <16 x i1> %c, <16 x i8> %y, <16 x i8> %z
|
||||
@ -892,6 +1124,19 @@ define void @PR46531(i32* %x, i32* %y, i32* %z) {
|
||||
; AVX512VL-NEXT: vpord %xmm0, %xmm1, %xmm2 {%k1}
|
||||
; AVX512VL-NEXT: vmovdqu %xmm2, (%rdi)
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: PR46531:
|
||||
; XOP: # %bb.0:
|
||||
; XOP-NEXT: vmovdqu (%rsi), %xmm0
|
||||
; XOP-NEXT: vmovdqu (%rdx), %xmm1
|
||||
; XOP-NEXT: vpor %xmm0, %xmm1, %xmm2
|
||||
; XOP-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm3
|
||||
; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; XOP-NEXT: vpcomneqd %xmm4, %xmm3, %xmm3
|
||||
; XOP-NEXT: vpxor %xmm0, %xmm1, %xmm0
|
||||
; XOP-NEXT: vblendvps %xmm3, %xmm0, %xmm2, %xmm0
|
||||
; XOP-NEXT: vmovups %xmm0, (%rdi)
|
||||
; XOP-NEXT: retq
|
||||
%vy = bitcast i32* %y to <4 x i32>*
|
||||
%a = load <4 x i32>, <4 x i32>* %vy, align 4
|
||||
%vz = bitcast i32* %z to <4 x i32>*
|
||||
|
Loading…
x
Reference in New Issue
Block a user