diff --git a/test/CodeGen/X86/vector-shuffle-256-v16.ll b/test/CodeGen/X86/vector-shuffle-256-v16.ll index 10e8e3a680c..9664becc276 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v16.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v16.ll @@ -3249,6 +3249,56 @@ define <16 x i16> @shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_u ret <16 x i16> %shuffle } +define <16 x i16> @shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3(<16 x i16> %a, <16 x i16> %b) { +; AVX1-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3: +; AVX1: # BB#0: +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7] +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3: +; AVX2: # BB#0: +; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7] +; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX2-NEXT: retq + %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + ret <16 x i16> %shuffle +} + +define <16 x i16> @shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8(<16 x i16> %a, <16 x i16> %b) { +; AVX1-LABEL: shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8: +; AVX2: # BB#0: +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 +; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX2-NEXT: retq + %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + ret <16 x i16> %shuffle +} + +define <16 x i16> @shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u(<16 x i16> %a, <16 x i16> %b) { +; AVX1-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] +; AVX1-NEXT: retq +; +; AVX2-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u: +; AVX2: # BB#0: +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3] +; AVX2-NEXT: retq + %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + ret <16 x i16> %shuffle +} + define <16 x i16> @insert_v16i16_0elt_into_zero_vector(i16* %ptr) { ; ALL-LABEL: insert_v16i16_0elt_into_zero_vector: ; ALL: # BB#0: diff --git a/test/CodeGen/X86/vector-shuffle-256-v32.ll b/test/CodeGen/X86/vector-shuffle-256-v32.ll index 55d3f96c08e..316832f78b4 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v32.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -1977,6 +1977,57 @@ define <32 x i8> @shuffle_v32i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_ ret <32 x i8> %shuffle } +define <32 x i8> @shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10(<32 x i8> %a, <32 x i8> %b) { +; AVX1-LABEL: shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10: +; AVX1: # BB#0: +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10] +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10: +; AVX2: # BB#0: +; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10] +; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX2-NEXT: retq + %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> + ret <32 x i8> %shuffle +} + +define <32 x i8> @shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) { +; AVX1-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16: +; AVX2: # BB#0: +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0 +; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX2-NEXT: retq + %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> + ret <32 x i8> %shuffle +} + +define <32 x i8> @shuffle_v32i8_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<32 x i8> %a, <32 x i8> %b) { +; AVX1-LABEL: shuffle_v32i8_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6] +; AVX1-NEXT: retq +; +; AVX2-LABEL: shuffle_v32i8_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: +; AVX2: # BB#0: +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6] +; AVX2-NEXT: retq + %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> + ret <32 x i8> %shuffle +} + define <32 x i8> @insert_dup_mem_v32i8_i32(i32* %ptr) { ; AVX1-LABEL: insert_dup_mem_v32i8_i32: ; AVX1: # BB#0: diff --git a/test/CodeGen/X86/vector-shuffle-256-v4.ll b/test/CodeGen/X86/vector-shuffle-256-v4.ll index 7e2879c7c43..366726f75cb 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v4.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v4.ll @@ -477,6 +477,56 @@ define <4 x double> @shuffle_v4f64_u062(<4 x double> %a, <4 x double> %b) { ret <4 x double> %shuffle } +define <4 x double> @shuffle_v4f64_11uu(<4 x double> %a, <4 x double> %b) { +; ALL-LABEL: shuffle_v4f64_11uu: +; ALL: # BB#0: +; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,2,2] +; ALL-NEXT: retq + %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> + ret <4 x double> %shuffle +} + +define <4 x double> @shuffle_v4f64_22uu(<4 x double> %a, <4 x double> %b) { +; AVX1-LABEL: shuffle_v4f64_22uu: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] +; AVX1-NEXT: retq +; +; AVX2-LABEL: shuffle_v4f64_22uu: +; AVX2: # BB#0: +; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,3] +; AVX2-NEXT: retq +; +; AVX512VL-LABEL: shuffle_v4f64_22uu: +; AVX512VL: # BB#0: +; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,3] +; AVX512VL-NEXT: retq + %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> + ret <4 x double> %shuffle +} + +define <4 x double> @shuffle_v4f64_3333(<4 x double> %a, <4 x double> %b) { +; AVX1-LABEL: shuffle_v4f64_3333: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1] +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: shuffle_v4f64_3333: +; AVX2: # BB#0: +; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,3] +; AVX2-NEXT: retq +; +; AVX512VL-LABEL: shuffle_v4f64_3333: +; AVX512VL: # BB#0: +; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,3] +; AVX512VL-NEXT: retq + %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> + ret <4 x double> %shuffle +} + define <4 x i64> @shuffle_v4i64_0000(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: shuffle_v4i64_0000: ; AVX1: # BB#0: @@ -1073,6 +1123,66 @@ define <4 x i64> @shuffle_v4i64_40u2(<4 x i64> %a, <4 x i64> %b) { ret <4 x i64> %shuffle } +define <4 x i64> @shuffle_v4i64_11uu(<4 x i64> %a, <4 x i64> %b) { +; AVX1-LABEL: shuffle_v4i64_11uu: +; AVX1: # BB#0: +; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,2,2] +; AVX1-NEXT: retq +; +; AVX2-LABEL: shuffle_v4i64_11uu: +; AVX2: # BB#0: +; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7] +; AVX2-NEXT: retq +; +; AVX512VL-LABEL: shuffle_v4i64_11uu: +; AVX512VL: # BB#0: +; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7] +; AVX512VL-NEXT: retq + %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> + ret <4 x i64> %shuffle +} + +define <4 x i64> @shuffle_v4i64_22uu(<4 x i64> %a, <4 x i64> %b) { +; AVX1-LABEL: shuffle_v4i64_22uu: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] +; AVX1-NEXT: retq +; +; AVX2-LABEL: shuffle_v4i64_22uu: +; AVX2: # BB#0: +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,3] +; AVX2-NEXT: retq +; +; AVX512VL-LABEL: shuffle_v4i64_22uu: +; AVX512VL: # BB#0: +; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,3] +; AVX512VL-NEXT: retq + %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> + ret <4 x i64> %shuffle +} + +define <4 x i64> @shuffle_v4i64_3333(<4 x i64> %a, <4 x i64> %b) { +; AVX1-LABEL: shuffle_v4i64_3333: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,1] +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: shuffle_v4i64_3333: +; AVX2: # BB#0: +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,3,3,3] +; AVX2-NEXT: retq +; +; AVX512VL-LABEL: shuffle_v4i64_3333: +; AVX512VL: # BB#0: +; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,3,3,3] +; AVX512VL-NEXT: retq + %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> + ret <4 x i64> %shuffle +} + define <4 x i64> @stress_test1(<4 x i64> %a, <4 x i64> %b) { ; ALL-LABEL: stress_test1: ; ALL: retq diff --git a/test/CodeGen/X86/vector-shuffle-256-v8.ll b/test/CodeGen/X86/vector-shuffle-256-v8.ll index 9e4f0e7822c..dc8910e73a6 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v8.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v8.ll @@ -830,6 +830,55 @@ define <8 x float> @shuffle_v8f32_a2u3e6f7(<8 x float> %a, <8 x float> %b) { ret <8 x float> %shuffle } +define <8 x float> @shuffle_v8f32_uuuu1111(<8 x float> %a, <8 x float> %b) { +; AVX1-LABEL: shuffle_v8f32_uuuu1111: +; AVX1: # BB#0: +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: shuffle_v8f32_uuuu1111: +; AVX2: # BB#0: +; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 +; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: retq + %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> + ret <8 x float> %shuffle +} + +define <8 x float> @shuffle_v8f32_44444444(<8 x float> %a, <8 x float> %b) { +; AVX1-LABEL: shuffle_v8f32_44444444: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: shuffle_v8f32_44444444: +; AVX2: # BB#0: +; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 +; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: retq + %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> + ret <8 x float> %shuffle +} + +define <8 x float> @shuffle_v8f32_5555uuuu(<8 x float> %a, <8 x float> %b) { +; AVX1-LABEL: shuffle_v8f32_5555uuuu: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; AVX1-NEXT: retq +; +; AVX2-LABEL: shuffle_v8f32_5555uuuu: +; AVX2: # BB#0: +; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 +; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: retq + %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> + ret <8 x float> %shuffle +} + define <8 x i32> @shuffle_v8i32_00000000(<8 x i32> %a, <8 x i32> %b) { ; AVX1-LABEL: shuffle_v8i32_00000000: ; AVX1: # BB#0: @@ -1895,6 +1944,55 @@ define <8 x i32> @shuffle_v8i32_80u1b4uu(<8 x i32> %a, <8 x i32> %b) { ret <8 x i32> %shuffle } +define <8 x i32> @shuffle_v8i32_uuuu1111(<8 x i32> %a, <8 x i32> %b) { +; AVX1-LABEL: shuffle_v8i32_uuuu1111: +; AVX1: # BB#0: +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: shuffle_v8i32_uuuu1111: +; AVX2: # BB#0: +; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1 +; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: retq + %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> + ret <8 x i32> %shuffle +} + +define <8 x i32> @shuffle_v8i32_44444444(<8 x i32> %a, <8 x i32> %b) { +; AVX1-LABEL: shuffle_v8i32_44444444: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: shuffle_v8i32_44444444: +; AVX2: # BB#0: +; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1 +; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: retq + %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> + ret <8 x i32> %shuffle +} + +define <8 x i32> @shuffle_v8i32_5555uuuu(<8 x i32> %a, <8 x i32> %b) { +; AVX1-LABEL: shuffle_v8i32_5555uuuu: +; AVX1: # BB#0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; AVX1-NEXT: retq +; +; AVX2-LABEL: shuffle_v8i32_5555uuuu: +; AVX2: # BB#0: +; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1 +; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: retq + %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> + ret <8 x i32> %shuffle +} + define <8 x float> @splat_mem_v8f32_2(float* %p) { ; ALL-LABEL: splat_mem_v8f32_2: ; ALL: # BB#0: