diff --git a/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll b/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll index 570d3178f36..749c49f7859 100644 --- a/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll +++ b/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll @@ -904,6 +904,38 @@ define <16 x i32> @combine_vpermi2var_16i32_identity(<16 x i32> %x0, <16 x i32> ret <16 x i32> %res1 } +define <16 x float> @combine_vpermt2var_vpermi2var_16f32_as_unpckhps(<16 x float> %a0, <16 x float> %a1) { +; X32-LABEL: combine_vpermt2var_vpermi2var_16f32_as_unpckhps: +; X32: # BB#0: +; X32-NEXT: vmovaps {{.*#+}} zmm2 = [18,2,19,3,22,6,23,7,26,10,27,11,30,14,31,15] +; X32-NEXT: vpermt2ps %zmm1, %zmm2, %zmm0 +; X32-NEXT: retl +; +; X64-LABEL: combine_vpermt2var_vpermi2var_16f32_as_unpckhps: +; X64: # BB#0: +; X64-NEXT: vmovaps {{.*#+}} zmm2 = [18,2,19,3,22,6,23,7,26,10,27,11,30,14,31,15] +; X64-NEXT: vpermt2ps %zmm1, %zmm2, %zmm0 +; X64-NEXT: retq + %res0 = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %a0, <16 x i32> , <16 x float> %a1, i16 -1) + ret <16 x float> %res0 +} + +define <16 x i32> @vpermt2var_vpermi2var_16i32_as_unpckldq(<16 x i32> %a0, <16 x i32> %a1) { +; X32-LABEL: vpermt2var_vpermi2var_16i32_as_unpckldq: +; X32: # BB#0: +; X32-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,1,17,4,20,5,21,8,24,9,25,12,28,13,29] +; X32-NEXT: vpermt2d %zmm1, %zmm2, %zmm0 +; X32-NEXT: retl +; +; X64-LABEL: vpermt2var_vpermi2var_16i32_as_unpckldq: +; X64: # BB#0: +; X64-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,1,17,4,20,5,21,8,24,9,25,12,28,13,29] +; X64-NEXT: vpermt2d %zmm1, %zmm2, %zmm0 +; X64-NEXT: retq + %res0 = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %a0, <16 x i32> , <16 x i32> %a1, i16 -1) + ret <16 x i32> %res0 +} + define <32 x i16> @combine_vpermi2var_32i16_identity(<32 x i16> %x0, <32 x i16> %x1) { ; X32-LABEL: combine_vpermi2var_32i16_identity: ; X32: # BB#0: diff --git a/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll b/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll index 1948b507afd..33629a3288d 100644 --- a/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll +++ b/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll @@ -74,3 +74,35 @@ define <16 x i16> @combine_vpermt2var_vpermi2var_16i16_as_vperm2(<16 x i16> %x0, %res1 = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> , <16 x i16> %res0, <16 x i16> %res0, i16 -1) ret <16 x i16> %res1 } + +define <16 x i16> @combine_vpermt2var_vpermi2var_16i16_as_unpckhwd(<16 x i16> %a0, <16 x i16> %a1) { +; X32-LABEL: combine_vpermt2var_vpermi2var_16i16_as_unpckhwd: +; X32: # BB#0: +; X32-NEXT: vmovdqu16 {{.*#+}} ymm2 = [20,4,21,5,22,6,23,7,28,12,29,13,30,14,31,15] +; X32-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 +; X32-NEXT: retl +; +; X64-LABEL: combine_vpermt2var_vpermi2var_16i16_as_unpckhwd: +; X64: # BB#0: +; X64-NEXT: vmovdqu16 {{.*#+}} ymm2 = [20,4,21,5,22,6,23,7,28,12,29,13,30,14,31,15] +; X64-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 +; X64-NEXT: retq + %res0 = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %a0, <16 x i16> , <16 x i16> %a1, i16 -1) + ret <16 x i16> %res0 +} + +define <16 x i16> @combine_vpermt2var_vpermi2var_16i16_as_unpcklwd(<16 x i16> %a0, <16 x i16> %a1) { +; X32-LABEL: combine_vpermt2var_vpermi2var_16i16_as_unpcklwd: +; X32: # BB#0: +; X32-NEXT: vmovdqu16 {{.*#+}} ymm2 = [0,16,1,17,2,18,3,19,8,24,9,25,10,26,11,27] +; X32-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 +; X32-NEXT: retl +; +; X64-LABEL: combine_vpermt2var_vpermi2var_16i16_as_unpcklwd: +; X64: # BB#0: +; X64-NEXT: vmovdqu16 {{.*#+}} ymm2 = [0,16,1,17,2,18,3,19,8,24,9,25,10,26,11,27] +; X64-NEXT: vpermt2w %ymm1, %ymm2, %ymm0 +; X64-NEXT: retq + %res0 = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> , <16 x i16> %a0, <16 x i16> %a1, i16 -1) + ret <16 x i16> %res0 +}