diff --git a/lib/Target/X86/X86InstrTablesInfo.h b/lib/Target/X86/X86InstrTablesInfo.h index 038c21b81ef..415a891bfd9 100755 --- a/lib/Target/X86/X86InstrTablesInfo.h +++ b/lib/Target/X86/X86InstrTablesInfo.h @@ -755,6 +755,14 @@ static const X86EvexToVexCompressTableEntry X86EvexToVex128CompressTable[] = { { X86::VDIVPDZ256rr , X86::VDIVPDYrr }, { X86::VDIVPSZ256rm , X86::VDIVPSYrm }, { X86::VDIVPSZ256rr , X86::VDIVPSYrr }, + { X86::VEXTRACTF32x4Z256mr , X86::VEXTRACTF128mr }, + { X86::VEXTRACTF64x2Z256mr , X86::VEXTRACTF128mr }, + { X86::VEXTRACTF32x4Z256rr , X86::VEXTRACTF128rr }, + { X86::VEXTRACTF64x2Z256rr , X86::VEXTRACTF128rr }, + { X86::VEXTRACTI32x4Z256mr , X86::VEXTRACTI128mr }, + { X86::VEXTRACTI64x2Z256mr , X86::VEXTRACTI128mr }, + { X86::VEXTRACTI32x4Z256rr , X86::VEXTRACTI128rr }, + { X86::VEXTRACTI64x2Z256rr , X86::VEXTRACTI128rr }, { X86::VFMADD132PDZ256m , X86::VFMADD132PDYm }, { X86::VFMADD132PDZ256r , X86::VFMADD132PDYr }, { X86::VFMADD132PSZ256m , X86::VFMADD132PSYm }, @@ -827,6 +835,14 @@ static const X86EvexToVexCompressTableEntry X86EvexToVex128CompressTable[] = { { X86::VFNMSUB231PDZ256r , X86::VFNMSUB231PDYr }, { X86::VFNMSUB231PSZ256m , X86::VFNMSUB231PSYm }, { X86::VFNMSUB231PSZ256r , X86::VFNMSUB231PSYr }, + { X86::VINSERTF32x4Z256rm , X86::VINSERTF128rm }, + { X86::VINSERTF64x2Z256rm , X86::VINSERTF128rm }, + { X86::VINSERTF32x4Z256rr , X86::VINSERTF128rr }, + { X86::VINSERTF64x2Z256rr , X86::VINSERTF128rr }, + { X86::VINSERTI32x4Z256rm , X86::VINSERTI128rm }, + { X86::VINSERTI64x2Z256rm , X86::VINSERTI128rm }, + { X86::VINSERTI32x4Z256rr , X86::VINSERTI128rr }, + { X86::VINSERTI64x2Z256rr , X86::VINSERTI128rr }, { X86::VMAXCPDZ256rm , X86::VMAXCPDYrm }, { X86::VMAXCPDZ256rr , X86::VMAXCPDYrr }, { X86::VMAXCPSZ256rm , X86::VMAXCPSYrm }, diff --git a/test/CodeGen/X86/avx2-vbroadcast.ll b/test/CodeGen/X86/avx2-vbroadcast.ll index 89eeae4416b..f65f485cc62 100644 --- a/test/CodeGen/X86/avx2-vbroadcast.ll +++ b/test/CodeGen/X86/avx2-vbroadcast.ll @@ -209,34 +209,22 @@ entry: } define <4 x i64> @QQ64(i64* %ptr) nounwind uwtable readnone ssp { -; X32-AVX2-LABEL: QQ64: -; X32-AVX2: ## BB#0: ## %entry -; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX2-NEXT: movl (%eax), %ecx -; X32-AVX2-NEXT: movl 4(%eax), %eax -; X32-AVX2-NEXT: vmovd %ecx, %xmm0 -; X32-AVX2-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 -; X32-AVX2-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 -; X32-AVX2-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 -; X32-AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-AVX2-NEXT: retl +; X32-LABEL: QQ64: +; X32: ## BB#0: ## %entry +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl (%eax), %ecx +; X32-NEXT: movl 4(%eax), %eax +; X32-NEXT: vmovd %ecx, %xmm0 +; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 +; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 +; X32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 +; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; X32-NEXT: retl ; ; X64-LABEL: QQ64: ; X64: ## BB#0: ## %entry ; X64-NEXT: vbroadcastsd (%rdi), %ymm0 ; X64-NEXT: retq -; -; X32-AVX512VL-LABEL: QQ64: -; X32-AVX512VL: ## BB#0: ## %entry -; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512VL-NEXT: movl (%eax), %ecx -; X32-AVX512VL-NEXT: movl 4(%eax), %eax -; X32-AVX512VL-NEXT: vmovd %ecx, %xmm0 -; X32-AVX512VL-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 -; X32-AVX512VL-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 -; X32-AVX512VL-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 -; X32-AVX512VL-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 -; X32-AVX512VL-NEXT: retl entry: %q = load i64, i64* %ptr, align 4 %q0 = insertelement <4 x i64> undef, i64 %q, i32 0 @@ -1683,7 +1671,7 @@ define void @isel_crash_4q(i64* %cV_R.addr) { ; X32-AVX512VL-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 ; X32-AVX512VL-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1 ; X32-AVX512VL-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 -; X32-AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm1, %ymm1 +; X32-AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm1 ; X32-AVX512VL-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp) ; X32-AVX512VL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%esp) ; X32-AVX512VL-NEXT: movl %ebp, %esp diff --git a/test/CodeGen/X86/avx512-extract-subvector.ll b/test/CodeGen/X86/avx512-extract-subvector.ll index 9e866245282..391bf6ba455 100644 --- a/test/CodeGen/X86/avx512-extract-subvector.ll +++ b/test/CodeGen/X86/avx512-extract-subvector.ll @@ -60,7 +60,7 @@ define <32 x i8> @extract_subvector256_v64i8(<64 x i8> %x) nounwind { define void @extract_subvector256_v8f64_store(double* nocapture %addr, <4 x double> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector256_v8f64_store: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: vextractf64x2 $1, %ymm0, (%rdi) +; SKX-NEXT: vextractf128 $1, %ymm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> @@ -72,7 +72,7 @@ entry: define void @extract_subvector256_v8f32_store(float* nocapture %addr, <8 x float> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector256_v8f32_store: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: vextractf32x4 $1, %ymm0, (%rdi) +; SKX-NEXT: vextractf128 $1, %ymm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> @@ -84,7 +84,7 @@ entry: define void @extract_subvector256_v4i64_store(i64* nocapture %addr, <4 x i64> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector256_v4i64_store: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: vextracti64x2 $1, %ymm0, (%rdi) +; SKX-NEXT: vextracti128 $1, %ymm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> @@ -96,7 +96,7 @@ entry: define void @extract_subvector256_v8i32_store(i32* nocapture %addr, <8 x i32> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector256_v8i32_store: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: vextracti32x4 $1, %ymm0, (%rdi) +; SKX-NEXT: vextracti128 $1, %ymm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> @@ -108,7 +108,7 @@ entry: define void @extract_subvector256_v16i16_store(i16* nocapture %addr, <16 x i16> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector256_v16i16_store: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: vextracti32x4 $1, %ymm0, (%rdi) +; SKX-NEXT: vextracti128 $1, %ymm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> @@ -120,7 +120,7 @@ entry: define void @extract_subvector256_v32i8_store(i8* nocapture %addr, <32 x i8> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector256_v32i8_store: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: vextracti32x4 $1, %ymm0, (%rdi) +; SKX-NEXT: vextracti128 $1, %ymm0, (%rdi) ; SKX-NEXT: retq entry: %0 = shufflevector <32 x i8> %a, <32 x i8> undef, <16 x i32> diff --git a/test/CodeGen/X86/avx512-insert-extract.ll b/test/CodeGen/X86/avx512-insert-extract.ll index c6cc7428997..26d14fa0840 100644 --- a/test/CodeGen/X86/avx512-insert-extract.ll +++ b/test/CodeGen/X86/avx512-insert-extract.ll @@ -463,7 +463,7 @@ define i64 @extract_v4i64(<4 x i64> %x, i64* %dst) { ; SKX-LABEL: extract_v4i64: ; SKX: ## BB#0: ; SKX-NEXT: vpextrq $1, %xmm0, %rax -; SKX-NEXT: vextracti64x2 $1, %ymm0, %xmm0 +; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0 ; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) ; SKX-NEXT: retq %r1 = extractelement <4 x i64> %x, i32 1 @@ -521,7 +521,7 @@ define i32 @extract_v8i32(<8 x i32> %x, i32* %dst) { ; SKX-LABEL: extract_v8i32: ; SKX: ## BB#0: ; SKX-NEXT: vpextrd $1, %xmm0, %eax -; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm0 +; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0 ; SKX-NEXT: vpextrd $1, %xmm0, (%rdi) ; SKX-NEXT: retq %r1 = extractelement <8 x i32> %x, i32 1 @@ -582,7 +582,7 @@ define i16 @extract_v16i16(<16 x i16> %x, i16* %dst) { ; SKX-LABEL: extract_v16i16: ; SKX: ## BB#0: ; SKX-NEXT: vpextrw $1, %xmm0, %eax -; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm0 +; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0 ; SKX-NEXT: vpextrw $1, %xmm0, (%rdi) ; SKX-NEXT: ## kill: %AX %AX %EAX ; SKX-NEXT: retq @@ -646,7 +646,7 @@ define i8 @extract_v32i8(<32 x i8> %x, i8* %dst) { ; SKX-LABEL: extract_v32i8: ; SKX: ## BB#0: ; SKX-NEXT: vpextrb $1, %xmm0, %eax -; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm0 +; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0 ; SKX-NEXT: vpextrb $1, %xmm0, (%rdi) ; SKX-NEXT: ## kill: %AL %AL %EAX ; SKX-NEXT: retq @@ -714,9 +714,9 @@ define <4 x i64> @insert_v4i64(<4 x i64> %x, i64 %y , i64* %ptr) { ; SKX: ## BB#0: ; SKX-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm1 ; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] -; SKX-NEXT: vextracti64x2 $1, %ymm0, %xmm1 +; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; SKX-NEXT: vpinsrq $1, %rdi, %xmm1, %xmm1 -; SKX-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm0 +; SKX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; SKX-NEXT: retq %val = load i64, i64* %ptr %r1 = insertelement <4 x i64> %x, i64 %val, i32 1 @@ -780,9 +780,9 @@ define <8 x i32> @insert_v8i32(<8 x i32> %x, i32 %y, i32* %ptr) { ; SKX: ## BB#0: ; SKX-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm1 ; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] -; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm1 +; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; SKX-NEXT: vpinsrd $1, %edi, %xmm1, %xmm1 -; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 +; SKX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; SKX-NEXT: retq %val = load i32, i32* %ptr %r1 = insertelement <8 x i32> %x, i32 %val, i32 1 @@ -846,9 +846,9 @@ define <16 x i16> @insert_v16i16(<16 x i16> %x, i16 %y, i16* %ptr) { ; SKX: ## BB#0: ; SKX-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm1 ; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] -; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm1 +; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; SKX-NEXT: vpinsrw $1, %edi, %xmm1, %xmm1 -; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 +; SKX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; SKX-NEXT: retq %val = load i16, i16* %ptr %r1 = insertelement <16 x i16> %x, i16 %val, i32 1 @@ -912,9 +912,9 @@ define <32 x i8> @insert_v32i8(<32 x i8> %x, i8 %y, i8* %ptr) { ; SKX: ## BB#0: ; SKX-NEXT: vpinsrb $1, (%rsi), %xmm0, %xmm1 ; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] -; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm1 +; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; SKX-NEXT: vpinsrb $1, %edi, %xmm1, %xmm1 -; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 +; SKX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; SKX-NEXT: retq %val = load i8, i8* %ptr %r1 = insertelement <32 x i8> %x, i8 %val, i32 1 @@ -1014,9 +1014,9 @@ define <16 x i16> @test_insert_128_v16i16(<16 x i16> %x, i16 %y) { ; ; SKX-LABEL: test_insert_128_v16i16: ; SKX: ## BB#0: -; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm1 +; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; SKX-NEXT: vpinsrw $2, %edi, %xmm1, %xmm1 -; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 +; SKX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; SKX-NEXT: retq %r = insertelement <16 x i16> %x, i16 %y, i32 10 ret <16 x i16> %r @@ -1032,9 +1032,9 @@ define <32 x i8> @test_insert_128_v32i8(<32 x i8> %x, i8 %y) { ; ; SKX-LABEL: test_insert_128_v32i8: ; SKX: ## BB#0: -; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm1 +; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; SKX-NEXT: vpinsrb $4, %edi, %xmm1, %xmm1 -; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 +; SKX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; SKX-NEXT: retq %r = insertelement <32 x i8> %x, i8 %y, i32 20 ret <32 x i8> %r diff --git a/test/CodeGen/X86/avx512-vbroadcasti128.ll b/test/CodeGen/X86/avx512-vbroadcasti128.ll index 09c48ddf81a..ad8a29cacd8 100644 --- a/test/CodeGen/X86/avx512-vbroadcasti128.ll +++ b/test/CodeGen/X86/avx512-vbroadcasti128.ll @@ -237,7 +237,7 @@ define <8 x i32> @PR29088(<4 x i32>* %p0, <8 x float>* %p1) { ; X64-AVX512VL-NEXT: vmovdqa (%rdi), %xmm0 ; X64-AVX512VL-NEXT: vpxor %ymm1, %ymm1, %ymm1 ; X64-AVX512VL-NEXT: vmovdqa %ymm1, (%rsi) -; X64-AVX512VL-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512VL-NEXT: retq ; ; X64-AVX512BWVL-LABEL: PR29088: @@ -245,7 +245,7 @@ define <8 x i32> @PR29088(<4 x i32>* %p0, <8 x float>* %p1) { ; X64-AVX512BWVL-NEXT: vmovdqa (%rdi), %xmm0 ; X64-AVX512BWVL-NEXT: vpxor %ymm1, %ymm1, %ymm1 ; X64-AVX512BWVL-NEXT: vmovdqa %ymm1, (%rsi) -; X64-AVX512BWVL-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX512BWVL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512BWVL-NEXT: retq ; ; X64-AVX512DQVL-LABEL: PR29088: @@ -253,7 +253,7 @@ define <8 x i32> @PR29088(<4 x i32>* %p0, <8 x float>* %p1) { ; X64-AVX512DQVL-NEXT: vmovdqa (%rdi), %xmm0 ; X64-AVX512DQVL-NEXT: vxorps %ymm1, %ymm1, %ymm1 ; X64-AVX512DQVL-NEXT: vmovaps %ymm1, (%rsi) -; X64-AVX512DQVL-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX512DQVL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512DQVL-NEXT: retq %ld = load <4 x i32>, <4 x i32>* %p0 store <8 x float> zeroinitializer, <8 x float>* %p1 diff --git a/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/test/CodeGen/X86/avx512bwvl-intrinsics.ll index 6fd11157744..7a9d7d7885f 100644 --- a/test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ b/test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -30,7 +30,7 @@ define <8 x i32> @test_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1) { ; CHECK-NEXT: vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01] ; CHECK-NEXT: vpinsrd $2, %r10d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xca,0x02] ; CHECK-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xce,0x03] -; CHECK-NEXT: vinserti32x4 $1, %xmm0, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0x75,0x28,0x38,0xc0,0x01] +; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1) %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 @@ -79,7 +79,7 @@ define <8 x i32> @test_mask_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask) { ; CHECK-NEXT: vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01] ; CHECK-NEXT: vpinsrd $2, %r10d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xca,0x02] ; CHECK-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xce,0x03] -; CHECK-NEXT: vinserti32x4 $1, %xmm0, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0x75,0x28,0x38,0xc0,0x01] +; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask) %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 @@ -129,7 +129,7 @@ define <8 x i32> @test_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1) { ; CHECK-NEXT: vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01] ; CHECK-NEXT: vpinsrd $2, %r10d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xca,0x02] ; CHECK-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xce,0x03] -; CHECK-NEXT: vinserti32x4 $1, %xmm0, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0x75,0x28,0x38,0xc0,0x01] +; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1) %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 @@ -178,7 +178,7 @@ define <8 x i32> @test_mask_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask) ; CHECK-NEXT: vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01] ; CHECK-NEXT: vpinsrd $2, %r10d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xca,0x02] ; CHECK-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xce,0x03] -; CHECK-NEXT: vinserti32x4 $1, %xmm0, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0x75,0x28,0x38,0xc0,0x01] +; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask) %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 diff --git a/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll b/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll index ace1cd32fb2..f8460bf880f 100644 --- a/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll +++ b/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll @@ -1565,7 +1565,7 @@ declare <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double>, i32, define <2 x double>@test_int_x86_avx512_mask_vextractf64x2_256(<4 x double> %x0, <2 x double> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_vextractf64x2_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vextractf64x2 $1, %ymm0, %xmm2 ## encoding: [0x62,0xf3,0xfd,0x28,0x19,0xc2,0x01] +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc2,0x01] ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vextractf64x2 $1, %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x19,0xc1,0x01] ; CHECK-NEXT: vextractf64x2 $1, %ymm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x19,0xc0,0x01] @@ -1585,7 +1585,7 @@ declare <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double>, <2 x do define <4 x double>@test_int_x86_avx512_mask_insertf64x2_256(<4 x double> %x0, <2 x double> %x1, <4 x double> %x3, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x2_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %ymm0, %ymm3 ## encoding: [0x62,0xf3,0xfd,0x28,0x18,0xd9,0x01] +; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xd9,0x01] ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x18,0xd1,0x01] ; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x18,0xc1,0x01] @@ -1605,7 +1605,7 @@ declare <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64>, <2 x i64>, i3 define <4 x i64>@test_int_x86_avx512_mask_inserti64x2_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x3, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x2_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm3 ## encoding: [0x62,0xf3,0xfd,0x28,0x38,0xd9,0x01] +; CHECK-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd9,0x01] ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x38,0xd1,0x01] ; CHECK-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x38,0xc1,0x01] diff --git a/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll b/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll index 58dddf8b6d6..c63d47d780d 100644 --- a/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll +++ b/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll @@ -4778,7 +4778,7 @@ declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float>, i32, <4 define <4 x float>@test_int_x86_avx512_mask_vextractf32x4_256(<8 x float> %x0, <4 x float> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_vextractf32x4_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vextractf32x4 $1, %ymm0, %xmm2 ## encoding: [0x62,0xf3,0x7d,0x28,0x19,0xc2,0x01] +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc2,0x01] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vextractf32x4 $1, %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x19,0xc1,0x01] ; CHECK-NEXT: vextractf32x4 $1, %ymm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x19,0xc0,0x01] @@ -4798,7 +4798,7 @@ declare <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float>, <4 x floa define <8 x float>@test_int_x86_avx512_mask_insertf32x4_256(<8 x float> %x0, <4 x float> %x1, <8 x float> %x3, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x4_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm3 ## encoding: [0x62,0xf3,0x7d,0x28,0x18,0xd9,0x01] +; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xd9,0x01] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x18,0xd1,0x01] ; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x18,0xc1,0x01] @@ -4818,7 +4818,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32>, <4 x i32>, i3 define <8 x i32>@test_int_x86_avx512_mask_inserti32x4_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x3, i8 %x4) { ; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x4_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm3 ## encoding: [0x62,0xf3,0x7d,0x28,0x38,0xd9,0x01] +; CHECK-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd9,0x01] ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x38,0xd1,0x01] ; CHECK-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x38,0xc1,0x01] diff --git a/test/CodeGen/X86/masked_memop.ll b/test/CodeGen/X86/masked_memop.ll index cba9a221f77..4e65b169c7e 100644 --- a/test/CodeGen/X86/masked_memop.ll +++ b/test/CodeGen/X86/masked_memop.ll @@ -1009,7 +1009,7 @@ define void @one_mask_bit_set3(<4 x i64>* %addr, <4 x i64> %val) { ; ; SKX-LABEL: one_mask_bit_set3: ; SKX: ## BB#0: -; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm0 +; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0 ; SKX-NEXT: vmovq %xmm0, 16(%rdi) ; SKX-NEXT: retq call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %val, <4 x i64>* %addr, i32 4, <4 x i1>) @@ -1026,17 +1026,11 @@ define void @one_mask_bit_set4(<4 x double>* %addr, <4 x double> %val) { ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; -; AVX512F-LABEL: one_mask_bit_set4: -; AVX512F: ## BB#0: -; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX512F-NEXT: vmovhpd %xmm0, 24(%rdi) -; AVX512F-NEXT: retq -; -; SKX-LABEL: one_mask_bit_set4: -; SKX: ## BB#0: -; SKX-NEXT: vextractf32x4 $1, %ymm0, %xmm0 -; SKX-NEXT: vmovhpd %xmm0, 24(%rdi) -; SKX-NEXT: retq +; AVX512-LABEL: one_mask_bit_set4: +; AVX512: ## BB#0: +; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX512-NEXT: vmovhpd %xmm0, 24(%rdi) +; AVX512-NEXT: retq call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> %val, <4 x double>* %addr, i32 4, <4 x i1>) ret void } @@ -1109,19 +1103,12 @@ define <4 x i64> @load_one_mask_bit_set3(<4 x i64>* %addr, <4 x i64> %val) { ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512F-LABEL: load_one_mask_bit_set3: -; AVX512F: ## BB#0: -; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512F-NEXT: vpinsrq $0, 16(%rdi), %xmm1, %xmm1 -; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX512F-NEXT: retq -; -; SKX-LABEL: load_one_mask_bit_set3: -; SKX: ## BB#0: -; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm1 -; SKX-NEXT: vpinsrq $0, 16(%rdi), %xmm1, %xmm1 -; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 -; SKX-NEXT: retq +; AVX512-LABEL: load_one_mask_bit_set3: +; AVX512: ## BB#0: +; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512-NEXT: vpinsrq $0, 16(%rdi), %xmm1, %xmm1 +; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512-NEXT: retq %res = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* %addr, i32 4, <4 x i1>, <4 x i64> %val) ret <4 x i64> %res } @@ -1136,19 +1123,12 @@ define <4 x double> @load_one_mask_bit_set4(<4 x double>* %addr, <4 x double> %v ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX-NEXT: retq ; -; AVX512F-LABEL: load_one_mask_bit_set4: -; AVX512F: ## BB#0: -; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512F-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] -; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX512F-NEXT: retq -; -; SKX-LABEL: load_one_mask_bit_set4: -; SKX: ## BB#0: -; SKX-NEXT: vextractf32x4 $1, %ymm0, %xmm1 -; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] -; SKX-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 -; SKX-NEXT: retq +; AVX512-LABEL: load_one_mask_bit_set4: +; AVX512: ## BB#0: +; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX512-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] +; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX512-NEXT: retq %res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %addr, i32 4, <4 x i1>, <4 x double> %val) ret <4 x double> %res } diff --git a/test/CodeGen/X86/stack-folding-fp-avx512vl.ll b/test/CodeGen/X86/stack-folding-fp-avx512vl.ll index 198a96df6b1..c6ae85dda43 100644 --- a/test/CodeGen/X86/stack-folding-fp-avx512vl.ll +++ b/test/CodeGen/X86/stack-folding-fp-avx512vl.ll @@ -488,7 +488,7 @@ define <8 x float> @stack_fold_xorps_ymm(<8 x float> %a0, <8 x float> %a1) { define <4 x float> @stack_fold_extractf32x4(<8 x float> %a0, <8 x float> %a1) { ;CHECK-LABEL: stack_fold_extractf32x4 - ;CHECK: vextractf32x4 $1, {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill + ;CHECK: vextractf128 $1, {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill %1 = shufflevector <8 x float> %a0, <8 x float> %a1, <4 x i32> %2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() ret <4 x float> %1 @@ -496,7 +496,7 @@ define <4 x float> @stack_fold_extractf32x4(<8 x float> %a0, <8 x float> %a1) { define <2 x double> @stack_fold_extractf64x2(<4 x double> %a0, <4 x double> %a1) { ;CHECK-LABEL: stack_fold_extractf64x2 - ;CHECK: vextractf64x2 $1, {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill + ;CHECK: vextractf128 $1, {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <2 x i32> %2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() ret <2 x double> %1 @@ -504,7 +504,7 @@ define <2 x double> @stack_fold_extractf64x2(<4 x double> %a0, <4 x double> %a1) define <8 x float> @stack_fold_insertf32x4(<4 x float> %a0, <4 x float> %a1) { ;CHECK-LABEL: stack_fold_insertf32x4 - ;CHECK: vinsertf32x4 $1, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload + ;CHECK: vinsertf128 $1, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <8 x i32> ret <8 x float> %2 @@ -512,7 +512,7 @@ define <8 x float> @stack_fold_insertf32x4(<4 x float> %a0, <4 x float> %a1) { define <4 x double> @stack_fold_insertf64x2(<2 x double> %a0, <2 x double> %a1) { ;CHECK-LABEL: stack_fold_insertf64x2 - ;CHECK: vinsertf64x2 $1, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload + ;CHECK: vinsertf128 $1, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <4 x i32> ret <4 x double> %2 diff --git a/test/CodeGen/X86/stack-folding-int-avx512vl.ll b/test/CodeGen/X86/stack-folding-int-avx512vl.ll index 6847595e927..77afc49b257 100644 --- a/test/CodeGen/X86/stack-folding-int-avx512vl.ll +++ b/test/CodeGen/X86/stack-folding-int-avx512vl.ll @@ -445,7 +445,7 @@ declare <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16>, <16 x i8>, i16 define <4 x i32> @stack_fold_extracti32x4(<8 x i32> %a0, <8 x i32> %a1) { ;CHECK-LABEL: stack_fold_extracti32x4 - ;CHECK: vextracti32x4 $1, {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill + ;CHECK: vextracti128 $1, {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill ; add forces execution domain %1 = add <8 x i32> %a0, %2 = shufflevector <8 x i32> %1, <8 x i32> %a1, <4 x i32> @@ -455,7 +455,7 @@ define <4 x i32> @stack_fold_extracti32x4(<8 x i32> %a0, <8 x i32> %a1) { define <2 x i64> @stack_fold_extracti64x2(<4 x i64> %a0, <4 x i64> %a1) { ;CHECK-LABEL: stack_fold_extracti64x2 - ;CHECK: vextracti64x2 $1, {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill + ;CHECK: vextracti128 $1, {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill ; add forces execution domain %1 = add <4 x i64> %a0, %2 = shufflevector <4 x i64> %1, <4 x i64> %a1, <2 x i32> @@ -465,7 +465,7 @@ define <2 x i64> @stack_fold_extracti64x2(<4 x i64> %a0, <4 x i64> %a1) { define <8 x i32> @stack_fold_inserti32x4(<4 x i32> %a0, <4 x i32> %a1) { ;CHECK-LABEL: stack_fold_inserti32x4 - ;CHECK: vinserti32x4 $1, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload + ;CHECK: vinserti128 $1, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() %2 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <8 x i32> ; add forces execution domain @@ -475,7 +475,7 @@ define <8 x i32> @stack_fold_inserti32x4(<4 x i32> %a0, <4 x i32> %a1) { define <4 x i64> @stack_fold_inserti64x2(<2 x i64> %a0, <2 x i64> %a1) { ;CHECK-LABEL: stack_fold_inserti64x2 - ;CHECK: vinserti64x2 $1, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload + ;CHECK: vinserti128 $1, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() %2 = shufflevector <2 x i64> %a0, <2 x i64> %a1, <4 x i32> ; add forces execution domain diff --git a/test/CodeGen/X86/subvector-broadcast.ll b/test/CodeGen/X86/subvector-broadcast.ll index 1606c912226..7aa3f393bbe 100644 --- a/test/CodeGen/X86/subvector-broadcast.ll +++ b/test/CodeGen/X86/subvector-broadcast.ll @@ -832,7 +832,7 @@ define <4 x double> @test_broadcast_2f64_4f64_reuse(<2 x double>* %p0, <2 x doub ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-AVX512F-NEXT: vmovaps (%ecx), %xmm0 ; X32-AVX512F-NEXT: vmovaps %xmm0, (%eax) -; X32-AVX512F-NEXT: vinsertf32x4 $1, %xmm0, %ymm0, %ymm0 +; X32-AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X32-AVX512F-NEXT: retl ; ; X32-AVX512BW-LABEL: test_broadcast_2f64_4f64_reuse: @@ -841,7 +841,7 @@ define <4 x double> @test_broadcast_2f64_4f64_reuse(<2 x double>* %p0, <2 x doub ; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-AVX512BW-NEXT: vmovaps (%ecx), %xmm0 ; X32-AVX512BW-NEXT: vmovaps %xmm0, (%eax) -; X32-AVX512BW-NEXT: vinsertf32x4 $1, %xmm0, %ymm0, %ymm0 +; X32-AVX512BW-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X32-AVX512BW-NEXT: retl ; ; X32-AVX512DQ-LABEL: test_broadcast_2f64_4f64_reuse: @@ -850,7 +850,7 @@ define <4 x double> @test_broadcast_2f64_4f64_reuse(<2 x double>* %p0, <2 x doub ; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-AVX512DQ-NEXT: vmovapd (%ecx), %xmm0 ; X32-AVX512DQ-NEXT: vmovapd %xmm0, (%eax) -; X32-AVX512DQ-NEXT: vinsertf64x2 $1, %xmm0, %ymm0, %ymm0 +; X32-AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X32-AVX512DQ-NEXT: retl ; ; X64-AVX-LABEL: test_broadcast_2f64_4f64_reuse: @@ -864,21 +864,21 @@ define <4 x double> @test_broadcast_2f64_4f64_reuse(<2 x double>* %p0, <2 x doub ; X64-AVX512F: ## BB#0: ; X64-AVX512F-NEXT: vmovaps (%rdi), %xmm0 ; X64-AVX512F-NEXT: vmovaps %xmm0, (%rsi) -; X64-AVX512F-NEXT: vinsertf32x4 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512F-NEXT: retq ; ; X64-AVX512BW-LABEL: test_broadcast_2f64_4f64_reuse: ; X64-AVX512BW: ## BB#0: ; X64-AVX512BW-NEXT: vmovaps (%rdi), %xmm0 ; X64-AVX512BW-NEXT: vmovaps %xmm0, (%rsi) -; X64-AVX512BW-NEXT: vinsertf32x4 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX512BW-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512BW-NEXT: retq ; ; X64-AVX512DQ-LABEL: test_broadcast_2f64_4f64_reuse: ; X64-AVX512DQ: ## BB#0: ; X64-AVX512DQ-NEXT: vmovapd (%rdi), %xmm0 ; X64-AVX512DQ-NEXT: vmovapd %xmm0, (%rsi) -; X64-AVX512DQ-NEXT: vinsertf64x2 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512DQ-NEXT: retq %1 = load <2 x double>, <2 x double>* %p0 store <2 x double> %1, <2 x double>* %p1 @@ -896,32 +896,14 @@ define <4 x i64> @test_broadcast_2i64_4i64_reuse(<2 x i64>* %p0, <2 x i64>* %p1) ; X32-AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X32-AVX-NEXT: retl ; -; X32-AVX512F-LABEL: test_broadcast_2i64_4i64_reuse: -; X32-AVX512F: ## BB#0: -; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX512F-NEXT: vmovdqa (%ecx), %xmm0 -; X32-AVX512F-NEXT: vmovdqa %xmm0, (%eax) -; X32-AVX512F-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 -; X32-AVX512F-NEXT: retl -; -; X32-AVX512BW-LABEL: test_broadcast_2i64_4i64_reuse: -; X32-AVX512BW: ## BB#0: -; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX512BW-NEXT: vmovdqa (%ecx), %xmm0 -; X32-AVX512BW-NEXT: vmovdqa %xmm0, (%eax) -; X32-AVX512BW-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 -; X32-AVX512BW-NEXT: retl -; -; X32-AVX512DQ-LABEL: test_broadcast_2i64_4i64_reuse: -; X32-AVX512DQ: ## BB#0: -; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX512DQ-NEXT: vmovdqa (%ecx), %xmm0 -; X32-AVX512DQ-NEXT: vmovdqa %xmm0, (%eax) -; X32-AVX512DQ-NEXT: vinserti64x2 $1, %xmm0, %ymm0, %ymm0 -; X32-AVX512DQ-NEXT: retl +; X32-AVX512-LABEL: test_broadcast_2i64_4i64_reuse: +; X32-AVX512: ## BB#0: +; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-AVX512-NEXT: vmovdqa (%ecx), %xmm0 +; X32-AVX512-NEXT: vmovdqa %xmm0, (%eax) +; X32-AVX512-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; X32-AVX512-NEXT: retl ; ; X64-AVX-LABEL: test_broadcast_2i64_4i64_reuse: ; X64-AVX: ## BB#0: @@ -930,26 +912,12 @@ define <4 x i64> @test_broadcast_2i64_4i64_reuse(<2 x i64>* %p0, <2 x i64>* %p1) ; X64-AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX-NEXT: retq ; -; X64-AVX512F-LABEL: test_broadcast_2i64_4i64_reuse: -; X64-AVX512F: ## BB#0: -; X64-AVX512F-NEXT: vmovdqa (%rdi), %xmm0 -; X64-AVX512F-NEXT: vmovdqa %xmm0, (%rsi) -; X64-AVX512F-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 -; X64-AVX512F-NEXT: retq -; -; X64-AVX512BW-LABEL: test_broadcast_2i64_4i64_reuse: -; X64-AVX512BW: ## BB#0: -; X64-AVX512BW-NEXT: vmovdqa (%rdi), %xmm0 -; X64-AVX512BW-NEXT: vmovdqa %xmm0, (%rsi) -; X64-AVX512BW-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 -; X64-AVX512BW-NEXT: retq -; -; X64-AVX512DQ-LABEL: test_broadcast_2i64_4i64_reuse: -; X64-AVX512DQ: ## BB#0: -; X64-AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0 -; X64-AVX512DQ-NEXT: vmovdqa %xmm0, (%rsi) -; X64-AVX512DQ-NEXT: vinserti64x2 $1, %xmm0, %ymm0, %ymm0 -; X64-AVX512DQ-NEXT: retq +; X64-AVX512-LABEL: test_broadcast_2i64_4i64_reuse: +; X64-AVX512: ## BB#0: +; X64-AVX512-NEXT: vmovdqa (%rdi), %xmm0 +; X64-AVX512-NEXT: vmovdqa %xmm0, (%rsi) +; X64-AVX512-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX512-NEXT: retq %1 = load <2 x i64>, <2 x i64>* %p0 store <2 x i64> %1, <2 x i64>* %p1 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32> @@ -957,37 +925,21 @@ define <4 x i64> @test_broadcast_2i64_4i64_reuse(<2 x i64>* %p0, <2 x i64>* %p1) } define <8 x float> @test_broadcast_4f32_8f32_reuse(<4 x float>* %p0, <4 x float>* %p1) { -; X32-AVX-LABEL: test_broadcast_4f32_8f32_reuse: -; X32-AVX: ## BB#0: -; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX-NEXT: vmovaps (%ecx), %xmm0 -; X32-AVX-NEXT: vmovaps %xmm0, (%eax) -; X32-AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; X32-AVX-NEXT: retl +; X32-LABEL: test_broadcast_4f32_8f32_reuse: +; X32: ## BB#0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: vmovaps (%ecx), %xmm0 +; X32-NEXT: vmovaps %xmm0, (%eax) +; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X32-NEXT: retl ; -; X32-AVX512-LABEL: test_broadcast_4f32_8f32_reuse: -; X32-AVX512: ## BB#0: -; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX512-NEXT: vmovaps (%ecx), %xmm0 -; X32-AVX512-NEXT: vmovaps %xmm0, (%eax) -; X32-AVX512-NEXT: vinsertf32x4 $1, %xmm0, %ymm0, %ymm0 -; X32-AVX512-NEXT: retl -; -; X64-AVX-LABEL: test_broadcast_4f32_8f32_reuse: -; X64-AVX: ## BB#0: -; X64-AVX-NEXT: vmovaps (%rdi), %xmm0 -; X64-AVX-NEXT: vmovaps %xmm0, (%rsi) -; X64-AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; X64-AVX-NEXT: retq -; -; X64-AVX512-LABEL: test_broadcast_4f32_8f32_reuse: -; X64-AVX512: ## BB#0: -; X64-AVX512-NEXT: vmovaps (%rdi), %xmm0 -; X64-AVX512-NEXT: vmovaps %xmm0, (%rsi) -; X64-AVX512-NEXT: vinsertf32x4 $1, %xmm0, %ymm0, %ymm0 -; X64-AVX512-NEXT: retq +; X64-LABEL: test_broadcast_4f32_8f32_reuse: +; X64: ## BB#0: +; X64-NEXT: vmovaps (%rdi), %xmm0 +; X64-NEXT: vmovaps %xmm0, (%rsi) +; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X64-NEXT: retq %1 = load <4 x float>, <4 x float>* %p0 store <4 x float> %1, <4 x float>* %p1 %2 = shufflevector <4 x float> %1, <4 x float> undef, <8 x i32> @@ -1010,7 +962,7 @@ define <8 x i32> @test_broadcast_4i32_8i32_reuse(<4 x i32>* %p0, <4 x i32>* %p1) ; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-AVX512-NEXT: vmovdqa (%ecx), %xmm0 ; X32-AVX512-NEXT: vmovdqa %xmm0, (%eax) -; X32-AVX512-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 +; X32-AVX512-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; X32-AVX512-NEXT: retl ; ; X64-AVX-LABEL: test_broadcast_4i32_8i32_reuse: @@ -1024,7 +976,7 @@ define <8 x i32> @test_broadcast_4i32_8i32_reuse(<4 x i32>* %p0, <4 x i32>* %p1) ; X64-AVX512: ## BB#0: ; X64-AVX512-NEXT: vmovdqa (%rdi), %xmm0 ; X64-AVX512-NEXT: vmovdqa %xmm0, (%rsi) -; X64-AVX512-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX512-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512-NEXT: retq %1 = load <4 x i32>, <4 x i32>* %p0 store <4 x i32> %1, <4 x i32>* %p1 @@ -1048,7 +1000,7 @@ define <16 x i16> @test_broadcast_8i16_16i16_reuse(<8 x i16> *%p0, <8 x i16> *%p ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-AVX512F-NEXT: vmovdqa (%ecx), %xmm0 ; X32-AVX512F-NEXT: vmovdqa %xmm0, (%eax) -; X32-AVX512F-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 +; X32-AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; X32-AVX512F-NEXT: retl ; ; X32-AVX512BW-LABEL: test_broadcast_8i16_16i16_reuse: @@ -1057,7 +1009,7 @@ define <16 x i16> @test_broadcast_8i16_16i16_reuse(<8 x i16> *%p0, <8 x i16> *%p ; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-AVX512BW-NEXT: vmovdqu (%ecx), %xmm0 ; X32-AVX512BW-NEXT: vmovdqu %xmm0, (%eax) -; X32-AVX512BW-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 +; X32-AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; X32-AVX512BW-NEXT: retl ; ; X32-AVX512DQ-LABEL: test_broadcast_8i16_16i16_reuse: @@ -1066,7 +1018,7 @@ define <16 x i16> @test_broadcast_8i16_16i16_reuse(<8 x i16> *%p0, <8 x i16> *%p ; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-AVX512DQ-NEXT: vmovdqa (%ecx), %xmm0 ; X32-AVX512DQ-NEXT: vmovdqa %xmm0, (%eax) -; X32-AVX512DQ-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 +; X32-AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; X32-AVX512DQ-NEXT: retl ; ; X64-AVX-LABEL: test_broadcast_8i16_16i16_reuse: @@ -1080,21 +1032,21 @@ define <16 x i16> @test_broadcast_8i16_16i16_reuse(<8 x i16> *%p0, <8 x i16> *%p ; X64-AVX512F: ## BB#0: ; X64-AVX512F-NEXT: vmovdqa (%rdi), %xmm0 ; X64-AVX512F-NEXT: vmovdqa %xmm0, (%rsi) -; X64-AVX512F-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512F-NEXT: retq ; ; X64-AVX512BW-LABEL: test_broadcast_8i16_16i16_reuse: ; X64-AVX512BW: ## BB#0: ; X64-AVX512BW-NEXT: vmovdqu (%rdi), %xmm0 ; X64-AVX512BW-NEXT: vmovdqu %xmm0, (%rsi) -; X64-AVX512BW-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512BW-NEXT: retq ; ; X64-AVX512DQ-LABEL: test_broadcast_8i16_16i16_reuse: ; X64-AVX512DQ: ## BB#0: ; X64-AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0 ; X64-AVX512DQ-NEXT: vmovdqa %xmm0, (%rsi) -; X64-AVX512DQ-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512DQ-NEXT: retq %1 = load <8 x i16>, <8 x i16> *%p0 store <8 x i16> %1, <8 x i16>* %p1 @@ -1118,7 +1070,7 @@ define <32 x i8> @test_broadcast_16i8_32i8_reuse(<16 x i8> *%p0, <16 x i8> *%p1) ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-AVX512F-NEXT: vmovdqa (%ecx), %xmm0 ; X32-AVX512F-NEXT: vmovdqa %xmm0, (%eax) -; X32-AVX512F-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 +; X32-AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; X32-AVX512F-NEXT: retl ; ; X32-AVX512BW-LABEL: test_broadcast_16i8_32i8_reuse: @@ -1127,7 +1079,7 @@ define <32 x i8> @test_broadcast_16i8_32i8_reuse(<16 x i8> *%p0, <16 x i8> *%p1) ; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-AVX512BW-NEXT: vmovdqu (%ecx), %xmm0 ; X32-AVX512BW-NEXT: vmovdqu %xmm0, (%eax) -; X32-AVX512BW-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 +; X32-AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; X32-AVX512BW-NEXT: retl ; ; X32-AVX512DQ-LABEL: test_broadcast_16i8_32i8_reuse: @@ -1136,7 +1088,7 @@ define <32 x i8> @test_broadcast_16i8_32i8_reuse(<16 x i8> *%p0, <16 x i8> *%p1) ; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-AVX512DQ-NEXT: vmovdqa (%ecx), %xmm0 ; X32-AVX512DQ-NEXT: vmovdqa %xmm0, (%eax) -; X32-AVX512DQ-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 +; X32-AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; X32-AVX512DQ-NEXT: retl ; ; X64-AVX-LABEL: test_broadcast_16i8_32i8_reuse: @@ -1150,21 +1102,21 @@ define <32 x i8> @test_broadcast_16i8_32i8_reuse(<16 x i8> *%p0, <16 x i8> *%p1) ; X64-AVX512F: ## BB#0: ; X64-AVX512F-NEXT: vmovdqa (%rdi), %xmm0 ; X64-AVX512F-NEXT: vmovdqa %xmm0, (%rsi) -; X64-AVX512F-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512F-NEXT: retq ; ; X64-AVX512BW-LABEL: test_broadcast_16i8_32i8_reuse: ; X64-AVX512BW: ## BB#0: ; X64-AVX512BW-NEXT: vmovdqu (%rdi), %xmm0 ; X64-AVX512BW-NEXT: vmovdqu %xmm0, (%rsi) -; X64-AVX512BW-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512BW-NEXT: retq ; ; X64-AVX512DQ-LABEL: test_broadcast_16i8_32i8_reuse: ; X64-AVX512DQ: ## BB#0: ; X64-AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0 ; X64-AVX512DQ-NEXT: vmovdqa %xmm0, (%rsi) -; X64-AVX512DQ-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512DQ-NEXT: retq %1 = load <16 x i8>, <16 x i8> *%p0 store <16 x i8> %1, <16 x i8>* %p1 @@ -1194,7 +1146,7 @@ define <8 x i32> @test_broadcast_4i32_8i32_chain(<4 x i32>* %p0, <4 x float>* %p ; X32-AVX512F-NEXT: vmovdqa (%ecx), %xmm0 ; X32-AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X32-AVX512F-NEXT: vmovdqa %xmm1, (%eax) -; X32-AVX512F-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 +; X32-AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; X32-AVX512F-NEXT: retl ; ; X32-AVX512BW-LABEL: test_broadcast_4i32_8i32_chain: @@ -1204,7 +1156,7 @@ define <8 x i32> @test_broadcast_4i32_8i32_chain(<4 x i32>* %p0, <4 x float>* %p ; X32-AVX512BW-NEXT: vmovdqa (%ecx), %xmm0 ; X32-AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X32-AVX512BW-NEXT: vmovdqa %xmm1, (%eax) -; X32-AVX512BW-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 +; X32-AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; X32-AVX512BW-NEXT: retl ; ; X32-AVX512DQ-LABEL: test_broadcast_4i32_8i32_chain: @@ -1214,7 +1166,7 @@ define <8 x i32> @test_broadcast_4i32_8i32_chain(<4 x i32>* %p0, <4 x float>* %p ; X32-AVX512DQ-NEXT: vmovdqa (%ecx), %xmm0 ; X32-AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X32-AVX512DQ-NEXT: vmovaps %xmm1, (%eax) -; X32-AVX512DQ-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 +; X32-AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; X32-AVX512DQ-NEXT: retl ; ; X64-AVX-LABEL: test_broadcast_4i32_8i32_chain: @@ -1230,7 +1182,7 @@ define <8 x i32> @test_broadcast_4i32_8i32_chain(<4 x i32>* %p0, <4 x float>* %p ; X64-AVX512F-NEXT: vmovdqa (%rdi), %xmm0 ; X64-AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X64-AVX512F-NEXT: vmovdqa %xmm1, (%rsi) -; X64-AVX512F-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512F-NEXT: retq ; ; X64-AVX512BW-LABEL: test_broadcast_4i32_8i32_chain: @@ -1238,7 +1190,7 @@ define <8 x i32> @test_broadcast_4i32_8i32_chain(<4 x i32>* %p0, <4 x float>* %p ; X64-AVX512BW-NEXT: vmovdqa (%rdi), %xmm0 ; X64-AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X64-AVX512BW-NEXT: vmovdqa %xmm1, (%rsi) -; X64-AVX512BW-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512BW-NEXT: retq ; ; X64-AVX512DQ-LABEL: test_broadcast_4i32_8i32_chain: @@ -1246,7 +1198,7 @@ define <8 x i32> @test_broadcast_4i32_8i32_chain(<4 x i32>* %p0, <4 x float>* %p ; X64-AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0 ; X64-AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X64-AVX512DQ-NEXT: vmovaps %xmm1, (%rsi) -; X64-AVX512DQ-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512DQ-NEXT: retq %1 = load <4 x i32>, <4 x i32>* %p0 store <4 x float> zeroinitializer, <4 x float>* %p1 diff --git a/test/CodeGen/X86/vec_fp_to_int.ll b/test/CodeGen/X86/vec_fp_to_int.ll index 2ced6de6aeb..2ad20a89cf2 100644 --- a/test/CodeGen/X86/vec_fp_to_int.ll +++ b/test/CodeGen/X86/vec_fp_to_int.ll @@ -204,7 +204,7 @@ define <4 x i64> @fptosi_4f64_to_4i64(<4 x double> %a) { ; ; AVX512VL-LABEL: fptosi_4f64_to_4i64: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextractf32x4 $1, %ymm0, %xmm1 +; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX512VL-NEXT: vcvttsd2si %xmm1, %rax ; AVX512VL-NEXT: vmovq %rax, %xmm2 ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] @@ -217,7 +217,7 @@ define <4 x i64> @fptosi_4f64_to_4i64(<4 x double> %a) { ; AVX512VL-NEXT: vcvttsd2si %xmm0, %rax ; AVX512VL-NEXT: vmovq %rax, %xmm0 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] -; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512DQ-LABEL: fptosi_4f64_to_4i64: @@ -719,7 +719,7 @@ define <4 x i64> @fptoui_4f64_to_4i64(<4 x double> %a) { ; ; AVX512VL-LABEL: fptoui_4f64_to_4i64: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextractf32x4 $1, %ymm0, %xmm1 +; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX512VL-NEXT: vcvttsd2usi %xmm1, %rax ; AVX512VL-NEXT: vmovq %rax, %xmm2 ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] @@ -732,7 +732,7 @@ define <4 x i64> @fptoui_4f64_to_4i64(<4 x double> %a) { ; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax ; AVX512VL-NEXT: vmovq %rax, %xmm0 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] -; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512DQ-LABEL: fptoui_4f64_to_4i64: @@ -1097,7 +1097,7 @@ define <4 x i64> @fptosi_4f32_to_4i64(<8 x float> %a) { ; AVX512VL-NEXT: vcvttss2si %xmm0, %rax ; AVX512VL-NEXT: vmovq %rax, %xmm0 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] -; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512DQ-LABEL: fptosi_4f32_to_4i64: @@ -1205,7 +1205,7 @@ define <4 x i64> @fptosi_8f32_to_4i64(<8 x float> %a) { ; AVX512VL-NEXT: vmovq %rcx, %xmm1 ; AVX512VL-NEXT: vmovq %rax, %xmm2 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] -; AVX512VL-NEXT: vinserti32x4 $1, %xmm0, %ymm1, %ymm0 +; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512DQ-LABEL: fptosi_8f32_to_4i64: @@ -1822,7 +1822,7 @@ define <4 x i64> @fptoui_4f32_to_4i64(<8 x float> %a) { ; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax ; AVX512VL-NEXT: vmovq %rax, %xmm0 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] -; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512DQ-LABEL: fptoui_4f32_to_4i64: @@ -2000,7 +2000,7 @@ define <4 x i64> @fptoui_8f32_to_4i64(<8 x float> %a) { ; AVX512VL-NEXT: vmovq %rcx, %xmm1 ; AVX512VL-NEXT: vmovq %rax, %xmm2 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] -; AVX512VL-NEXT: vinserti32x4 $1, %xmm0, %ymm1, %ymm0 +; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512DQ-LABEL: fptoui_8f32_to_4i64: @@ -2409,125 +2409,29 @@ define <4 x i32> @fptosi_2f128_to_4i32(<2 x fp128> %a) nounwind { ; SSE-NEXT: popq %r14 ; SSE-NEXT: retq ; -; VEX-LABEL: fptosi_2f128_to_4i32: -; VEX: # BB#0: -; VEX-NEXT: pushq %r14 -; VEX-NEXT: pushq %rbx -; VEX-NEXT: subq $24, %rsp -; VEX-NEXT: movq %rsi, %r14 -; VEX-NEXT: movq %rdi, %rbx -; VEX-NEXT: movq %rdx, %rdi -; VEX-NEXT: movq %rcx, %rsi -; VEX-NEXT: callq __fixtfdi -; VEX-NEXT: vmovq %rax, %xmm0 -; VEX-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill -; VEX-NEXT: movq %rbx, %rdi -; VEX-NEXT: movq %r14, %rsi -; VEX-NEXT: callq __fixtfdi -; VEX-NEXT: vmovq %rax, %xmm0 -; VEX-NEXT: vpunpcklqdq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload -; VEX-NEXT: # xmm0 = xmm0[0],mem[0] -; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; VEX-NEXT: addq $24, %rsp -; VEX-NEXT: popq %rbx -; VEX-NEXT: popq %r14 -; VEX-NEXT: retq -; -; AVX512F-LABEL: fptosi_2f128_to_4i32: -; AVX512F: # BB#0: -; AVX512F-NEXT: pushq %r14 -; AVX512F-NEXT: pushq %rbx -; AVX512F-NEXT: subq $24, %rsp -; AVX512F-NEXT: movq %rsi, %r14 -; AVX512F-NEXT: movq %rdi, %rbx -; AVX512F-NEXT: movq %rdx, %rdi -; AVX512F-NEXT: movq %rcx, %rsi -; AVX512F-NEXT: callq __fixtfdi -; AVX512F-NEXT: vmovq %rax, %xmm0 -; AVX512F-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill -; AVX512F-NEXT: movq %rbx, %rdi -; AVX512F-NEXT: movq %r14, %rsi -; AVX512F-NEXT: callq __fixtfdi -; AVX512F-NEXT: vmovq %rax, %xmm0 -; AVX512F-NEXT: vpunpcklqdq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload -; AVX512F-NEXT: # xmm0 = xmm0[0],mem[0] -; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; AVX512F-NEXT: addq $24, %rsp -; AVX512F-NEXT: popq %rbx -; AVX512F-NEXT: popq %r14 -; AVX512F-NEXT: retq -; -; AVX512VL-LABEL: fptosi_2f128_to_4i32: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: pushq %r14 -; AVX512VL-NEXT: pushq %rbx -; AVX512VL-NEXT: subq $24, %rsp -; AVX512VL-NEXT: movq %rsi, %r14 -; AVX512VL-NEXT: movq %rdi, %rbx -; AVX512VL-NEXT: movq %rdx, %rdi -; AVX512VL-NEXT: movq %rcx, %rsi -; AVX512VL-NEXT: callq __fixtfdi -; AVX512VL-NEXT: vmovq %rax, %xmm0 -; AVX512VL-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill -; AVX512VL-NEXT: movq %rbx, %rdi -; AVX512VL-NEXT: movq %r14, %rsi -; AVX512VL-NEXT: callq __fixtfdi -; AVX512VL-NEXT: vmovq %rax, %xmm0 -; AVX512VL-NEXT: vpunpcklqdq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload -; AVX512VL-NEXT: # xmm0 = xmm0[0],mem[0] -; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; AVX512VL-NEXT: addq $24, %rsp -; AVX512VL-NEXT: popq %rbx -; AVX512VL-NEXT: popq %r14 -; AVX512VL-NEXT: retq -; -; AVX512DQ-LABEL: fptosi_2f128_to_4i32: -; AVX512DQ: # BB#0: -; AVX512DQ-NEXT: pushq %r14 -; AVX512DQ-NEXT: pushq %rbx -; AVX512DQ-NEXT: subq $24, %rsp -; AVX512DQ-NEXT: movq %rsi, %r14 -; AVX512DQ-NEXT: movq %rdi, %rbx -; AVX512DQ-NEXT: movq %rdx, %rdi -; AVX512DQ-NEXT: movq %rcx, %rsi -; AVX512DQ-NEXT: callq __fixtfdi -; AVX512DQ-NEXT: vmovq %rax, %xmm0 -; AVX512DQ-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill -; AVX512DQ-NEXT: movq %rbx, %rdi -; AVX512DQ-NEXT: movq %r14, %rsi -; AVX512DQ-NEXT: callq __fixtfdi -; AVX512DQ-NEXT: vmovq %rax, %xmm0 -; AVX512DQ-NEXT: vpunpcklqdq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload -; AVX512DQ-NEXT: # xmm0 = xmm0[0],mem[0] -; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; AVX512DQ-NEXT: addq $24, %rsp -; AVX512DQ-NEXT: popq %rbx -; AVX512DQ-NEXT: popq %r14 -; AVX512DQ-NEXT: retq -; -; AVX512VLDQ-LABEL: fptosi_2f128_to_4i32: -; AVX512VLDQ: # BB#0: -; AVX512VLDQ-NEXT: pushq %r14 -; AVX512VLDQ-NEXT: pushq %rbx -; AVX512VLDQ-NEXT: subq $24, %rsp -; AVX512VLDQ-NEXT: movq %rsi, %r14 -; AVX512VLDQ-NEXT: movq %rdi, %rbx -; AVX512VLDQ-NEXT: movq %rdx, %rdi -; AVX512VLDQ-NEXT: movq %rcx, %rsi -; AVX512VLDQ-NEXT: callq __fixtfdi -; AVX512VLDQ-NEXT: vmovq %rax, %xmm0 -; AVX512VLDQ-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill -; AVX512VLDQ-NEXT: movq %rbx, %rdi -; AVX512VLDQ-NEXT: movq %r14, %rsi -; AVX512VLDQ-NEXT: callq __fixtfdi -; AVX512VLDQ-NEXT: vmovq %rax, %xmm0 -; AVX512VLDQ-NEXT: vpunpcklqdq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload -; AVX512VLDQ-NEXT: # xmm0 = xmm0[0],mem[0] -; AVX512VLDQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; AVX512VLDQ-NEXT: addq $24, %rsp -; AVX512VLDQ-NEXT: popq %rbx -; AVX512VLDQ-NEXT: popq %r14 -; AVX512VLDQ-NEXT: retq +; AVX-LABEL: fptosi_2f128_to_4i32: +; AVX: # BB#0: +; AVX-NEXT: pushq %r14 +; AVX-NEXT: pushq %rbx +; AVX-NEXT: subq $24, %rsp +; AVX-NEXT: movq %rsi, %r14 +; AVX-NEXT: movq %rdi, %rbx +; AVX-NEXT: movq %rdx, %rdi +; AVX-NEXT: movq %rcx, %rsi +; AVX-NEXT: callq __fixtfdi +; AVX-NEXT: vmovq %rax, %xmm0 +; AVX-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: movq %rbx, %rdi +; AVX-NEXT: movq %r14, %rsi +; AVX-NEXT: callq __fixtfdi +; AVX-NEXT: vmovq %rax, %xmm0 +; AVX-NEXT: vpunpcklqdq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero +; AVX-NEXT: addq $24, %rsp +; AVX-NEXT: popq %rbx +; AVX-NEXT: popq %r14 +; AVX-NEXT: retq %cvt = fptosi <2 x fp128> %a to <2 x i32> %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> ret <4 x i32> %ext diff --git a/test/CodeGen/X86/vec_int_to_fp.ll b/test/CodeGen/X86/vec_int_to_fp.ll index 58d7f7bf3d8..6a81cdc490f 100644 --- a/test/CodeGen/X86/vec_int_to_fp.ll +++ b/test/CodeGen/X86/vec_int_to_fp.ll @@ -288,7 +288,7 @@ define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) { ; ; AVX512VL-LABEL: sitofp_4i64_to_4f64: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 +; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2 ; AVX512VL-NEXT: vmovq %xmm1, %rax @@ -299,7 +299,7 @@ define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) { ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] -; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512DQ-LABEL: sitofp_4i64_to_4f64: @@ -821,7 +821,7 @@ define <4 x double> @uitofp_4i64_to_4f64(<4 x i64> %a) { ; ; AVX512VL-LABEL: uitofp_4i64_to_4f64: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 +; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm2 ; AVX512VL-NEXT: vmovq %xmm1, %rax @@ -832,7 +832,7 @@ define <4 x double> @uitofp_4i64_to_4f64(<4 x i64> %a) { ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm0 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] -; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512DQ-LABEL: uitofp_4i64_to_4f64: @@ -1430,7 +1430,7 @@ define <4 x float> @sitofp_4i64_to_4f32(<4 x i64> %a) { ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm0 +; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] @@ -2344,7 +2344,7 @@ define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) { ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm2 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm0 +; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm2 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] @@ -2775,7 +2775,7 @@ define <4 x double> @sitofp_load_4i64_to_4f64(<4 x i64> *%a) { ; AVX512VL-LABEL: sitofp_load_4i64_to_4f64: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0 -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 +; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2 ; AVX512VL-NEXT: vmovq %xmm1, %rax @@ -2786,7 +2786,7 @@ define <4 x double> @sitofp_load_4i64_to_4f64(<4 x i64> *%a) { ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] -; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512DQ-LABEL: sitofp_load_4i64_to_4f64: @@ -3190,7 +3190,7 @@ define <4 x double> @uitofp_load_4i64_to_4f64(<4 x i64> *%a) { ; AVX512VL-LABEL: uitofp_load_4i64_to_4f64: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0 -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 +; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm2 ; AVX512VL-NEXT: vmovq %xmm1, %rax @@ -3201,7 +3201,7 @@ define <4 x double> @uitofp_load_4i64_to_4f64(<4 x i64> *%a) { ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm0 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] -; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512DQ-LABEL: uitofp_load_4i64_to_4f64: @@ -3426,7 +3426,7 @@ define <4 x float> @sitofp_load_4i64_to_4f32(<4 x i64> *%a) { ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm0 +; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] @@ -3667,7 +3667,7 @@ define <8 x float> @sitofp_load_8i64_to_8f32(<8 x i64> *%a) { ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm0 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0] -; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512DQ-LABEL: sitofp_load_8i64_to_8f32: @@ -4013,7 +4013,7 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) { ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm2 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm0 +; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm2 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] @@ -4593,7 +4593,7 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) { ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm4, %xmm0 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0] -; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512DQ-LABEL: uitofp_load_8i64_to_8f32: diff --git a/test/CodeGen/X86/vector-half-conversions.ll b/test/CodeGen/X86/vector-half-conversions.ll index 31eb2202a05..5bf6fbeb623 100644 --- a/test/CodeGen/X86/vector-half-conversions.ll +++ b/test/CodeGen/X86/vector-half-conversions.ll @@ -461,7 +461,7 @@ define <8 x float> @cvt_8i16_to_8f32(<8 x i16> %a0) nounwind { ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] -; AVX512VL-NEXT: vinsertf32x4 $1, %xmm4, %ymm0, %ymm0 +; AVX512VL-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 ; AVX512VL-NEXT: retq %1 = bitcast <8 x i16> %a0 to <8 x half> %2 = fpext <8 x half> %1 to <8 x float> @@ -757,7 +757,7 @@ define <16 x float> @cvt_16i16_to_16f32(<16 x i16> %a0) nounwind { ; ; AVX512VL-LABEL: cvt_16i16_to_16f32: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm10 +; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm10 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: movq %rax, %rcx ; AVX512VL-NEXT: shrq $48, %rcx @@ -840,14 +840,14 @@ define <16 x float> @cvt_16i16_to_16f32(<16 x i16> %a0) nounwind { ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm7[0],xmm5[0],xmm7[2,3] ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1],xmm0[0],xmm2[3] ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm4[0] -; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm16[0],xmm15[0],xmm16[2,3] ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm14[0],xmm1[3] ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm13[0] ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm12[0],xmm11[0],xmm12[2,3] ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm9[0],xmm2[3] ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm8[0] -; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm2, %ymm1 +; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 ; AVX512VL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512VL-NEXT: retq %1 = bitcast <16 x i16> %a0 to <16 x half> @@ -1227,7 +1227,7 @@ define <8 x float> @load_cvt_8i16_to_8f32(<8 x i16>* %a0) nounwind { ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3] ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] -; AVX512VL-NEXT: vinsertf32x4 $1, %xmm4, %ymm0, %ymm0 +; AVX512VL-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 ; AVX512VL-NEXT: retq %1 = load <8 x i16>, <8 x i16>* %a0 %2 = bitcast <8 x i16> %1 to <8 x half> @@ -1491,14 +1491,14 @@ define <16 x float> @load_cvt_16i16_to_16f32(<16 x i16>* %a0) nounwind { ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3] ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] -; AVX512VL-NEXT: vinsertf32x4 $1, %xmm4, %ymm0, %ymm0 +; AVX512VL-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm14[0],xmm15[0],xmm14[2,3] ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm13[0],xmm1[3] ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm12[0] ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm10[0],xmm11[0],xmm10[2,3] ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm9[0],xmm2[3] ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm8[0] -; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm2, %ymm1 +; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 ; AVX512VL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512VL-NEXT: retq %1 = load <16 x i16>, <16 x i16>* %a0 @@ -1738,7 +1738,7 @@ define <4 x double> @cvt_4i16_to_4f64(<4 x i16> %a0) nounwind { ; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX512VL-NEXT: vinsertf32x4 $1, %xmm2, %ymm0, %ymm0 +; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX512VL-NEXT: retq %1 = bitcast <4 x i16> %a0 to <4 x half> %2 = fpext <4 x half> %1 to <4 x double> @@ -1929,7 +1929,7 @@ define <4 x double> @cvt_8i16_to_4f64(<8 x i16> %a0) nounwind { ; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX512VL-NEXT: vinsertf32x4 $1, %xmm2, %ymm0, %ymm0 +; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX512VL-NEXT: retq %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> %2 = bitcast <4 x i16> %1 to <4 x half> @@ -2145,14 +2145,14 @@ define <8 x double> @cvt_8i16_to_8f64(<8 x i16> %a0) nounwind { ; AVX512VL-NEXT: vcvtss2sd %xmm5, %xmm5, %xmm5 ; AVX512VL-NEXT: vcvtss2sd %xmm4, %xmm4, %xmm4 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm4 = xmm5[0],xmm4[0] -; AVX512VL-NEXT: vinsertf32x4 $1, %xmm6, %ymm4, %ymm4 +; AVX512VL-NEXT: vinsertf128 $1, %xmm6, %ymm4, %ymm4 ; AVX512VL-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 ; AVX512VL-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX512VL-NEXT: vinsertf32x4 $1, %xmm2, %ymm0, %ymm0 +; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX512VL-NEXT: vinsertf64x4 $1, %ymm4, %zmm0, %zmm0 ; AVX512VL-NEXT: retq %1 = bitcast <8 x i16> %a0 to <8 x half> @@ -2350,7 +2350,7 @@ define <4 x double> @load_cvt_4i16_to_4f64(<4 x i16>* %a0) nounwind { ; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; AVX512VL-NEXT: vinsertf32x4 $1, %xmm2, %ymm0, %ymm0 +; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX512VL-NEXT: retq %1 = load <4 x i16>, <4 x i16>* %a0 %2 = bitcast <4 x i16> %1 to <4 x half> @@ -2474,7 +2474,7 @@ define <4 x double> @load_cvt_8i16_to_4f64(<8 x i16>* %a0) nounwind { ; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX512VL-NEXT: vinsertf32x4 $1, %xmm2, %ymm0, %ymm0 +; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX512VL-NEXT: retq %1 = load <8 x i16>, <8 x i16>* %a0 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> @@ -2643,14 +2643,14 @@ define <8 x double> @load_cvt_8i16_to_8f64(<8 x i16>* %a0) nounwind { ; AVX512VL-NEXT: vcvtss2sd %xmm5, %xmm5, %xmm5 ; AVX512VL-NEXT: vcvtss2sd %xmm4, %xmm4, %xmm4 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm4 = xmm4[0],xmm5[0] -; AVX512VL-NEXT: vinsertf32x4 $1, %xmm6, %ymm4, %ymm4 +; AVX512VL-NEXT: vinsertf128 $1, %xmm6, %ymm4, %ymm4 ; AVX512VL-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 ; AVX512VL-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; AVX512VL-NEXT: vinsertf32x4 $1, %xmm2, %ymm0, %ymm0 +; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX512VL-NEXT: vinsertf64x4 $1, %ymm4, %zmm0, %zmm0 ; AVX512VL-NEXT: retq %1 = load <8 x i16>, <8 x i16>* %a0 @@ -3182,7 +3182,7 @@ define <8 x i16> @cvt_8f32_to_8i16(<8 x float> %a0) nounwind { ; AVX512VL-NEXT: orl %edx, %eax ; AVX512VL-NEXT: shlq $32, %rax ; AVX512VL-NEXT: orq %rcx, %rax -; AVX512VL-NEXT: vextractf32x4 $1, %ymm0, %xmm0 +; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovd %xmm1, %ecx @@ -3427,7 +3427,7 @@ define <16 x i16> @cvt_16f32_to_16i16(<16 x float> %a0) nounwind { ; AVX512VL-NEXT: vcvtps2ph $4, %xmm2, %xmm2 ; AVX512VL-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; AVX512VL-NEXT: vmovd %xmm2, %eax -; AVX512VL-NEXT: vextractf32x4 $1, %ymm1, %xmm2 +; AVX512VL-NEXT: vextractf128 $1, %ymm1, %xmm2 ; AVX512VL-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm1 ; AVX512VL-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 @@ -3458,7 +3458,7 @@ define <16 x i16> @cvt_16f32_to_16i16(<16 x float> %a0) nounwind { ; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm1 ; AVX512VL-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; AVX512VL-NEXT: vmovd %xmm1, %eax -; AVX512VL-NEXT: vextractf32x4 $1, %ymm0, %xmm1 +; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; AVX512VL-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 @@ -3479,7 +3479,7 @@ define <16 x i16> @cvt_16f32_to_16i16(<16 x float> %a0) nounwind { ; AVX512VL-NEXT: vpinsrw $6, %eax, %xmm3, %xmm1 ; AVX512VL-NEXT: vmovd %xmm0, %eax ; AVX512VL-NEXT: vpinsrw $7, %eax, %xmm1, %xmm0 -; AVX512VL-NEXT: vinserti32x4 $1, %xmm2, %ymm0, %ymm0 +; AVX512VL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; AVX512VL-NEXT: retq %1 = fptrunc <16 x float> %a0 to <16 x half> %2 = bitcast <16 x half> %1 to <16 x i16> @@ -3958,7 +3958,7 @@ define void @store_cvt_8f32_to_8i16(<8 x float> %a0, <8 x i16>* %a1) nounwind { ; AVX512VL-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] ; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovd %xmm1, %r10d -; AVX512VL-NEXT: vextractf32x4 $1, %ymm0, %xmm1 +; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] ; AVX512VL-NEXT: vcvtps2ph $4, %xmm2, %xmm2 ; AVX512VL-NEXT: vmovd %xmm2, %r11d @@ -4191,9 +4191,9 @@ define void @store_cvt_16f32_to_16i16(<16 x float> %a0, <16 x i16>* %a1) nounwin ; ; AVX512VL-LABEL: store_cvt_16f32_to_16i16: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextractf32x4 $1, %ymm0, %xmm1 +; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX512VL-NEXT: vextractf64x4 $1, %zmm0, %ymm2 -; AVX512VL-NEXT: vextractf32x4 $1, %ymm2, %xmm3 +; AVX512VL-NEXT: vextractf128 $1, %ymm2, %xmm3 ; AVX512VL-NEXT: vcvtps2ph $4, %xmm3, %xmm4 ; AVX512VL-NEXT: vmovd %xmm4, %eax ; AVX512VL-NEXT: vcvtps2ph $4, %xmm2, %xmm4 @@ -4422,7 +4422,7 @@ define <4 x i16> @cvt_4f64_to_4i16(<4 x double> %a0) nounwind { ; AVX512VL-NEXT: movzwl %ax, %r14d ; AVX512VL-NEXT: orl %ebx, %r14d ; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload -; AVX512VL-NEXT: vextractf32x4 $1, %ymm0, %xmm0 +; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: callq __truncdfhf2 @@ -4572,7 +4572,7 @@ define <8 x i16> @cvt_4f64_to_8i16_undef(<4 x double> %a0) nounwind { ; AVX512VL-NEXT: movzwl %ax, %r14d ; AVX512VL-NEXT: orl %ebx, %r14d ; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload -; AVX512VL-NEXT: vextractf32x4 $1, %ymm0, %xmm0 +; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: callq __truncdfhf2 @@ -4726,7 +4726,7 @@ define <8 x i16> @cvt_4f64_to_8i16_zero(<4 x double> %a0) nounwind { ; AVX512VL-NEXT: movzwl %ax, %r14d ; AVX512VL-NEXT: orl %ebx, %r14d ; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload -; AVX512VL-NEXT: vextractf32x4 $1, %ymm0, %xmm0 +; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: callq __truncdfhf2 @@ -4969,7 +4969,7 @@ define <8 x i16> @cvt_8f64_to_8i16(<8 x double> %a0) nounwind { ; AVX512VL-NEXT: movzwl %ax, %r15d ; AVX512VL-NEXT: orl %ebx, %r15d ; AVX512VL-NEXT: vmovups (%rsp), %zmm0 # 64-byte Reload -; AVX512VL-NEXT: vextractf32x4 $1, %ymm0, %xmm0 +; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX512VL-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: callq __truncdfhf2 @@ -4994,7 +4994,7 @@ define <8 x i16> @cvt_8f64_to_8i16(<8 x double> %a0) nounwind { ; AVX512VL-NEXT: movzwl %ax, %r15d ; AVX512VL-NEXT: orl %ebx, %r15d ; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload -; AVX512VL-NEXT: vextractf32x4 $1, %ymm0, %xmm0 +; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: callq __truncdfhf2 @@ -5188,7 +5188,7 @@ define void @store_cvt_4f64_to_4i16(<4 x double> %a0, <4 x i16>* %a1) nounwind { ; AVX512VL-NEXT: callq __truncdfhf2 ; AVX512VL-NEXT: movl %eax, %r14d ; AVX512VL-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload -; AVX512VL-NEXT: vextractf32x4 $1, %ymm0, %xmm0 +; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX512VL-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: callq __truncdfhf2 @@ -5357,7 +5357,7 @@ define void @store_cvt_4f64_to_8i16_undef(<4 x double> %a0, <8 x i16>* %a1) noun ; AVX512VL-NEXT: movzwl %ax, %ebx ; AVX512VL-NEXT: orl %ebp, %ebx ; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload -; AVX512VL-NEXT: vextractf32x4 $1, %ymm0, %xmm0 +; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: callq __truncdfhf2 @@ -5528,7 +5528,7 @@ define void @store_cvt_4f64_to_8i16_zero(<4 x double> %a0, <8 x i16>* %a1) nounw ; AVX512VL-NEXT: movzwl %ax, %ebx ; AVX512VL-NEXT: orl %ebp, %ebx ; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload -; AVX512VL-NEXT: vextractf32x4 $1, %ymm0, %xmm0 +; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: callq __truncdfhf2 @@ -5775,7 +5775,7 @@ define void @store_cvt_8f64_to_8i16(<8 x double> %a0, <8 x i16>* %a1) nounwind { ; AVX512VL-NEXT: callq __truncdfhf2 ; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill ; AVX512VL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload -; AVX512VL-NEXT: vextractf32x4 $1, %ymm0, %xmm0 +; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX512VL-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: callq __truncdfhf2 @@ -5787,7 +5787,7 @@ define void @store_cvt_8f64_to_8i16(<8 x double> %a0, <8 x i16>* %a1) nounwind { ; AVX512VL-NEXT: callq __truncdfhf2 ; AVX512VL-NEXT: movl %eax, %r12d ; AVX512VL-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload -; AVX512VL-NEXT: vextractf32x4 $1, %ymm0, %xmm0 +; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX512VL-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: callq __truncdfhf2 diff --git a/test/CodeGen/X86/vector-lzcnt-256.ll b/test/CodeGen/X86/vector-lzcnt-256.ll index 3ad13e03dbd..c6839549302 100644 --- a/test/CodeGen/X86/vector-lzcnt-256.ll +++ b/test/CodeGen/X86/vector-lzcnt-256.ll @@ -710,35 +710,20 @@ define <32 x i8> @testv32i8(<32 x i8> %in) nounwind { ; AVX2-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VLCD-LABEL: testv32i8: -; AVX512VLCD: ## BB#0: -; AVX512VLCD-NEXT: vextracti32x4 $1, %ymm0, %xmm1 -; AVX512VLCD-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero -; AVX512VLCD-NEXT: vplzcntd %zmm1, %zmm1 -; AVX512VLCD-NEXT: vpmovdb %zmm1, %xmm1 -; AVX512VLCD-NEXT: vmovdqa {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24] -; AVX512VLCD-NEXT: vpsubb %xmm2, %xmm1, %xmm1 -; AVX512VLCD-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512VLCD-NEXT: vplzcntd %zmm0, %zmm0 -; AVX512VLCD-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512VLCD-NEXT: vpsubb %xmm2, %xmm0, %xmm0 -; AVX512VLCD-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 -; AVX512VLCD-NEXT: retq -; -; AVX512CD-LABEL: testv32i8: -; AVX512CD: ## BB#0: -; AVX512CD-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512CD-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero -; AVX512CD-NEXT: vplzcntd %zmm1, %zmm1 -; AVX512CD-NEXT: vpmovdb %zmm1, %xmm1 -; AVX512CD-NEXT: vmovdqa {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24] -; AVX512CD-NEXT: vpsubb %xmm2, %xmm1, %xmm1 -; AVX512CD-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0 -; AVX512CD-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512CD-NEXT: vpsubb %xmm2, %xmm0, %xmm0 -; AVX512CD-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX512CD-NEXT: retq +; AVX512-LABEL: testv32i8: +; AVX512: ## BB#0: +; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; AVX512-NEXT: vplzcntd %zmm1, %zmm1 +; AVX512-NEXT: vpmovdb %zmm1, %xmm1 +; AVX512-NEXT: vmovdqa {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24] +; AVX512-NEXT: vpsubb %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512-NEXT: vplzcntd %zmm0, %zmm0 +; AVX512-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512-NEXT: vpsubb %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512-NEXT: retq ; ; X32-AVX-LABEL: testv32i8: ; X32-AVX: # BB#0: @@ -799,35 +784,20 @@ define <32 x i8> @testv32i8u(<32 x i8> %in) nounwind { ; AVX2-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512VLCD-LABEL: testv32i8u: -; AVX512VLCD: ## BB#0: -; AVX512VLCD-NEXT: vextracti32x4 $1, %ymm0, %xmm1 -; AVX512VLCD-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero -; AVX512VLCD-NEXT: vplzcntd %zmm1, %zmm1 -; AVX512VLCD-NEXT: vpmovdb %zmm1, %xmm1 -; AVX512VLCD-NEXT: vmovdqa {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24] -; AVX512VLCD-NEXT: vpsubb %xmm2, %xmm1, %xmm1 -; AVX512VLCD-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512VLCD-NEXT: vplzcntd %zmm0, %zmm0 -; AVX512VLCD-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512VLCD-NEXT: vpsubb %xmm2, %xmm0, %xmm0 -; AVX512VLCD-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 -; AVX512VLCD-NEXT: retq -; -; AVX512CD-LABEL: testv32i8u: -; AVX512CD: ## BB#0: -; AVX512CD-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512CD-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero -; AVX512CD-NEXT: vplzcntd %zmm1, %zmm1 -; AVX512CD-NEXT: vpmovdb %zmm1, %xmm1 -; AVX512CD-NEXT: vmovdqa {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24] -; AVX512CD-NEXT: vpsubb %xmm2, %xmm1, %xmm1 -; AVX512CD-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0 -; AVX512CD-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512CD-NEXT: vpsubb %xmm2, %xmm0, %xmm0 -; AVX512CD-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX512CD-NEXT: retq +; AVX512-LABEL: testv32i8u: +; AVX512: ## BB#0: +; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero +; AVX512-NEXT: vplzcntd %zmm1, %zmm1 +; AVX512-NEXT: vpmovdb %zmm1, %xmm1 +; AVX512-NEXT: vmovdqa {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24] +; AVX512-NEXT: vpsubb %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512-NEXT: vplzcntd %zmm0, %zmm0 +; AVX512-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512-NEXT: vpsubb %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512-NEXT: retq ; ; X32-AVX-LABEL: testv32i8u: ; X32-AVX: # BB#0: diff --git a/test/CodeGen/X86/vector-shuffle-256-v16.ll b/test/CodeGen/X86/vector-shuffle-256-v16.ll index ba47740cbaf..3c7fd8b51a0 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v16.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v16.ll @@ -1789,25 +1789,15 @@ define <16 x i16> @shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11: -; AVX2: # BB#0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,0,2,4,5,6,7] -; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,4,7] -; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] -; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 -; AVX512VL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,0,2,4,5,6,7] -; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,4,7] -; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] -; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2OR512VL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX2OR512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,0,2,4,5,6,7] +; AVX2OR512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,4,7] +; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] +; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle } @@ -1822,23 +1812,14 @@ define <16 x i16> @shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_0 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09: -; AVX2: # BB#0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7] -; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0] -; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,2,1,0] -; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 -; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7] -; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0] -; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,2,1,0] -; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7] +; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0] +; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,2,1,0] +; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle } @@ -1885,23 +1866,14 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_0 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08: -; AVX2: # BB#0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3] -; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1 -; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 -; AVX512VL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3] -; AVX512VL-NEXT: vpbroadcastw %xmm1, %xmm1 -; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2OR512VL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3] +; AVX2OR512VL-NEXT: vpbroadcastw %xmm1, %xmm1 +; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle } @@ -1919,29 +1891,17 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12: -; AVX2: # BB#0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2 -; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] -; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,7] -; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] -; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] -; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4] -; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 -; AVX512VL-NEXT: vpsllq $48, %xmm1, %xmm2 -; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] -; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,7] -; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] -; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] -; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4] -; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2OR512VL-NEXT: vpsllq $48, %xmm1, %xmm2 +; AVX2OR512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] +; AVX2OR512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,7] +; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] +; AVX2OR512VL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] +; AVX2OR512VL-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4] +; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle } @@ -1957,25 +1917,15 @@ define <16 x i16> @shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_1 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11: -; AVX2: # BB#0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] -; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7] -; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 -; AVX512VL-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX512VL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] -; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7] -; AVX512VL-NEXT: vinserti32x4 $1, %xmm2, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2OR512VL-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX2OR512VL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX2OR512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] +; AVX2OR512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7] +; AVX2OR512VL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle } @@ -1991,25 +1941,15 @@ define <16 x i16> @shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_1 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15: -; AVX2: # BB#0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] -; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7] -; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 -; AVX512VL-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; AVX512VL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] -; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7] -; AVX512VL-NEXT: vinserti32x4 $1, %xmm2, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2OR512VL-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; AVX2OR512VL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; AVX2OR512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7] +; AVX2OR512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7] +; AVX2OR512VL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle } @@ -2026,27 +1966,16 @@ define <16 x i16> @shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13: -; AVX2: # BB#0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7] -; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7] -; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] -; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,1,2,0,4,5,6,7] -; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2] -; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 -; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7] -; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7] -; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] -; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,1,2,0,4,5,6,7] -; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2] -; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7] +; AVX2OR512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7] +; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] +; AVX2OR512VL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,1,2,0,4,5,6,7] +; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2] +; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle } @@ -2062,25 +1991,15 @@ define <16 x i16> @shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_0 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08: -; AVX2: # BB#0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2 -; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,14,15] -; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] -; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1] -; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 -; AVX512VL-NEXT: vpbroadcastw %xmm1, %xmm2 -; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,14,15] -; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] -; AVX512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1] -; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2OR512VL-NEXT: vpbroadcastw %xmm1, %xmm2 +; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,14,15] +; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] +; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1] +; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle } @@ -2095,23 +2014,14 @@ define <16 x i16> @shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13: -; AVX2: # BB#0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7] -; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] -; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,3,2] -; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 -; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7] -; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] -; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,3,2] -; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7] +; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] +; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,3,2] +; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle } @@ -2128,27 +2038,16 @@ define <16 x i16> @shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13: -; AVX2: # BB#0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7] -; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7] -; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] -; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[2,3,0,2,4,5,6,7] -; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2] -; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 -; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7] -; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7] -; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] -; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[2,3,0,2,4,5,6,7] -; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2] -; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7] +; AVX2OR512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7] +; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2] +; AVX2OR512VL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[2,3,0,2,4,5,6,7] +; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2] +; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle } @@ -2164,25 +2063,15 @@ define <16 x i16> @shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15: -; AVX2: # BB#0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] -; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] -; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,2,3] -; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,4,7] -; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 -; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] -; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] -; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,2,3] -; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,4,7] -; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] +; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] +; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,2,3] +; AVX2OR512VL-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,4,7] +; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle } @@ -2210,12 +2099,12 @@ define <16 x i16> @shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_0 ; ; AVX512VL-LABEL: shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 +; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512VL-NEXT: vmovdqu {{.*#+}} xmm2 = [14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1] ; AVX512VL-NEXT: vpshufb %xmm2, %xmm1, %xmm3 ; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7] ; AVX512VL-NEXT: vpshufb %xmm2, %xmm0, %xmm0 -; AVX512VL-NEXT: vinserti32x4 $1, %xmm3, %ymm0, %ymm0 +; AVX512VL-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -2232,25 +2121,15 @@ define <16 x i16> @shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_0 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08: -; AVX2: # BB#0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2 -; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,2,3] -; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] -; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1] -; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 -; AVX512VL-NEXT: vpbroadcastw %xmm1, %xmm2 -; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,2,3] -; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] -; AVX512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1] -; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2OR512VL-NEXT: vpbroadcastw %xmm1, %xmm2 +; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,2,3] +; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] +; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1] +; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle } @@ -2266,25 +2145,15 @@ define <16 x i16> @shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_0 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08: -; AVX2: # BB#0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2 -; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,2,3] -; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] -; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1] -; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 -; AVX512VL-NEXT: vpbroadcastw %xmm1, %xmm2 -; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,2,3] -; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] -; AVX512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1] -; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2OR512VL-NEXT: vpbroadcastw %xmm1, %xmm2 +; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,2,3] +; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] +; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1] +; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle } @@ -2300,25 +2169,15 @@ define <16 x i16> @shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12: -; AVX2: # BB#0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2 -; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,2,3] -; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] -; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9] -; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 -; AVX512VL-NEXT: vpsllq $48, %xmm1, %xmm2 -; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,2,3] -; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] -; AVX512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9] -; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2OR512VL-NEXT: vpsllq $48, %xmm1, %xmm2 +; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,2,3] +; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] +; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9] +; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle } @@ -2334,25 +2193,15 @@ define <16 x i16> @shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_0 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08: -; AVX2: # BB#0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2 -; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,2,3] -; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] -; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1] -; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 -; AVX512VL-NEXT: vpbroadcastw %xmm1, %xmm2 -; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,2,3] -; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] -; AVX512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1] -; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2OR512VL-NEXT: vpbroadcastw %xmm1, %xmm2 +; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,2,3] +; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] +; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1] +; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle } @@ -2368,25 +2217,15 @@ define <16 x i16> @shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12: -; AVX2: # BB#0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2 -; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,2,3] -; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] -; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9] -; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 -; AVX512VL-NEXT: vpsllq $48, %xmm1, %xmm2 -; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,2,3] -; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] -; AVX512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9] -; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2OR512VL-NEXT: vpsllq $48, %xmm1, %xmm2 +; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,2,3] +; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] +; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9] +; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle } @@ -2414,12 +2253,12 @@ define <16 x i16> @shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_1 ; ; AVX512VL-LABEL: shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 +; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512VL-NEXT: vmovdqu {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7] ; AVX512VL-NEXT: vpshufb %xmm2, %xmm1, %xmm3 ; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] ; AVX512VL-NEXT: vpshufb %xmm2, %xmm0, %xmm0 -; AVX512VL-NEXT: vinserti32x4 $1, %xmm3, %ymm0, %ymm0 +; AVX512VL-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -2448,12 +2287,12 @@ define <16 x i16> @shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_1 ; ; AVX512VL-LABEL: shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 +; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512VL-NEXT: vmovdqu {{.*#+}} xmm2 = [4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7] ; AVX512VL-NEXT: vpshufb %xmm2, %xmm1, %xmm3 ; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] ; AVX512VL-NEXT: vpshufb %xmm2, %xmm0, %xmm0 -; AVX512VL-NEXT: vinserti32x4 $1, %xmm3, %ymm0, %ymm0 +; AVX512VL-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -2482,12 +2321,12 @@ define <16 x i16> @shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_1 ; ; AVX512VL-LABEL: shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 +; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512VL-NEXT: vmovdqu {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11] ; AVX512VL-NEXT: vpshufb %xmm2, %xmm1, %xmm3 ; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7] ; AVX512VL-NEXT: vpshufb %xmm2, %xmm0, %xmm0 -; AVX512VL-NEXT: vinserti32x4 $1, %xmm3, %ymm0, %ymm0 +; AVX512VL-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -2516,12 +2355,12 @@ define <16 x i16> @shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_1 ; ; AVX512VL-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 +; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512VL-NEXT: vmovdqu {{.*#+}} xmm2 = [12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7] ; AVX512VL-NEXT: vpshufb %xmm2, %xmm1, %xmm3 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] ; AVX512VL-NEXT: vpshufb %xmm2, %xmm0, %xmm0 -; AVX512VL-NEXT: vinserti32x4 $1, %xmm3, %ymm0, %ymm0 +; AVX512VL-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -2538,25 +2377,15 @@ define <16 x i16> @shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12: -; AVX2: # BB#0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2 -; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,14,15] -; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] -; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9] -; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 -; AVX512VL-NEXT: vpsllq $48, %xmm1, %xmm2 -; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,14,15] -; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] -; AVX512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9] -; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2OR512VL-NEXT: vpsllq $48, %xmm1, %xmm2 +; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,14,15] +; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] +; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9] +; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle } @@ -2572,25 +2401,15 @@ define <16 x i16> @shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12: -; AVX2: # BB#0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2 -; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,14,15] -; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] -; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9] -; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 -; AVX512VL-NEXT: vpsllq $48, %xmm1, %xmm2 -; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,14,15] -; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] -; AVX512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9] -; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2OR512VL-NEXT: vpsllq $48, %xmm1, %xmm2 +; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,14,15] +; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] +; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9] +; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle } @@ -2606,25 +2425,15 @@ define <16 x i16> @shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12: -; AVX2: # BB#0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2 -; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15] -; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] -; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] -; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 -; AVX512VL-NEXT: vpsllq $48, %xmm1, %xmm2 -; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15] -; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] -; AVX512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] -; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2OR512VL-NEXT: vpsllq $48, %xmm1, %xmm2 +; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15] +; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] +; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] +; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle } @@ -2640,25 +2449,15 @@ define <16 x i16> @shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_0 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08: -; AVX2: # BB#0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2 -; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,14,15] -; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] -; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1] -; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 -; AVX512VL-NEXT: vpbroadcastw %xmm1, %xmm2 -; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,14,15] -; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] -; AVX512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1] -; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2OR512VL-NEXT: vpbroadcastw %xmm1, %xmm2 +; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,14,15] +; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] +; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1] +; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle } @@ -2675,27 +2474,16 @@ define <16 x i16> @shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15: -; AVX2: # BB#0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] -; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] -; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] -; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,0,4,5,6,7] -; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 -; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] -; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] -; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] -; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,0,4,5,6,7] -; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX2OR512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7] +; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] +; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; AVX2OR512VL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,0,4,5,6,7] +; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle } @@ -2711,25 +2499,15 @@ define <16 x i16> @shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12: -; AVX2: # BB#0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2 -; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,14,15] -; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] -; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9] -; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 -; AVX512VL-NEXT: vpsllq $48, %xmm1, %xmm2 -; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,14,15] -; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] -; AVX512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9] -; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2OR512VL-NEXT: vpsllq $48, %xmm1, %xmm2 +; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,14,15] +; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] +; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9] +; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle } @@ -2745,25 +2523,15 @@ define <16 x i16> @shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12: -; AVX2: # BB#0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2 -; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,14,15] -; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] -; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9] -; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 -; AVX512VL-NEXT: vpsllq $48, %xmm1, %xmm2 -; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,14,15] -; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] -; AVX512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9] -; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2OR512VL-NEXT: vpsllq $48, %xmm1, %xmm2 +; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,14,15] +; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] +; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9] +; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle } @@ -2779,25 +2547,15 @@ define <16 x i16> @shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12: -; AVX2: # BB#0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2 -; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15] -; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] -; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] -; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 -; AVX512VL-NEXT: vpsllq $48, %xmm1, %xmm2 -; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15] -; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] -; AVX512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] -; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2OR512VL-NEXT: vpsllq $48, %xmm1, %xmm2 +; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15] +; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] +; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9] +; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle } @@ -2848,13 +2606,13 @@ define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_1 ; ; AVX512VL-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 +; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512VL-NEXT: vpbroadcastq %xmm1, %xmm2 ; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,1,2,0] ; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,3,2,3,4,5,6,7] ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0] -; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -2926,7 +2684,7 @@ define <16 x i16> @shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_1 ; ; AVX512VL-LABEL: shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 +; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] ; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7] @@ -2934,7 +2692,7 @@ define <16 x i16> @shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_1 ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,1,3] ; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,5,4,7] ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,3,1,2] -; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -2961,13 +2719,13 @@ define <16 x i16> @shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_1 ; ; AVX512VL-LABEL: shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 +; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,2,3] ; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,1,2,0] ; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,3,2,1,4,5,6,7] ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,0,3,1] -; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -2996,12 +2754,12 @@ define <16 x i16> @shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_1 ; ; AVX512VL-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 +; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512VL-NEXT: vmovdqu {{.*#+}} xmm2 = [6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11] ; AVX512VL-NEXT: vpshufb %xmm2, %xmm1, %xmm3 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] ; AVX512VL-NEXT: vpshufb %xmm2, %xmm0, %xmm0 -; AVX512VL-NEXT: vinserti32x4 $1, %xmm3, %ymm0, %ymm0 +; AVX512VL-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -3693,23 +3451,14 @@ define <16 x i16> @shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12: -; AVX2: # BB#0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7] -; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] -; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] -; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 -; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7] -; AVX512VL-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] -; AVX512VL-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] -; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7] +; AVX2OR512VL-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] +; AVX2OR512VL-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9] +; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle } @@ -3809,23 +3558,14 @@ define <16 x i16> @shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10: -; AVX2: # BB#0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] -; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] -; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] -; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1 -; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] -; AVX512VL-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] -; AVX512VL-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] -; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7] +; AVX2OR512VL-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] +; AVX2OR512VL-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5] +; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle } @@ -4000,17 +3740,11 @@ define <16 x i16> @shuffle_v16i16_u_u_u_u_u_u_u_u_0_16_1_17_2_18_3_19(<16 x i16> ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_0_16_1_17_2_18_3_19: -; AVX2: # BB#0: -; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_0_16_1_17_2_18_3_19: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX512VL-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_0_16_1_17_2_18_3_19: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX2OR512VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle } @@ -4023,19 +3757,12 @@ define <16 x i16> @shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3(<16 x i16> %a, ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3: -; AVX2: # BB#0: -; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7] -; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] -; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7] -; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] -; AVX512VL-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7] +; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; AVX2OR512VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle } @@ -4049,17 +3776,11 @@ define <16 x i16> @shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8(<16 x i16> %a, ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8: -; AVX2: # BB#0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 -; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm0 -; AVX512VL-NEXT: vpbroadcastw %xmm0, %ymm0 -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX2OR512VL-NEXT: vpbroadcastw %xmm0, %ymm0 +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle } @@ -4091,19 +3812,12 @@ define <16 x i16> @shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u(<16 x i16> %a, ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u: -; AVX2: # BB#0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 -; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7] -; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm0 -; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7] -; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX2OR512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7] +; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle } diff --git a/test/CodeGen/X86/vector-shuffle-256-v32.ll b/test/CodeGen/X86/vector-shuffle-256-v32.ll index a71e4babd34..301e8079a5d 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v32.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -1693,17 +1693,11 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_ ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40: -; AVX2: # BB#0: -; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24] -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24] +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle } @@ -1774,19 +1768,12 @@ define <32 x i8> @shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_ ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47: -; AVX2: # BB#0: -; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] -; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] -; AVX512VL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; AVX512VL-NEXT: vinserti32x4 $1, %xmm2, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] +; AVX2OR512VL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; AVX2OR512VL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle } @@ -2175,7 +2162,7 @@ define <32 x i8> @shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_ ; AVX512VL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,2,2,4,5,6,7] ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] -; AVX512VL-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 +; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle @@ -2190,17 +2177,11 @@ define <32 x i8> @shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_ ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16: -; AVX2: # BB#0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 -; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm0 -; AVX512VL-NEXT: vpbroadcastb %xmm0, %ymm0 -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX2OR512VL-NEXT: vpbroadcastb %xmm0, %ymm0 +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle } @@ -2264,7 +2245,7 @@ define <32 x i8> @shuffle_v32i8_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_ ; ; AVX512VL-LABEL: shuffle_v32i8_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm0 +; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX512VL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,6,6,6] ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] diff --git a/test/CodeGen/X86/vector-shuffle-256-v4.ll b/test/CodeGen/X86/vector-shuffle-256-v4.ll index 3ecfc29d0f0..7f978138719 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v4.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v4.ll @@ -320,39 +320,19 @@ define <4 x double> @shuffle_v4f64_4163(<4 x double> %a, <4 x double> %b) { } define <4 x double> @shuffle_v4f64_0145(<4 x double> %a, <4 x double> %b) { -; AVX1-LABEL: shuffle_v4f64_0145: -; AVX1: # BB#0: -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v4f64_0145: -; AVX2: # BB#0: -; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v4f64_0145: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; ALL-LABEL: shuffle_v4f64_0145: +; ALL: # BB#0: +; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; ALL-NEXT: retq %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> ret <4 x double> %shuffle } define <4 x double> @shuffle_v4f64_4501(<4 x double> %a, <4 x double> %b) { -; AVX1-LABEL: shuffle_v4f64_4501: -; AVX1: # BB#0: -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v4f64_4501: -; AVX2: # BB#0: -; AVX2-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v4f64_4501: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vinsertf32x4 $1, %xmm0, %ymm1, %ymm0 -; AVX512VL-NEXT: retq +; ALL-LABEL: shuffle_v4f64_4501: +; ALL: # BB#0: +; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; ALL-NEXT: retq %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> ret <4 x double> %shuffle } @@ -367,23 +347,11 @@ define <4 x double> @shuffle_v4f64_0167(<4 x double> %a, <4 x double> %b) { } define <4 x double> @shuffle_v4f64_1054(<4 x double> %a, <4 x double> %b) { -; AVX1-LABEL: shuffle_v4f64_1054: -; AVX1: # BB#0: -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v4f64_1054: -; AVX2: # BB#0: -; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v4f64_1054: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] -; AVX512VL-NEXT: retq +; ALL-LABEL: shuffle_v4f64_1054: +; ALL: # BB#0: +; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] +; ALL-NEXT: retq %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> ret <4 x double> %shuffle } @@ -735,7 +703,7 @@ define <4 x i64> @shuffle_v4i64_0142(<4 x i64> %a, <4 x i64> %b) { ; ; AVX512VL-LABEL: shuffle_v4i64_0142: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm1 +; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 ; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,2,2] ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7] ; AVX512VL-NEXT: retq @@ -808,7 +776,7 @@ define <4 x i64> @shuffle_v4i64_0145(<4 x i64> %a, <4 x i64> %b) { ; ; AVX512VL-LABEL: shuffle_v4i64_0145: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> ret <4 x i64> %shuffle @@ -852,7 +820,7 @@ define <4 x i64> @shuffle_v4i64_4501(<4 x i64> %a, <4 x i64> %b) { ; ; AVX512VL-LABEL: shuffle_v4i64_4501: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vinserti32x4 $1, %xmm0, %ymm1, %ymm0 +; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> ret <4 x i64> %shuffle @@ -948,7 +916,7 @@ define <4 x i64> @shuffle_v4i64_1054(<4 x i64> %a, <4 x i64> %b) { ; ; AVX512VL-LABEL: shuffle_v4i64_1054: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] ; AVX512VL-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> @@ -1424,7 +1392,7 @@ define <4 x i64> @concat_v4i64_0145_bc(<4 x i64> %a0, <4 x i64> %a1) { ; ; AVX512VL-LABEL: concat_v4i64_0145_bc: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq %a0lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> %a1lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> diff --git a/test/CodeGen/X86/vector-shuffle-256-v8.ll b/test/CodeGen/X86/vector-shuffle-256-v8.ll index d0e85937909..cba15827d32 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v8.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v8.ll @@ -753,17 +753,11 @@ define <8 x float> @shuffle_v8f32_76543210(<8 x float> %a, <8 x float> %b) { } define <8 x float> @shuffle_v8f32_3210ba98(<8 x float> %a, <8 x float> %b) { -; AVX1OR2-LABEL: shuffle_v8f32_3210ba98: -; AVX1OR2: # BB#0: -; AVX1OR2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1OR2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] -; AVX1OR2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v8f32_3210ba98: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vinsertf64x2 $1, %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] -; AVX512VL-NEXT: retq +; ALL-LABEL: shuffle_v8f32_3210ba98: +; ALL: # BB#0: +; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] +; ALL-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> ret <8 x float> %shuffle } @@ -829,17 +823,11 @@ define <8 x float> @shuffle_v8f32_ba987654(<8 x float> %a, <8 x float> %b) { } define <8 x float> @shuffle_v8f32_ba983210(<8 x float> %a, <8 x float> %b) { -; AVX1OR2-LABEL: shuffle_v8f32_ba983210: -; AVX1OR2: # BB#0: -; AVX1OR2-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1OR2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] -; AVX1OR2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v8f32_ba983210: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vinsertf64x2 $1, %xmm0, %ymm1, %ymm0 -; AVX512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] -; AVX512VL-NEXT: retq +; ALL-LABEL: shuffle_v8f32_ba983210: +; ALL: # BB#0: +; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] +; ALL-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> ret <8 x float> %shuffle } @@ -863,17 +851,11 @@ define <8 x float> @shuffle_v8f32_a2u3e6f7(<8 x float> %a, <8 x float> %b) { } define <8 x float> @shuffle_v8f32_uuuu1111(<8 x float> %a, <8 x float> %b) { -; AVX1OR2-LABEL: shuffle_v8f32_uuuu1111: -; AVX1OR2: # BB#0: -; AVX1OR2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] -; AVX1OR2-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; AVX1OR2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v8f32_uuuu1111: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] -; AVX512VL-NEXT: vinsertf32x4 $1, %xmm0, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; ALL-LABEL: shuffle_v8f32_uuuu1111: +; ALL: # BB#0: +; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; ALL-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> ret <8 x float> %shuffle } @@ -885,17 +867,11 @@ define <8 x float> @shuffle_v8f32_44444444(<8 x float> %a, <8 x float> %b) { ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v8f32_44444444: -; AVX2: # BB#0: -; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX2-NEXT: vbroadcastss %xmm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v8f32_44444444: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextractf32x4 $1, %ymm0, %xmm0 -; AVX512VL-NEXT: vbroadcastss %xmm0, %ymm0 -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v8f32_44444444: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX2OR512VL-NEXT: vbroadcastss %xmm0, %ymm0 +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> ret <8 x float> %shuffle } @@ -910,33 +886,21 @@ define <8 x float> @shuffle_v8f32_1188uuuu(<8 x float> %a, <8 x float> %b) { } define <8 x float> @shuffle_v8f32_uuuu3210(<8 x float> %a, <8 x float> %b) { -; AVX1OR2-LABEL: shuffle_v8f32_uuuu3210: -; AVX1OR2: # BB#0: -; AVX1OR2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] -; AVX1OR2-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; AVX1OR2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v8f32_uuuu3210: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] -; AVX512VL-NEXT: vinsertf32x4 $1, %xmm0, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; ALL-LABEL: shuffle_v8f32_uuuu3210: +; ALL: # BB#0: +; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] +; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; ALL-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> ret <8 x float> %shuffle } define <8 x float> @shuffle_v8f32_uuuu1188(<8 x float> %a, <8 x float> %b) { -; AVX1OR2-LABEL: shuffle_v8f32_uuuu1188: -; AVX1OR2: # BB#0: -; AVX1OR2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[0,0] -; AVX1OR2-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; AVX1OR2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v8f32_uuuu1188: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[0,0] -; AVX512VL-NEXT: vinsertf32x4 $1, %xmm0, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; ALL-LABEL: shuffle_v8f32_uuuu1188: +; ALL: # BB#0: +; ALL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[0,0] +; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; ALL-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> ret <8 x float> %shuffle } @@ -951,17 +915,11 @@ define <8 x float> @shuffle_v8f32_1111uuuu(<8 x float> %a, <8 x float> %b) { } define <8 x float> @shuffle_v8f32_5555uuuu(<8 x float> %a, <8 x float> %b) { -; AVX1OR2-LABEL: shuffle_v8f32_5555uuuu: -; AVX1OR2: # BB#0: -; AVX1OR2-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1OR2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] -; AVX1OR2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v8f32_5555uuuu: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextractf32x4 $1, %ymm0, %xmm0 -; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] -; AVX512VL-NEXT: retq +; ALL-LABEL: shuffle_v8f32_5555uuuu: +; ALL: # BB#0: +; ALL-NEXT: vextractf128 $1, %ymm0, %xmm0 +; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; ALL-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> ret <8 x float> %shuffle } @@ -1900,17 +1858,11 @@ define <8 x i32> @shuffle_v8i32_3210ba98(<8 x i32> %a, <8 x i32> %b) { ; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v8i32_3210ba98: -; AVX2: # BB#0: -; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v8i32_3210ba98: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v8i32_3210ba98: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2OR512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> ret <8 x i32> %shuffle } @@ -2047,17 +1999,11 @@ define <8 x i32> @shuffle_v8i32_uuuu1111(<8 x i32> %a, <8 x i32> %b) { ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v8i32_uuuu1111: -; AVX2: # BB#0: -; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] -; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v8i32_uuuu1111: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] -; AVX512VL-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v8i32_uuuu1111: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] +; AVX2OR512VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> ret <8 x i32> %shuffle } @@ -2095,7 +2041,7 @@ define <8 x i32> @shuffle_v8i32_44444444(<8 x i32> %a, <8 x i32> %b) { ; ; AVX512VL-LABEL: shuffle_v8i32_44444444: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm0 +; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX512VL-NEXT: vpbroadcastd %xmm0, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> @@ -2109,17 +2055,11 @@ define <8 x i32> @shuffle_v8i32_5555uuuu(<8 x i32> %a, <8 x i32> %b) { ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v8i32_5555uuuu: -; AVX2: # BB#0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 -; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v8i32_5555uuuu: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm0 -; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v8i32_5555uuuu: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> ret <8 x i32> %shuffle } diff --git a/test/CodeGen/X86/vector-trunc.ll b/test/CodeGen/X86/vector-trunc.ll index 8a826e025a3..2571a21ce21 100644 --- a/test/CodeGen/X86/vector-trunc.ll +++ b/test/CodeGen/X86/vector-trunc.ll @@ -643,7 +643,7 @@ define void @trunc32i16_32i8(<32 x i16> %a) { ; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512VL-NEXT: vpmovsxwd %ymm1, %zmm1 ; AVX512VL-NEXT: vpmovdb %zmm1, %xmm1 -; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: vmovdqu %ymm0, (%rax) ; AVX512VL-NEXT: retq ; @@ -701,7 +701,7 @@ define <8 x i32> @trunc2x4i64_8i32(<4 x i64> %a, <4 x i64> %b) { ; AVX512VL: # BB#0: # %entry ; AVX512VL-NEXT: vpmovqd %ymm0, %xmm0 ; AVX512VL-NEXT: vpmovqd %ymm1, %xmm1 -; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: trunc2x4i64_8i32: @@ -717,7 +717,7 @@ define <8 x i32> @trunc2x4i64_8i32(<4 x i64> %a, <4 x i64> %b) { ; AVX512BWVL: # BB#0: # %entry ; AVX512BWVL-NEXT: vpmovqd %ymm0, %xmm0 ; AVX512BWVL-NEXT: vpmovqd %ymm1, %xmm1 -; AVX512BWVL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 +; AVX512BWVL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX512BWVL-NEXT: retq entry: %0 = trunc <4 x i64> %a to <4 x i32>