diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 9e06f0fd6f9..ef191580568 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -14554,6 +14554,12 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { if (N1.isUndef()) return N0; + // If this is an insert of an extracted vector into an undef vector, we can + // just use the input to the extract. + if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && + N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT) + return N1.getOperand(0); + // Combine INSERT_SUBVECTORs where we are inserting to the same index. // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx ) // --> INSERT_SUBVECTOR( Vec, SubNew, Idx ) diff --git a/test/CodeGen/X86/avx512-skx-insert-subvec.ll b/test/CodeGen/X86/avx512-skx-insert-subvec.ll index 3a93b544b95..c95b8ef33ba 100644 --- a/test/CodeGen/X86/avx512-skx-insert-subvec.ll +++ b/test/CodeGen/X86/avx512-skx-insert-subvec.ll @@ -30,9 +30,9 @@ define <8 x i1> @test2(<2 x i1> %a) { ; CHECK: # BB#0: ; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0 ; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k0 -; CHECK-NEXT: vpxord %zmm0, %zmm0, %zmm0 -; CHECK-NEXT: vpmovm2q %k0, %zmm1 -; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; CHECK-NEXT: vpmovm2q %k0, %zmm0 +; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 +; CHECK-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; CHECK-NEXT: vpmovq2m %zmm0, %k0 ; CHECK-NEXT: vpmovm2w %k0, %xmm0 ; CHECK-NEXT: retq diff --git a/test/CodeGen/X86/clear_upper_vector_element_bits.ll b/test/CodeGen/X86/clear_upper_vector_element_bits.ll index 14ef67884a4..3309855bab0 100644 --- a/test/CodeGen/X86/clear_upper_vector_element_bits.ll +++ b/test/CodeGen/X86/clear_upper_vector_element_bits.ll @@ -35,7 +35,6 @@ define <2 x i64> @_clearupper2xi64a(<2 x i64>) nounwind { ret <2 x i64> %v1 } -; FIXME: Unnecessary vblendps/vpblendd on AVX targets define <4 x i64> @_clearupper4xi64a(<4 x i64>) nounwind { ; SSE-LABEL: _clearupper4xi64a: ; SSE: # BB#0: @@ -46,14 +45,12 @@ define <4 x i64> @_clearupper4xi64a(<4 x i64>) nounwind { ; ; AVX1-LABEL: _clearupper4xi64a: ; AVX1: # BB#0: -; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1,2,3] ; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] ; AVX1-NEXT: retq ; ; AVX2-LABEL: _clearupper4xi64a: ; AVX2: # BB#0: -; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7] ; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] ; AVX2-NEXT: retq @@ -106,7 +103,6 @@ define <4 x i32> @_clearupper4xi32a(<4 x i32>) nounwind { ret <4 x i32> %v3 } -; FIXME: Unnecessary vblendps on AVX1 target ; FIXME: Missed vpblendw on AVX2 target define <8 x i32> @_clearupper8xi32a(<8 x i32>) nounwind { ; SSE-LABEL: _clearupper8xi32a: @@ -118,15 +114,13 @@ define <8 x i32> @_clearupper8xi32a(<8 x i32>) nounwind { ; ; AVX1-LABEL: _clearupper8xi32a: ; AVX1: # BB#0: -; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1,2,3] -; AVX1-NEXT: vandpd {{.*}}(%rip), %ymm0, %ymm0 +; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: _clearupper8xi32a: ; AVX2: # BB#0: -; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7] -; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1 -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 +; AVX2-NEXT: vandps %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq %x0 = extractelement <8 x i32> %0, i32 0 %x1 = extractelement <8 x i32> %0, i32 1 @@ -229,7 +223,6 @@ define <8 x i16> @_clearupper8xi16a(<8 x i16>) nounwind { ret <8 x i16> %v7 } -; FIXME: Unnecessary vblendps/vpblendd on AVX targets define <16 x i16> @_clearupper16xi16a(<16 x i16>) nounwind { ; SSE-LABEL: _clearupper16xi16a: ; SSE: # BB#0: @@ -290,17 +283,10 @@ define <16 x i16> @_clearupper16xi16a(<16 x i16>) nounwind { ; SSE-NEXT: popq %rbp ; SSE-NEXT: retq ; -; AVX1-LABEL: _clearupper16xi16a: -; AVX1: # BB#0: -; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1,2,3] -; AVX1-NEXT: vandpd {{.*}}(%rip), %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: _clearupper16xi16a: -; AVX2: # BB#0: -; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7] -; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; AVX2-NEXT: retq +; AVX-LABEL: _clearupper16xi16a: +; AVX: # BB#0: +; AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 +; AVX-NEXT: retq %x0 = extractelement <16 x i16> %0, i32 0 %x1 = extractelement <16 x i16> %0, i32 1 %x2 = extractelement <16 x i16> %0, i32 2