mirror of
https://github.com/RPCSX/llvm.git
synced 2024-12-14 15:39:00 +00:00
[DAGCombiner] Teach DAG combine that inserting an extract_subvector result into the same location of a an undef vector can just use the original input to the extract.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@294932 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
d46db47633
commit
5b7ece9f05
@ -14554,6 +14554,12 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
|
||||
if (N1.isUndef())
|
||||
return N0;
|
||||
|
||||
// If this is an insert of an extracted vector into an undef vector, we can
|
||||
// just use the input to the extract.
|
||||
if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
|
||||
N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
|
||||
return N1.getOperand(0);
|
||||
|
||||
// Combine INSERT_SUBVECTORs where we are inserting to the same index.
|
||||
// INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
|
||||
// --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
|
||||
|
@ -30,9 +30,9 @@ define <8 x i1> @test2(<2 x i1> %a) {
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k0
|
||||
; CHECK-NEXT: vpxord %zmm0, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vpmovm2q %k0, %zmm1
|
||||
; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vpmovm2q %k0, %zmm0
|
||||
; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1
|
||||
; CHECK-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
|
||||
; CHECK-NEXT: vpmovq2m %zmm0, %k0
|
||||
; CHECK-NEXT: vpmovm2w %k0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
|
@ -35,7 +35,6 @@ define <2 x i64> @_clearupper2xi64a(<2 x i64>) nounwind {
|
||||
ret <2 x i64> %v1
|
||||
}
|
||||
|
||||
; FIXME: Unnecessary vblendps/vpblendd on AVX targets
|
||||
define <4 x i64> @_clearupper4xi64a(<4 x i64>) nounwind {
|
||||
; SSE-LABEL: _clearupper4xi64a:
|
||||
; SSE: # BB#0:
|
||||
@ -46,14 +45,12 @@ define <4 x i64> @_clearupper4xi64a(<4 x i64>) nounwind {
|
||||
;
|
||||
; AVX1-LABEL: _clearupper4xi64a:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1,2,3]
|
||||
; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
|
||||
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: _clearupper4xi64a:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7]
|
||||
; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
|
||||
; AVX2-NEXT: retq
|
||||
@ -106,7 +103,6 @@ define <4 x i32> @_clearupper4xi32a(<4 x i32>) nounwind {
|
||||
ret <4 x i32> %v3
|
||||
}
|
||||
|
||||
; FIXME: Unnecessary vblendps on AVX1 target
|
||||
; FIXME: Missed vpblendw on AVX2 target
|
||||
define <8 x i32> @_clearupper8xi32a(<8 x i32>) nounwind {
|
||||
; SSE-LABEL: _clearupper8xi32a:
|
||||
@ -118,15 +114,13 @@ define <8 x i32> @_clearupper8xi32a(<8 x i32>) nounwind {
|
||||
;
|
||||
; AVX1-LABEL: _clearupper8xi32a:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1,2,3]
|
||||
; AVX1-NEXT: vandpd {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: _clearupper8xi32a:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7]
|
||||
; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1
|
||||
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
|
||||
; AVX2-NEXT: vandps %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
%x0 = extractelement <8 x i32> %0, i32 0
|
||||
%x1 = extractelement <8 x i32> %0, i32 1
|
||||
@ -229,7 +223,6 @@ define <8 x i16> @_clearupper8xi16a(<8 x i16>) nounwind {
|
||||
ret <8 x i16> %v7
|
||||
}
|
||||
|
||||
; FIXME: Unnecessary vblendps/vpblendd on AVX targets
|
||||
define <16 x i16> @_clearupper16xi16a(<16 x i16>) nounwind {
|
||||
; SSE-LABEL: _clearupper16xi16a:
|
||||
; SSE: # BB#0:
|
||||
@ -290,17 +283,10 @@ define <16 x i16> @_clearupper16xi16a(<16 x i16>) nounwind {
|
||||
; SSE-NEXT: popq %rbp
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: _clearupper16xi16a:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1,2,3]
|
||||
; AVX1-NEXT: vandpd {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: _clearupper16xi16a:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7]
|
||||
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
; AVX-LABEL: _clearupper16xi16a:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX-NEXT: retq
|
||||
%x0 = extractelement <16 x i16> %0, i32 0
|
||||
%x1 = extractelement <16 x i16> %0, i32 1
|
||||
%x2 = extractelement <16 x i16> %0, i32 2
|
||||
|
Loading…
Reference in New Issue
Block a user