mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-11 13:37:07 +00:00
[X86][SSE] Avoid vector byte shuffles with zero by using pshufb to create zeros
pshufb can shuffle in zero bytes as well as bytes from a source vector - we can use this to avoid having to shuffle 2 vectors and ORing the result when the used inputs from a vector are all zeroable. Differential Revision: http://reviews.llvm.org/D6878 llvm-svn: 225551
This commit is contained in:
parent
afb7885642
commit
02b7bb4769
@ -9599,23 +9599,41 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
||||
if (Subtarget->hasSSSE3()) {
|
||||
SDValue V1Mask[16];
|
||||
SDValue V2Mask[16];
|
||||
for (int i = 0; i < 16; ++i)
|
||||
bool V1InUse = false;
|
||||
bool V2InUse = false;
|
||||
SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
|
||||
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
if (Mask[i] == -1) {
|
||||
V1Mask[i] = V2Mask[i] = DAG.getUNDEF(MVT::i8);
|
||||
} else {
|
||||
V1Mask[i] = DAG.getConstant(Mask[i] < 16 ? Mask[i] : 0x80, MVT::i8);
|
||||
V2Mask[i] =
|
||||
DAG.getConstant(Mask[i] < 16 ? 0x80 : Mask[i] - 16, MVT::i8);
|
||||
const int ZeroMask = 0x80;
|
||||
int V1Idx = (Mask[i] < 16 ? Mask[i] : ZeroMask);
|
||||
int V2Idx = (Mask[i] < 16 ? ZeroMask : Mask[i] - 16);
|
||||
if (Zeroable[i])
|
||||
V1Idx = V2Idx = ZeroMask;
|
||||
V1Mask[i] = DAG.getConstant(V1Idx, MVT::i8);
|
||||
V2Mask[i] = DAG.getConstant(V2Idx, MVT::i8);
|
||||
V1InUse |= (ZeroMask != V1Idx);
|
||||
V2InUse |= (ZeroMask != V2Idx);
|
||||
}
|
||||
V1 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, V1,
|
||||
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, V1Mask));
|
||||
if (isSingleInputShuffleMask(Mask))
|
||||
return V1; // Single inputs are easy.
|
||||
}
|
||||
assert((V1InUse || V2InUse) && "Shuffling to a zeroable vector");
|
||||
|
||||
// Otherwise, blend the two.
|
||||
V2 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, V2,
|
||||
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, V2Mask));
|
||||
return DAG.getNode(ISD::OR, DL, MVT::v16i8, V1, V2);
|
||||
if (V1InUse)
|
||||
V1 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, V1,
|
||||
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, V1Mask));
|
||||
if (V2InUse)
|
||||
V2 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, V2,
|
||||
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, V2Mask));
|
||||
|
||||
// If we need shuffled inputs from both, blend the two.
|
||||
if (V1InUse && V2InUse)
|
||||
return DAG.getNode(ISD::OR, DL, MVT::v16i8, V1, V2);
|
||||
if (V1InUse)
|
||||
return V1; // Single inputs are easy.
|
||||
if (V2InUse)
|
||||
return V2; // Single inputs are easy.
|
||||
}
|
||||
|
||||
// There are special ways we can lower some single-element blends.
|
||||
|
@ -470,26 +470,17 @@ define <16 x i8> @PR20540(<8 x i8> %a) {
|
||||
;
|
||||
; SSSE3-LABEL: PR20540:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: pxor %xmm1, %xmm1
|
||||
; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,0,0,0,0,0,0,0]
|
||||
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; SSSE3-NEXT: por %xmm1, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: PR20540:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE41-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,0,0,0,0,0,0,0]
|
||||
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; SSE41-NEXT: por %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: PR20540:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,0,0,0,0,0,0,0]
|
||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%shuffle = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
|
||||
ret <16 x i8> %shuffle
|
||||
@ -505,28 +496,19 @@ define <16 x i8> @shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(
|
||||
; SSSE3-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: movd %edi, %xmm0
|
||||
; SSSE3-NEXT: pxor %xmm1, %xmm1
|
||||
; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
||||
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; SSSE3-NEXT: por %xmm1, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: movd %edi, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE41-NEXT: pshufb {{.*#+}} xmm1 = zero,xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
||||
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; SSE41-NEXT: por %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovd %edi, %xmm0
|
||||
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpshufb {{.*#+}} xmm1 = zero,xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
|
||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%a = insertelement <16 x i8> undef, i8 %i, i32 0
|
||||
%shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
@ -544,28 +526,19 @@ define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(
|
||||
; SSSE3-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: movd %edi, %xmm0
|
||||
; SSSE3-NEXT: pxor %xmm1, %xmm1
|
||||
; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,0,0,0,0],zero,xmm1[0,0,0,0,0,0,0,0,0,0]
|
||||
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; SSSE3-NEXT: por %xmm1, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: movd %edi, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,0,0,0,0],zero,xmm1[0,0,0,0,0,0,0,0,0,0]
|
||||
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; SSE41-NEXT: por %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovd %edi, %xmm0
|
||||
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,0,0,0,0],zero,xmm1[0,0,0,0,0,0,0,0,0,0]
|
||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%a = insertelement <16 x i8> undef, i8 %i, i32 0
|
||||
%shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
@ -601,30 +574,21 @@ define <16 x i8> @shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(
|
||||
; SSSE3: # BB#0:
|
||||
; SSSE3-NEXT: movd %edi, %xmm0
|
||||
; SSSE3-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12]
|
||||
; SSSE3-NEXT: pxor %xmm1, %xmm1
|
||||
; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1],zero,xmm1[3,4,5,6,7,8,9,10,11,12,13,14,15]
|
||||
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; SSSE3-NEXT: por %xmm1, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
|
||||
; SSE41: # BB#0:
|
||||
; SSE41-NEXT: movd %edi, %xmm0
|
||||
; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12]
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1],zero,xmm1[3,4,5,6,7,8,9,10,11,12,13,14,15]
|
||||
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; SSE41-NEXT: por %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovd %edi, %xmm0
|
||||
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12]
|
||||
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1],zero,xmm1[3,4,5,6,7,8,9,10,11,12,13,14,15]
|
||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%a = insertelement <16 x i8> undef, i8 %i, i32 3
|
||||
%shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 19, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
|
Loading…
Reference in New Issue
Block a user