mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-10 14:36:12 +00:00
[X86][SSSE3] Bailout of lowerVectorShuffleAsPermuteAndUnpack for shuffle-with-zero (PR40306)
If we have PSHUFB and we're shuffling with a zero vector, then we are better off not doing VECTOR_SHUFFLE(UNPCK()) as we lose track of those zero elements. llvm-svn: 351103
This commit is contained in:
parent
6cc44ba56f
commit
459fed92ba
@ -11964,10 +11964,9 @@ static SDValue lowerVectorShuffleAsInsertPS(const SDLoc &DL, SDValue V1,
|
||||
/// because for floating point vectors we have a generalized SHUFPS lowering
|
||||
/// strategy that handles everything that doesn't *exactly* match an unpack,
|
||||
/// making this clever lowering unnecessary.
|
||||
static SDValue lowerVectorShuffleAsPermuteAndUnpack(const SDLoc &DL, MVT VT,
|
||||
SDValue V1, SDValue V2,
|
||||
ArrayRef<int> Mask,
|
||||
SelectionDAG &DAG) {
|
||||
static SDValue lowerVectorShuffleAsPermuteAndUnpack(
|
||||
const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
|
||||
const X86Subtarget &Subtarget, SelectionDAG &DAG) {
|
||||
assert(!VT.isFloatingPoint() &&
|
||||
"This routine only supports integer vectors.");
|
||||
assert(VT.is128BitVector() &&
|
||||
@ -12036,6 +12035,13 @@ static SDValue lowerVectorShuffleAsPermuteAndUnpack(const SDLoc &DL, MVT VT,
|
||||
if (SDValue Unpack = TryUnpack(ScalarSize, ScalarSize / OrigScalarSize))
|
||||
return Unpack;
|
||||
|
||||
// If we have PSHUFB, and we're shuffling with a zero vector then we're
|
||||
// better off not doing VECTOR_SHUFFLE(UNPCK()) as we lose track of those
|
||||
// zero elements.
|
||||
if (Subtarget.hasSSSE3() && (ISD::isBuildVectorAllZeros(V1.getNode()) ||
|
||||
ISD::isBuildVectorAllZeros(V2.getNode())))
|
||||
return SDValue();
|
||||
|
||||
// If none of the unpack-rooted lowerings worked (or were profitable) try an
|
||||
// initial unpack.
|
||||
if (NumLoInputs == 0 || NumHiInputs == 0) {
|
||||
@ -12549,7 +12555,7 @@ static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
|
||||
// Try to lower by permuting the inputs into an unpack instruction.
|
||||
if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(
|
||||
DL, MVT::v4i32, V1, V2, Mask, DAG))
|
||||
DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG))
|
||||
return Unpack;
|
||||
}
|
||||
|
||||
@ -13245,8 +13251,8 @@ static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
return BitBlend;
|
||||
|
||||
// Try to lower by permuting the inputs into an unpack instruction.
|
||||
if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(DL, MVT::v8i16, V1,
|
||||
V2, Mask, DAG))
|
||||
if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(
|
||||
DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG))
|
||||
return Unpack;
|
||||
|
||||
// If we can't directly blend but can use PSHUFB, that will be better as it
|
||||
@ -13534,7 +13540,7 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
// shuffles will both be pshufb, in which case we shouldn't bother with
|
||||
// this.
|
||||
if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(
|
||||
DL, MVT::v16i8, V1, V2, Mask, DAG))
|
||||
DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
|
||||
return Unpack;
|
||||
|
||||
// If we have VBMI we can use one VPERM instead of multiple PSHUFBs.
|
||||
|
@ -2488,17 +2488,19 @@ define <8 x i16> @shuffle_v8i16_9zzzuuuu(<8 x i16> %x) {
|
||||
;
|
||||
; SSSE3-LABEL: shuffle_v8i16_9zzzuuuu:
|
||||
; SSSE3: # %bb.0:
|
||||
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[6,7]
|
||||
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: shuffle_v8i16_9zzzuuuu:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[6,7]
|
||||
; SSE41-NEXT: psrld $16, %xmm0
|
||||
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: shuffle_v8i16_9zzzuuuu:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[6,7]
|
||||
; AVX-NEXT: vpsrld $16, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
|
||||
; AVX-NEXT: retq
|
||||
%r = shufflevector <8 x i16> zeroinitializer, <8 x i16> %x, <8 x i32> <i32 9, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
ret <8 x i16> %r
|
||||
|
Loading…
x
Reference in New Issue
Block a user