mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-28 07:05:03 +00:00
[x86] Fix a miscompile in the new shuffle lowering found through the new
fuzz testing. The function which tested for adjacency did what it said on the tin, but when I called it, I wanted it to do something more thorough: I wanted to know if the *pairs* of shuffle elements were adjacent and started at 0 mod 2. In one place I had the decency to try to test for this, but in the other it was completely skipped, miscompiling this test case. Fix this by making the helper actually do what I wanted it to do everywhere I called it (and removing the now redundant code in one place). I *really* dislike the name "canWidenShuffleElements" for this predicate. If anyone can come up with a better name, please let me know. The other name I thought about was "canWidenShuffleMask" but is it really widening the mask to reduce the number of lanes shuffled? I don't know. Naming things is hard. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@215089 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
e216468fef
commit
15d82b7d33
@ -8234,10 +8234,11 @@ static SDValue lower128BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
||||
}
|
||||
}
|
||||
|
||||
/// \brief Tiny helper function to test whether adjacent masks are sequential.
|
||||
static bool areAdjacentMasksSequential(ArrayRef<int> Mask) {
|
||||
/// \brief Tiny helper function to test whether a shuffle mask could be
|
||||
/// simplified by widening the elements being shuffled.
|
||||
static bool canWidenShuffleElements(ArrayRef<int> Mask) {
|
||||
for (int i = 0, Size = Mask.size(); i < Size; i += 2)
|
||||
if (Mask[i] + 1 != Mask[i+1])
|
||||
if (Mask[i] % 2 != 0 || Mask[i] + 1 != Mask[i+1])
|
||||
return false;
|
||||
|
||||
return true;
|
||||
@ -8291,7 +8292,7 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,
|
||||
// but it might be interesting to form i128 integers to handle flipping the
|
||||
// low and high halves of AVX 256-bit vectors.
|
||||
if (VT.isInteger() && VT.getScalarSizeInBits() < 64 &&
|
||||
areAdjacentMasksSequential(Mask)) {
|
||||
canWidenShuffleElements(Mask)) {
|
||||
SmallVector<int, 8> NewMask;
|
||||
for (int i = 0, Size = Mask.size(); i < Size; i += 2)
|
||||
NewMask.push_back(Mask[i] / 2);
|
||||
@ -19517,8 +19518,7 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
|
||||
|
||||
// See if this reduces to a PSHUFD which is no more expensive and can
|
||||
// combine with more operations.
|
||||
if (Mask[0] % 2 == 0 && Mask[2] % 2 == 0 &&
|
||||
areAdjacentMasksSequential(Mask)) {
|
||||
if (canWidenShuffleElements(Mask)) {
|
||||
int DMask[] = {-1, -1, -1, -1};
|
||||
int DOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 2;
|
||||
DMask[DOffset + 0] = DOffset + Mask[0] / 2;
|
||||
|
@ -17,6 +17,13 @@ define <4 x i32> @shuffle_v4i32_0020(<4 x i32> %a, <4 x i32> %b) {
|
||||
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
|
||||
ret <4 x i32> %shuffle
|
||||
}
|
||||
define <4 x i32> @shuffle_v4i32_0112(<4 x i32> %a, <4 x i32> %b) {
|
||||
; CHECK-SSE2-LABEL: @shuffle_v4i32_0112
|
||||
; CHECK-SSE2: pshufd {{.*}} # xmm0 = xmm0[0,1,1,2]
|
||||
; CHECK-SSE2-NEXT: retq
|
||||
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 1, i32 2>
|
||||
ret <4 x i32> %shuffle
|
||||
}
|
||||
define <4 x i32> @shuffle_v4i32_0300(<4 x i32> %a, <4 x i32> %b) {
|
||||
; CHECK-SSE2-LABEL: @shuffle_v4i32_0300
|
||||
; CHECK-SSE2: pshufd {{.*}} # xmm0 = xmm0[0,3,0,0]
|
||||
|
Loading…
Reference in New Issue
Block a user