Optimizing swizzles of complex shuffles may generate additional complex shuffles.

Do not try to optimize swizzles of shuffles if the source shuffle has more than
a single user, except when the source shuffle is also a swizzle.

llvm-svn: 153864
This commit is contained in:
Nadav Rotem 2012-04-02 07:11:12 +00:00
parent fe02cb5e8b
commit a9ec0e024f
2 changed files with 26 additions and 1 deletions

View File

@ -7792,6 +7792,14 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
SmallVector<int, 8> NewMask;
ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
// If the source shuffle has more than one user then do not try to optimize
// it because it may generate a more complex shuffle node. However, if the
// source shuffle is also a swizzle (a single source shuffle), our
// transformation is still likely to reduce the number of shuffles and only
// generate a simple shuffle node.
if (N0.getOperand(1).getOpcode() != ISD::UNDEF && !N0.hasOneUse())
return SDValue();
EVT InVT = N0.getValueType();
int InNumElts = InVT.getVectorNumElements();
@ -7808,7 +7816,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
NewMask.push_back(Idx);
}
assert(NewMask.size() == VT.getVectorNumElements() && "Invalid mask size");
return DAG.getVectorShuffle(VT, N->getDebugLoc(), OtherSV->getOperand(0),
OtherSV->getOperand(1), &NewMask[0]);
}

View File

@ -12,3 +12,20 @@ define void @pull_bitcast (<4 x i8>* %pA, <4 x i8>* %pB) {
store <4 x i8> %C, <4 x i8>* %pA
ret void
}
; CHECK: multi_use_swizzle
; CHECK: mov
; CHECK-NEXT: shuf
; CHECK-NEXT: shuf
; CHECK-NEXT: shuf
; CHECK-NEXT: xor
; CHECK-NEXT: ret
define <4 x i32> @multi_use_swizzle (<4 x i32>* %pA, <4 x i32>* %pB) {
%A = load <4 x i32>* %pA
%B = load <4 x i32>* %pB
%S = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 1, i32 5, i32 6>
%S1 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 2>
%S2 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 2>
%R = xor <4 x i32> %S1, %S2
ret <4 x i32> %R
}