[X86] Generalize logic blend of (x, -x) combine to match (-x, x).

I suspect this is what let PR26110 lie dormant for so long.

llvm-svn: 261024
This commit is contained in:
Ahmed Bougacha 2016-02-16 22:14:07 +00:00
parent c6b1c28e14
commit 0b74af0c16
2 changed files with 21 additions and 17 deletions

View File

@ -26470,13 +26470,23 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG,
// (add (xor X, M), (and M, 1))
// And further to:
// (sub (xor X, M), M)
if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X &&
ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) &&
X.getValueType() == MaskVT && Y.getValueType() == MaskVT) {
assert(EltBits == 8 || EltBits == 16 || EltBits == 32);
return DAG.getBitcast(
VT, DAG.getNode(ISD::SUB, DL, MaskVT,
DAG.getNode(ISD::XOR, DL, MaskVT, X, Mask), Mask));
if (X.getValueType() == MaskVT && Y.getValueType() == MaskVT) {
auto IsNegV = [](SDNode *N, SDValue V) {
return N->getOpcode() == ISD::SUB && N->getOperand(1) == V &&
ISD::isBuildVectorAllZeros(N->getOperand(0).getNode());
};
SDValue V;
if (IsNegV(Y.getNode(), X))
V = X;
else if (IsNegV(X.getNode(), Y))
V = Y;
if (V) {
assert(EltBits == 8 || EltBits == 16 || EltBits == 32);
return DAG.getBitcast(
VT, DAG.getNode(ISD::SUB, DL, MaskVT,
DAG.getNode(ISD::XOR, DL, MaskVT, V, Mask), Mask));
}
}
// PBLENDVB is only available on SSE 4.1.

View File

@ -1011,11 +1011,8 @@ define <4 x i32> @blend_neg_logic_v4i32_2(<4 x i32> %v, <4 x i32> %c) {
; SSE2-NEXT: psrld $31, %xmm1
; SSE2-NEXT: pslld $31, %xmm1
; SSE2-NEXT: psrad $31, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: psubd %xmm0, %xmm2
; SSE2-NEXT: pand %xmm1, %xmm0
; SSE2-NEXT: pandn %xmm2, %xmm1
; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm0
; SSE2-NEXT: psubd %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: blend_neg_logic_v4i32_2:
@ -1023,11 +1020,8 @@ define <4 x i32> @blend_neg_logic_v4i32_2(<4 x i32> %v, <4 x i32> %c) {
; SSSE3-NEXT: psrld $31, %xmm1
; SSSE3-NEXT: pslld $31, %xmm1
; SSSE3-NEXT: psrad $31, %xmm1
; SSSE3-NEXT: pxor %xmm2, %xmm2
; SSSE3-NEXT: psubd %xmm0, %xmm2
; SSSE3-NEXT: pand %xmm1, %xmm0
; SSSE3-NEXT: pandn %xmm2, %xmm1
; SSSE3-NEXT: por %xmm1, %xmm0
; SSSE3-NEXT: pxor %xmm1, %xmm0
; SSSE3-NEXT: psubd %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: blend_neg_logic_v4i32_2: