mirror of
https://github.com/RPCSX/llvm.git
synced 2024-12-03 01:12:59 +00:00
Add support for matching psign & plendvb to the x86 target
Remove unnecessary pandn patterns, 'vnot' patfrag looks through bitcasts git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@122098 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
d715e07efe
commit
b65c175d32
@ -981,6 +981,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||
setTargetDAGCombine(ISD::SRA);
|
||||
setTargetDAGCombine(ISD::SRL);
|
||||
setTargetDAGCombine(ISD::OR);
|
||||
setTargetDAGCombine(ISD::AND);
|
||||
setTargetDAGCombine(ISD::STORE);
|
||||
setTargetDAGCombine(ISD::ZERO_EXTEND);
|
||||
if (Subtarget->is64Bit())
|
||||
@ -8870,6 +8871,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case X86ISD::PINSRB: return "X86ISD::PINSRB";
|
||||
case X86ISD::PINSRW: return "X86ISD::PINSRW";
|
||||
case X86ISD::PSHUFB: return "X86ISD::PSHUFB";
|
||||
case X86ISD::PANDN: return "X86ISD::PANDN";
|
||||
case X86ISD::PSIGNB: return "X86ISD::PSIGNB";
|
||||
case X86ISD::PSIGNW: return "X86ISD::PSIGNW";
|
||||
case X86ISD::PSIGND: return "X86ISD::PSIGND";
|
||||
case X86ISD::FMAX: return "X86ISD::FMAX";
|
||||
case X86ISD::FMIN: return "X86ISD::FMIN";
|
||||
case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
|
||||
@ -11053,6 +11058,36 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
||||
static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const X86Subtarget *Subtarget) {
|
||||
if (DCI.isBeforeLegalizeOps())
|
||||
return SDValue();
|
||||
|
||||
// Want to form PANDN nodes, in the hopes of then easily combining them with
|
||||
// OR and AND nodes to form PBLEND/PSIGN.
|
||||
EVT VT = N->getValueType(0);
|
||||
if (VT != MVT::v2i64)
|
||||
return SDValue();
|
||||
|
||||
SDValue N0 = N->getOperand(0);
|
||||
SDValue N1 = N->getOperand(1);
|
||||
DebugLoc DL = N->getDebugLoc();
|
||||
|
||||
// Check LHS for vnot
|
||||
if (N0.getOpcode() == ISD::XOR &&
|
||||
ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode()))
|
||||
return DAG.getNode(X86ISD::PANDN, DL, VT, N0.getOperand(0), N1);
|
||||
|
||||
// Check RHS for vnot
|
||||
if (N1.getOpcode() == ISD::XOR &&
|
||||
ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode()))
|
||||
return DAG.getNode(X86ISD::PANDN, DL, VT, N1.getOperand(0), N0);
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const X86Subtarget *Subtarget) {
|
||||
@ -11060,12 +11095,101 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
|
||||
return SDValue();
|
||||
|
||||
EVT VT = N->getValueType(0);
|
||||
if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
|
||||
if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64 && VT != MVT::v2i64)
|
||||
return SDValue();
|
||||
|
||||
// fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
|
||||
SDValue N0 = N->getOperand(0);
|
||||
SDValue N1 = N->getOperand(1);
|
||||
|
||||
// look for psign/blend
|
||||
if (Subtarget->hasSSSE3()) {
|
||||
if (VT == MVT::v2i64) {
|
||||
// Canonicalize pandn to RHS
|
||||
if (N0.getOpcode() == X86ISD::PANDN)
|
||||
std::swap(N0, N1);
|
||||
// or (and (m, x), (pandn m, y))
|
||||
if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::PANDN) {
|
||||
SDValue Mask = N1.getOperand(0);
|
||||
SDValue X = N1.getOperand(1);
|
||||
SDValue Y;
|
||||
if (N0.getOperand(0) == Mask)
|
||||
Y = N0.getOperand(1);
|
||||
if (N0.getOperand(1) == Mask)
|
||||
Y = N0.getOperand(0);
|
||||
|
||||
// Check to see if the mask appeared in both the AND and PANDN and
|
||||
if (!Y.getNode())
|
||||
return SDValue();
|
||||
|
||||
// Validate that X, Y, and Mask are BIT_CONVERTS, and see through them.
|
||||
if (Mask.getOpcode() != ISD::BITCAST ||
|
||||
X.getOpcode() != ISD::BITCAST ||
|
||||
Y.getOpcode() != ISD::BITCAST)
|
||||
return SDValue();
|
||||
|
||||
// Look through mask bitcast.
|
||||
Mask = Mask.getOperand(0);
|
||||
EVT MaskVT = Mask.getValueType();
|
||||
|
||||
// Validate that the Mask operand is a vector sra node. The sra node
|
||||
// will be an intrinsic.
|
||||
if (Mask.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
|
||||
return SDValue();
|
||||
|
||||
// FIXME: what to do for bytes, since there is a psignb/pblendvb, but
|
||||
// there is no psrai.b
|
||||
switch (cast<ConstantSDNode>(Mask.getOperand(0))->getZExtValue()) {
|
||||
case Intrinsic::x86_sse2_psrai_w:
|
||||
case Intrinsic::x86_sse2_psrai_d:
|
||||
break;
|
||||
default: return SDValue();
|
||||
}
|
||||
|
||||
// Check that the SRA is all signbits.
|
||||
SDValue SraC = Mask.getOperand(2);
|
||||
unsigned SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue();
|
||||
unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits();
|
||||
if ((SraAmt + 1) != EltBits)
|
||||
return SDValue();
|
||||
|
||||
DebugLoc DL = N->getDebugLoc();
|
||||
|
||||
// Now we know we at least have a plendvb with the mask val. See if
|
||||
// we can form a psignb/w/d.
|
||||
// psign = x.type == y.type == mask.type && y = sub(0, x);
|
||||
X = X.getOperand(0);
|
||||
Y = Y.getOperand(0);
|
||||
if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X &&
|
||||
ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) &&
|
||||
X.getValueType() == MaskVT && X.getValueType() == Y.getValueType()){
|
||||
unsigned Opc = 0;
|
||||
switch (EltBits) {
|
||||
case 8: Opc = X86ISD::PSIGNB; break;
|
||||
case 16: Opc = X86ISD::PSIGNW; break;
|
||||
case 32: Opc = X86ISD::PSIGND; break;
|
||||
default: break;
|
||||
}
|
||||
if (Opc) {
|
||||
SDValue Sign = DAG.getNode(Opc, DL, MaskVT, X, Mask.getOperand(1));
|
||||
return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Sign);
|
||||
}
|
||||
}
|
||||
// PBLENDVB only available on SSE 4.1
|
||||
if (!Subtarget->hasSSE41())
|
||||
return SDValue();
|
||||
|
||||
unsigned IID = Intrinsic::x86_sse41_pblendvb;
|
||||
X = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, X);
|
||||
Y = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Y);
|
||||
Mask = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Mask);
|
||||
Mask = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::v16i8,
|
||||
DAG.getConstant(IID, MVT::i32), X, Y, Mask);
|
||||
return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Mask);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
|
||||
if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
|
||||
std::swap(N0, N1);
|
||||
if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
|
||||
@ -11116,7 +11240,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
|
||||
DAG.getNode(ISD::TRUNCATE, DL,
|
||||
MVT::i8, ShAmt0));
|
||||
}
|
||||
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
@ -11334,6 +11458,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
||||
case ISD::SHL:
|
||||
case ISD::SRA:
|
||||
case ISD::SRL: return PerformShiftCombine(N, DAG, Subtarget);
|
||||
case ISD::AND: return PerformAndCombine(N, DAG, DCI, Subtarget);
|
||||
case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget);
|
||||
case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
|
||||
case X86ISD::FXOR:
|
||||
|
@ -159,7 +159,13 @@ namespace llvm {
|
||||
|
||||
/// PSHUFB - Shuffle 16 8-bit values within a vector.
|
||||
PSHUFB,
|
||||
|
||||
|
||||
/// PANDN - and with not'd value.
|
||||
PANDN,
|
||||
|
||||
/// PSIGNB/W/D - Copy integer sign.
|
||||
PSIGNB, PSIGNW, PSIGND,
|
||||
|
||||
/// FMAX, FMIN - Floating point max and min.
|
||||
///
|
||||
FMAX, FMIN,
|
||||
|
@ -43,6 +43,18 @@ def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
|
||||
def X86pshufb : SDNode<"X86ISD::PSHUFB",
|
||||
SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
|
||||
SDTCisSameAs<0,2>]>>;
|
||||
def X86pandn : SDNode<"X86ISD::PANDN",
|
||||
SDTypeProfile<1, 2, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>,
|
||||
SDTCisSameAs<0,2>]>>;
|
||||
def X86psignb : SDNode<"X86ISD::PSIGNB",
|
||||
SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
|
||||
SDTCisSameAs<0,2>]>>;
|
||||
def X86psignw : SDNode<"X86ISD::PSIGNW",
|
||||
SDTypeProfile<1, 2, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>,
|
||||
SDTCisSameAs<0,2>]>>;
|
||||
def X86psignd : SDNode<"X86ISD::PSIGND",
|
||||
SDTypeProfile<1, 2, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>,
|
||||
SDTCisSameAs<0,2>]>>;
|
||||
def X86pextrb : SDNode<"X86ISD::PEXTRB",
|
||||
SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
|
||||
def X86pextrw : SDNode<"X86ISD::PEXTRW",
|
||||
|
@ -1584,19 +1584,13 @@ defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>;
|
||||
let isCommutable = 0 in
|
||||
defm ANDN : sse12_fp_packed_logical<0x55, "andn", undef /* dummy */, 1, [
|
||||
// single r+r
|
||||
[(set VR128:$dst, (v2i64 (and (xor VR128:$src1,
|
||||
(bc_v2i64 (v4i32 immAllOnesV))),
|
||||
VR128:$src2)))],
|
||||
[(set VR128:$dst, (X86pandn VR128:$src1, VR128:$src2))],
|
||||
// double r+r
|
||||
[(set VR128:$dst, (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
|
||||
(bc_v2i64 (v2f64 VR128:$src2))))],
|
||||
[],
|
||||
// single r+m
|
||||
[(set VR128:$dst, (v2i64 (and (xor (bc_v2i64 (v4f32 VR128:$src1)),
|
||||
(bc_v2i64 (v4i32 immAllOnesV))),
|
||||
(memopv2i64 addr:$src2))))],
|
||||
[(set VR128:$dst, (X86pandn VR128:$src1, (memopv2i64 addr:$src2)))],
|
||||
// double r+m
|
||||
[(set VR128:$dst, (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
|
||||
(memopv2i64 addr:$src2)))]]>;
|
||||
[]]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SSE 1 & 2 - Arithmetic Instructions
|
||||
@ -2536,15 +2530,11 @@ let ExeDomain = SSEPackedInt in {
|
||||
}
|
||||
def PANDNrr : PDI<0xDF, MRMSrcReg,
|
||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
||||
"pandn\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
|
||||
VR128:$src2)))]>;
|
||||
"pandn\t{$src2, $dst|$dst, $src2}", []>;
|
||||
|
||||
def PANDNrm : PDI<0xDF, MRMSrcMem,
|
||||
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
|
||||
"pandn\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
|
||||
(memopv2i64 addr:$src2))))]>;
|
||||
"pandn\t{$src2, $dst|$dst, $src2}", []>;
|
||||
}
|
||||
} // Constraints = "$src1 = $dst"
|
||||
|
||||
@ -3608,6 +3598,13 @@ def : Pat<(X86pshufb VR128:$src, VR128:$mask),
|
||||
def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
|
||||
(PSHUFBrm128 VR128:$src, addr:$mask)>, Requires<[HasSSSE3]>;
|
||||
|
||||
def : Pat<(X86psignb VR128:$src1, VR128:$src2),
|
||||
(PSIGNBrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>;
|
||||
def : Pat<(X86psignw VR128:$src1, VR128:$src2),
|
||||
(PSIGNWrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>;
|
||||
def : Pat<(X86psignd VR128:$src1, VR128:$src2),
|
||||
(PSIGNDrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>;
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
// SSSE3 - Packed Align Instruction Patterns
|
||||
//===---------------------------------------------------------------------===//
|
||||
@ -3896,27 +3893,6 @@ def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))),
|
||||
def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
|
||||
(MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
|
||||
|
||||
// Some special case pandn patterns.
|
||||
def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))),
|
||||
VR128:$src2)),
|
||||
(PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))),
|
||||
VR128:$src2)),
|
||||
(PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))),
|
||||
VR128:$src2)),
|
||||
(PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
|
||||
|
||||
def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))),
|
||||
(memop addr:$src2))),
|
||||
(PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))),
|
||||
(memop addr:$src2))),
|
||||
(PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))),
|
||||
(memop addr:$src2))),
|
||||
(PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
|
||||
|
||||
// vector -> vector casts
|
||||
def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
|
||||
(Int_CVTDQ2PSrr VR128:$src)>, Requires<[HasSSE2]>;
|
||||
|
26
test/CodeGen/X86/vec-sign.ll
Normal file
26
test/CodeGen/X86/vec-sign.ll
Normal file
@ -0,0 +1,26 @@
|
||||
; RUN: llc < %s -mcpu=nehalem | FileCheck %s
|
||||
|
||||
define <4 x i32> @psignd(<4 x i32> %a, <4 x i32> %b) nounwind ssp {
|
||||
entry:
|
||||
; CHECK: psignd
|
||||
; CHECK-NOT: sub
|
||||
%b.lobit = ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
|
||||
%sub = sub nsw <4 x i32> zeroinitializer, %a
|
||||
%0 = xor <4 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
%1 = and <4 x i32> %a, %0
|
||||
%2 = and <4 x i32> %b.lobit, %sub
|
||||
%cond = or <4 x i32> %1, %2
|
||||
ret <4 x i32> %cond
|
||||
}
|
||||
|
||||
define <4 x i32> @pblendvb(<4 x i32> %b, <4 x i32> %a, <4 x i32> %c) nounwind ssp {
|
||||
entry:
|
||||
; CHECK: pblendvb
|
||||
%b.lobit = ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
|
||||
%sub = sub nsw <4 x i32> zeroinitializer, %a
|
||||
%0 = xor <4 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
%1 = and <4 x i32> %c, %0
|
||||
%2 = and <4 x i32> %a, %b.lobit
|
||||
%cond = or <4 x i32> %1, %2
|
||||
ret <4 x i32> %cond
|
||||
}
|
Loading…
Reference in New Issue
Block a user