mirror of
https://github.com/RPCS3/llvm.git
synced 2025-04-13 19:40:26 +00:00
Add X86-SSE4 codegen support for vector-select.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@139285 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
6d483c2b07
commit
ffe3e7da84
@ -917,6 +917,13 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
|||||||
setOperationAction(ISD::SHL, MVT::v4i32, Custom);
|
setOperationAction(ISD::SHL, MVT::v4i32, Custom);
|
||||||
setOperationAction(ISD::SHL, MVT::v16i8, Custom);
|
setOperationAction(ISD::SHL, MVT::v16i8, Custom);
|
||||||
|
|
||||||
|
setOperationAction(ISD::VSELECT, MVT::v2f64, Custom);
|
||||||
|
setOperationAction(ISD::VSELECT, MVT::v2i64, Custom);
|
||||||
|
setOperationAction(ISD::VSELECT, MVT::v16i8, Custom);
|
||||||
|
setOperationAction(ISD::VSELECT, MVT::v8i16, Custom);
|
||||||
|
setOperationAction(ISD::VSELECT, MVT::v4i32, Custom);
|
||||||
|
setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
|
||||||
|
|
||||||
// i8 and i16 vectors are custom , because the source register and source
|
// i8 and i16 vectors are custom , because the source register and source
|
||||||
// source memory operand types are not the same width. f32 vectors are
|
// source memory operand types are not the same width. f32 vectors are
|
||||||
// custom since the immediate controlling the insert encodes additional
|
// custom since the immediate controlling the insert encodes additional
|
||||||
@ -8684,6 +8691,32 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops));
|
return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
|
||||||
|
SDValue Cond = Op.getOperand(0);
|
||||||
|
SDValue Op1 = Op.getOperand(1);
|
||||||
|
SDValue Op2 = Op.getOperand(2);
|
||||||
|
DebugLoc DL = Op.getDebugLoc();
|
||||||
|
|
||||||
|
SDValue Ops[] = {Cond, Op1, Op2};
|
||||||
|
|
||||||
|
assert(Op1.getValueType().isVector() && "Op1 must be a vector");
|
||||||
|
assert(Op2.getValueType().isVector() && "Op2 must be a vector");
|
||||||
|
assert(Cond.getValueType().isVector() && "Cond must be a vector");
|
||||||
|
assert(Op1.getValueType() == Op2.getValueType() && "Type mismatch");
|
||||||
|
|
||||||
|
switch (Op1.getValueType().getSimpleVT().SimpleTy) {
|
||||||
|
default: break;
|
||||||
|
case MVT::v2i64: return DAG.getNode(X86ISD::BLENDVPD, DL, Op1.getValueType(), Ops, array_lengthof(Ops));
|
||||||
|
case MVT::v2f64: return DAG.getNode(X86ISD::BLENDVPD, DL, Op1.getValueType(), Ops, array_lengthof(Ops));
|
||||||
|
case MVT::v4i32: return DAG.getNode(X86ISD::BLENDVPS, DL, Op1.getValueType(), Ops, array_lengthof(Ops));
|
||||||
|
case MVT::v4f32: return DAG.getNode(X86ISD::BLENDVPS, DL, Op1.getValueType(), Ops, array_lengthof(Ops));
|
||||||
|
case MVT::v16i8: return DAG.getNode(X86ISD::PBLENDVB, DL, Op1.getValueType(), Ops, array_lengthof(Ops));
|
||||||
|
}
|
||||||
|
|
||||||
|
return SDValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// isAndOrOfSingleUseSetCCs - Return true if node is an ISD::AND or
|
// isAndOrOfSingleUseSetCCs - Return true if node is an ISD::AND or
|
||||||
// ISD::OR of two X86ISD::SETCC nodes each of which has no other use apart
|
// ISD::OR of two X86ISD::SETCC nodes each of which has no other use apart
|
||||||
// from the AND / OR.
|
// from the AND / OR.
|
||||||
@ -10350,6 +10383,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG);
|
case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG);
|
||||||
case ISD::SETCC: return LowerSETCC(Op, DAG);
|
case ISD::SETCC: return LowerSETCC(Op, DAG);
|
||||||
case ISD::SELECT: return LowerSELECT(Op, DAG);
|
case ISD::SELECT: return LowerSELECT(Op, DAG);
|
||||||
|
case ISD::VSELECT: return LowerVSELECT(Op, DAG);
|
||||||
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
|
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
|
||||||
case ISD::JumpTable: return LowerJumpTable(Op, DAG);
|
case ISD::JumpTable: return LowerJumpTable(Op, DAG);
|
||||||
case ISD::VASTART: return LowerVASTART(Op, DAG);
|
case ISD::VASTART: return LowerVASTART(Op, DAG);
|
||||||
|
@ -175,8 +175,10 @@ namespace llvm {
|
|||||||
/// PSIGNB/W/D - Copy integer sign.
|
/// PSIGNB/W/D - Copy integer sign.
|
||||||
PSIGNB, PSIGNW, PSIGND,
|
PSIGNB, PSIGNW, PSIGND,
|
||||||
|
|
||||||
/// PBLENDVB - Variable blend
|
/// BLENDVXX family of opcodes
|
||||||
PBLENDVB,
|
PBLENDVB,
|
||||||
|
BLENDVPD,
|
||||||
|
BLENDVPS,
|
||||||
|
|
||||||
/// FMAX, FMIN - Floating point max and min.
|
/// FMAX, FMIN - Floating point max and min.
|
||||||
///
|
///
|
||||||
@ -809,6 +811,7 @@ namespace llvm {
|
|||||||
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
@ -58,9 +58,15 @@ def X86psignw : SDNode<"X86ISD::PSIGNW",
|
|||||||
def X86psignd : SDNode<"X86ISD::PSIGND",
|
def X86psignd : SDNode<"X86ISD::PSIGND",
|
||||||
SDTypeProfile<1, 2, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>,
|
SDTypeProfile<1, 2, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>,
|
||||||
SDTCisSameAs<0,2>]>>;
|
SDTCisSameAs<0,2>]>>;
|
||||||
def X86pblendv : SDNode<"X86ISD::PBLENDVB",
|
def X86pblendvb : SDNode<"X86ISD::PBLENDVB",
|
||||||
SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
|
SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
|
||||||
SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>;
|
SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>;
|
||||||
|
def X86blendvpd : SDNode<"X86ISD::BLENDVPD",
|
||||||
|
SDTypeProfile<1, 3, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>,
|
||||||
|
SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>;
|
||||||
|
def X86blendvps : SDNode<"X86ISD::BLENDVPS",
|
||||||
|
SDTypeProfile<1, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>,
|
||||||
|
SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>;
|
||||||
def X86pextrb : SDNode<"X86ISD::PEXTRB",
|
def X86pextrb : SDNode<"X86ISD::PEXTRB",
|
||||||
SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
|
SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
|
||||||
def X86pextrw : SDNode<"X86ISD::PEXTRW",
|
def X86pextrw : SDNode<"X86ISD::PEXTRW",
|
||||||
|
@ -5843,7 +5843,7 @@ defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem,
|
|||||||
defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem,
|
defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem,
|
||||||
memopv32i8, int_x86_avx_blendv_ps_256>;
|
memopv32i8, int_x86_avx_blendv_ps_256>;
|
||||||
|
|
||||||
def : Pat<(X86pblendv VR128:$src1, VR128:$src2, VR128:$src3),
|
def : Pat<(X86pblendvb VR128:$src1, VR128:$src2, VR128:$src3),
|
||||||
(VPBLENDVBrr VR128:$src1, VR128:$src2, VR128:$src3)>,
|
(VPBLENDVBrr VR128:$src1, VR128:$src2, VR128:$src3)>,
|
||||||
Requires<[HasAVX]>;
|
Requires<[HasAVX]>;
|
||||||
|
|
||||||
@ -5871,8 +5871,12 @@ defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
|
|||||||
defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
|
defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
|
||||||
defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
|
defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
|
||||||
|
|
||||||
def : Pat<(X86pblendv VR128:$src1, VR128:$src2, XMM0),
|
def : Pat<(X86pblendvb VR128:$src1, VR128:$src2, XMM0),
|
||||||
(PBLENDVBrr0 VR128:$src1, VR128:$src2)>, Requires<[HasSSE41]>;
|
(PBLENDVBrr0 VR128:$src1, VR128:$src2)>, Requires<[HasSSE41]>;
|
||||||
|
def : Pat<(X86blendvpd XMM0, VR128:$src1, VR128:$src2),
|
||||||
|
(BLENDVPDrr0 VR128:$src1, VR128:$src2)>, Requires<[HasSSE41]>;
|
||||||
|
def : Pat<(X86blendvps XMM0, VR128:$src1, VR128:$src2),
|
||||||
|
(BLENDVPSrr0 VR128:$src1, VR128:$src2)>, Requires<[HasSSE41]>;
|
||||||
|
|
||||||
let Predicates = [HasAVX] in
|
let Predicates = [HasAVX] in
|
||||||
def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
|
def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
|
||||||
|
@ -3,8 +3,8 @@
|
|||||||
|
|
||||||
; This test is the poster-child for integer-element-promotion.
|
; This test is the poster-child for integer-element-promotion.
|
||||||
; Until this feature is complete, we mark this test as expected to fail.
|
; Until this feature is complete, we mark this test as expected to fail.
|
||||||
; XFAIL: *
|
|
||||||
; CHECK: vector_code
|
; CHECK: vector_code
|
||||||
|
; CHECK: blend
|
||||||
; CHECK: ret
|
; CHECK: ret
|
||||||
define <4 x float> @vector_code(<4 x i64> %A, <4 x i64> %B, <4 x float> %R0, <4 x float> %R1 ) {
|
define <4 x float> @vector_code(<4 x i64> %A, <4 x i64> %B, <4 x float> %R0, <4 x float> %R1 ) {
|
||||||
%C = icmp eq <4 x i64> %A, %B
|
%C = icmp eq <4 x i64> %A, %B
|
||||||
|
Loading…
x
Reference in New Issue
Block a user