mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-14 15:19:33 +00:00
X86: deduplicate V[SZ]EXT_MOVL and V[SZ]EXT nodes
I believe VZEXT_MOVL means "zero all vector elements except the first" (and should have identical input & output types) whereas VZEXT means "zero extend each element of a vector (discarding higher elements if necessary)". For example: (v4i32 (vzext (v16i8 ...))) should zero extend the low 4 bytes of the incoming vector to 32-bits, discarding higher bytes. However, somewhere in the past, these two concepts had become confused, even leading to a nonsensical VSEXT_MOVL. This re-merges the nodes where appropriate (all VSEXT_MOVL -> VSEXT, VZEXT_MOVL -> VZEXT when it's an actual extension). rdar://problem/15981990 llvm-svn: 200918
This commit is contained in:
parent
ae1537330d
commit
87cd67f0f1
@ -9032,7 +9032,7 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
|
||||
return SDValue();
|
||||
|
||||
if (Subtarget->hasInt256())
|
||||
return DAG.getNode(X86ISD::VZEXT_MOVL, dl, VT, In);
|
||||
return DAG.getNode(X86ISD::VZEXT, dl, VT, In);
|
||||
|
||||
SDValue ZeroVec = getZeroVector(InVT, Subtarget, DAG, dl);
|
||||
SDValue Undef = DAG.getUNDEF(InVT);
|
||||
@ -10617,7 +10617,7 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
|
||||
return SDValue();
|
||||
|
||||
if (Subtarget->hasInt256())
|
||||
return DAG.getNode(X86ISD::VSEXT_MOVL, dl, VT, In);
|
||||
return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
|
||||
|
||||
// Optimize vectors in AVX mode
|
||||
// Sign extend v8i16 to v8i32 and
|
||||
@ -10646,8 +10646,8 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
|
||||
MVT HalfVT = MVT::getVectorVT(VT.getScalarType(),
|
||||
VT.getVectorNumElements()/2);
|
||||
|
||||
OpLo = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpLo);
|
||||
OpHi = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpHi);
|
||||
OpLo = DAG.getNode(X86ISD::VSEXT, dl, HalfVT, OpLo);
|
||||
OpHi = DAG.getNode(X86ISD::VSEXT, dl, HalfVT, OpHi);
|
||||
|
||||
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
|
||||
}
|
||||
@ -14017,7 +14017,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case X86ISD::ATOMAND64_DAG: return "X86ISD::ATOMAND64_DAG";
|
||||
case X86ISD::ATOMNAND64_DAG: return "X86ISD::ATOMNAND64_DAG";
|
||||
case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
|
||||
case X86ISD::VSEXT_MOVL: return "X86ISD::VSEXT_MOVL";
|
||||
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
|
||||
case X86ISD::VZEXT: return "X86ISD::VZEXT";
|
||||
case X86ISD::VSEXT: return "X86ISD::VSEXT";
|
||||
|
@ -245,12 +245,9 @@ namespace llvm {
|
||||
/// the list of operands.
|
||||
TC_RETURN,
|
||||
|
||||
// VZEXT_MOVL - Vector move low and zero extend.
|
||||
// VZEXT_MOVL - Vector move to low scalar and zero higher vector elements.
|
||||
VZEXT_MOVL,
|
||||
|
||||
// VSEXT_MOVL - Vector move low and sign extend.
|
||||
VSEXT_MOVL,
|
||||
|
||||
// VZEXT - Vector integer zero-extend.
|
||||
VZEXT,
|
||||
|
||||
|
@ -87,16 +87,6 @@ def X86insrtps : SDNode<"X86ISD::INSERTPS",
|
||||
def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
|
||||
SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
|
||||
|
||||
def X86vzmovly : SDNode<"X86ISD::VZEXT_MOVL",
|
||||
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
|
||||
SDTCisInt<0>, SDTCisInt<1>,
|
||||
SDTCisOpSmallerThanOp<1, 0> ]>>;
|
||||
|
||||
def X86vsmovl : SDNode<"X86ISD::VSEXT_MOVL",
|
||||
SDTypeProfile<1, 1,
|
||||
[SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<0>, SDTCisInt<1>,
|
||||
SDTCisOpSmallerThanOp<1, 0>]>>;
|
||||
|
||||
def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
|
||||
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
|
||||
|
||||
|
@ -5824,34 +5824,6 @@ let Predicates = [UseSSE41] in {
|
||||
(PMOVZXDQrm addr:$src)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX2] in {
|
||||
let AddedComplexity = 15 in {
|
||||
def : Pat<(v4i64 (X86vzmovly (v4i32 VR128:$src))),
|
||||
(VPMOVZXDQYrr VR128:$src)>;
|
||||
def : Pat<(v8i32 (X86vzmovly (v8i16 VR128:$src))),
|
||||
(VPMOVZXWDYrr VR128:$src)>;
|
||||
def : Pat<(v16i16 (X86vzmovly (v16i8 VR128:$src))),
|
||||
(VPMOVZXBWYrr VR128:$src)>;
|
||||
}
|
||||
|
||||
def : Pat<(v4i64 (X86vsmovl (v4i32 VR128:$src))), (VPMOVSXDQYrr VR128:$src)>;
|
||||
def : Pat<(v8i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDYrr VR128:$src)>;
|
||||
def : Pat<(v16i16 (X86vsmovl (v16i8 VR128:$src))), (VPMOVSXBWYrr VR128:$src)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (VPMOVSXDQrr VR128:$src)>;
|
||||
def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>;
|
||||
def : Pat<(v8i16 (X86vsmovl (v16i8 VR128:$src))), (VPMOVSXBWrr VR128:$src)>;
|
||||
}
|
||||
|
||||
let Predicates = [UseSSE41] in {
|
||||
def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>;
|
||||
def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>;
|
||||
def : Pat<(v8i16 (X86vsmovl (v16i8 VR128:$src))), (PMOVSXBWrr VR128:$src)>;
|
||||
}
|
||||
|
||||
|
||||
multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId,
|
||||
OpndItins itins = DEFAULT_ITINS> {
|
||||
def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
@ -6002,9 +5974,9 @@ let Predicates = [HasAVX2] in {
|
||||
def : Pat<(v4i64 (X86vsext (v8i32 VR256:$src))),
|
||||
(VPMOVSXDQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
|
||||
|
||||
def : Pat<(v8i32 (X86vsmovl (v8i16 (bitconvert (v2i64 (load addr:$src)))))),
|
||||
def : Pat<(v8i32 (X86vsext (v8i16 (bitconvert (v2i64 (load addr:$src)))))),
|
||||
(VPMOVSXWDYrm addr:$src)>;
|
||||
def : Pat<(v4i64 (X86vsmovl (v4i32 (bitconvert (v2i64 (load addr:$src)))))),
|
||||
def : Pat<(v4i64 (X86vsext (v4i32 (bitconvert (v2i64 (load addr:$src)))))),
|
||||
(VPMOVSXDQYrm addr:$src)>;
|
||||
|
||||
def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2i64
|
||||
|
Loading…
Reference in New Issue
Block a user