ISD::VECTOR_SHUFFLE now stores an array of integers representing the shuffle
mask internal to the node, rather than taking a BUILD_VECTOR of ConstantSDNodes
as the shuffle mask.  A value of -1 represents UNDEF.

In addition to eliminating the creation of illegal BUILD_VECTORS just to 
represent shuffle masks, we are better about canonicalizing the shuffle mask,
resulting in substantially better code for some classes of shuffles.

A clean up of x86 shuffle code, and some canonicalizing in DAGCombiner is next.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@69952 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Nate Begeman 2009-04-24 03:42:54 +00:00
parent 98d07102d6
commit b706d29f9c
25 changed files with 1637 additions and 2184 deletions

View File

@ -353,6 +353,13 @@ public:
SDValue getConvertRndSat(MVT VT, DebugLoc dl, SDValue Val, SDValue DTy, SDValue getConvertRndSat(MVT VT, DebugLoc dl, SDValue Val, SDValue DTy,
SDValue STy, SDValue STy,
SDValue Rnd, SDValue Sat, ISD::CvtCode Code); SDValue Rnd, SDValue Sat, ISD::CvtCode Code);
/// getVectorShuffle - Return an ISD::VECTOR_SHUFFLE node. The number of
/// elements in VT, which must be a vector type, must match the number of
/// mask elements NumElts. A negative integer mask element is treated as
/// undefined.
SDValue getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1, SDValue N2,
const int *MaskElts);
/// getZeroExtendInReg - Return the expression required to zero extend the Op /// getZeroExtendInReg - Return the expression required to zero extend the Op
/// value assuming it was the smaller SrcTy value. /// value assuming it was the smaller SrcTy value.

View File

@ -1703,6 +1703,32 @@ public:
} }
}; };
class ShuffleVectorSDNode : public SDNode {
SDUse Ops[2];
int *Mask;
protected:
friend class SelectionDAG;
ShuffleVectorSDNode(MVT VT, DebugLoc dl, SDValue N1, SDValue N2, int *M)
: SDNode(ISD::VECTOR_SHUFFLE, dl, getSDVTList(VT)), Mask(M) {
InitOperands(Ops, N1, N2);
}
public:
const int * getMask() const { return Mask; }
bool isSplat() { return isSplatMask(Mask, getValueType(0)); }
int getSplatIndex() {
assert(isSplat() && "Cannot get splat index for non-splat!");
return Mask[0];
}
static bool isSplatMask(const int *Mask, MVT VT);
static bool classof(const ShuffleVectorSDNode *) { return true; }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::VECTOR_SHUFFLE;
}
};
class ConstantSDNode : public SDNode { class ConstantSDNode : public SDNode {
const ConstantInt *Value; const ConstantInt *Value;
friend class SelectionDAG; friend class SelectionDAG;
@ -2084,7 +2110,7 @@ public:
return N->getOpcode() == ISD::CONDCODE; return N->getOpcode() == ISD::CONDCODE;
} }
}; };
/// CvtRndSatSDNode - NOTE: avoid using this node as this may disappear in the /// CvtRndSatSDNode - NOTE: avoid using this node as this may disappear in the
/// future and most targets don't support it. /// future and most targets don't support it.
class CvtRndSatSDNode : public SDNode { class CvtRndSatSDNode : public SDNode {

View File

@ -328,7 +328,7 @@ public:
/// support *some* VECTOR_SHUFFLE operations, those with specific masks. /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
/// are assumed to be legal. /// are assumed to be legal.
virtual bool isShuffleMaskLegal(SDValue Mask, MVT VT) const { virtual bool isShuffleMaskLegal(const int *Mask, MVT VT) const {
return true; return true;
} }
@ -336,9 +336,7 @@ public:
/// used by Targets can use this to indicate if there is a suitable /// used by Targets can use this to indicate if there is a suitable
/// VECTOR_SHUFFLE that can be used to replace a VAND with a constant /// VECTOR_SHUFFLE that can be used to replace a VAND with a constant
/// pool entry. /// pool entry.
virtual bool isVectorClearMaskLegal(const std::vector<SDValue> &BVOps, virtual bool isVectorClearMaskLegal(const int *Mask, MVT VT) const {
MVT EVT,
SelectionDAG &DAG) const {
return false; return false;
} }

View File

@ -51,15 +51,6 @@ class SDTCisOpSmallerThanOp<int SmallOp, int BigOp> : SDTypeConstraint<SmallOp>{
int BigOperandNum = BigOp; int BigOperandNum = BigOp;
} }
/// SDTCisIntVectorOfSameSize - This indicates that ThisOp and OtherOp are
/// vector types, and that ThisOp is the result of
/// MVT::getIntVectorWithNumElements with the number of elements
/// that ThisOp has.
class SDTCisIntVectorOfSameSize<int ThisOp, int OtherOp>
: SDTypeConstraint<ThisOp> {
int OtherOpNum = OtherOp;
}
/// SDTCisEltOfVec - This indicates that ThisOp is a scalar type of the same /// SDTCisEltOfVec - This indicates that ThisOp is a scalar type of the same
/// type as the element type of OtherOp, which is a vector type. /// type as the element type of OtherOp, which is a vector type.
class SDTCisEltOfVec<int ThisOp, int OtherOp> class SDTCisEltOfVec<int ThisOp, int OtherOp>
@ -175,8 +166,8 @@ def SDTIStore : SDTypeProfile<1, 3, [ // indexed store
SDTCisSameAs<0, 2>, SDTCisPtrTy<0>, SDTCisPtrTy<3> SDTCisSameAs<0, 2>, SDTCisPtrTy<0>, SDTCisPtrTy<3>
]>; ]>;
def SDTVecShuffle : SDTypeProfile<1, 3, [ def SDTVecShuffle : SDTypeProfile<1, 2, [
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisIntVectorOfSameSize<3, 0> SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
]>; ]>;
def SDTVecExtract : SDTypeProfile<1, 2, [ // vector extract def SDTVecExtract : SDTypeProfile<1, 2, [ // vector extract
SDTCisEltOfVec<0, 1>, SDTCisPtrTy<2> SDTCisEltOfVec<0, 1>, SDTCisPtrTy<2>

View File

@ -5098,7 +5098,21 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
InVec.getValueType(), &Ops[0], Ops.size()); InVec.getValueType(), &Ops[0], Ops.size());
} }
// If the invec is an UNDEF and if EltNo is a constant, create a new
// BUILD_VECTOR with undef elements and the inserted element.
if (!LegalOperations && InVec.getOpcode() == ISD::UNDEF &&
isa<ConstantSDNode>(EltNo)) {
MVT VT = InVec.getValueType();
MVT EVT = VT.getVectorElementType();
unsigned NElts = VT.getVectorNumElements();
SmallVector<SDValue, 8> Ops(NElts, DAG.getUNDEF(EVT));
unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
if (Elt < Ops.size())
Ops[Elt] = InVal;
return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
InVec.getValueType(), &Ops[0], Ops.size());
}
return SDValue(); return SDValue();
} }
@ -5160,9 +5174,8 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// to examine the mask. // to examine the mask.
if (BCNumEltsChanged) if (BCNumEltsChanged)
return SDValue(); return SDValue();
unsigned Idx = cast<ConstantSDNode>(InVec.getOperand(2). int Idx = cast<ShuffleVectorSDNode>(InVec)->getMask()[Elt];
getOperand(Elt))->getZExtValue(); int NumElems = InVec.getValueType().getVectorNumElements();
unsigned NumElems = InVec.getOperand(2).getNumOperands();
InVec = (Idx < NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); InVec = (Idx < NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
if (InVec.getOpcode() == ISD::BIT_CONVERT) if (InVec.getOpcode() == ISD::BIT_CONVERT)
InVec = InVec.getOperand(0); InVec = InVec.getOperand(0);
@ -5209,7 +5222,6 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
unsigned NumInScalars = N->getNumOperands(); unsigned NumInScalars = N->getNumOperands();
MVT VT = N->getValueType(0); MVT VT = N->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
MVT EltType = VT.getVectorElementType(); MVT EltType = VT.getVectorElementType();
// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
@ -5252,56 +5264,36 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
} }
// If everything is good, we can make a shuffle operation. // If everything is good, we can make a shuffle operation.
MVT IndexVT = MVT::i32;
if (VecIn1.getNode()) { if (VecIn1.getNode()) {
SmallVector<SDValue, 8> BuildVecIndices; SmallVector<int, 8> Mask;
for (unsigned i = 0; i != NumInScalars; ++i) { for (unsigned i = 0; i != NumInScalars; ++i) {
if (N->getOperand(i).getOpcode() == ISD::UNDEF) { if (N->getOperand(i).getOpcode() == ISD::UNDEF) {
BuildVecIndices.push_back(DAG.getUNDEF(IndexVT)); Mask.push_back(-1);
continue; continue;
} }
SDValue Extract = N->getOperand(i);
// If extracting from the first vector, just use the index directly. // If extracting from the first vector, just use the index directly.
SDValue Extract = N->getOperand(i);
SDValue ExtVal = Extract.getOperand(1); SDValue ExtVal = Extract.getOperand(1);
if (Extract.getOperand(0) == VecIn1) { if (Extract.getOperand(0) == VecIn1) {
if (ExtVal.getValueType() == IndexVT) Mask.push_back(cast<ConstantSDNode>(ExtVal)->getZExtValue());
BuildVecIndices.push_back(ExtVal);
else {
unsigned Idx = cast<ConstantSDNode>(ExtVal)->getZExtValue();
BuildVecIndices.push_back(DAG.getConstant(Idx, IndexVT));
}
continue; continue;
} }
// Otherwise, use InIdx + VecSize // Otherwise, use InIdx + VecSize
unsigned Idx = cast<ConstantSDNode>(ExtVal)->getZExtValue(); unsigned Idx = cast<ConstantSDNode>(ExtVal)->getZExtValue();
BuildVecIndices.push_back(DAG.getConstant(Idx+NumInScalars, IndexVT)); Mask.push_back(Idx+NumInScalars);
} }
// Add count and size info. // Add count and size info.
MVT BuildVecVT = MVT::getVectorVT(IndexVT, NumElts); if (!TLI.isTypeLegal(VT) && LegalTypes)
if (!TLI.isTypeLegal(BuildVecVT) && LegalTypes)
return SDValue(); return SDValue();
// Return the new VECTOR_SHUFFLE node. // Return the new VECTOR_SHUFFLE node.
SDValue Ops[5]; SDValue Ops[2];
Ops[0] = VecIn1; Ops[0] = VecIn1;
if (VecIn2.getNode()) { Ops[1] = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
Ops[1] = VecIn2; return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]);
} else {
// Use an undef build_vector as input for the second operand.
std::vector<SDValue> UnOps(NumInScalars,
DAG.getUNDEF(EltType));
Ops[1] = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
&UnOps[0], UnOps.size());
AddToWorkList(Ops[1].getNode());
}
Ops[2] = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), BuildVecVT,
&BuildVecIndices[0], BuildVecIndices.size());
return DAG.getNode(ISD::VECTOR_SHUFFLE, N->getDebugLoc(), VT, Ops, 3);
} }
return SDValue(); return SDValue();
@ -5321,8 +5313,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
} }
SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
SDValue ShufMask = N->getOperand(2); return SDValue();
unsigned NumElts = ShufMask.getNumOperands();
MVT VT = N->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
SDValue N0 = N->getOperand(0); SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1); SDValue N1 = N->getOperand(1);
@ -5330,60 +5324,13 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
assert(N0.getValueType().getVectorNumElements() == NumElts && assert(N0.getValueType().getVectorNumElements() == NumElts &&
"Vector shuffle must be normalized in DAG"); "Vector shuffle must be normalized in DAG");
// If the shuffle mask is an identity operation on the LHS, return the LHS. // FIXME: implement canonicalizations from DAG.getVectorShuffle()
bool isIdentity = true;
for (unsigned i = 0; i != NumElts; ++i) {
if (ShufMask.getOperand(i).getOpcode() != ISD::UNDEF &&
cast<ConstantSDNode>(ShufMask.getOperand(i))->getZExtValue() != i) {
isIdentity = false;
break;
}
}
if (isIdentity) return N->getOperand(0);
// If the shuffle mask is an identity operation on the RHS, return the RHS.
isIdentity = true;
for (unsigned i = 0; i != NumElts; ++i) {
if (ShufMask.getOperand(i).getOpcode() != ISD::UNDEF &&
cast<ConstantSDNode>(ShufMask.getOperand(i))->getZExtValue() !=
i+NumElts) {
isIdentity = false;
break;
}
}
if (isIdentity) return N->getOperand(1);
// Check if the shuffle is a unary shuffle, i.e. one of the vectors is not
// needed at all.
bool isUnary = true;
bool isSplat = true;
int VecNum = -1;
unsigned BaseIdx = 0;
for (unsigned i = 0; i != NumElts; ++i)
if (ShufMask.getOperand(i).getOpcode() != ISD::UNDEF) {
unsigned Idx=cast<ConstantSDNode>(ShufMask.getOperand(i))->getZExtValue();
int V = (Idx < NumElts) ? 0 : 1;
if (VecNum == -1) {
VecNum = V;
BaseIdx = Idx;
} else {
if (BaseIdx != Idx)
isSplat = false;
if (VecNum != V) {
isUnary = false;
break;
}
}
}
// Normalize unary shuffle so the RHS is undef.
if (isUnary && VecNum == 1)
std::swap(N0, N1);
// If it is a splat, check if the argument vector is a build_vector with // If it is a splat, check if the argument vector is a build_vector with
// all scalar elements the same. // all scalar elements the same.
if (isSplat) { if (cast<ShuffleVectorSDNode>(N)->isSplat()) {
SDNode *V = N0.getNode(); SDNode *V = N0.getNode();
// If this is a bit convert that changes the element type of the vector but // If this is a bit convert that changes the element type of the vector but
// not the number of vector elements, look through it. Be careful not to // not the number of vector elements, look through it. Be careful not to
@ -5397,6 +5344,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
if (V->getOpcode() == ISD::BUILD_VECTOR) { if (V->getOpcode() == ISD::BUILD_VECTOR) {
unsigned NumElems = V->getNumOperands(); unsigned NumElems = V->getNumOperands();
unsigned BaseIdx = cast<ShuffleVectorSDNode>(N)->getSplatIndex();
if (NumElems > BaseIdx) { if (NumElems > BaseIdx) {
SDValue Base; SDValue Base;
bool AllSame = true; bool AllSame = true;
@ -5421,38 +5369,6 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
} }
} }
} }
// If it is a unary or the LHS and the RHS are the same node, turn the RHS
// into an undef.
if (isUnary || N0 == N1) {
// Check the SHUFFLE mask, mapping any inputs from the 2nd operand into the
// first operand.
SmallVector<SDValue, 8> MappedOps;
for (unsigned i = 0; i != NumElts; ++i) {
if (ShufMask.getOperand(i).getOpcode() == ISD::UNDEF ||
cast<ConstantSDNode>(ShufMask.getOperand(i))->getZExtValue() <
NumElts) {
MappedOps.push_back(ShufMask.getOperand(i));
} else {
unsigned NewIdx =
cast<ConstantSDNode>(ShufMask.getOperand(i))->getZExtValue() -
NumElts;
MappedOps.push_back(DAG.getConstant(NewIdx,
ShufMask.getOperand(i).getValueType()));
}
}
ShufMask = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
ShufMask.getValueType(),
&MappedOps[0], MappedOps.size());
AddToWorkList(ShufMask.getNode());
return DAG.getNode(ISD::VECTOR_SHUFFLE, N->getDebugLoc(),
N->getValueType(0), N0,
DAG.getUNDEF(N->getValueType(0)),
ShufMask);
}
return SDValue(); return SDValue();
} }
@ -5461,52 +5377,42 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
/// vector_shuffle V, Zero, <0, 4, 2, 4> /// vector_shuffle V, Zero, <0, 4, 2, 4>
SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
MVT VT = N->getValueType(0);
DebugLoc dl = N->getDebugLoc();
SDValue LHS = N->getOperand(0); SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1); SDValue RHS = N->getOperand(1);
if (N->getOpcode() == ISD::AND) { if (N->getOpcode() == ISD::AND) {
if (RHS.getOpcode() == ISD::BIT_CONVERT) if (RHS.getOpcode() == ISD::BIT_CONVERT)
RHS = RHS.getOperand(0); RHS = RHS.getOperand(0);
if (RHS.getOpcode() == ISD::BUILD_VECTOR) { if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
std::vector<SDValue> IdxOps; SmallVector<int, 8> Indices;
unsigned NumOps = RHS.getNumOperands(); unsigned NumElts = RHS.getNumOperands();
unsigned NumElts = NumOps;
for (unsigned i = 0; i != NumElts; ++i) { for (unsigned i = 0; i != NumElts; ++i) {
SDValue Elt = RHS.getOperand(i); SDValue Elt = RHS.getOperand(i);
if (!isa<ConstantSDNode>(Elt)) if (!isa<ConstantSDNode>(Elt))
return SDValue(); return SDValue();
else if (cast<ConstantSDNode>(Elt)->isAllOnesValue()) else if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
IdxOps.push_back(DAG.getIntPtrConstant(i)); Indices.push_back(i);
else if (cast<ConstantSDNode>(Elt)->isNullValue()) else if (cast<ConstantSDNode>(Elt)->isNullValue())
IdxOps.push_back(DAG.getIntPtrConstant(NumElts)); Indices.push_back(NumElts);
else else
return SDValue(); return SDValue();
} }
// Let's see if the target supports this vector_shuffle. // Let's see if the target supports this vector_shuffle.
if (!TLI.isVectorClearMaskLegal(IdxOps, TLI.getPointerTy(), DAG)) MVT RVT = RHS.getValueType();
if (!TLI.isVectorClearMaskLegal(&Indices[0], RVT))
return SDValue(); return SDValue();
// Return the new VECTOR_SHUFFLE node. // Return the new VECTOR_SHUFFLE node.
MVT EVT = RHS.getValueType().getVectorElementType(); MVT EVT = RVT.getVectorElementType();
MVT VT = MVT::getVectorVT(EVT, NumElts); SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
MVT MaskVT = MVT::getVectorVT(TLI.getPointerTy(), NumElts); DAG.getConstant(0, EVT));
std::vector<SDValue> Ops; SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
LHS = DAG.getNode(ISD::BIT_CONVERT, LHS.getDebugLoc(), VT, LHS); RVT, &ZeroOps[0], ZeroOps.size());
Ops.push_back(LHS); LHS = DAG.getNode(ISD::BIT_CONVERT, dl, RVT, LHS);
AddToWorkList(LHS.getNode()); SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
std::vector<SDValue> ZeroOps(NumElts, DAG.getConstant(0, EVT)); return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuf);
Ops.push_back(DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
VT, &ZeroOps[0], ZeroOps.size()));
Ops.push_back(DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
MaskVT, &IdxOps[0], IdxOps.size()));
SDValue Result = DAG.getNode(ISD::VECTOR_SHUFFLE, N->getDebugLoc(),
VT, &Ops[0], Ops.size());
if (VT != N->getValueType(0))
Result = DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
N->getValueType(0), Result);
return Result;
} }
} }

View File

@ -267,16 +267,10 @@ private:
bool isVolatile, SDValue ValOp, bool isVolatile, SDValue ValOp,
unsigned StWidth, DebugLoc dl); unsigned StWidth, DebugLoc dl);
/// isShuffleLegal - Return non-null if a vector shuffle is legal with the /// promoteShuffle - Promote a shuffle mask of a vector VT to perform the
/// specified mask and type. Targets can specify exactly which masks they /// same shuffle on a vector of NVT. Must not create an illegal shuffle mask.
/// support and the code generator is tasked with not creating illegal masks. SDValue promoteShuffle(MVT NVT, MVT VT, DebugLoc dl, SDValue N1, SDValue N2,
/// const int *Mask) const;
/// Note that this will also return true for shuffles that are promoted to a
/// different type.
///
/// If this is a legal shuffle, this method returns the (possibly promoted)
/// build_vector Mask. If it's not a legal shuffle, it returns null.
SDNode *isShuffleLegal(MVT VT, SDValue Mask) const;
bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest, bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,
SmallPtrSet<SDNode*, 32> &NodesLeadingTo); SmallPtrSet<SDNode*, 32> &NodesLeadingTo);
@ -319,50 +313,35 @@ private:
}; };
} }
/// isVectorShuffleLegal - Return true if a vector shuffle is legal with the /// promoteShuffle - Promote a shuffle mask of a vector VT to perform the
/// specified mask and type. Targets can specify exactly which masks they /// same shuffle on a vector of NVT. Must not create an illegal shuffle mask.
/// support and the code generator is tasked with not creating illegal masks. /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
/// SDValue SelectionDAGLegalize::promoteShuffle(MVT NVT, MVT VT, DebugLoc dl,
/// Note that this will also return true for shuffles that are promoted to a SDValue N1, SDValue N2,
/// different type. const int *Mask) const {
SDNode *SelectionDAGLegalize::isShuffleLegal(MVT VT, SDValue Mask) const { MVT EltVT = NVT.getVectorElementType();
switch (TLI.getOperationAction(ISD::VECTOR_SHUFFLE, VT)) { int NumMaskElts = VT.getVectorNumElements();
default: return 0; int NumDestElts = NVT.getVectorNumElements();
case TargetLowering::Legal: unsigned NumEltsGrowth = NumDestElts / NumMaskElts;
case TargetLowering::Custom:
break;
case TargetLowering::Promote: {
// If this is promoted to a different type, convert the shuffle mask and
// ask if it is legal in the promoted type!
MVT NVT = TLI.getTypeToPromoteTo(ISD::VECTOR_SHUFFLE, VT);
MVT EltVT = NVT.getVectorElementType();
// If we changed # elements, change the shuffle mask. assert(NumEltsGrowth && "Cannot promote to vector type with fewer elts!");
unsigned NumEltsGrowth =
NVT.getVectorNumElements() / VT.getVectorNumElements(); if (NumEltsGrowth == 1)
assert(NumEltsGrowth && "Cannot promote to vector type with fewer elts!"); return DAG.getVectorShuffle(NVT, dl, N1, N2, Mask);
if (NumEltsGrowth > 1) {
// Renumber the elements. SmallVector<int, 8> NewMask;
SmallVector<SDValue, 8> Ops; for (int i = 0; i != NumMaskElts; ++i) {
for (unsigned i = 0, e = Mask.getNumOperands(); i != e; ++i) { int Idx = Mask[i];
SDValue InOp = Mask.getOperand(i); for (unsigned j = 0; j != NumEltsGrowth; ++j) {
for (unsigned j = 0; j != NumEltsGrowth; ++j) { if (Idx < 0)
if (InOp.getOpcode() == ISD::UNDEF) NewMask.push_back(-1);
Ops.push_back(DAG.getUNDEF(EltVT)); else
else { NewMask.push_back(Idx * NumEltsGrowth + j);
unsigned InEltNo = cast<ConstantSDNode>(InOp)->getZExtValue();
Ops.push_back(DAG.getConstant(InEltNo*NumEltsGrowth+j, EltVT));
}
}
}
Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getDebugLoc(),
NVT, &Ops[0], Ops.size());
} }
VT = NVT;
break;
} }
} assert((int)NewMask.size() == NumDestElts && "Non-integer NumEltsGrowth?");
return TLI.isShuffleMaskLegal(Mask, VT) ? Mask.getNode() : 0; assert(TLI.isShuffleMaskLegal(&Mask[0], NVT) && "Shuffle not legal?");
return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]);
} }
SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag, SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag,
@ -1652,25 +1631,15 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Tmp1.getValueType(), Tmp2); Tmp1.getValueType(), Tmp2);
unsigned NumElts = Tmp1.getValueType().getVectorNumElements(); unsigned NumElts = Tmp1.getValueType().getVectorNumElements();
MVT ShufMaskVT =
MVT::getIntVectorWithNumElements(NumElts);
MVT ShufMaskEltVT = ShufMaskVT.getVectorElementType();
// We generate a shuffle of InVec and ScVec, so the shuffle mask // We generate a shuffle of InVec and ScVec, so the shuffle mask
// should be 0,1,2,3,4,5... with the appropriate element replaced with // should be 0,1,2,3,4,5... with the appropriate element replaced with
// elt 0 of the RHS. // elt 0 of the RHS.
SmallVector<SDValue, 8> ShufOps; SmallVector<int, 8> ShufOps;
for (unsigned i = 0; i != NumElts; ++i) { for (unsigned i = 0; i != NumElts; ++i)
if (i != InsertPos->getZExtValue()) ShufOps.push_back(i != InsertPos->getZExtValue() ? i : NumElts);
ShufOps.push_back(DAG.getConstant(i, ShufMaskEltVT));
else Result = DAG.getVectorShuffle(Tmp1.getValueType(), dl, Tmp1, ScVec,
ShufOps.push_back(DAG.getConstant(NumElts, ShufMaskEltVT)); &ShufOps[0]);
}
SDValue ShufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, ShufMaskVT,
&ShufOps[0], ShufOps.size());
Result = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, Tmp1.getValueType(),
Tmp1, ScVec, ShufMask);
Result = LegalizeOp(Result); Result = LegalizeOp(Result);
break; break;
} }
@ -1708,13 +1677,14 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
case ISD::VECTOR_SHUFFLE: case ISD::VECTOR_SHUFFLE:
Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the input vectors, Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the input vectors,
Tmp2 = LegalizeOp(Node->getOperand(1)); // but not the shuffle mask. Tmp2 = LegalizeOp(Node->getOperand(1)); // but not the shuffle mask.
Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Node->getOperand(2)); Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2);
// Allow targets to custom lower the SHUFFLEs they support. // Allow targets to custom lower the SHUFFLEs they support.
switch (TLI.getOperationAction(ISD::VECTOR_SHUFFLE, Result.getValueType())){ switch (TLI.getOperationAction(ISD::VECTOR_SHUFFLE, Result.getValueType())){
default: assert(0 && "Unknown operation action!"); default: assert(0 && "Unknown operation action!");
case TargetLowering::Legal: case TargetLowering::Legal:
assert(isShuffleLegal(Result.getValueType(), Node->getOperand(2)) && assert(TLI.isShuffleMaskLegal(cast<ShuffleVectorSDNode>(Node)->getMask(),
Result.getValueType()) &&
"vector shuffle should not be created if not legal!"); "vector shuffle should not be created if not legal!");
break; break;
case TargetLowering::Custom: case TargetLowering::Custom:
@ -1728,23 +1698,21 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
MVT VT = Node->getValueType(0); MVT VT = Node->getValueType(0);
MVT EltVT = VT.getVectorElementType(); MVT EltVT = VT.getVectorElementType();
MVT PtrVT = TLI.getPointerTy(); MVT PtrVT = TLI.getPointerTy();
SDValue Mask = Node->getOperand(2); const int *Mask = cast<ShuffleVectorSDNode>(Node)->getMask();
unsigned NumElems = Mask.getNumOperands(); int NumElems = VT.getVectorNumElements();
SmallVector<SDValue, 8> Ops; SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i != NumElems; ++i) { for (int i = 0; i != NumElems; ++i) {
SDValue Arg = Mask.getOperand(i); if (Mask[i] < 0) {
if (Arg.getOpcode() == ISD::UNDEF) {
Ops.push_back(DAG.getUNDEF(EltVT)); Ops.push_back(DAG.getUNDEF(EltVT));
} else { continue;
assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
unsigned Idx = cast<ConstantSDNode>(Arg)->getZExtValue();
if (Idx < NumElems)
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Tmp1,
DAG.getConstant(Idx, PtrVT)));
else
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Tmp2,
DAG.getConstant(Idx - NumElems, PtrVT)));
} }
int Idx = Mask[i];
if (Idx < NumElems)
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Tmp1,
DAG.getConstant(Idx, PtrVT)));
else
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Tmp2,
DAG.getConstant(Idx - NumElems, PtrVT)));
} }
Result = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size()); Result = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
break; break;
@ -1759,9 +1727,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Tmp2 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Tmp2); Tmp2 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Tmp2);
// Convert the shuffle mask to the right # elements. // Convert the shuffle mask to the right # elements.
Tmp3 = SDValue(isShuffleLegal(OVT, Node->getOperand(2)), 0); Result = promoteShuffle(NVT, OVT, dl, Tmp1, Tmp2,
assert(Tmp3.getNode() && "Shuffle not legal?"); cast<ShuffleVectorSDNode>(Node)->getMask());
Result = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, NVT, Tmp1, Tmp2, Tmp3);
Result = DAG.getNode(ISD::BIT_CONVERT, dl, OVT, Result); Result = DAG.getNode(ISD::BIT_CONVERT, dl, OVT, Result);
break; break;
} }
@ -5490,6 +5457,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
// FIXME: it would be far nicer to change this into map<SDValue,uint64_t> // FIXME: it would be far nicer to change this into map<SDValue,uint64_t>
// and use a bitmask instead of a list of elements. // and use a bitmask instead of a list of elements.
// FIXME: this doesn't treat <0, u, 0, u> for example, as a splat.
std::map<SDValue, std::vector<unsigned> > Values; std::map<SDValue, std::vector<unsigned> > Values;
Values[SplatValue].push_back(0); Values[SplatValue].push_back(0);
bool isConstant = true; bool isConstant = true;
@ -5546,21 +5514,17 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
if (SplatValue.getNode()) { // Splat of one value? if (SplatValue.getNode()) { // Splat of one value?
// Build the shuffle constant vector: <0, 0, 0, 0> // Build the shuffle constant vector: <0, 0, 0, 0>
MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); SmallVector<int, 8> ZeroVec(NumElems, 0);
SDValue Zero = DAG.getConstant(0, MaskVT.getVectorElementType());
std::vector<SDValue> ZeroVec(NumElems, Zero);
SDValue SplatMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT,
&ZeroVec[0], ZeroVec.size());
// If the target supports VECTOR_SHUFFLE and this shuffle mask, use it. // If the target supports VECTOR_SHUFFLE and this shuffle mask, use it.
if (isShuffleLegal(VT, SplatMask)) { if (TLI.isShuffleMaskLegal(&ZeroVec[0], Node->getValueType(0))) {
// Get the splatted value into the low element of a vector register. // Get the splatted value into the low element of a vector register.
SDValue LowValVec = SDValue LowValVec =
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, SplatValue); DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, SplatValue);
// Return shuffle(LowValVec, undef, <0,0,0,0>) // Return shuffle(LowValVec, undef, <0,0,0,0>)
return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, LowValVec, return DAG.getVectorShuffle(VT, dl, LowValVec, DAG.getUNDEF(VT),
DAG.getUNDEF(VT), SplatMask); &ZeroVec[0]);
} }
} }
@ -5582,35 +5546,25 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
std::swap(Val1, Val2); std::swap(Val1, Val2);
// Build the shuffle constant vector: e.g. <0, 4, 0, 4> // Build the shuffle constant vector: e.g. <0, 4, 0, 4>
MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems); SmallVector<int, 8> ShuffleMask(NumElems, -1);
MVT MaskEltVT = MaskVT.getVectorElementType();
std::vector<SDValue> MaskVec(NumElems);
// Set elements of the shuffle mask for Val1. // Set elements of the shuffle mask for Val1.
std::vector<unsigned> &Val1Elts = Values[Val1]; std::vector<unsigned> &Val1Elts = Values[Val1];
for (unsigned i = 0, e = Val1Elts.size(); i != e; ++i) for (unsigned i = 0, e = Val1Elts.size(); i != e; ++i)
MaskVec[Val1Elts[i]] = DAG.getConstant(0, MaskEltVT); ShuffleMask[Val1Elts[i]] = 0;
// Set elements of the shuffle mask for Val2. // Set elements of the shuffle mask for Val2.
std::vector<unsigned> &Val2Elts = Values[Val2]; std::vector<unsigned> &Val2Elts = Values[Val2];
for (unsigned i = 0, e = Val2Elts.size(); i != e; ++i) for (unsigned i = 0, e = Val2Elts.size(); i != e; ++i)
if (Val2.getOpcode() != ISD::UNDEF) if (Val2.getOpcode() != ISD::UNDEF)
MaskVec[Val2Elts[i]] = DAG.getConstant(NumElems, MaskEltVT); ShuffleMask[Val2Elts[i]] = NumElems;
else
MaskVec[Val2Elts[i]] = DAG.getUNDEF(MaskEltVT);
SDValue ShuffleMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT,
&MaskVec[0], MaskVec.size());
// If the target supports SCALAR_TO_VECTOR and this shuffle mask, use it. // If the target supports SCALAR_TO_VECTOR and this shuffle mask, use it.
if (TLI.isOperationLegalOrCustom(ISD::SCALAR_TO_VECTOR, VT) && if (TLI.isOperationLegalOrCustom(ISD::SCALAR_TO_VECTOR, VT) &&
isShuffleLegal(VT, ShuffleMask)) { TLI.isShuffleMaskLegal(&ShuffleMask[0], VT)) {
Val1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Val1); Val1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Val1);
Val2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Val2); Val2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Val2);
SDValue Ops[] = { Val1, Val2, ShuffleMask }; return DAG.getVectorShuffle(VT, dl, Val1, Val2, &ShuffleMask[0]);
// Return shuffle(LoValVec, HiValVec, <0,1,0,1>)
return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, Ops, 3);
} }
} }
@ -8066,36 +8020,19 @@ SDValue SelectionDAGLegalize::WidenVectorOp(SDValue Op, MVT WidenVT) {
case ISD::VECTOR_SHUFFLE: { case ISD::VECTOR_SHUFFLE: {
SDValue Tmp1 = WidenVectorOp(Node->getOperand(0), WidenVT); SDValue Tmp1 = WidenVectorOp(Node->getOperand(0), WidenVT);
SDValue Tmp2 = WidenVectorOp(Node->getOperand(1), WidenVT); SDValue Tmp2 = WidenVectorOp(Node->getOperand(1), WidenVT);
// VECTOR_SHUFFLE 3rd operand must be a constant build vector that is const int *Mask = cast<ShuffleVectorSDNode>(Node)->getMask();
// used as permutation array. We build the vector here instead of widening SmallVector<int, 8> NewMask;
// because we don't want to legalize and have it turned to something else.
SDValue PermOp = Node->getOperand(2);
SDValueVector NewOps;
MVT PVT = PermOp.getValueType().getVectorElementType();
for (unsigned i = 0; i < NumElts; ++i) { for (unsigned i = 0; i < NumElts; ++i) {
if (PermOp.getOperand(i).getOpcode() == ISD::UNDEF) { int Idx = Mask[i];
NewOps.push_back(PermOp.getOperand(i)); if (Idx < (int)NumElts)
} else { NewMask.push_back(Idx);
unsigned Idx = else
cast<ConstantSDNode>(PermOp.getOperand(i))->getZExtValue(); NewMask.push_back(Idx + NewNumElts - NumElts);
if (Idx < NumElts) {
NewOps.push_back(PermOp.getOperand(i));
}
else {
NewOps.push_back(DAG.getConstant(Idx + NewNumElts - NumElts,
PermOp.getOperand(i).getValueType()));
}
}
} }
for (unsigned i = NumElts; i < NewNumElts; ++i) { for (unsigned i = NumElts; i < NewNumElts; ++i)
NewOps.push_back(DAG.getUNDEF(PVT)); NewMask.push_back(-1);
}
Result = DAG.getVectorShuffle(WidenVT, dl, Tmp1, Tmp2, &NewMask[0]);
SDValue Tmp3 = DAG.getNode(ISD::BUILD_VECTOR, dl,
MVT::getVectorVT(PVT, NewOps.size()),
&NewOps[0], NewOps.size());
Result = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, WidenVT, Tmp1, Tmp2, Tmp3);
break; break;
} }
case ISD::LOAD: { case ISD::LOAD: {

View File

@ -772,10 +772,8 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(SDNode *N, SDValue &Lo,
// If Lo or Hi uses elements from at most two of the four input vectors, then // If Lo or Hi uses elements from at most two of the four input vectors, then
// express it as a vector shuffle of those two inputs. Otherwise extract the // express it as a vector shuffle of those two inputs. Otherwise extract the
// input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR. // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
SDValue Mask = N->getOperand(2); const int *Mask = cast<ShuffleVectorSDNode>(N)->getMask();
MVT IdxVT = Mask.getValueType().getVectorElementType(); SmallVector<int, 16> Ops;
SmallVector<SDValue, 16> Ops;
Ops.reserve(NewElts);
for (unsigned High = 0; High < 2; ++High) { for (unsigned High = 0; High < 2; ++High) {
SDValue &Output = High ? Hi : Lo; SDValue &Output = High ? Hi : Lo;
@ -787,18 +785,15 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(SDNode *N, SDValue &Lo,
unsigned FirstMaskIdx = High * NewElts; unsigned FirstMaskIdx = High * NewElts;
bool useBuildVector = false; bool useBuildVector = false;
for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) { for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
SDValue Arg = Mask.getOperand(FirstMaskIdx + MaskOffset);
// The mask element. This indexes into the input. // The mask element. This indexes into the input.
unsigned Idx = Arg.getOpcode() == ISD::UNDEF ? int Idx = Mask[FirstMaskIdx + MaskOffset];
-1U : cast<ConstantSDNode>(Arg)->getZExtValue();
// The input vector this mask element indexes into. // The input vector this mask element indexes into.
unsigned Input = Idx / NewElts; unsigned Input = (unsigned)Idx / NewElts;
if (Input >= array_lengthof(Inputs)) { if (Input >= array_lengthof(Inputs)) {
// The mask element does not index into any input vector. // The mask element does not index into any input vector.
Ops.push_back(DAG.getUNDEF(IdxVT)); Ops.push_back(-1);
continue; continue;
} }
@ -826,27 +821,24 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(SDNode *N, SDValue &Lo,
} }
// Add the mask index for the new shuffle vector. // Add the mask index for the new shuffle vector.
Ops.push_back(DAG.getConstant(Idx + OpNo * NewElts, IdxVT)); Ops.push_back(Idx + OpNo * NewElts);
} }
if (useBuildVector) { if (useBuildVector) {
MVT EltVT = NewVT.getVectorElementType(); MVT EltVT = NewVT.getVectorElementType();
Ops.clear(); SmallVector<SDValue, 16> SVOps;
// Extract the input elements by hand. // Extract the input elements by hand.
for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) { for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
SDValue Arg = Mask.getOperand(FirstMaskIdx + MaskOffset);
// The mask element. This indexes into the input. // The mask element. This indexes into the input.
unsigned Idx = Arg.getOpcode() == ISD::UNDEF ? int Idx = Mask[FirstMaskIdx + MaskOffset];
-1U : cast<ConstantSDNode>(Arg)->getZExtValue();
// The input vector this mask element indexes into. // The input vector this mask element indexes into.
unsigned Input = Idx / NewElts; unsigned Input = (unsigned)Idx / NewElts;
if (Input >= array_lengthof(Inputs)) { if (Input >= array_lengthof(Inputs)) {
// The mask element is "undef" or indexes off the end of the input. // The mask element is "undef" or indexes off the end of the input.
Ops.push_back(DAG.getUNDEF(EltVT)); SVOps.push_back(DAG.getUNDEF(EltVT));
continue; continue;
} }
@ -854,25 +846,22 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(SDNode *N, SDValue &Lo,
Idx -= Input * NewElts; Idx -= Input * NewElts;
// Extract the vector element by hand. // Extract the vector element by hand.
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
Inputs[Input], DAG.getIntPtrConstant(Idx))); Inputs[Input], DAG.getIntPtrConstant(Idx)));
} }
// Construct the Lo/Hi output using a BUILD_VECTOR. // Construct the Lo/Hi output using a BUILD_VECTOR.
Output = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, &Ops[0], Ops.size()); Output = DAG.getNode(ISD::BUILD_VECTOR,dl,NewVT, &SVOps[0], SVOps.size());
} else if (InputUsed[0] == -1U) { } else if (InputUsed[0] == -1U) {
// No input vectors were used! The result is undefined. // No input vectors were used! The result is undefined.
Output = DAG.getUNDEF(NewVT); Output = DAG.getUNDEF(NewVT);
} else { } else {
// At least one input vector was used. Create a new shuffle vector.
SDValue NewMask = DAG.getNode(ISD::BUILD_VECTOR, dl,
MVT::getVectorVT(IdxVT, Ops.size()),
&Ops[0], Ops.size());
SDValue Op0 = Inputs[InputUsed[0]]; SDValue Op0 = Inputs[InputUsed[0]];
// If only one input was used, use an undefined vector for the other. // If only one input was used, use an undefined vector for the other.
SDValue Op1 = InputUsed[1] == -1U ? SDValue Op1 = InputUsed[1] == -1U ?
DAG.getUNDEF(NewVT) : Inputs[InputUsed[1]]; DAG.getUNDEF(NewVT) : Inputs[InputUsed[1]];
Output = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, NewVT, Op0, Op1, NewMask); // At least one input vector was used. Create a new shuffle vector.
Output = DAG.getVectorShuffle(NewVT, dl, Op0, Op1, &Ops[0]);
} }
Ops.clear(); Ops.clear();
@ -1473,18 +1462,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
if (NumOperands == 2) { if (NumOperands == 2) {
// Replace concat of two operands with a shuffle. // Replace concat of two operands with a shuffle.
MVT PtrVT = TLI.getPointerTy(); SmallVector<int, 16> MaskOps(WidenNumElts);
SmallVector<SDValue, 16> MaskOps(WidenNumElts);
for (unsigned i=0; i < WidenNumElts/2; ++i) { for (unsigned i=0; i < WidenNumElts/2; ++i) {
MaskOps[i] = DAG.getConstant(i, PtrVT); MaskOps[i] = i;
MaskOps[i+WidenNumElts/2] = DAG.getConstant(i+WidenNumElts, PtrVT); MaskOps[i+WidenNumElts/2] = i+WidenNumElts;
} }
SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, return DAG.getVectorShuffle(WidenVT, dl,
MVT::getVectorVT(PtrVT, WidenNumElts), GetWidenedVector(N->getOperand(0)),
&MaskOps[0], WidenNumElts); GetWidenedVector(N->getOperand(1)),
return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, WidenVT, &MaskOps[0]);
GetWidenedVector(N->getOperand(0)),
GetWidenedVector(N->getOperand(1)), Mask);
} }
} }
} }
@ -1762,7 +1748,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) {
SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(SDNode *N) {
MVT VT = N->getValueType(0); MVT VT = N->getValueType(0);
unsigned NumElts = VT.getVectorNumElements(); int NumElts = VT.getVectorNumElements();
DebugLoc dl = N->getDebugLoc(); DebugLoc dl = N->getDebugLoc();
MVT WidenVT = TLI.getTypeToTransformTo(VT); MVT WidenVT = TLI.getTypeToTransformTo(VT);
@ -1772,28 +1758,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(SDNode *N) {
SDValue InOp2 = GetWidenedVector(N->getOperand(1)); SDValue InOp2 = GetWidenedVector(N->getOperand(1));
// Adjust mask based on new input vector length. // Adjust mask based on new input vector length.
SDValue Mask = N->getOperand(2); const int *Mask = cast<ShuffleVectorSDNode>(N)->getMask();
SmallVector<SDValue, 16> MaskOps(WidenNumElts); SmallVector<int, 16> NewMask;
MVT IdxVT = Mask.getValueType().getVectorElementType(); for (int i = 0; i < NumElts; ++i) {
for (unsigned i = 0; i < NumElts; ++i) { if (Mask[i] < NumElts)
SDValue Arg = Mask.getOperand(i); NewMask.push_back(Mask[i]);
if (Arg.getOpcode() == ISD::UNDEF) else
MaskOps[i] = Arg; NewMask.push_back(Mask[i] - NumElts + WidenNumElts);
else {
unsigned Idx = cast<ConstantSDNode>(Arg)->getZExtValue();
if (Idx < NumElts)
MaskOps[i] = Arg;
else
MaskOps[i] = DAG.getConstant(Idx - NumElts + WidenNumElts, IdxVT);
}
} }
for (unsigned i = NumElts; i < WidenNumElts; ++i) for (unsigned i = NumElts; i < WidenNumElts; ++i)
MaskOps[i] = DAG.getUNDEF(IdxVT); NewMask.push_back(-1);
SDValue NewMask = DAG.getNode(ISD::BUILD_VECTOR, dl, return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, &NewMask[0]);
MVT::getVectorVT(IdxVT, WidenNumElts),
&MaskOps[0], WidenNumElts);
return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, WidenVT, InOp1, InOp2, NewMask);
} }
SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {

View File

@ -456,6 +456,13 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(AT->getRawSubclassData()); ID.AddInteger(AT->getRawSubclassData());
break; break;
} }
case ISD::VECTOR_SHUFFLE: {
const int *Mask = cast<ShuffleVectorSDNode>(N)->getMask();
for (unsigned i = 0, e = N->getValueType(0).getVectorNumElements();
i != e; ++i)
ID.AddInteger(Mask[i]);
break;
}
} // end switch (N->getOpcode()) } // end switch (N->getOpcode())
} }
@ -762,12 +769,6 @@ void SelectionDAG::VerifyNode(SDNode *N) {
assert(N->getValueType(0).isVector() && "Wrong return type!"); assert(N->getValueType(0).isVector() && "Wrong return type!");
assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() && assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() &&
"Wrong number of operands!"); "Wrong number of operands!");
MVT EltVT = N->getValueType(0).getVectorElementType();
for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I)
assert((I->getValueType() == EltVT ||
(EltVT.isInteger() && I->getValueType().isInteger() &&
EltVT.bitsLE(I->getValueType()))) &&
"Wrong operand type!");
break; break;
} }
} }
@ -1126,6 +1127,110 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) {
return SDValue(CondCodeNodes[Cond], 0); return SDValue(CondCodeNodes[Cond], 0);
} }
static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl<int> &M) {
std::swap(N1, N2);
int NElts = M.size();
for (int i = 0; i != NElts; ++i) {
if (M[i] >= NElts)
M[i] -= NElts;
else if (M[i] >= 0)
M[i] += NElts;
}
}
SDValue SelectionDAG::getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1,
SDValue N2, const int *Mask) {
assert(N1.getValueType() == N2.getValueType() && "Invalid VECTOR_SHUFFLE");
assert(VT.isVector() && N1.getValueType().isVector() &&
"Vector Shuffle VTs must be a vectors");
assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType()
&& "Vector Shuffle VTs must have same element type");
// Canonicalize shuffle undef, undef -> undef
if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() == ISD::UNDEF)
return N1;
// Validate that all the indices past in in Mask are within the range of
// elements input to the shuffle.
int NElts = VT.getVectorNumElements();
SmallVector<int, 8> MaskVec;
for (int i = 0; i != NElts; ++i) {
if (Mask[i] >= (NElts * 2)) {
assert(0 && "Index out of range");
return SDValue();
}
MaskVec.push_back(Mask[i]);
}
// Canonicalize shuffle v, v -> v, undef
if (N1 == N2) {
N2 = getUNDEF(VT);
for (int i = 0; i != NElts; ++i)
if (MaskVec[i] >= NElts) MaskVec[i] -= NElts;
}
// Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
if (N1.getOpcode() == ISD::UNDEF)
commuteShuffle(N1, N2, MaskVec);
// Canonicalize all index into lhs, -> shuffle lhs, undef
// Canonicalize all index into rhs, -> shuffle rhs, undef
bool AllLHS = true, AllRHS = true;
bool N2Undef = N2.getOpcode() == ISD::UNDEF;
for (int i = 0; i != NElts; ++i) {
if (MaskVec[i] >= NElts) {
if (N2Undef)
MaskVec[i] = -1;
else
AllLHS = false;
} else if (MaskVec[i] >= 0) {
AllRHS = false;
}
}
if (AllLHS && AllRHS)
return getUNDEF(VT);
if (AllLHS)
N2 = getUNDEF(VT);
if (AllRHS) {
N1 = getUNDEF(VT);
commuteShuffle(N1, N2, MaskVec);
}
// If Identity shuffle, or all shuffle in to undef, return that node.
bool AllUndef = true;
bool Identity = true;
for (int i = 0; i < NElts; ++i) {
if (MaskVec[i] >= 0 && MaskVec[i] != i) Identity = false;
if (MaskVec[i] >= 0) AllUndef = false;
}
if (Identity)
return N1;
if (AllUndef)
return getUNDEF(VT);
FoldingSetNodeID ID;
SDValue Ops[2] = { N1, N2 };
AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops, 2);
for (int i = 0; i != NElts; ++i)
ID.AddInteger(MaskVec[i]);
void* IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
// Allocate the mask array for the node out of the BumpPtrAllocator, since
// SDNode doesn't have access to it. This memory will be "leaked" when
// the node is deallocated, but recovered when the NodeAllocator is released.
int *MaskAlloc = OperandAllocator.Allocate<int>(NElts);
memcpy(MaskAlloc, &MaskVec[0], NElts * sizeof(int));
ShuffleVectorSDNode *N = NodeAllocator.Allocate<ShuffleVectorSDNode>();
new (N) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
return SDValue(N, 0);
}
SDValue SelectionDAG::getConvertRndSat(MVT VT, DebugLoc dl, SDValue SelectionDAG::getConvertRndSat(MVT VT, DebugLoc dl,
SDValue Val, SDValue DTy, SDValue Val, SDValue DTy,
SDValue STy, SDValue Rnd, SDValue Sat, SDValue STy, SDValue Rnd, SDValue Sat,
@ -2087,19 +2192,18 @@ bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const {
SDValue SelectionDAG::getShuffleScalarElt(const SDNode *N, unsigned i) { SDValue SelectionDAG::getShuffleScalarElt(const SDNode *N, unsigned i) {
MVT VT = N->getValueType(0); MVT VT = N->getValueType(0);
DebugLoc dl = N->getDebugLoc(); DebugLoc dl = N->getDebugLoc();
SDValue PermMask = N->getOperand(2); const int *PermMask = cast<ShuffleVectorSDNode>(N)->getMask();
SDValue Idx = PermMask.getOperand(i); if (PermMask[i] < 0)
if (Idx.getOpcode() == ISD::UNDEF)
return getUNDEF(VT.getVectorElementType()); return getUNDEF(VT.getVectorElementType());
unsigned Index = cast<ConstantSDNode>(Idx)->getZExtValue(); int Index = PermMask[i];
unsigned NumElems = PermMask.getNumOperands(); int NumElems = VT.getVectorNumElements();
SDValue V = (Index < NumElems) ? N->getOperand(0) : N->getOperand(1); SDValue V = (Index < NumElems) ? N->getOperand(0) : N->getOperand(1);
Index %= NumElems; Index %= NumElems;
if (V.getOpcode() == ISD::BIT_CONVERT) { if (V.getOpcode() == ISD::BIT_CONVERT) {
V = V.getOperand(0); V = V.getOperand(0);
MVT VVT = V.getValueType(); MVT VVT = V.getValueType();
if (!VVT.isVector() || VVT.getVectorNumElements() != NumElems) if (!VVT.isVector() || VVT.getVectorNumElements() != (unsigned)NumElems)
return SDValue(); return SDValue();
} }
if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
@ -2793,12 +2897,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
} }
break; break;
case ISD::VECTOR_SHUFFLE: case ISD::VECTOR_SHUFFLE:
assert(N1.getValueType() == N2.getValueType() && assert(0 && "should use getVectorShuffle constructor!");
N1.getValueType().isVector() &&
VT.isVector() && N3.getValueType().isVector() &&
N3.getOpcode() == ISD::BUILD_VECTOR &&
VT.getVectorNumElements() == N3.getNumOperands() &&
"Illegal VECTOR_SHUFFLE node!");
break; break;
case ISD::BIT_CONVERT: case ISD::BIT_CONVERT:
// Fold bit_convert nodes from a type to themselves. // Fold bit_convert nodes from a type to themselves.
@ -5322,14 +5421,14 @@ void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
if (!isTargetOpcode() && getOpcode() == ISD::VECTOR_SHUFFLE) { if (!isTargetOpcode() && getOpcode() == ISD::VECTOR_SHUFFLE) {
SDNode *Mask = getOperand(2).getNode(); const int *Mask = cast<ShuffleVectorSDNode>(this)->getMask();
OS << "<"; OS << "<";
for (unsigned i = 0, e = Mask->getNumOperands(); i != e; ++i) { for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) {
if (i) OS << ","; if (i) OS << ",";
if (Mask->getOperand(i).getOpcode() == ISD::UNDEF) if (Mask[i] < 0)
OS << "u"; OS << "u";
else else
OS << cast<ConstantSDNode>(Mask->getOperand(i))->getZExtValue(); OS << Mask[i];
} }
OS << ">"; OS << ">";
} }
@ -5610,3 +5709,13 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
SplatBitSize = sz; SplatBitSize = sz;
return true; return true;
} }
bool ShuffleVectorSDNode::isSplatMask(const int *Mask, MVT VT) {
int Idx = -1;
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
if (Idx < 0) Idx = Mask[i];
if (Mask[i] >= 0 && Mask[i] != Idx)
return false;
}
return true;
}

View File

@ -870,8 +870,7 @@ SDValue SelectionDAGLowering::getValue(const Value *V) {
if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) if (ConstantFP *CFP = dyn_cast<ConstantFP>(C))
return N = DAG.getConstantFP(*CFP, VT); return N = DAG.getConstantFP(*CFP, VT);
if (isa<UndefValue>(C) && !isa<VectorType>(V->getType()) && if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
!V->getType()->isAggregateType())
return N = DAG.getUNDEF(VT); return N = DAG.getUNDEF(VT);
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
@ -925,14 +924,11 @@ SDValue SelectionDAGLowering::getValue(const Value *V) {
for (unsigned i = 0; i != NumElements; ++i) for (unsigned i = 0; i != NumElements; ++i)
Ops.push_back(getValue(CP->getOperand(i))); Ops.push_back(getValue(CP->getOperand(i)));
} else { } else {
assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) && assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
"Unknown vector constant!");
MVT EltVT = TLI.getValueType(VecTy->getElementType()); MVT EltVT = TLI.getValueType(VecTy->getElementType());
SDValue Op; SDValue Op;
if (isa<UndefValue>(C)) if (EltVT.isFloatingPoint())
Op = DAG.getUNDEF(EltVT);
else if (EltVT.isFloatingPoint())
Op = DAG.getConstantFP(0, EltVT); Op = DAG.getConstantFP(0, EltVT);
else else
Op = DAG.getConstant(0, EltVT); Op = DAG.getConstant(0, EltVT);
@ -2435,37 +2431,42 @@ void SelectionDAGLowering::visitExtractElement(User &I) {
// Utility for visitShuffleVector - Returns true if the mask is mask starting // Utility for visitShuffleVector - Returns true if the mask is mask starting
// from SIndx and increasing to the element length (undefs are allowed). // from SIndx and increasing to the element length (undefs are allowed).
static bool SequentialMask(SDValue Mask, unsigned SIndx) { static bool SequentialMask(SmallVectorImpl<int> &Mask, int SIndx) {
unsigned MaskNumElts = Mask.getNumOperands(); int MaskNumElts = Mask.size();
for (unsigned i = 0; i != MaskNumElts; ++i) { for (int i = 0; i != MaskNumElts; ++i)
if (Mask.getOperand(i).getOpcode() != ISD::UNDEF) { if ((Mask[i] >= 0) && (Mask[i] != i + SIndx))
unsigned Idx = cast<ConstantSDNode>(Mask.getOperand(i))->getZExtValue(); return false;
if (Idx != i + SIndx)
return false;
}
}
return true; return true;
} }
void SelectionDAGLowering::visitShuffleVector(User &I) { void SelectionDAGLowering::visitShuffleVector(User &I) {
SmallVector<int, 8> Mask;
SDValue Src1 = getValue(I.getOperand(0)); SDValue Src1 = getValue(I.getOperand(0));
SDValue Src2 = getValue(I.getOperand(1)); SDValue Src2 = getValue(I.getOperand(1));
SDValue Mask = getValue(I.getOperand(2));
// Convert the ConstantVector mask operand into an array of ints, with -1
// representing undef values.
SmallVector<Constant*, 8> MaskElts;
cast<Constant>(I.getOperand(2))->getVectorElements(MaskElts);
int MaskNumElts = MaskElts.size();
for (int i = 0; i != MaskNumElts; ++i) {
if (isa<UndefValue>(MaskElts[i]))
Mask.push_back(-1);
else
Mask.push_back(cast<ConstantInt>(MaskElts[i])->getSExtValue());
}
MVT VT = TLI.getValueType(I.getType()); MVT VT = TLI.getValueType(I.getType());
MVT SrcVT = Src1.getValueType(); MVT SrcVT = Src1.getValueType();
int MaskNumElts = Mask.getNumOperands();
int SrcNumElts = SrcVT.getVectorNumElements(); int SrcNumElts = SrcVT.getVectorNumElements();
if (SrcNumElts == MaskNumElts) { if (SrcNumElts == MaskNumElts) {
setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, getCurDebugLoc(), setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
VT, Src1, Src2, Mask)); &Mask[0]));
return; return;
} }
// Normalize the shuffle vector since mask and vector length don't match. // Normalize the shuffle vector since mask and vector length don't match.
MVT MaskEltVT = Mask.getValueType().getVectorElementType();
if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) { if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) {
// Mask is longer than the source vectors and is a multiple of the source // Mask is longer than the source vectors and is a multiple of the source
// vectors. We can use concatenate vector to make the mask and vectors // vectors. We can use concatenate vector to make the mask and vectors
@ -2479,44 +2480,33 @@ void SelectionDAGLowering::visitShuffleVector(User &I) {
// Pad both vectors with undefs to make them the same length as the mask. // Pad both vectors with undefs to make them the same length as the mask.
unsigned NumConcat = MaskNumElts / SrcNumElts; unsigned NumConcat = MaskNumElts / SrcNumElts;
bool Src1U = Src1.getOpcode() == ISD::UNDEF;
bool Src2U = Src2.getOpcode() == ISD::UNDEF;
SDValue UndefVal = DAG.getUNDEF(SrcVT); SDValue UndefVal = DAG.getUNDEF(SrcVT);
SDValue* MOps1 = new SDValue[NumConcat]; SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
SDValue* MOps2 = new SDValue[NumConcat]; SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
MOps1[0] = Src1; MOps1[0] = Src1;
MOps2[0] = Src2; MOps2[0] = Src2;
for (unsigned i = 1; i != NumConcat; ++i) {
MOps1[i] = UndefVal; Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
MOps2[i] = UndefVal; getCurDebugLoc(), VT,
} &MOps1[0], NumConcat);
Src1 = DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(), Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
VT, MOps1, NumConcat); getCurDebugLoc(), VT,
Src2 = DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(), &MOps2[0], NumConcat);
VT, MOps2, NumConcat);
delete [] MOps1;
delete [] MOps2;
// Readjust mask for new input vector length. // Readjust mask for new input vector length.
SmallVector<SDValue, 8> MappedOps; SmallVector<int, 8> MappedOps;
for (int i = 0; i != MaskNumElts; ++i) { for (int i = 0; i != MaskNumElts; ++i) {
if (Mask.getOperand(i).getOpcode() == ISD::UNDEF) { int Idx = Mask[i];
MappedOps.push_back(Mask.getOperand(i)); if (Idx < SrcNumElts)
} else { MappedOps.push_back(Idx);
int Idx = cast<ConstantSDNode>(Mask.getOperand(i))->getZExtValue(); else
if (Idx < SrcNumElts) MappedOps.push_back(Idx + MaskNumElts - SrcNumElts);
MappedOps.push_back(DAG.getConstant(Idx, MaskEltVT));
else
MappedOps.push_back(DAG.getConstant(Idx + MaskNumElts - SrcNumElts,
MaskEltVT));
}
} }
Mask = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
Mask.getValueType(), &MappedOps[0]));
&MappedOps[0], MappedOps.size());
setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, getCurDebugLoc(),
VT, Src1, Src2, Mask));
return; return;
} }
@ -2541,20 +2531,19 @@ void SelectionDAGLowering::visitShuffleVector(User &I) {
int MaxRange[2] = {-1, -1}; int MaxRange[2] = {-1, -1};
for (int i = 0; i != MaskNumElts; ++i) { for (int i = 0; i != MaskNumElts; ++i) {
SDValue Arg = Mask.getOperand(i); int Idx = Mask[i];
if (Arg.getOpcode() != ISD::UNDEF) { int Input = 0;
assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); if (Idx < 0)
int Idx = cast<ConstantSDNode>(Arg)->getZExtValue(); continue;
int Input = 0;
if (Idx >= SrcNumElts) { if (Idx >= SrcNumElts) {
Input = 1; Input = 1;
Idx -= SrcNumElts; Idx -= SrcNumElts;
}
if (Idx > MaxRange[Input])
MaxRange[Input] = Idx;
if (Idx < MinRange[Input])
MinRange[Input] = Idx;
} }
if (Idx > MaxRange[Input])
MaxRange[Input] = Idx;
if (Idx < MinRange[Input])
MinRange[Input] = Idx;
} }
// Check if the access is smaller than the vector size and can we find // Check if the access is smaller than the vector size and can we find
@ -2596,26 +2585,18 @@ void SelectionDAGLowering::visitShuffleVector(User &I) {
} }
} }
// Calculate new mask. // Calculate new mask.
SmallVector<SDValue, 8> MappedOps; SmallVector<int, 8> MappedOps;
for (int i = 0; i != MaskNumElts; ++i) { for (int i = 0; i != MaskNumElts; ++i) {
SDValue Arg = Mask.getOperand(i); int Idx = Mask[i];
if (Arg.getOpcode() == ISD::UNDEF) { if (Idx < 0)
MappedOps.push_back(Arg); MappedOps.push_back(Idx);
} else { else if (Idx < SrcNumElts)
int Idx = cast<ConstantSDNode>(Arg)->getZExtValue(); MappedOps.push_back(Idx - StartIdx[0]);
if (Idx < SrcNumElts) else
MappedOps.push_back(DAG.getConstant(Idx - StartIdx[0], MaskEltVT)); MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts);
else {
Idx = Idx - SrcNumElts - StartIdx[1] + MaskNumElts;
MappedOps.push_back(DAG.getConstant(Idx, MaskEltVT));
}
}
} }
Mask = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
Mask.getValueType(), &MappedOps[0]));
&MappedOps[0], MappedOps.size());
setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, getCurDebugLoc(),
VT, Src1, Src2, Mask));
return; return;
} }
} }
@ -2627,12 +2608,10 @@ void SelectionDAGLowering::visitShuffleVector(User &I) {
MVT PtrVT = TLI.getPointerTy(); MVT PtrVT = TLI.getPointerTy();
SmallVector<SDValue,8> Ops; SmallVector<SDValue,8> Ops;
for (int i = 0; i != MaskNumElts; ++i) { for (int i = 0; i != MaskNumElts; ++i) {
SDValue Arg = Mask.getOperand(i); if (Mask[i] < 0) {
if (Arg.getOpcode() == ISD::UNDEF) {
Ops.push_back(DAG.getUNDEF(EltVT)); Ops.push_back(DAG.getUNDEF(EltVT));
} else { } else {
assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!"); int Idx = Mask[i];
int Idx = cast<ConstantSDNode>(Arg)->getZExtValue();
if (Idx < SrcNumElts) if (Idx < SrcNumElts)
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
EltVT, Src1, DAG.getConstant(Idx, PtrVT))); EltVT, Src1, DAG.getConstant(Idx, PtrVT)));

View File

@ -1672,7 +1672,7 @@ SPU::LowerV2I64Splat(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
SDValue V1 = Op.getOperand(0); SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1); SDValue V2 = Op.getOperand(1);
SDValue PermMask = Op.getOperand(2); const int *PermMask = cast<ShuffleVectorSDNode>(Op)->getMask();
DebugLoc dl = Op.getDebugLoc(); DebugLoc dl = Op.getDebugLoc();
if (V2.getOpcode() == ISD::UNDEF) V2 = V1; if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
@ -1703,39 +1703,40 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
} else } else
assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE"); assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
for (unsigned i = 0; i != PermMask.getNumOperands(); ++i) { for (unsigned i = 0; i != MaxElts; ++i) {
if (PermMask.getOperand(i).getOpcode() != ISD::UNDEF) { if (PermMask[i] < 0)
unsigned SrcElt = cast<ConstantSDNode > (PermMask.getOperand(i))->getZExtValue(); continue;
unsigned SrcElt = PermMask[i];
if (monotonic) { if (monotonic) {
if (SrcElt >= V2EltIdx0) { if (SrcElt >= V2EltIdx0) {
if (1 >= (++EltsFromV2)) { if (1 >= (++EltsFromV2)) {
V2Elt = (V2EltIdx0 - SrcElt) << 2; V2Elt = (V2EltIdx0 - SrcElt) << 2;
}
} else if (CurrElt != SrcElt) {
monotonic = false;
} }
} else if (CurrElt != SrcElt) {
++CurrElt; monotonic = false;
} }
if (rotate) { ++CurrElt;
if (PrevElt > 0 && SrcElt < MaxElts) { }
if ((PrevElt == SrcElt - 1)
|| (PrevElt == MaxElts - 1 && SrcElt == 0)) { if (rotate) {
PrevElt = SrcElt; if (PrevElt > 0 && SrcElt < MaxElts) {
if (SrcElt == 0) if ((PrevElt == SrcElt - 1)
V0Elt = i; || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
} else {
rotate = false;
}
} else if (PrevElt == 0) {
// First time through, need to keep track of previous element
PrevElt = SrcElt; PrevElt = SrcElt;
if (SrcElt == 0)
V0Elt = i;
} else { } else {
// This isn't a rotation, takes elements from vector 2
rotate = false; rotate = false;
} }
} else if (PrevElt == 0) {
// First time through, need to keep track of previous element
PrevElt = SrcElt;
} else {
// This isn't a rotation, takes elements from vector 2
rotate = false;
} }
} }
} }
@ -1768,12 +1769,8 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
unsigned BytesPerElement = EltVT.getSizeInBits()/8; unsigned BytesPerElement = EltVT.getSizeInBits()/8;
SmallVector<SDValue, 16> ResultMask; SmallVector<SDValue, 16> ResultMask;
for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) { for (unsigned i = 0, e = MaxElts; i != e; ++i) {
unsigned SrcElt; unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
SrcElt = 0;
else
SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
for (unsigned j = 0; j < BytesPerElement; ++j) { for (unsigned j = 0; j < BytesPerElement; ++j) {
ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,

View File

@ -456,22 +456,22 @@ static bool isFloatingPointZero(SDValue Op) {
/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
/// true if Op is undef or if it matches the specified value. /// true if Op is undef or if it matches the specified value.
static bool isConstantOrUndef(SDValue Op, unsigned Val) { static bool isConstantOrUndef(int Op, int Val) {
return Op.getOpcode() == ISD::UNDEF || return Op < 0 || Op == Val;
cast<ConstantSDNode>(Op)->getZExtValue() == Val;
} }
/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUHUM instruction. /// VPKUHUM instruction.
bool PPC::isVPKUHUMShuffleMask(SDNode *N, bool isUnary) { bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
const int *Mask = N->getMask();
if (!isUnary) { if (!isUnary) {
for (unsigned i = 0; i != 16; ++i) for (unsigned i = 0; i != 16; ++i)
if (!isConstantOrUndef(N->getOperand(i), i*2+1)) if (!isConstantOrUndef(Mask[i], i*2+1))
return false; return false;
} else { } else {
for (unsigned i = 0; i != 8; ++i) for (unsigned i = 0; i != 8; ++i)
if (!isConstantOrUndef(N->getOperand(i), i*2+1) || if (!isConstantOrUndef(Mask[i], i*2+1) ||
!isConstantOrUndef(N->getOperand(i+8), i*2+1)) !isConstantOrUndef(Mask[i+8], i*2+1))
return false; return false;
} }
return true; return true;
@ -479,18 +479,19 @@ bool PPC::isVPKUHUMShuffleMask(SDNode *N, bool isUnary) {
/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUWUM instruction. /// VPKUWUM instruction.
bool PPC::isVPKUWUMShuffleMask(SDNode *N, bool isUnary) { bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
const int *Mask = N->getMask();
if (!isUnary) { if (!isUnary) {
for (unsigned i = 0; i != 16; i += 2) for (unsigned i = 0; i != 16; i += 2)
if (!isConstantOrUndef(N->getOperand(i ), i*2+2) || if (!isConstantOrUndef(Mask[i ], i*2+2) ||
!isConstantOrUndef(N->getOperand(i+1), i*2+3)) !isConstantOrUndef(Mask[i+1], i*2+3))
return false; return false;
} else { } else {
for (unsigned i = 0; i != 8; i += 2) for (unsigned i = 0; i != 8; i += 2)
if (!isConstantOrUndef(N->getOperand(i ), i*2+2) || if (!isConstantOrUndef(Mask[i ], i*2+2) ||
!isConstantOrUndef(N->getOperand(i+1), i*2+3) || !isConstantOrUndef(Mask[i+1], i*2+3) ||
!isConstantOrUndef(N->getOperand(i+8), i*2+2) || !isConstantOrUndef(Mask[i+8], i*2+2) ||
!isConstantOrUndef(N->getOperand(i+9), i*2+3)) !isConstantOrUndef(Mask[i+9], i*2+3))
return false; return false;
} }
return true; return true;
@ -498,27 +499,29 @@ bool PPC::isVPKUWUMShuffleMask(SDNode *N, bool isUnary) {
/// isVMerge - Common function, used to match vmrg* shuffles. /// isVMerge - Common function, used to match vmrg* shuffles.
/// ///
static bool isVMerge(SDNode *N, unsigned UnitSize, static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
unsigned LHSStart, unsigned RHSStart) { unsigned LHSStart, unsigned RHSStart) {
assert(N->getOpcode() == ISD::BUILD_VECTOR && assert(N->getValueType(0) == MVT::v16i8 &&
N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!"); "PPC only supports shuffles by bytes!");
assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
"Unsupported merge size!"); "Unsupported merge size!");
const int *Mask = N->getMask();
for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
if (!isConstantOrUndef(N->getOperand(i*UnitSize*2+j), if (!isConstantOrUndef(Mask[i*UnitSize*2+j],
LHSStart+j+i*UnitSize) || LHSStart+j+i*UnitSize) ||
!isConstantOrUndef(N->getOperand(i*UnitSize*2+UnitSize+j), !isConstantOrUndef(Mask[i*UnitSize*2+UnitSize+j],
RHSStart+j+i*UnitSize)) RHSStart+j+i*UnitSize))
return false; return false;
} }
return true; return true;
} }
/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
bool PPC::isVMRGLShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) { bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
bool isUnary) {
if (!isUnary) if (!isUnary)
return isVMerge(N, UnitSize, 8, 24); return isVMerge(N, UnitSize, 8, 24);
return isVMerge(N, UnitSize, 8, 8); return isVMerge(N, UnitSize, 8, 8);
@ -526,7 +529,8 @@ bool PPC::isVMRGLShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) {
/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
bool PPC::isVMRGHShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) { bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
bool isUnary) {
if (!isUnary) if (!isUnary)
return isVMerge(N, UnitSize, 0, 16); return isVMerge(N, UnitSize, 0, 16);
return isVMerge(N, UnitSize, 0, 0); return isVMerge(N, UnitSize, 0, 0);
@ -536,91 +540,92 @@ bool PPC::isVMRGHShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) {
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
/// amount, otherwise return -1. /// amount, otherwise return -1.
int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) { int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {
assert(N->getOpcode() == ISD::BUILD_VECTOR && assert(N->getValueType(0) == MVT::v16i8 &&
N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!"); "PPC only supports shuffles by bytes!");
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
// Find the first non-undef value in the shuffle mask. // Find the first non-undef value in the shuffle mask.
const int *Mask = SVOp->getMask();
unsigned i; unsigned i;
for (i = 0; i != 16 && N->getOperand(i).getOpcode() == ISD::UNDEF; ++i) for (i = 0; i != 16 && Mask[i] < 0; ++i)
/*search*/; /*search*/;
if (i == 16) return -1; // all undef. if (i == 16) return -1; // all undef.
// Otherwise, check to see if the rest of the elements are consequtively // Otherwise, check to see if the rest of the elements are consecutively
// numbered from this value. // numbered from this value.
unsigned ShiftAmt = cast<ConstantSDNode>(N->getOperand(i))->getZExtValue(); unsigned ShiftAmt = Mask[i];
if (ShiftAmt < i) return -1; if (ShiftAmt < i) return -1;
ShiftAmt -= i; ShiftAmt -= i;
if (!isUnary) { if (!isUnary) {
// Check the rest of the elements to see if they are consequtive. // Check the rest of the elements to see if they are consecutive.
for (++i; i != 16; ++i) for (++i; i != 16; ++i)
if (!isConstantOrUndef(N->getOperand(i), ShiftAmt+i)) if (!isConstantOrUndef(Mask[i], ShiftAmt+i))
return -1; return -1;
} else { } else {
// Check the rest of the elements to see if they are consequtive. // Check the rest of the elements to see if they are consecutive.
for (++i; i != 16; ++i) for (++i; i != 16; ++i)
if (!isConstantOrUndef(N->getOperand(i), (ShiftAmt+i) & 15)) if (!isConstantOrUndef(Mask[i], (ShiftAmt+i) & 15))
return -1; return -1;
} }
return ShiftAmt; return ShiftAmt;
} }
/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a splat of a single element that is suitable for input to /// specifies a splat of a single element that is suitable for input to
/// VSPLTB/VSPLTH/VSPLTW. /// VSPLTB/VSPLTH/VSPLTW.
bool PPC::isSplatShuffleMask(SDNode *N, unsigned EltSize) { bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
assert(N->getOpcode() == ISD::BUILD_VECTOR && assert(N->getValueType(0) == MVT::v16i8 &&
N->getNumOperands() == 16 &&
(EltSize == 1 || EltSize == 2 || EltSize == 4)); (EltSize == 1 || EltSize == 2 || EltSize == 4));
// This is a splat operation if each element of the permute is the same, and // This is a splat operation if each element of the permute is the same, and
// if the value doesn't reference the second vector. // if the value doesn't reference the second vector.
unsigned ElementBase = 0; const int *Mask = N->getMask();
SDValue Elt = N->getOperand(0); unsigned ElementBase = Mask[0];
if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt))
ElementBase = EltV->getZExtValue(); // FIXME: Handle UNDEF elements too!
else if (ElementBase >= 16)
return false; // FIXME: Handle UNDEF elements too!
if (cast<ConstantSDNode>(Elt)->getZExtValue() >= 16)
return false; return false;
// Check that they are consequtive. // Check that the indices are consecutive, in the case of a multi-byte element
for (unsigned i = 1; i != EltSize; ++i) { // splatted with a v16i8 mask.
if (!isa<ConstantSDNode>(N->getOperand(i)) || for (unsigned i = 1; i != EltSize; ++i)
cast<ConstantSDNode>(N->getOperand(i))->getZExtValue() != i+ElementBase) if (Mask[i] < 0 || Mask[i] != (int)(i+ElementBase))
return false; return false;
}
assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!");
for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; if (Mask[i] < 0) continue;
assert(isa<ConstantSDNode>(N->getOperand(i)) &&
"Invalid VECTOR_SHUFFLE mask!");
for (unsigned j = 0; j != EltSize; ++j) for (unsigned j = 0; j != EltSize; ++j)
if (N->getOperand(i+j) != N->getOperand(j)) if (Mask[i+j] != Mask[j])
return false; return false;
} }
return true; return true;
} }
/// isAllNegativeZeroVector - Returns true if all elements of build_vector /// isAllNegativeZeroVector - Returns true if all elements of build_vector
/// are -0.0. /// are -0.0.
bool PPC::isAllNegativeZeroVector(SDNode *N) { bool PPC::isAllNegativeZeroVector(SDNode *N) {
assert(N->getOpcode() == ISD::BUILD_VECTOR); BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N);
if (PPC::isSplatShuffleMask(N, N->getNumOperands()))
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N)) APInt APVal, APUndef;
unsigned BitSize;
bool HasAnyUndefs;
if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32))
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
return CFP->getValueAPF().isNegZero(); return CFP->getValueAPF().isNegZero();
return false; return false;
} }
/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask. /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) { unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) {
assert(isSplatShuffleMask(N, EltSize)); ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
return cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() / EltSize; assert(isSplatShuffleMask(SVOp, EltSize));
return SVOp->getMask()[0] / EltSize;
} }
/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
@ -3149,11 +3154,10 @@ static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
LHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, LHS); LHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, LHS);
RHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, RHS); RHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, RHS);
SDValue Ops[16]; int Ops[16];
for (unsigned i = 0; i != 16; ++i) for (unsigned i = 0; i != 16; ++i)
Ops[i] = DAG.getConstant(i+Amt, MVT::i8); Ops[i] = i + Amt;
SDValue T = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v16i8, LHS, RHS, SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, Ops,16));
return DAG.getNode(ISD::BIT_CONVERT, dl, VT, T); return DAG.getNode(ISD::BIT_CONVERT, dl, VT, T);
} }
@ -3354,7 +3358,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
unsigned ShufIdxs[16]; int ShufIdxs[16];
switch (OpNum) { switch (OpNum) {
default: assert(0 && "Unknown i32 permute!"); default: assert(0 && "Unknown i32 permute!");
case OP_VMRGHW: case OP_VMRGHW:
@ -3392,13 +3396,11 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
case OP_VSLDOI12: case OP_VSLDOI12:
return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl); return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
} }
SDValue Ops[16]; MVT VT = OpLHS.getValueType();
for (unsigned i = 0; i != 16; ++i) OpLHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OpLHS);
Ops[i] = DAG.getConstant(ShufIdxs[i], MVT::i8); OpRHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OpRHS);
SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, OpLHS.getValueType(), return DAG.getNode(ISD::BIT_CONVERT, dl, VT, T);
OpLHS, OpRHS,
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, Ops, 16));
} }
/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
@ -3406,28 +3408,30 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
/// return the code it can be lowered into. Worst case, it can always be /// return the code it can be lowered into. Worst case, it can always be
/// lowered into a vperm. /// lowered into a vperm.
SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SelectionDAG &DAG) { SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc(); DebugLoc dl = Op.getDebugLoc();
SDValue V1 = Op.getOperand(0); SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1); SDValue V2 = Op.getOperand(1);
SDValue PermMask = Op.getOperand(2); ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
const int *PermMask = SVOp->getMask();
MVT VT = Op.getValueType();
// Cases that are handled by instructions that take permute immediates // Cases that are handled by instructions that take permute immediates
// (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
// selected by the instruction selector. // selected by the instruction selector.
if (V2.getOpcode() == ISD::UNDEF) { if (V2.getOpcode() == ISD::UNDEF) {
if (PPC::isSplatShuffleMask(PermMask.getNode(), 1) || if (PPC::isSplatShuffleMask(SVOp, 1) ||
PPC::isSplatShuffleMask(PermMask.getNode(), 2) || PPC::isSplatShuffleMask(SVOp, 2) ||
PPC::isSplatShuffleMask(PermMask.getNode(), 4) || PPC::isSplatShuffleMask(SVOp, 4) ||
PPC::isVPKUWUMShuffleMask(PermMask.getNode(), true) || PPC::isVPKUWUMShuffleMask(SVOp, true) ||
PPC::isVPKUHUMShuffleMask(PermMask.getNode(), true) || PPC::isVPKUHUMShuffleMask(SVOp, true) ||
PPC::isVSLDOIShuffleMask(PermMask.getNode(), true) != -1 || PPC::isVSLDOIShuffleMask(SVOp, true) != -1 ||
PPC::isVMRGLShuffleMask(PermMask.getNode(), 1, true) || PPC::isVMRGLShuffleMask(SVOp, 1, true) ||
PPC::isVMRGLShuffleMask(PermMask.getNode(), 2, true) || PPC::isVMRGLShuffleMask(SVOp, 2, true) ||
PPC::isVMRGLShuffleMask(PermMask.getNode(), 4, true) || PPC::isVMRGLShuffleMask(SVOp, 4, true) ||
PPC::isVMRGHShuffleMask(PermMask.getNode(), 1, true) || PPC::isVMRGHShuffleMask(SVOp, 1, true) ||
PPC::isVMRGHShuffleMask(PermMask.getNode(), 2, true) || PPC::isVMRGHShuffleMask(SVOp, 2, true) ||
PPC::isVMRGHShuffleMask(PermMask.getNode(), 4, true)) { PPC::isVMRGHShuffleMask(SVOp, 4, true)) {
return Op; return Op;
} }
} }
@ -3435,15 +3439,15 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
// Altivec has a variety of "shuffle immediates" that take two vector inputs // Altivec has a variety of "shuffle immediates" that take two vector inputs
// and produce a fixed permutation. If any of these match, do not lower to // and produce a fixed permutation. If any of these match, do not lower to
// VPERM. // VPERM.
if (PPC::isVPKUWUMShuffleMask(PermMask.getNode(), false) || if (PPC::isVPKUWUMShuffleMask(SVOp, false) ||
PPC::isVPKUHUMShuffleMask(PermMask.getNode(), false) || PPC::isVPKUHUMShuffleMask(SVOp, false) ||
PPC::isVSLDOIShuffleMask(PermMask.getNode(), false) != -1 || PPC::isVSLDOIShuffleMask(SVOp, false) != -1 ||
PPC::isVMRGLShuffleMask(PermMask.getNode(), 1, false) || PPC::isVMRGLShuffleMask(SVOp, 1, false) ||
PPC::isVMRGLShuffleMask(PermMask.getNode(), 2, false) || PPC::isVMRGLShuffleMask(SVOp, 2, false) ||
PPC::isVMRGLShuffleMask(PermMask.getNode(), 4, false) || PPC::isVMRGLShuffleMask(SVOp, 4, false) ||
PPC::isVMRGHShuffleMask(PermMask.getNode(), 1, false) || PPC::isVMRGHShuffleMask(SVOp, 1, false) ||
PPC::isVMRGHShuffleMask(PermMask.getNode(), 2, false) || PPC::isVMRGHShuffleMask(SVOp, 2, false) ||
PPC::isVMRGHShuffleMask(PermMask.getNode(), 4, false)) PPC::isVMRGHShuffleMask(SVOp, 4, false))
return Op; return Op;
// Check to see if this is a shuffle of 4-byte values. If so, we can use our // Check to see if this is a shuffle of 4-byte values. If so, we can use our
@ -3453,11 +3457,10 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
unsigned EltNo = 8; // Start out undef. unsigned EltNo = 8; // Start out undef.
for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
if (PermMask.getOperand(i*4+j).getOpcode() == ISD::UNDEF) if (PermMask[i*4+j] < 0)
continue; // Undef, ignore it. continue; // Undef, ignore it.
unsigned ByteSource = unsigned ByteSource = PermMask[i*4+j];
cast<ConstantSDNode>(PermMask.getOperand(i*4+j))->getZExtValue();
if ((ByteSource & 3) != j) { if ((ByteSource & 3) != j) {
isFourElementShuffle = false; isFourElementShuffle = false;
break; break;
@ -3509,12 +3512,8 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
unsigned BytesPerElement = EltVT.getSizeInBits()/8; unsigned BytesPerElement = EltVT.getSizeInBits()/8;
SmallVector<SDValue, 16> ResultMask; SmallVector<SDValue, 16> ResultMask;
for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) { for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
unsigned SrcElt; unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
SrcElt = 0;
else
SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
for (unsigned j = 0; j != BytesPerElement; ++j) for (unsigned j = 0; j != BytesPerElement; ++j)
ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
@ -3704,13 +3703,12 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) {
OddParts = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OddParts); OddParts = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OddParts);
// Merge the results together. // Merge the results together.
SDValue Ops[16]; int Ops[16];
for (unsigned i = 0; i != 8; ++i) { for (unsigned i = 0; i != 8; ++i) {
Ops[i*2 ] = DAG.getConstant(2*i+1, MVT::i8); Ops[i*2 ] = 2*i+1;
Ops[i*2+1] = DAG.getConstant(2*i+1+16, MVT::i8); Ops[i*2+1] = 2*i+1+16;
} }
return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v16i8, EvenParts, OddParts, return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, Ops, 16));
} else { } else {
assert(0 && "Unknown mul to lower!"); assert(0 && "Unknown mul to lower!");
abort(); abort();

View File

@ -175,19 +175,21 @@ namespace llvm {
namespace PPC { namespace PPC {
/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUHUM instruction. /// VPKUHUM instruction.
bool isVPKUHUMShuffleMask(SDNode *N, bool isUnary); bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary);
/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUWUM instruction. /// VPKUWUM instruction.
bool isVPKUWUMShuffleMask(SDNode *N, bool isUnary); bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary);
/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
bool isVMRGLShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary); bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
bool isUnary);
/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
bool isVMRGHShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary); bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
bool isUnary);
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
/// amount, otherwise return -1. /// amount, otherwise return -1.
@ -196,7 +198,7 @@ namespace llvm {
/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a splat of a single element that is suitable for input to /// specifies a splat of a single element that is suitable for input to
/// VSPLTB/VSPLTH/VSPLTW. /// VSPLTB/VSPLTH/VSPLTW.
bool isSplatShuffleMask(SDNode *N, unsigned EltSize); bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize);
/// isAllNegativeZeroVector - Returns true if all elements of build_vector /// isAllNegativeZeroVector - Returns true if all elements of build_vector
/// are -0.0. /// are -0.0.

View File

@ -15,96 +15,118 @@
// Altivec transformation functions and pattern fragments. // Altivec transformation functions and pattern fragments.
// //
/// VPKUHUM_shuffle_mask/VPKUWUM_shuffle_mask - Return true if this is a valid
/// shuffle mask for the VPKUHUM or VPKUWUM instructions.
def VPKUHUM_shuffle_mask : PatLeaf<(build_vector), [{
return PPC::isVPKUHUMShuffleMask(N, false);
}]>;
def VPKUWUM_shuffle_mask : PatLeaf<(build_vector), [{
return PPC::isVPKUWUMShuffleMask(N, false);
}]>;
def VPKUHUM_unary_shuffle_mask : PatLeaf<(build_vector), [{ def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
return PPC::isVPKUHUMShuffleMask(N, true); (vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), false);
}]>; }]>;
def VPKUWUM_unary_shuffle_mask : PatLeaf<(build_vector), [{ def vpkuwum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
return PPC::isVPKUWUMShuffleMask(N, true); (vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), false);
}]>;
def vpkuhum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), true);
}]>;
def vpkuwum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), true);
}]>; }]>;
def VMRGLB_shuffle_mask : PatLeaf<(build_vector), [{ def vmrglb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
return PPC::isVMRGLShuffleMask(N, 1, false); (vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false);
}]>; }]>;
def VMRGLH_shuffle_mask : PatLeaf<(build_vector), [{ def vmrglh_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
return PPC::isVMRGLShuffleMask(N, 2, false); (vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false);
}]>; }]>;
def VMRGLW_shuffle_mask : PatLeaf<(build_vector), [{ def vmrglw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
return PPC::isVMRGLShuffleMask(N, 4, false); (vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false);
}]>; }]>;
def VMRGHB_shuffle_mask : PatLeaf<(build_vector), [{ def vmrghb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
return PPC::isVMRGHShuffleMask(N, 1, false); (vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false);
}]>; }]>;
def VMRGHH_shuffle_mask : PatLeaf<(build_vector), [{ def vmrghh_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
return PPC::isVMRGHShuffleMask(N, 2, false); (vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false);
}]>; }]>;
def VMRGHW_shuffle_mask : PatLeaf<(build_vector), [{ def vmrghw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
return PPC::isVMRGHShuffleMask(N, 4, false); (vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false);
}]>; }]>;
def VMRGLB_unary_shuffle_mask : PatLeaf<(build_vector), [{
return PPC::isVMRGLShuffleMask(N, 1, true); def vmrglb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true);
}]>; }]>;
def VMRGLH_unary_shuffle_mask : PatLeaf<(build_vector), [{ def vmrglh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
return PPC::isVMRGLShuffleMask(N, 2, true); (vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true);
}]>; }]>;
def VMRGLW_unary_shuffle_mask : PatLeaf<(build_vector), [{ def vmrglw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
return PPC::isVMRGLShuffleMask(N, 4, true); (vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true);
}]>; }]>;
def VMRGHB_unary_shuffle_mask : PatLeaf<(build_vector), [{ def vmrghb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
return PPC::isVMRGHShuffleMask(N, 1, true); (vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true);
}]>; }]>;
def VMRGHH_unary_shuffle_mask : PatLeaf<(build_vector), [{ def vmrghh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
return PPC::isVMRGHShuffleMask(N, 2, true); (vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true);
}]>; }]>;
def VMRGHW_unary_shuffle_mask : PatLeaf<(build_vector), [{ def vmrghw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
return PPC::isVMRGHShuffleMask(N, 4, true); (vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true);
}]>; }]>;
def VSLDOI_get_imm : SDNodeXForm<build_vector, [{
def VSLDOI_get_imm : SDNodeXForm<vector_shuffle, [{
return getI32Imm(PPC::isVSLDOIShuffleMask(N, false)); return getI32Imm(PPC::isVSLDOIShuffleMask(N, false));
}]>; }]>;
def VSLDOI_shuffle_mask : PatLeaf<(build_vector), [{ def vsldoi_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVSLDOIShuffleMask(N, false) != -1; return PPC::isVSLDOIShuffleMask(N, false) != -1;
}], VSLDOI_get_imm>; }], VSLDOI_get_imm>;
/// VSLDOI_unary* - These are used to match vsldoi(X,X), which is turned into /// VSLDOI_unary* - These are used to match vsldoi(X,X), which is turned into
/// vector_shuffle(X,undef,mask) by the dag combiner. /// vector_shuffle(X,undef,mask) by the dag combiner.
def VSLDOI_unary_get_imm : SDNodeXForm<build_vector, [{ def VSLDOI_unary_get_imm : SDNodeXForm<vector_shuffle, [{
return getI32Imm(PPC::isVSLDOIShuffleMask(N, true)); return getI32Imm(PPC::isVSLDOIShuffleMask(N, true));
}]>; }]>;
def VSLDOI_unary_shuffle_mask : PatLeaf<(build_vector), [{ def vsldoi_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVSLDOIShuffleMask(N, true) != -1; return PPC::isVSLDOIShuffleMask(N, true) != -1;
}], VSLDOI_unary_get_imm>; }], VSLDOI_unary_get_imm>;
// VSPLT*_get_imm xform function: convert vector_shuffle mask to VSPLT* imm. // VSPLT*_get_imm xform function: convert vector_shuffle mask to VSPLT* imm.
def VSPLTB_get_imm : SDNodeXForm<build_vector, [{ def VSPLTB_get_imm : SDNodeXForm<vector_shuffle, [{
return getI32Imm(PPC::getVSPLTImmediate(N, 1)); return getI32Imm(PPC::getVSPLTImmediate(N, 1));
}]>; }]>;
def VSPLTB_shuffle_mask : PatLeaf<(build_vector), [{ def vspltb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
return PPC::isSplatShuffleMask(N, 1); (vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 1);
}], VSPLTB_get_imm>; }], VSPLTB_get_imm>;
def VSPLTH_get_imm : SDNodeXForm<build_vector, [{ def VSPLTH_get_imm : SDNodeXForm<vector_shuffle, [{
return getI32Imm(PPC::getVSPLTImmediate(N, 2)); return getI32Imm(PPC::getVSPLTImmediate(N, 2));
}]>; }]>;
def VSPLTH_shuffle_mask : PatLeaf<(build_vector), [{ def vsplth_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
return PPC::isSplatShuffleMask(N, 2); (vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 2);
}], VSPLTH_get_imm>; }], VSPLTH_get_imm>;
def VSPLTW_get_imm : SDNodeXForm<build_vector, [{ def VSPLTW_get_imm : SDNodeXForm<vector_shuffle, [{
return getI32Imm(PPC::getVSPLTImmediate(N, 4)); return getI32Imm(PPC::getVSPLTImmediate(N, 4));
}]>; }]>;
def VSPLTW_shuffle_mask : PatLeaf<(build_vector), [{ def vspltw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
return PPC::isSplatShuffleMask(N, 4); (vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 4);
}], VSPLTW_get_imm>; }], VSPLTW_get_imm>;
@ -268,8 +290,7 @@ def VSEL : VA1a_Int<42, "vsel", int_ppc_altivec_vsel>;
def VSLDOI : VAForm_2<44, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, u5imm:$SH), def VSLDOI : VAForm_2<44, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, u5imm:$SH),
"vsldoi $vD, $vA, $vB, $SH", VecFP, "vsldoi $vD, $vA, $vB, $SH", VecFP,
[(set VRRC:$vD, [(set VRRC:$vD,
(vector_shuffle (v16i8 VRRC:$vA), VRRC:$vB, (vsldoi_shuffle:$SH (v16i8 VRRC:$vA), VRRC:$vB))]>;
VSLDOI_shuffle_mask:$SH))]>;
// VX-Form instructions. AltiVec arithmetic ops. // VX-Form instructions. AltiVec arithmetic ops.
def VADDFP : VXForm_1<10, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), def VADDFP : VXForm_1<10, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
@ -345,28 +366,22 @@ def VMINUW : VX1_Int< 642, "vminuw", int_ppc_altivec_vminuw>;
def VMRGHB : VXForm_1< 12, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), def VMRGHB : VXForm_1< 12, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vmrghb $vD, $vA, $vB", VecFP, "vmrghb $vD, $vA, $vB", VecFP,
[(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA), [(set VRRC:$vD, (vmrghb_shuffle VRRC:$vA, VRRC:$vB))]>;
VRRC:$vB, VMRGHB_shuffle_mask))]>;
def VMRGHH : VXForm_1< 76, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), def VMRGHH : VXForm_1< 76, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vmrghh $vD, $vA, $vB", VecFP, "vmrghh $vD, $vA, $vB", VecFP,
[(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA), [(set VRRC:$vD, (vmrghh_shuffle VRRC:$vA, VRRC:$vB))]>;
VRRC:$vB, VMRGHH_shuffle_mask))]>;
def VMRGHW : VXForm_1<140, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), def VMRGHW : VXForm_1<140, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vmrghw $vD, $vA, $vB", VecFP, "vmrghw $vD, $vA, $vB", VecFP,
[(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA), [(set VRRC:$vD, (vmrghw_shuffle VRRC:$vA, VRRC:$vB))]>;
VRRC:$vB, VMRGHW_shuffle_mask))]>;
def VMRGLB : VXForm_1<268, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), def VMRGLB : VXForm_1<268, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vmrglb $vD, $vA, $vB", VecFP, "vmrglb $vD, $vA, $vB", VecFP,
[(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA), [(set VRRC:$vD, (vmrglb_shuffle VRRC:$vA, VRRC:$vB))]>;
VRRC:$vB, VMRGLB_shuffle_mask))]>;
def VMRGLH : VXForm_1<332, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), def VMRGLH : VXForm_1<332, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vmrglh $vD, $vA, $vB", VecFP, "vmrglh $vD, $vA, $vB", VecFP,
[(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA), [(set VRRC:$vD, (vmrglh_shuffle VRRC:$vA, VRRC:$vB))]>;
VRRC:$vB, VMRGLH_shuffle_mask))]>;
def VMRGLW : VXForm_1<396, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), def VMRGLW : VXForm_1<396, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vmrglw $vD, $vA, $vB", VecFP, "vmrglw $vD, $vA, $vB", VecFP,
[(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA), [(set VRRC:$vD, (vmrglw_shuffle VRRC:$vA, VRRC:$vB))]>;
VRRC:$vB, VMRGLW_shuffle_mask))]>;
def VMSUMMBM : VA1a_Int<37, "vmsummbm", int_ppc_altivec_vmsummbm>; def VMSUMMBM : VA1a_Int<37, "vmsummbm", int_ppc_altivec_vmsummbm>;
def VMSUMSHM : VA1a_Int<40, "vmsumshm", int_ppc_altivec_vmsumshm>; def VMSUMSHM : VA1a_Int<40, "vmsumshm", int_ppc_altivec_vmsumshm>;
@ -440,16 +455,16 @@ def VSLW : VX1_Int< 388, "vslw", int_ppc_altivec_vslw>;
def VSPLTB : VXForm_1<524, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), def VSPLTB : VXForm_1<524, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
"vspltb $vD, $vB, $UIMM", VecPerm, "vspltb $vD, $vB, $UIMM", VecPerm,
[(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vB), (undef), [(set VRRC:$vD,
VSPLTB_shuffle_mask:$UIMM))]>; (vspltb_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>;
def VSPLTH : VXForm_1<588, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), def VSPLTH : VXForm_1<588, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
"vsplth $vD, $vB, $UIMM", VecPerm, "vsplth $vD, $vB, $UIMM", VecPerm,
[(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vB), (undef), [(set VRRC:$vD,
VSPLTH_shuffle_mask:$UIMM))]>; (vsplth_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>;
def VSPLTW : VXForm_1<652, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), def VSPLTW : VXForm_1<652, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
"vspltw $vD, $vB, $UIMM", VecPerm, "vspltw $vD, $vB, $UIMM", VecPerm,
[(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vB), (undef), [(set VRRC:$vD,
VSPLTW_shuffle_mask:$UIMM))]>; (vspltw_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>;
def VSR : VX1_Int< 708, "vsr" , int_ppc_altivec_vsr>; def VSR : VX1_Int< 708, "vsr" , int_ppc_altivec_vsr>;
def VSRO : VX1_Int<1100, "vsro" , int_ppc_altivec_vsro>; def VSRO : VX1_Int<1100, "vsro" , int_ppc_altivec_vsro>;
@ -479,13 +494,13 @@ def VPKSWSS : VX1_Int<462, "vpkswss", int_ppc_altivec_vpkswss>;
def VPKSWUS : VX1_Int<334, "vpkswus", int_ppc_altivec_vpkswus>; def VPKSWUS : VX1_Int<334, "vpkswus", int_ppc_altivec_vpkswus>;
def VPKUHUM : VXForm_1<14, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), def VPKUHUM : VXForm_1<14, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vpkuhum $vD, $vA, $vB", VecFP, "vpkuhum $vD, $vA, $vB", VecFP,
[(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA), [(set VRRC:$vD,
VRRC:$vB, VPKUHUM_shuffle_mask))]>; (vpkuhum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>;
def VPKUHUS : VX1_Int<142, "vpkuhus", int_ppc_altivec_vpkuhus>; def VPKUHUS : VX1_Int<142, "vpkuhus", int_ppc_altivec_vpkuhus>;
def VPKUWUM : VXForm_1<78, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), def VPKUWUM : VXForm_1<78, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vpkuwum $vD, $vA, $vB", VecFP, "vpkuwum $vD, $vA, $vB", VecFP,
[(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA), [(set VRRC:$vD,
VRRC:$vB, VPKUWUM_shuffle_mask))]>; (vpkuwum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>;
def VPKUWUS : VX1_Int<206, "vpkuwus", int_ppc_altivec_vpkuwus>; def VPKUWUS : VX1_Int<206, "vpkuwus", int_ppc_altivec_vpkuwus>;
// Vector Unpack. // Vector Unpack.
@ -603,25 +618,25 @@ def : Pat<(v4f32 (bitconvert (v4i32 VRRC:$src))), (v4f32 VRRC:$src)>;
// Shuffles. // Shuffles.
// Match vsldoi(x,x), vpkuwum(x,x), vpkuhum(x,x) // Match vsldoi(x,x), vpkuwum(x,x), vpkuhum(x,x)
def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VSLDOI_unary_shuffle_mask:$in), def:Pat<(vsldoi_unary_shuffle:$in (v16i8 VRRC:$vA), undef),
(VSLDOI VRRC:$vA, VRRC:$vA, VSLDOI_unary_shuffle_mask:$in)>; (VSLDOI VRRC:$vA, VRRC:$vA, (VSLDOI_unary_get_imm VRRC:$in))>;
def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef,VPKUWUM_unary_shuffle_mask:$in), def:Pat<(vpkuwum_unary_shuffle (v16i8 VRRC:$vA), undef),
(VPKUWUM VRRC:$vA, VRRC:$vA)>; (VPKUWUM VRRC:$vA, VRRC:$vA)>;
def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef,VPKUHUM_unary_shuffle_mask:$in), def:Pat<(vpkuhum_unary_shuffle (v16i8 VRRC:$vA), undef),
(VPKUHUM VRRC:$vA, VRRC:$vA)>; (VPKUHUM VRRC:$vA, VRRC:$vA)>;
// Match vmrg*(x,x) // Match vmrg*(x,x)
def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGLB_unary_shuffle_mask:$in), def:Pat<(vmrglb_unary_shuffle (v16i8 VRRC:$vA), undef),
(VMRGLB VRRC:$vA, VRRC:$vA)>; (VMRGLB VRRC:$vA, VRRC:$vA)>;
def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGLH_unary_shuffle_mask:$in), def:Pat<(vmrglh_unary_shuffle (v16i8 VRRC:$vA), undef),
(VMRGLH VRRC:$vA, VRRC:$vA)>; (VMRGLH VRRC:$vA, VRRC:$vA)>;
def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGLW_unary_shuffle_mask:$in), def:Pat<(vmrglw_unary_shuffle (v16i8 VRRC:$vA), undef),
(VMRGLW VRRC:$vA, VRRC:$vA)>; (VMRGLW VRRC:$vA, VRRC:$vA)>;
def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGHB_unary_shuffle_mask:$in), def:Pat<(vmrghb_unary_shuffle (v16i8 VRRC:$vA), undef),
(VMRGHB VRRC:$vA, VRRC:$vA)>; (VMRGHB VRRC:$vA, VRRC:$vA)>;
def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGHH_unary_shuffle_mask:$in), def:Pat<(vmrghh_unary_shuffle (v16i8 VRRC:$vA), undef),
(VMRGHH VRRC:$vA, VRRC:$vA)>; (VMRGHH VRRC:$vA, VRRC:$vA)>;
def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGHW_unary_shuffle_mask:$in), def:Pat<(vmrghw_unary_shuffle (v16i8 VRRC:$vA), undef),
(VMRGHW VRRC:$vA, VRRC:$vA)>; (VMRGHW VRRC:$vA, VRRC:$vA)>;
// Logical Operations // Logical Operations

File diff suppressed because it is too large Load Diff

View File

@ -230,7 +230,8 @@ namespace llvm {
// VSHL, VSRL - Vector logical left / right shift. // VSHL, VSRL - Vector logical left / right shift.
VSHL, VSRL, VSHL, VSRL,
// CMPPD, CMPPS - Vector double/float comparison.
// CMPPD, CMPPS - Vector double/float comparison. // CMPPD, CMPPS - Vector double/float comparison.
CMPPD, CMPPS, CMPPD, CMPPS,
@ -251,80 +252,72 @@ namespace llvm {
namespace X86 { namespace X86 {
/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand /// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to PSHUFD. /// specifies a shuffle of elements that is suitable for input to PSHUFD.
bool isPSHUFDMask(SDNode *N); bool isPSHUFDMask(ShuffleVectorSDNode *N);
/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand /// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to PSHUFD. /// specifies a shuffle of elements that is suitable for input to PSHUFD.
bool isPSHUFHWMask(SDNode *N); bool isPSHUFHWMask(ShuffleVectorSDNode *N);
/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand /// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to PSHUFD. /// specifies a shuffle of elements that is suitable for input to PSHUFD.
bool isPSHUFLWMask(SDNode *N); bool isPSHUFLWMask(ShuffleVectorSDNode *N);
/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to SHUFP*. /// specifies a shuffle of elements that is suitable for input to SHUFP*.
bool isSHUFPMask(SDNode *N); bool isSHUFPMask(ShuffleVectorSDNode *N);
/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVHLPS. /// specifies a shuffle of elements that is suitable for input to MOVHLPS.
bool isMOVHLPSMask(SDNode *N); bool isMOVHLPSMask(ShuffleVectorSDNode *N);
/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form /// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, /// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
/// <2, 3, 2, 3> /// <2, 3, 2, 3>
bool isMOVHLPS_v_undef_Mask(SDNode *N); bool isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N);
/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. /// specifies a shuffle of elements that is suitable for MOVLP{S|D}.
bool isMOVLPMask(SDNode *N); bool isMOVLPMask(ShuffleVectorSDNode *N);
/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand /// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} /// specifies a shuffle of elements that is suitable for MOVHP{S|D}.
/// as well as MOVLHPS. /// as well as MOVLHPS.
bool isMOVHPMask(SDNode *N); bool isMOVHPMask(ShuffleVectorSDNode *N);
/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to UNPCKL. /// specifies a shuffle of elements that is suitable for input to UNPCKL.
bool isUNPCKLMask(SDNode *N, bool V2IsSplat = false); bool isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat = false);
/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to UNPCKH. /// specifies a shuffle of elements that is suitable for input to UNPCKH.
bool isUNPCKHMask(SDNode *N, bool V2IsSplat = false); bool isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat = false);
/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
/// <0, 0, 1, 1> /// <0, 0, 1, 1>
bool isUNPCKL_v_undef_Mask(SDNode *N); bool isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N);
/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form /// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, /// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
/// <2, 2, 3, 3> /// <2, 2, 3, 3>
bool isUNPCKH_v_undef_Mask(SDNode *N); bool isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N);
/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVSS, /// specifies a shuffle of elements that is suitable for input to MOVSS,
/// MOVSD, and MOVD, i.e. setting the lowest element. /// MOVSD, and MOVD, i.e. setting the lowest element.
bool isMOVLMask(SDNode *N); bool isMOVLMask(ShuffleVectorSDNode *N);
/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVSHDUP. /// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
bool isMOVSHDUPMask(SDNode *N); bool isMOVSHDUPMask(ShuffleVectorSDNode *N);
/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVSLDUP. /// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
bool isMOVSLDUPMask(SDNode *N); bool isMOVSLDUPMask(ShuffleVectorSDNode *N);
/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a splat of a single element.
bool isSplatMask(SDNode *N);
/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a splat of zero element.
bool isSplatLoMask(SDNode *N);
/// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVDDUP. /// specifies a shuffle of elements that is suitable for input to MOVDDUP.
bool isMOVDDUPMask(SDNode *N); bool isMOVDDUPMask(ShuffleVectorSDNode *N);
/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
@ -477,14 +470,13 @@ namespace llvm {
/// support *some* VECTOR_SHUFFLE operations, those with specific masks. /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask /// By default, if a target supports the VECTOR_SHUFFLE node, all mask
/// values are assumed to be legal. /// values are assumed to be legal.
virtual bool isShuffleMaskLegal(SDValue Mask, MVT VT) const; virtual bool isShuffleMaskLegal(const int *Mask, MVT VT) const;
/// isVectorClearMaskLegal - Similar to isShuffleMaskLegal. This is /// isVectorClearMaskLegal - Similar to isShuffleMaskLegal. This is
/// used by Targets can use this to indicate if there is a suitable /// used by Targets can use this to indicate if there is a suitable
/// VECTOR_SHUFFLE that can be used to replace a VAND with a constant /// VECTOR_SHUFFLE that can be used to replace a VAND with a constant
/// pool entry. /// pool entry.
virtual bool isVectorClearMaskLegal(const std::vector<SDValue> &BVOps, virtual bool isVectorClearMaskLegal(const int *Mask, MVT VT) const;
MVT EVT, SelectionDAG &DAG) const;
/// ShouldShrinkFPConstant - If true, then instruction selection should /// ShouldShrinkFPConstant - If true, then instruction selection should
/// seek to shrink the FP constant of the specified type to a smaller type /// seek to shrink the FP constant of the specified type to a smaller type

View File

@ -3801,6 +3801,7 @@ def : Pat<(parallel (store (i32 (X86dec_flag (loadi32 addr:$dst))), addr:$dst),
(implicit EFLAGS)), (implicit EFLAGS)),
(DEC32m addr:$dst)>, Requires<[In32BitMode]>; (DEC32m addr:$dst)>, Requires<[In32BitMode]>;
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// Floating Point Stack Support // Floating Point Stack Support
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//

View File

@ -30,33 +30,37 @@ def bc_v1i64 : PatFrag<(ops node:$in), (v1i64 (bitconvert node:$in))>;
// MMX_SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to // MMX_SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to
// PSHUFW imm. // PSHUFW imm.
def MMX_SHUFFLE_get_shuf_imm : SDNodeXForm<build_vector, [{ def MMX_SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{
return getI8Imm(X86::getShuffleSHUFImmediate(N)); return getI8Imm(X86::getShuffleSHUFImmediate(N));
}]>; }]>;
// Patterns for: vector_shuffle v1, v2, <2, 6, 3, 7, ...> // Patterns for: vector_shuffle v1, v2, <2, 6, 3, 7, ...>
def MMX_UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{ def mmx_unpckh : PatFrag<(ops node:$lhs, node:$rhs),
return X86::isUNPCKHMask(N); (vector_shuffle node:$lhs, node:$rhs), [{
return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N));
}]>; }]>;
// Patterns for: vector_shuffle v1, v2, <0, 4, 2, 5, ...> // Patterns for: vector_shuffle v1, v2, <0, 4, 2, 5, ...>
def MMX_UNPCKL_shuffle_mask : PatLeaf<(build_vector), [{ def mmx_unpckl : PatFrag<(ops node:$lhs, node:$rhs),
return X86::isUNPCKLMask(N); (vector_shuffle node:$lhs, node:$rhs), [{
return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N));
}]>; }]>;
// Patterns for: vector_shuffle v1, <undef>, <0, 0, 1, 1, ...> // Patterns for: vector_shuffle v1, <undef>, <0, 0, 1, 1, ...>
def MMX_UNPCKH_v_undef_shuffle_mask : PatLeaf<(build_vector), [{ def mmx_unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs),
return X86::isUNPCKH_v_undef_Mask(N); (vector_shuffle node:$lhs, node:$rhs), [{
return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
}]>; }]>;
// Patterns for: vector_shuffle v1, <undef>, <2, 2, 3, 3, ...> // Patterns for: vector_shuffle v1, <undef>, <2, 2, 3, 3, ...>
def MMX_UNPCKL_v_undef_shuffle_mask : PatLeaf<(build_vector), [{ def mmx_unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs),
return X86::isUNPCKL_v_undef_Mask(N); (vector_shuffle node:$lhs, node:$rhs), [{
return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
}]>; }]>;
// Patterns for shuffling. def mmx_pshufw : PatFrag<(ops node:$lhs, node:$rhs),
def MMX_PSHUFW_shuffle_mask : PatLeaf<(build_vector), [{ (vector_shuffle node:$lhs, node:$rhs), [{
return X86::isPSHUFDMask(N); return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N));
}], MMX_SHUFFLE_get_shuf_imm>; }], MMX_SHUFFLE_get_shuf_imm>;
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
@ -185,9 +189,8 @@ def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMDestMem, (outs VR64:$dst), (ins VR128:$src),
def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMDestMem, (outs VR128:$dst), (ins VR64:$src), def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMDestMem, (outs VR128:$dst), (ins VR64:$src),
"movq2dq\t{$src, $dst|$dst, $src}", "movq2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, [(set VR128:$dst,
(v2i64 (vector_shuffle immAllZerosV, (movl immAllZerosV,
(v2i64 (scalar_to_vector (i64 (bitconvert VR64:$src)))), (v2i64 (scalar_to_vector (i64 (bitconvert VR64:$src))))))]>;
MOVL_shuffle_mask)))]>;
let neverHasSideEffects = 1 in let neverHasSideEffects = 1 in
def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMDestMem, (outs FR64:$dst), (ins VR64:$src), def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMDestMem, (outs FR64:$dst), (ins VR64:$src),
@ -319,86 +322,74 @@ let isTwoAddress = 1 in {
(outs VR64:$dst), (ins VR64:$src1, VR64:$src2), (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
"punpckhbw\t{$src2, $dst|$dst, $src2}", "punpckhbw\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst, [(set VR64:$dst,
(v8i8 (vector_shuffle VR64:$src1, VR64:$src2, (v8i8 (mmx_unpckh VR64:$src1, VR64:$src2)))]>;
MMX_UNPCKH_shuffle_mask)))]>;
def MMX_PUNPCKHBWrm : MMXI<0x68, MRMSrcMem, def MMX_PUNPCKHBWrm : MMXI<0x68, MRMSrcMem,
(outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
"punpckhbw\t{$src2, $dst|$dst, $src2}", "punpckhbw\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst, [(set VR64:$dst,
(v8i8 (vector_shuffle VR64:$src1, (v8i8 (mmx_unpckh VR64:$src1,
(bc_v8i8 (load_mmx addr:$src2)), (bc_v8i8 (load_mmx addr:$src2)))))]>;
MMX_UNPCKH_shuffle_mask)))]>;
def MMX_PUNPCKHWDrr : MMXI<0x69, MRMSrcReg, def MMX_PUNPCKHWDrr : MMXI<0x69, MRMSrcReg,
(outs VR64:$dst), (ins VR64:$src1, VR64:$src2), (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
"punpckhwd\t{$src2, $dst|$dst, $src2}", "punpckhwd\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst, [(set VR64:$dst,
(v4i16 (vector_shuffle VR64:$src1, VR64:$src2, (v4i16 (mmx_unpckh VR64:$src1, VR64:$src2)))]>;
MMX_UNPCKH_shuffle_mask)))]>;
def MMX_PUNPCKHWDrm : MMXI<0x69, MRMSrcMem, def MMX_PUNPCKHWDrm : MMXI<0x69, MRMSrcMem,
(outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
"punpckhwd\t{$src2, $dst|$dst, $src2}", "punpckhwd\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst, [(set VR64:$dst,
(v4i16 (vector_shuffle VR64:$src1, (v4i16 (mmx_unpckh VR64:$src1,
(bc_v4i16 (load_mmx addr:$src2)), (bc_v4i16 (load_mmx addr:$src2)))))]>;
MMX_UNPCKH_shuffle_mask)))]>;
def MMX_PUNPCKHDQrr : MMXI<0x6A, MRMSrcReg, def MMX_PUNPCKHDQrr : MMXI<0x6A, MRMSrcReg,
(outs VR64:$dst), (ins VR64:$src1, VR64:$src2), (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
"punpckhdq\t{$src2, $dst|$dst, $src2}", "punpckhdq\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst, [(set VR64:$dst,
(v2i32 (vector_shuffle VR64:$src1, VR64:$src2, (v2i32 (mmx_unpckh VR64:$src1, VR64:$src2)))]>;
MMX_UNPCKH_shuffle_mask)))]>;
def MMX_PUNPCKHDQrm : MMXI<0x6A, MRMSrcMem, def MMX_PUNPCKHDQrm : MMXI<0x6A, MRMSrcMem,
(outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
"punpckhdq\t{$src2, $dst|$dst, $src2}", "punpckhdq\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst, [(set VR64:$dst,
(v2i32 (vector_shuffle VR64:$src1, (v2i32 (mmx_unpckh VR64:$src1,
(bc_v2i32 (load_mmx addr:$src2)), (bc_v2i32 (load_mmx addr:$src2)))))]>;
MMX_UNPCKH_shuffle_mask)))]>;
// Unpack Low Packed Data Instructions // Unpack Low Packed Data Instructions
def MMX_PUNPCKLBWrr : MMXI<0x60, MRMSrcReg, def MMX_PUNPCKLBWrr : MMXI<0x60, MRMSrcReg,
(outs VR64:$dst), (ins VR64:$src1, VR64:$src2), (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
"punpcklbw\t{$src2, $dst|$dst, $src2}", "punpcklbw\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst, [(set VR64:$dst,
(v8i8 (vector_shuffle VR64:$src1, VR64:$src2, (v8i8 (mmx_unpckl VR64:$src1, VR64:$src2)))]>;
MMX_UNPCKL_shuffle_mask)))]>;
def MMX_PUNPCKLBWrm : MMXI<0x60, MRMSrcMem, def MMX_PUNPCKLBWrm : MMXI<0x60, MRMSrcMem,
(outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
"punpcklbw\t{$src2, $dst|$dst, $src2}", "punpcklbw\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst, [(set VR64:$dst,
(v8i8 (vector_shuffle VR64:$src1, (v8i8 (mmx_unpckl VR64:$src1,
(bc_v8i8 (load_mmx addr:$src2)), (bc_v8i8 (load_mmx addr:$src2)))))]>;
MMX_UNPCKL_shuffle_mask)))]>;
def MMX_PUNPCKLWDrr : MMXI<0x61, MRMSrcReg, def MMX_PUNPCKLWDrr : MMXI<0x61, MRMSrcReg,
(outs VR64:$dst), (ins VR64:$src1, VR64:$src2), (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
"punpcklwd\t{$src2, $dst|$dst, $src2}", "punpcklwd\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst, [(set VR64:$dst,
(v4i16 (vector_shuffle VR64:$src1, VR64:$src2, (v4i16 (mmx_unpckl VR64:$src1, VR64:$src2)))]>;
MMX_UNPCKL_shuffle_mask)))]>;
def MMX_PUNPCKLWDrm : MMXI<0x61, MRMSrcMem, def MMX_PUNPCKLWDrm : MMXI<0x61, MRMSrcMem,
(outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
"punpcklwd\t{$src2, $dst|$dst, $src2}", "punpcklwd\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst, [(set VR64:$dst,
(v4i16 (vector_shuffle VR64:$src1, (v4i16 (mmx_unpckl VR64:$src1,
(bc_v4i16 (load_mmx addr:$src2)), (bc_v4i16 (load_mmx addr:$src2)))))]>;
MMX_UNPCKL_shuffle_mask)))]>;
def MMX_PUNPCKLDQrr : MMXI<0x62, MRMSrcReg, def MMX_PUNPCKLDQrr : MMXI<0x62, MRMSrcReg,
(outs VR64:$dst), (ins VR64:$src1, VR64:$src2), (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
"punpckldq\t{$src2, $dst|$dst, $src2}", "punpckldq\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst, [(set VR64:$dst,
(v2i32 (vector_shuffle VR64:$src1, VR64:$src2, (v2i32 (mmx_unpckl VR64:$src1, VR64:$src2)))]>;
MMX_UNPCKL_shuffle_mask)))]>;
def MMX_PUNPCKLDQrm : MMXI<0x62, MRMSrcMem, def MMX_PUNPCKLDQrm : MMXI<0x62, MRMSrcMem,
(outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
"punpckldq\t{$src2, $dst|$dst, $src2}", "punpckldq\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst, [(set VR64:$dst,
(v2i32 (vector_shuffle VR64:$src1, (v2i32 (mmx_unpckl VR64:$src1,
(bc_v2i32 (load_mmx addr:$src2)), (bc_v2i32 (load_mmx addr:$src2)))))]>;
MMX_UNPCKL_shuffle_mask)))]>;
} }
// -- Pack Instructions // -- Pack Instructions
@ -411,17 +402,13 @@ def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg,
(outs VR64:$dst), (ins VR64:$src1, i8imm:$src2), (outs VR64:$dst), (ins VR64:$src1, i8imm:$src2),
"pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}", "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR64:$dst, [(set VR64:$dst,
(v4i16 (vector_shuffle (v4i16 (mmx_pshufw:$src2 VR64:$src1, (undef))))]>;
VR64:$src1, (undef),
MMX_PSHUFW_shuffle_mask:$src2)))]>;
def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem, def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem,
(outs VR64:$dst), (ins i64mem:$src1, i8imm:$src2), (outs VR64:$dst), (ins i64mem:$src1, i8imm:$src2),
"pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}", "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR64:$dst, [(set VR64:$dst,
(v4i16 (vector_shuffle (mmx_pshufw:$src2 (bc_v4i16 (load_mmx addr:$src1)),
(bc_v4i16 (load_mmx addr:$src1)), (undef)))]>;
(undef),
MMX_PSHUFW_shuffle_mask:$src2)))]>;
// -- Conversion Instructions // -- Conversion Instructions
let neverHasSideEffects = 1 in { let neverHasSideEffects = 1 in {
@ -627,34 +614,27 @@ def : Pat<(bc_v4i16 (v2i32 (scalar_to_vector GR32:$src))),
// Patterns to perform canonical versions of vector shuffling. // Patterns to perform canonical versions of vector shuffling.
let AddedComplexity = 10 in { let AddedComplexity = 10 in {
def : Pat<(v8i8 (vector_shuffle VR64:$src, (undef), def : Pat<(v8i8 (mmx_unpckl_undef VR64:$src, (undef))),
MMX_UNPCKL_v_undef_shuffle_mask)),
(MMX_PUNPCKLBWrr VR64:$src, VR64:$src)>; (MMX_PUNPCKLBWrr VR64:$src, VR64:$src)>;
def : Pat<(v4i16 (vector_shuffle VR64:$src, (undef), def : Pat<(v4i16 (mmx_unpckl_undef VR64:$src, (undef))),
MMX_UNPCKL_v_undef_shuffle_mask)),
(MMX_PUNPCKLWDrr VR64:$src, VR64:$src)>; (MMX_PUNPCKLWDrr VR64:$src, VR64:$src)>;
def : Pat<(v2i32 (vector_shuffle VR64:$src, (undef), def : Pat<(v2i32 (mmx_unpckl_undef VR64:$src, (undef))),
MMX_UNPCKL_v_undef_shuffle_mask)),
(MMX_PUNPCKLDQrr VR64:$src, VR64:$src)>; (MMX_PUNPCKLDQrr VR64:$src, VR64:$src)>;
} }
let AddedComplexity = 10 in { let AddedComplexity = 10 in {
def : Pat<(v8i8 (vector_shuffle VR64:$src, (undef), def : Pat<(v8i8 (mmx_unpckh_undef VR64:$src, (undef))),
MMX_UNPCKH_v_undef_shuffle_mask)),
(MMX_PUNPCKHBWrr VR64:$src, VR64:$src)>; (MMX_PUNPCKHBWrr VR64:$src, VR64:$src)>;
def : Pat<(v4i16 (vector_shuffle VR64:$src, (undef), def : Pat<(v4i16 (mmx_unpckh_undef VR64:$src, (undef))),
MMX_UNPCKH_v_undef_shuffle_mask)),
(MMX_PUNPCKHWDrr VR64:$src, VR64:$src)>; (MMX_PUNPCKHWDrr VR64:$src, VR64:$src)>;
def : Pat<(v2i32 (vector_shuffle VR64:$src, (undef), def : Pat<(v2i32 (mmx_unpckh_undef VR64:$src, (undef))),
MMX_UNPCKH_v_undef_shuffle_mask)),
(MMX_PUNPCKHDQrr VR64:$src, VR64:$src)>; (MMX_PUNPCKHDQrr VR64:$src, VR64:$src)>;
} }
// Patterns to perform vector shuffling with a zeroed out vector. // Patterns to perform vector shuffling with a zeroed out vector.
let AddedComplexity = 20 in { let AddedComplexity = 20 in {
def : Pat<(bc_v2i32 (vector_shuffle immAllZerosV, def : Pat<(bc_v2i32 (mmx_unpckl immAllZerosV,
(v2i32 (scalar_to_vector (load_mmx addr:$src))), (v2i32 (scalar_to_vector (load_mmx addr:$src))))),
MMX_UNPCKL_shuffle_mask)),
(MMX_PUNPCKLDQrm VR64:$src, VR64:$src)>; (MMX_PUNPCKLDQrm VR64:$src, VR64:$src)>;
} }

View File

@ -175,103 +175,108 @@ def PSxLDQ_imm : SDNodeXForm<imm, [{
// SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*, // SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
// SHUFP* etc. imm. // SHUFP* etc. imm.
def SHUFFLE_get_shuf_imm : SDNodeXForm<build_vector, [{ def SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{
return getI8Imm(X86::getShuffleSHUFImmediate(N)); return getI8Imm(X86::getShuffleSHUFImmediate(N));
}]>; }]>;
// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to // SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to
// PSHUFHW imm. // PSHUFHW imm.
def SHUFFLE_get_pshufhw_imm : SDNodeXForm<build_vector, [{ def SHUFFLE_get_pshufhw_imm : SDNodeXForm<vector_shuffle, [{
return getI8Imm(X86::getShufflePSHUFHWImmediate(N)); return getI8Imm(X86::getShufflePSHUFHWImmediate(N));
}]>; }]>;
// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to // SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to
// PSHUFLW imm. // PSHUFLW imm.
def SHUFFLE_get_pshuflw_imm : SDNodeXForm<build_vector, [{ def SHUFFLE_get_pshuflw_imm : SDNodeXForm<vector_shuffle, [{
return getI8Imm(X86::getShufflePSHUFLWImmediate(N)); return getI8Imm(X86::getShufflePSHUFLWImmediate(N));
}]>; }]>;
def SSE_splat_mask : PatLeaf<(build_vector), [{ def splat_lo : PatFrag<(ops node:$lhs, node:$rhs),
return X86::isSplatMask(N); (vector_shuffle node:$lhs, node:$rhs), [{
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
return SVOp->isSplat() && SVOp->getSplatIndex() == 0;
}]>;
def movddup : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isMOVDDUPMask(cast<ShuffleVectorSDNode>(N));
}]>;
def movhlps : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isMOVHLPSMask(cast<ShuffleVectorSDNode>(N));
}]>;
def movhlps_undef : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isMOVHLPS_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
}]>;
def movhp : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isMOVHPMask(cast<ShuffleVectorSDNode>(N));
}]>;
def movlp : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isMOVLPMask(cast<ShuffleVectorSDNode>(N));
}]>;
def movl : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isMOVLMask(cast<ShuffleVectorSDNode>(N));
}]>;
def movshdup : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isMOVSHDUPMask(cast<ShuffleVectorSDNode>(N));
}]>;
def movsldup : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isMOVSLDUPMask(cast<ShuffleVectorSDNode>(N));
}]>;
def unpckl : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N));
}]>;
def unpckh : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N));
}]>;
def unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
}]>;
def unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
}]>;
def pshufd : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N));
}], SHUFFLE_get_shuf_imm>; }], SHUFFLE_get_shuf_imm>;
def SSE_splat_lo_mask : PatLeaf<(build_vector), [{ def shufp : PatFrag<(ops node:$lhs, node:$rhs),
return X86::isSplatLoMask(N); (vector_shuffle node:$lhs, node:$rhs), [{
}]>; return X86::isSHUFPMask(cast<ShuffleVectorSDNode>(N));
def MOVDDUP_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isMOVDDUPMask(N);
}]>;
def MOVHLPS_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isMOVHLPSMask(N);
}]>;
def MOVHLPS_v_undef_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isMOVHLPS_v_undef_Mask(N);
}]>;
def MOVHP_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isMOVHPMask(N);
}]>;
def MOVLP_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isMOVLPMask(N);
}]>;
def MOVL_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isMOVLMask(N);
}]>;
def MOVSHDUP_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isMOVSHDUPMask(N);
}]>;
def MOVSLDUP_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isMOVSLDUPMask(N);
}]>;
def UNPCKL_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isUNPCKLMask(N);
}]>;
def UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isUNPCKHMask(N);
}]>;
def UNPCKL_v_undef_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isUNPCKL_v_undef_Mask(N);
}]>;
def UNPCKH_v_undef_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isUNPCKH_v_undef_Mask(N);
}]>;
def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isPSHUFDMask(N);
}], SHUFFLE_get_shuf_imm>; }], SHUFFLE_get_shuf_imm>;
def PSHUFHW_shuffle_mask : PatLeaf<(build_vector), [{ def pshufhw : PatFrag<(ops node:$lhs, node:$rhs),
return X86::isPSHUFHWMask(N); (vector_shuffle node:$lhs, node:$rhs), [{
return X86::isPSHUFHWMask(cast<ShuffleVectorSDNode>(N));
}], SHUFFLE_get_pshufhw_imm>; }], SHUFFLE_get_pshufhw_imm>;
def PSHUFLW_shuffle_mask : PatLeaf<(build_vector), [{ def pshuflw : PatFrag<(ops node:$lhs, node:$rhs),
return X86::isPSHUFLWMask(N); (vector_shuffle node:$lhs, node:$rhs), [{
return X86::isPSHUFLWMask(cast<ShuffleVectorSDNode>(N));
}], SHUFFLE_get_pshuflw_imm>; }], SHUFFLE_get_pshuflw_imm>;
def SHUFP_unary_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isPSHUFDMask(N);
}], SHUFFLE_get_shuf_imm>;
def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isSHUFPMask(N);
}], SHUFFLE_get_shuf_imm>;
def PSHUFD_binary_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isSHUFPMask(N);
}], SHUFFLE_get_shuf_imm>;
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// SSE scalar FP Instructions // SSE scalar FP Instructions
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
@ -704,16 +709,14 @@ let Constraints = "$src1 = $dst" in {
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
"movlps\t{$src2, $dst|$dst, $src2}", "movlps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, [(set VR128:$dst,
(v4f32 (vector_shuffle VR128:$src1, (movlp VR128:$src1,
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))), (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>;
MOVLP_shuffle_mask)))]>;
def MOVHPSrm : PSI<0x16, MRMSrcMem, def MOVHPSrm : PSI<0x16, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
"movhps\t{$src2, $dst|$dst, $src2}", "movhps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, [(set VR128:$dst,
(v4f32 (vector_shuffle VR128:$src1, (movhp VR128:$src1,
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))), (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>;
MOVHP_shuffle_mask)))]>;
} // AddedComplexity } // AddedComplexity
} // Constraints = "$src1 = $dst" } // Constraints = "$src1 = $dst"
@ -728,29 +731,25 @@ def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhps\t{$src, $dst|$dst, $src}", "movhps\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract [(store (f64 (vector_extract
(v2f64 (vector_shuffle (unpckh (bc_v2f64 (v4f32 VR128:$src)),
(bc_v2f64 (v4f32 VR128:$src)), (undef), (undef)), (iPTR 0))), addr:$dst)]>;
UNPCKH_shuffle_mask)), (iPTR 0))),
addr:$dst)]>;
let Constraints = "$src1 = $dst" in { let Constraints = "$src1 = $dst" in {
let AddedComplexity = 20 in { let AddedComplexity = 20 in {
def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"movlhps\t{$src2, $dst|$dst, $src2}", "movlhps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, [(set VR128:$dst,
(v4f32 (vector_shuffle VR128:$src1, VR128:$src2, (v4f32 (movhp VR128:$src1, VR128:$src2)))]>;
MOVHP_shuffle_mask)))]>;
def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"movhlps\t{$src2, $dst|$dst, $src2}", "movhlps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, [(set VR128:$dst,
(v4f32 (vector_shuffle VR128:$src1, VR128:$src2, (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>;
MOVHLPS_shuffle_mask)))]>;
} // AddedComplexity } // AddedComplexity
} // Constraints = "$src1 = $dst" } // Constraints = "$src1 = $dst"
let AddedComplexity = 20 in let AddedComplexity = 20 in
def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef), MOVDDUP_shuffle_mask)), def : Pat<(v4f32 (movddup VR128:$src, (undef))),
(MOVLHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>; (MOVLHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
@ -908,51 +907,41 @@ let Constraints = "$src1 = $dst" in {
let isConvertibleToThreeAddress = 1 in // Convert to pshufd let isConvertibleToThreeAddress = 1 in // Convert to pshufd
def SHUFPSrri : PSIi8<0xC6, MRMSrcReg, def SHUFPSrri : PSIi8<0xC6, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, (outs VR128:$dst), (ins VR128:$src1,
VR128:$src2, i32i8imm:$src3), VR128:$src2, i8imm:$src3),
"shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}", "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR128:$dst, [(set VR128:$dst,
(v4f32 (vector_shuffle (v4f32 (shufp:$src3 VR128:$src1, VR128:$src2)))]>;
VR128:$src1, VR128:$src2,
SHUFP_shuffle_mask:$src3)))]>;
def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem, def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, (outs VR128:$dst), (ins VR128:$src1,
f128mem:$src2, i32i8imm:$src3), f128mem:$src2, i8imm:$src3),
"shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}", "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR128:$dst, [(set VR128:$dst,
(v4f32 (vector_shuffle (v4f32 (shufp:$src3
VR128:$src1, (memopv4f32 addr:$src2), VR128:$src1, (memopv4f32 addr:$src2))))]>;
SHUFP_shuffle_mask:$src3)))]>;
let AddedComplexity = 10 in { let AddedComplexity = 10 in {
def UNPCKHPSrr : PSI<0x15, MRMSrcReg, def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2), (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"unpckhps\t{$src2, $dst|$dst, $src2}", "unpckhps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, [(set VR128:$dst,
(v4f32 (vector_shuffle (v4f32 (unpckh VR128:$src1, VR128:$src2)))]>;
VR128:$src1, VR128:$src2,
UNPCKH_shuffle_mask)))]>;
def UNPCKHPSrm : PSI<0x15, MRMSrcMem, def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
"unpckhps\t{$src2, $dst|$dst, $src2}", "unpckhps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, [(set VR128:$dst,
(v4f32 (vector_shuffle (v4f32 (unpckh VR128:$src1,
VR128:$src1, (memopv4f32 addr:$src2), (memopv4f32 addr:$src2))))]>;
UNPCKH_shuffle_mask)))]>;
def UNPCKLPSrr : PSI<0x14, MRMSrcReg, def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2), (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"unpcklps\t{$src2, $dst|$dst, $src2}", "unpcklps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, [(set VR128:$dst,
(v4f32 (vector_shuffle (v4f32 (unpckl VR128:$src1, VR128:$src2)))]>;
VR128:$src1, VR128:$src2,
UNPCKL_shuffle_mask)))]>;
def UNPCKLPSrm : PSI<0x14, MRMSrcMem, def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
"unpcklps\t{$src2, $dst|$dst, $src2}", "unpcklps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, [(set VR128:$dst,
(v4f32 (vector_shuffle (unpckl VR128:$src1, (memopv4f32 addr:$src2)))]>;
VR128:$src1, (memopv4f32 addr:$src2),
UNPCKL_shuffle_mask)))]>;
} // AddedComplexity } // AddedComplexity
} // Constraints = "$src1 = $dst" } // Constraints = "$src1 = $dst"
@ -1044,8 +1033,7 @@ let neverHasSideEffects = 1 in
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2), (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"movss\t{$src2, $dst|$dst, $src2}", "movss\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, [(set VR128:$dst,
(v4f32 (vector_shuffle VR128:$src1, VR128:$src2, (v4f32 (movl VR128:$src1, VR128:$src2)))]>;
MOVL_shuffle_mask)))]>;
} }
// Move to lower bits of a VR128 and zeroing upper bits. // Move to lower bits of a VR128 and zeroing upper bits.
@ -1451,16 +1439,14 @@ let Constraints = "$src1 = $dst" in {
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
"movlpd\t{$src2, $dst|$dst, $src2}", "movlpd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, [(set VR128:$dst,
(v2f64 (vector_shuffle VR128:$src1, (v2f64 (movlp VR128:$src1,
(scalar_to_vector (loadf64 addr:$src2)), (scalar_to_vector (loadf64 addr:$src2)))))]>;
MOVLP_shuffle_mask)))]>;
def MOVHPDrm : PDI<0x16, MRMSrcMem, def MOVHPDrm : PDI<0x16, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
"movhpd\t{$src2, $dst|$dst, $src2}", "movhpd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, [(set VR128:$dst,
(v2f64 (vector_shuffle VR128:$src1, (v2f64 (movhp VR128:$src1,
(scalar_to_vector (loadf64 addr:$src2)), (scalar_to_vector (loadf64 addr:$src2)))))]>;
MOVHP_shuffle_mask)))]>;
} // AddedComplexity } // AddedComplexity
} // Constraints = "$src1 = $dst" } // Constraints = "$src1 = $dst"
@ -1474,9 +1460,8 @@ def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhpd\t{$src, $dst|$dst, $src}", "movhpd\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract [(store (f64 (vector_extract
(v2f64 (vector_shuffle VR128:$src, (undef), (v2f64 (unpckh VR128:$src, (undef))),
UNPCKH_shuffle_mask)), (iPTR 0))), (iPTR 0))), addr:$dst)]>;
addr:$dst)]>;
// SSE2 instructions without OpSize prefix // SSE2 instructions without OpSize prefix
def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@ -1744,48 +1729,39 @@ let Constraints = "$src1 = $dst" in {
def SHUFPDrri : PDIi8<0xC6, MRMSrcReg, def SHUFPDrri : PDIi8<0xC6, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3),
"shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR128:$dst, (v2f64 (vector_shuffle [(set VR128:$dst,
VR128:$src1, VR128:$src2, (v2f64 (shufp:$src3 VR128:$src1, VR128:$src2)))]>;
SHUFP_shuffle_mask:$src3)))]>;
def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem, def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, (outs VR128:$dst), (ins VR128:$src1,
f128mem:$src2, i8imm:$src3), f128mem:$src2, i8imm:$src3),
"shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR128:$dst, [(set VR128:$dst,
(v2f64 (vector_shuffle (v2f64 (shufp:$src3
VR128:$src1, (memopv2f64 addr:$src2), VR128:$src1, (memopv2f64 addr:$src2))))]>;
SHUFP_shuffle_mask:$src3)))]>;
let AddedComplexity = 10 in { let AddedComplexity = 10 in {
def UNPCKHPDrr : PDI<0x15, MRMSrcReg, def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2), (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"unpckhpd\t{$src2, $dst|$dst, $src2}", "unpckhpd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, [(set VR128:$dst,
(v2f64 (vector_shuffle (v2f64 (unpckh VR128:$src1, VR128:$src2)))]>;
VR128:$src1, VR128:$src2,
UNPCKH_shuffle_mask)))]>;
def UNPCKHPDrm : PDI<0x15, MRMSrcMem, def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
"unpckhpd\t{$src2, $dst|$dst, $src2}", "unpckhpd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, [(set VR128:$dst,
(v2f64 (vector_shuffle (v2f64 (unpckh VR128:$src1,
VR128:$src1, (memopv2f64 addr:$src2), (memopv2f64 addr:$src2))))]>;
UNPCKH_shuffle_mask)))]>;
def UNPCKLPDrr : PDI<0x14, MRMSrcReg, def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2), (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"unpcklpd\t{$src2, $dst|$dst, $src2}", "unpcklpd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, [(set VR128:$dst,
(v2f64 (vector_shuffle (v2f64 (unpckl VR128:$src1, VR128:$src2)))]>;
VR128:$src1, VR128:$src2,
UNPCKL_shuffle_mask)))]>;
def UNPCKLPDrm : PDI<0x14, MRMSrcMem, def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
"unpcklpd\t{$src2, $dst|$dst, $src2}", "unpcklpd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, [(set VR128:$dst,
(v2f64 (vector_shuffle (unpckl VR128:$src1, (memopv2f64 addr:$src2)))]>;
VR128:$src1, (memopv2f64 addr:$src2),
UNPCKL_shuffle_mask)))]>;
} // AddedComplexity } // AddedComplexity
} // Constraints = "$src1 = $dst" } // Constraints = "$src1 = $dst"
@ -2043,49 +2019,43 @@ defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>;
def PSHUFDri : PDIi8<0x70, MRMSrcReg, def PSHUFDri : PDIi8<0x70, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
"pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}", "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (v4i32 (vector_shuffle [(set VR128:$dst, (v4i32 (pshufd:$src2
VR128:$src1, (undef), VR128:$src1, (undef))))]>;
PSHUFD_shuffle_mask:$src2)))]>;
def PSHUFDmi : PDIi8<0x70, MRMSrcMem, def PSHUFDmi : PDIi8<0x70, MRMSrcMem,
(outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
"pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}", "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (v4i32 (vector_shuffle [(set VR128:$dst, (v4i32 (pshufd:$src2
(bc_v4i32(memopv2i64 addr:$src1)), (bc_v4i32(memopv2i64 addr:$src1)),
(undef), (undef))))]>;
PSHUFD_shuffle_mask:$src2)))]>;
// SSE2 with ImmT == Imm8 and XS prefix. // SSE2 with ImmT == Imm8 and XS prefix.
def PSHUFHWri : Ii8<0x70, MRMSrcReg, def PSHUFHWri : Ii8<0x70, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
"pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}", "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (v8i16 (vector_shuffle [(set VR128:$dst, (v8i16 (pshufhw:$src2 VR128:$src1,
VR128:$src1, (undef), (undef))))]>,
PSHUFHW_shuffle_mask:$src2)))]>,
XS, Requires<[HasSSE2]>; XS, Requires<[HasSSE2]>;
def PSHUFHWmi : Ii8<0x70, MRMSrcMem, def PSHUFHWmi : Ii8<0x70, MRMSrcMem,
(outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
"pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}", "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (v8i16 (vector_shuffle [(set VR128:$dst, (v8i16 (pshufhw:$src2
(bc_v8i16 (memopv2i64 addr:$src1)), (bc_v8i16 (memopv2i64 addr:$src1)),
(undef), (undef))))]>,
PSHUFHW_shuffle_mask:$src2)))]>,
XS, Requires<[HasSSE2]>; XS, Requires<[HasSSE2]>;
// SSE2 with ImmT == Imm8 and XD prefix. // SSE2 with ImmT == Imm8 and XD prefix.
def PSHUFLWri : Ii8<0x70, MRMSrcReg, def PSHUFLWri : Ii8<0x70, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
"pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}", "pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (v8i16 (vector_shuffle [(set VR128:$dst, (v8i16 (pshuflw:$src2 VR128:$src1,
VR128:$src1, (undef), (undef))))]>,
PSHUFLW_shuffle_mask:$src2)))]>,
XD, Requires<[HasSSE2]>; XD, Requires<[HasSSE2]>;
def PSHUFLWmi : Ii8<0x70, MRMSrcMem, def PSHUFLWmi : Ii8<0x70, MRMSrcMem,
(outs VR128:$dst), (ins i128mem:$src1, i32i8imm:$src2), (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
"pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}", "pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (v8i16 (vector_shuffle [(set VR128:$dst, (v8i16 (pshuflw:$src2
(bc_v8i16 (memopv2i64 addr:$src1)), (bc_v8i16 (memopv2i64 addr:$src1)),
(undef), (undef))))]>,
PSHUFLW_shuffle_mask:$src2)))]>,
XD, Requires<[HasSSE2]>; XD, Requires<[HasSSE2]>;
@ -2094,107 +2064,91 @@ let Constraints = "$src1 = $dst" in {
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2), (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"punpcklbw\t{$src2, $dst|$dst, $src2}", "punpcklbw\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, [(set VR128:$dst,
(v16i8 (vector_shuffle VR128:$src1, VR128:$src2, (v16i8 (unpckl VR128:$src1, VR128:$src2)))]>;
UNPCKL_shuffle_mask)))]>;
def PUNPCKLBWrm : PDI<0x60, MRMSrcMem, def PUNPCKLBWrm : PDI<0x60, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"punpcklbw\t{$src2, $dst|$dst, $src2}", "punpcklbw\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, [(set VR128:$dst,
(v16i8 (vector_shuffle VR128:$src1, (unpckl VR128:$src1,
(bc_v16i8 (memopv2i64 addr:$src2)), (bc_v16i8 (memopv2i64 addr:$src2))))]>;
UNPCKL_shuffle_mask)))]>;
def PUNPCKLWDrr : PDI<0x61, MRMSrcReg, def PUNPCKLWDrr : PDI<0x61, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2), (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"punpcklwd\t{$src2, $dst|$dst, $src2}", "punpcklwd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, [(set VR128:$dst,
(v8i16 (vector_shuffle VR128:$src1, VR128:$src2, (v8i16 (unpckl VR128:$src1, VR128:$src2)))]>;
UNPCKL_shuffle_mask)))]>;
def PUNPCKLWDrm : PDI<0x61, MRMSrcMem, def PUNPCKLWDrm : PDI<0x61, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"punpcklwd\t{$src2, $dst|$dst, $src2}", "punpcklwd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, [(set VR128:$dst,
(v8i16 (vector_shuffle VR128:$src1, (unpckl VR128:$src1,
(bc_v8i16 (memopv2i64 addr:$src2)), (bc_v8i16 (memopv2i64 addr:$src2))))]>;
UNPCKL_shuffle_mask)))]>;
def PUNPCKLDQrr : PDI<0x62, MRMSrcReg, def PUNPCKLDQrr : PDI<0x62, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2), (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"punpckldq\t{$src2, $dst|$dst, $src2}", "punpckldq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, [(set VR128:$dst,
(v4i32 (vector_shuffle VR128:$src1, VR128:$src2, (v4i32 (unpckl VR128:$src1, VR128:$src2)))]>;
UNPCKL_shuffle_mask)))]>;
def PUNPCKLDQrm : PDI<0x62, MRMSrcMem, def PUNPCKLDQrm : PDI<0x62, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"punpckldq\t{$src2, $dst|$dst, $src2}", "punpckldq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, [(set VR128:$dst,
(v4i32 (vector_shuffle VR128:$src1, (unpckl VR128:$src1,
(bc_v4i32 (memopv2i64 addr:$src2)), (bc_v4i32 (memopv2i64 addr:$src2))))]>;
UNPCKL_shuffle_mask)))]>;
def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg, def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2), (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"punpcklqdq\t{$src2, $dst|$dst, $src2}", "punpcklqdq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, [(set VR128:$dst,
(v2i64 (vector_shuffle VR128:$src1, VR128:$src2, (v2i64 (unpckl VR128:$src1, VR128:$src2)))]>;
UNPCKL_shuffle_mask)))]>;
def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem, def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"punpcklqdq\t{$src2, $dst|$dst, $src2}", "punpcklqdq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, [(set VR128:$dst,
(v2i64 (vector_shuffle VR128:$src1, (v2i64 (unpckl VR128:$src1,
(memopv2i64 addr:$src2), (memopv2i64 addr:$src2))))]>;
UNPCKL_shuffle_mask)))]>;
def PUNPCKHBWrr : PDI<0x68, MRMSrcReg, def PUNPCKHBWrr : PDI<0x68, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2), (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"punpckhbw\t{$src2, $dst|$dst, $src2}", "punpckhbw\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, [(set VR128:$dst,
(v16i8 (vector_shuffle VR128:$src1, VR128:$src2, (v16i8 (unpckh VR128:$src1, VR128:$src2)))]>;
UNPCKH_shuffle_mask)))]>;
def PUNPCKHBWrm : PDI<0x68, MRMSrcMem, def PUNPCKHBWrm : PDI<0x68, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"punpckhbw\t{$src2, $dst|$dst, $src2}", "punpckhbw\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, [(set VR128:$dst,
(v16i8 (vector_shuffle VR128:$src1, (unpckh VR128:$src1,
(bc_v16i8 (memopv2i64 addr:$src2)), (bc_v16i8 (memopv2i64 addr:$src2))))]>;
UNPCKH_shuffle_mask)))]>;
def PUNPCKHWDrr : PDI<0x69, MRMSrcReg, def PUNPCKHWDrr : PDI<0x69, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2), (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"punpckhwd\t{$src2, $dst|$dst, $src2}", "punpckhwd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, [(set VR128:$dst,
(v8i16 (vector_shuffle VR128:$src1, VR128:$src2, (v8i16 (unpckh VR128:$src1, VR128:$src2)))]>;
UNPCKH_shuffle_mask)))]>;
def PUNPCKHWDrm : PDI<0x69, MRMSrcMem, def PUNPCKHWDrm : PDI<0x69, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"punpckhwd\t{$src2, $dst|$dst, $src2}", "punpckhwd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, [(set VR128:$dst,
(v8i16 (vector_shuffle VR128:$src1, (unpckh VR128:$src1,
(bc_v8i16 (memopv2i64 addr:$src2)), (bc_v8i16 (memopv2i64 addr:$src2))))]>;
UNPCKH_shuffle_mask)))]>;
def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg, def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2), (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"punpckhdq\t{$src2, $dst|$dst, $src2}", "punpckhdq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, [(set VR128:$dst,
(v4i32 (vector_shuffle VR128:$src1, VR128:$src2, (v4i32 (unpckh VR128:$src1, VR128:$src2)))]>;
UNPCKH_shuffle_mask)))]>;
def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem, def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"punpckhdq\t{$src2, $dst|$dst, $src2}", "punpckhdq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, [(set VR128:$dst,
(v4i32 (vector_shuffle VR128:$src1, (unpckh VR128:$src1,
(bc_v4i32 (memopv2i64 addr:$src2)), (bc_v4i32 (memopv2i64 addr:$src2))))]>;
UNPCKH_shuffle_mask)))]>;
def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg, def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2), (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"punpckhqdq\t{$src2, $dst|$dst, $src2}", "punpckhqdq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, [(set VR128:$dst,
(v2i64 (vector_shuffle VR128:$src1, VR128:$src2, (v2i64 (unpckh VR128:$src1, VR128:$src2)))]>;
UNPCKH_shuffle_mask)))]>;
def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem, def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"punpckhqdq\t{$src2, $dst|$dst, $src2}", "punpckhqdq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, [(set VR128:$dst,
(v2i64 (vector_shuffle VR128:$src1, (v2i64 (unpckh VR128:$src1,
(memopv2i64 addr:$src2), (memopv2i64 addr:$src2))))]>;
UNPCKH_shuffle_mask)))]>;
} }
// Extract / Insert // Extract / Insert
@ -2357,8 +2311,7 @@ let Constraints = "$src1 = $dst" in {
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2), (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"movsd\t{$src2, $dst|$dst, $src2}", "movsd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, [(set VR128:$dst,
(v2f64 (vector_shuffle VR128:$src1, VR128:$src2, (v2f64 (movl VR128:$src1, VR128:$src2)))]>;
MOVL_shuffle_mask)))]>;
} }
// Store / copy lower 64-bits of a XMM register. // Store / copy lower 64-bits of a XMM register.
@ -2449,44 +2402,35 @@ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))),
// Move Instructions // Move Instructions
def MOVSHDUPrr : S3SI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), def MOVSHDUPrr : S3SI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movshdup\t{$src, $dst|$dst, $src}", "movshdup\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v4f32 (vector_shuffle [(set VR128:$dst, (v4f32 (movshdup
VR128:$src, (undef), VR128:$src, (undef))))]>;
MOVSHDUP_shuffle_mask)))]>;
def MOVSHDUPrm : S3SI<0x16, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), def MOVSHDUPrm : S3SI<0x16, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"movshdup\t{$src, $dst|$dst, $src}", "movshdup\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v4f32 (vector_shuffle [(set VR128:$dst, (movshdup
(memopv4f32 addr:$src), (undef), (memopv4f32 addr:$src), (undef)))]>;
MOVSHDUP_shuffle_mask)))]>;
def MOVSLDUPrr : S3SI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), def MOVSLDUPrr : S3SI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movsldup\t{$src, $dst|$dst, $src}", "movsldup\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v4f32 (vector_shuffle [(set VR128:$dst, (v4f32 (movsldup
VR128:$src, (undef), VR128:$src, (undef))))]>;
MOVSLDUP_shuffle_mask)))]>;
def MOVSLDUPrm : S3SI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), def MOVSLDUPrm : S3SI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"movsldup\t{$src, $dst|$dst, $src}", "movsldup\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v4f32 (vector_shuffle [(set VR128:$dst, (movsldup
(memopv4f32 addr:$src), (undef), (memopv4f32 addr:$src), (undef)))]>;
MOVSLDUP_shuffle_mask)))]>;
def MOVDDUPrr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), def MOVDDUPrr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movddup\t{$src, $dst|$dst, $src}", "movddup\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, [(set VR128:$dst,(v2f64 (movddup VR128:$src, (undef))))]>;
(v2f64 (vector_shuffle VR128:$src, (undef),
MOVDDUP_shuffle_mask)))]>;
def MOVDDUPrm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), def MOVDDUPrm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"movddup\t{$src, $dst|$dst, $src}", "movddup\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, [(set VR128:$dst,
(v2f64 (vector_shuffle (v2f64 (movddup (scalar_to_vector (loadf64 addr:$src)),
(scalar_to_vector (loadf64 addr:$src)), (undef))))]>;
(undef), MOVDDUP_shuffle_mask)))]>;
def : Pat<(vector_shuffle def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))),
(bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))), (undef)),
(undef), MOVDDUP_shuffle_mask),
(MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>; (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
def : Pat<(vector_shuffle def : Pat<(movddup (memopv2f64 addr:$src), (undef)),
(memopv2f64 addr:$src), (undef), MOVDDUP_shuffle_mask),
(MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>; (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
@ -2555,22 +2499,18 @@ def MWAIT : I<0xC9, RawFrm, (outs), (ins), "mwait",
// vector_shuffle v1, <undef> <1, 1, 3, 3> // vector_shuffle v1, <undef> <1, 1, 3, 3>
let AddedComplexity = 15 in let AddedComplexity = 15 in
def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef), def : Pat<(v4i32 (movshdup VR128:$src, (undef))),
MOVSHDUP_shuffle_mask)),
(MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>; (MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>;
let AddedComplexity = 20 in let AddedComplexity = 20 in
def : Pat<(v4i32 (vector_shuffle (bc_v4i32 (memopv2i64 addr:$src)), (undef), def : Pat<(v4i32 (movshdup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
MOVSHDUP_shuffle_mask)),
(MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>; (MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>;
// vector_shuffle v1, <undef> <0, 0, 2, 2> // vector_shuffle v1, <undef> <0, 0, 2, 2>
let AddedComplexity = 15 in let AddedComplexity = 15 in
def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef), def : Pat<(v4i32 (movsldup VR128:$src, (undef))),
MOVSLDUP_shuffle_mask)),
(MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>; (MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>;
let AddedComplexity = 20 in let AddedComplexity = 20 in
def : Pat<(v4i32 (vector_shuffle (bc_v4i32 (memopv2i64 addr:$src)), (undef), def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
MOVSLDUP_shuffle_mask)),
(MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>; (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>;
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
@ -2911,207 +2851,173 @@ def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
// Splat v2f64 / v2i64 // Splat v2f64 / v2i64
let AddedComplexity = 10 in { let AddedComplexity = 10 in {
def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), SSE_splat_lo_mask:$sm), def : Pat<(splat_lo (v2f64 VR128:$src), (undef)),
(UNPCKLPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; (UNPCKLPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), UNPCKH_shuffle_mask:$sm), def : Pat<(unpckh (v2f64 VR128:$src), (undef)),
(UNPCKHPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; (UNPCKHPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), SSE_splat_lo_mask:$sm), def : Pat<(splat_lo (v2i64 VR128:$src), (undef)),
(PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; (PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), UNPCKH_shuffle_mask:$sm), def : Pat<(unpckh (v2i64 VR128:$src), (undef)),
(PUNPCKHQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; (PUNPCKHQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
} }
// Special unary SHUFPSrri case. // Special unary SHUFPSrri case.
def : Pat<(v4f32 (vector_shuffle VR128:$src1, (undef), def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))),
SHUFP_unary_shuffle_mask:$sm)), (SHUFPSrri VR128:$src1, VR128:$src1,
(SHUFPSrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>, (SHUFFLE_get_shuf_imm VR128:$src3))>,
Requires<[HasSSE1]>; Requires<[HasSSE1]>;
let AddedComplexity = 5 in
def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))),
(PSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>,
Requires<[HasSSE2]>;
// Special unary SHUFPDrri case. // Special unary SHUFPDrri case.
def : Pat<(v2f64 (vector_shuffle VR128:$src1, (undef), def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))),
SHUFP_unary_shuffle_mask:$sm)), (SHUFPDrri VR128:$src1, VR128:$src1,
(SHUFPDrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>, (SHUFFLE_get_shuf_imm VR128:$src3))>,
Requires<[HasSSE2]>;
// Special unary SHUFPDrri case.
def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))),
(SHUFPDrri VR128:$src1, VR128:$src1,
(SHUFFLE_get_shuf_imm VR128:$src3))>,
Requires<[HasSSE2]>; Requires<[HasSSE2]>;
// Unary v4f32 shuffle with PSHUF* in order to fold a load. // Unary v4f32 shuffle with PSHUF* in order to fold a load.
def : Pat<(vector_shuffle (bc_v4i32 (memopv4f32 addr:$src1)), (undef), def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)),
SHUFP_unary_shuffle_mask:$sm), (PSHUFDmi addr:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>,
(PSHUFDmi addr:$src1, SHUFP_unary_shuffle_mask:$sm)>,
Requires<[HasSSE2]>; Requires<[HasSSE2]>;
// Special binary v4i32 shuffle cases with SHUFPS. // Special binary v4i32 shuffle cases with SHUFPS.
def : Pat<(v4i32 (vector_shuffle VR128:$src1, (v4i32 VR128:$src2), def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))),
PSHUFD_binary_shuffle_mask:$sm)), (SHUFPSrri VR128:$src1, VR128:$src2,
(SHUFPSrri VR128:$src1, VR128:$src2, PSHUFD_binary_shuffle_mask:$sm)>, (SHUFFLE_get_shuf_imm VR128:$src3))>,
Requires<[HasSSE2]>; Requires<[HasSSE2]>;
def : Pat<(v4i32 (vector_shuffle VR128:$src1, def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
(bc_v4i32 (memopv2i64 addr:$src2)), PSHUFD_binary_shuffle_mask:$sm)), (SHUFPSrmi VR128:$src1, addr:$src2,
(SHUFPSrmi VR128:$src1, addr:$src2, PSHUFD_binary_shuffle_mask:$sm)>, (SHUFFLE_get_shuf_imm VR128:$src3))>,
Requires<[HasSSE2]>; Requires<[HasSSE2]>;
// Special binary v2i64 shuffle cases using SHUFPDrri. // Special binary v2i64 shuffle cases using SHUFPDrri.
def : Pat<(v2i64 (vector_shuffle VR128:$src1, VR128:$src2, def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)),
SHUFP_shuffle_mask:$sm)), (SHUFPDrri VR128:$src1, VR128:$src2,
(SHUFPDrri VR128:$src1, VR128:$src2, SHUFP_shuffle_mask:$sm)>, (SHUFFLE_get_shuf_imm VR128:$src3))>,
Requires<[HasSSE2]>; Requires<[HasSSE2]>;
// Special unary SHUFPDrri case.
def : Pat<(v2i64 (vector_shuffle VR128:$src1, (undef),
SHUFP_unary_shuffle_mask:$sm)),
(SHUFPDrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>,
Requires<[HasSSE2]>;
// vector_shuffle v1, <undef>, <0, 0, 1, 1, ...> // vector_shuffle v1, <undef>, <0, 0, 1, 1, ...>
let AddedComplexity = 15 in { let AddedComplexity = 15 in {
def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef), def : Pat<(v4i32 (unpckl_undef:$src2 VR128:$src, (undef))),
UNPCKL_v_undef_shuffle_mask:$sm)), (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
(PSHUFDri VR128:$src, PSHUFD_shuffle_mask:$sm)>,
Requires<[OptForSpeed, HasSSE2]>; Requires<[OptForSpeed, HasSSE2]>;
def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef), def : Pat<(v4f32 (unpckl_undef:$src2 VR128:$src, (undef))),
UNPCKL_v_undef_shuffle_mask:$sm)), (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
(PSHUFDri VR128:$src, PSHUFD_shuffle_mask:$sm)>,
Requires<[OptForSpeed, HasSSE2]>; Requires<[OptForSpeed, HasSSE2]>;
} }
let AddedComplexity = 10 in { let AddedComplexity = 10 in {
def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef), def : Pat<(v4f32 (unpckl_undef VR128:$src, (undef))),
UNPCKL_v_undef_shuffle_mask)),
(UNPCKLPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>; (UNPCKLPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
def : Pat<(v16i8 (vector_shuffle VR128:$src, (undef), def : Pat<(v16i8 (unpckl_undef VR128:$src, (undef))),
UNPCKL_v_undef_shuffle_mask)),
(PUNPCKLBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; (PUNPCKLBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
def : Pat<(v8i16 (vector_shuffle VR128:$src, (undef), def : Pat<(v8i16 (unpckl_undef VR128:$src, (undef))),
UNPCKL_v_undef_shuffle_mask)),
(PUNPCKLWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; (PUNPCKLWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef), def : Pat<(v4i32 (unpckl_undef VR128:$src, (undef))),
UNPCKL_v_undef_shuffle_mask)),
(PUNPCKLDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; (PUNPCKLDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
} }
// vector_shuffle v1, <undef>, <2, 2, 3, 3, ...> // vector_shuffle v1, <undef>, <2, 2, 3, 3, ...>
let AddedComplexity = 15 in { let AddedComplexity = 15 in {
def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef), def : Pat<(v4i32 (unpckh_undef:$src2 VR128:$src, (undef))),
UNPCKH_v_undef_shuffle_mask:$sm)), (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
(PSHUFDri VR128:$src, PSHUFD_shuffle_mask:$sm)>,
Requires<[OptForSpeed, HasSSE2]>; Requires<[OptForSpeed, HasSSE2]>;
def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef), def : Pat<(v4f32 (unpckh_undef:$src2 VR128:$src, (undef))),
UNPCKH_v_undef_shuffle_mask:$sm)), (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
(PSHUFDri VR128:$src, PSHUFD_shuffle_mask:$sm)>,
Requires<[OptForSpeed, HasSSE2]>; Requires<[OptForSpeed, HasSSE2]>;
} }
let AddedComplexity = 10 in { let AddedComplexity = 10 in {
def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef), def : Pat<(v4f32 (unpckh_undef VR128:$src, (undef))),
UNPCKH_v_undef_shuffle_mask)),
(UNPCKHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>; (UNPCKHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
def : Pat<(v16i8 (vector_shuffle VR128:$src, (undef), def : Pat<(v16i8 (unpckh_undef VR128:$src, (undef))),
UNPCKH_v_undef_shuffle_mask)),
(PUNPCKHBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; (PUNPCKHBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
def : Pat<(v8i16 (vector_shuffle VR128:$src, (undef), def : Pat<(v8i16 (unpckh_undef VR128:$src, (undef))),
UNPCKH_v_undef_shuffle_mask)),
(PUNPCKHWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; (PUNPCKHWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef), def : Pat<(v4i32 (unpckh_undef VR128:$src, (undef))),
UNPCKH_v_undef_shuffle_mask)),
(PUNPCKHDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; (PUNPCKHDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
} }
let AddedComplexity = 20 in { let AddedComplexity = 20 in {
// vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS // vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS
def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2, def : Pat<(v4i32 (movhp VR128:$src1, VR128:$src2)),
MOVHP_shuffle_mask)),
(MOVLHPSrr VR128:$src1, VR128:$src2)>; (MOVLHPSrr VR128:$src1, VR128:$src2)>;
// vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS // vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS
def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2, def : Pat<(v4i32 (movhlps VR128:$src1, VR128:$src2)),
MOVHLPS_shuffle_mask)),
(MOVHLPSrr VR128:$src1, VR128:$src2)>; (MOVHLPSrr VR128:$src1, VR128:$src2)>;
// vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS // vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS
def : Pat<(v4f32 (vector_shuffle VR128:$src1, (undef), def : Pat<(v4f32 (movhlps_undef VR128:$src1, (undef))),
MOVHLPS_v_undef_shuffle_mask)),
(MOVHLPSrr VR128:$src1, VR128:$src1)>; (MOVHLPSrr VR128:$src1, VR128:$src1)>;
def : Pat<(v4i32 (vector_shuffle VR128:$src1, (undef), def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))),
MOVHLPS_v_undef_shuffle_mask)),
(MOVHLPSrr VR128:$src1, VR128:$src1)>; (MOVHLPSrr VR128:$src1, VR128:$src1)>;
} }
let AddedComplexity = 20 in { let AddedComplexity = 20 in {
// vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS // vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS
// vector_shuffle v1, (load v2) <0, 1, 4, 5> using MOVHPS // vector_shuffle v1, (load v2) <0, 1, 4, 5> using MOVHPS
def : Pat<(v4f32 (vector_shuffle VR128:$src1, (load addr:$src2), def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))),
MOVLP_shuffle_mask)),
(MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>; (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
def : Pat<(v2f64 (vector_shuffle VR128:$src1, (load addr:$src2), def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))),
MOVLP_shuffle_mask)),
(MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
def : Pat<(v4f32 (vector_shuffle VR128:$src1, (load addr:$src2), def : Pat<(v4f32 (movhp VR128:$src1, (load addr:$src2))),
MOVHP_shuffle_mask)),
(MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>; (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
def : Pat<(v2f64 (vector_shuffle VR128:$src1, (load addr:$src2), def : Pat<(v2f64 (movhp VR128:$src1, (load addr:$src2))),
MOVHP_shuffle_mask)),
(MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; (MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
def : Pat<(v4i32 (vector_shuffle VR128:$src1, (load addr:$src2), def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))),
MOVLP_shuffle_mask)),
(MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
def : Pat<(v2i64 (vector_shuffle VR128:$src1, (load addr:$src2), def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))),
MOVLP_shuffle_mask)),
(MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
def : Pat<(v4i32 (vector_shuffle VR128:$src1, (load addr:$src2), def : Pat<(v4i32 (movhp VR128:$src1, (load addr:$src2))),
MOVHP_shuffle_mask)),
(MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>; (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
def : Pat<(v2i64 (vector_shuffle VR128:$src1, (load addr:$src2), def : Pat<(v2i64 (movhp VR128:$src1, (load addr:$src2))),
MOVHP_shuffle_mask)),
(MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; (MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
} }
// (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
// (store (vector_shuffle (load addr), v2, <0, 1, 4, 5>), addr) using MOVHPS // (store (vector_shuffle (load addr), v2, <0, 1, 4, 5>), addr) using MOVHPS
def : Pat<(store (v4f32 (vector_shuffle (load addr:$src1), VR128:$src2, def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
MOVLP_shuffle_mask)), addr:$src1),
(MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
def : Pat<(store (v2f64 (vector_shuffle (load addr:$src1), VR128:$src2, def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
MOVLP_shuffle_mask)), addr:$src1),
(MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
def : Pat<(store (v4f32 (vector_shuffle (load addr:$src1), VR128:$src2, def : Pat<(store (v4f32 (movhp (load addr:$src1), VR128:$src2)), addr:$src1),
MOVHP_shuffle_mask)), addr:$src1),
(MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; (MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
def : Pat<(store (v2f64 (vector_shuffle (load addr:$src1), VR128:$src2, def : Pat<(store (v2f64 (movhp (load addr:$src1), VR128:$src2)), addr:$src1),
MOVHP_shuffle_mask)), addr:$src1),
(MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; (MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
def : Pat<(store (v4i32 (vector_shuffle def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)),
(bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2, addr:$src1),
MOVLP_shuffle_mask)), addr:$src1),
(MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
def : Pat<(store (v2i64 (vector_shuffle (load addr:$src1), VR128:$src2, def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
MOVLP_shuffle_mask)), addr:$src1),
(MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
def : Pat<(store (v4i32 (vector_shuffle def : Pat<(store (v4i32 (movhp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)),
(bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2, addr:$src1),
MOVHP_shuffle_mask)), addr:$src1),
(MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; (MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
def : Pat<(store (v2i64 (vector_shuffle (load addr:$src1), VR128:$src2, def : Pat<(store (v2i64 (movhp (load addr:$src1), VR128:$src2)), addr:$src1),
MOVHP_shuffle_mask)), addr:$src1),
(MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; (MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
let AddedComplexity = 15 in { let AddedComplexity = 15 in {
// Setting the lowest element in the vector. // Setting the lowest element in the vector.
def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2, def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)),
MOVL_shuffle_mask)),
(MOVLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; (MOVLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
def : Pat<(v2i64 (vector_shuffle VR128:$src1, VR128:$src2, def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)),
MOVL_shuffle_mask)),
(MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
// vector_shuffle v1, v2 <4, 5, 2, 3> using MOVLPDrr (movsd) // vector_shuffle v1, v2 <4, 5, 2, 3> using MOVLPDrr (movsd)
def : Pat<(v4f32 (vector_shuffle VR128:$src1, VR128:$src2, def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)),
MOVLP_shuffle_mask)),
(MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2, def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)),
MOVLP_shuffle_mask)),
(MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
} }
// Set lowest element and zero upper elements. // Set lowest element and zero upper elements.
let AddedComplexity = 15 in let AddedComplexity = 15 in
def : Pat<(v2f64 (vector_shuffle immAllZerosV_bc, VR128:$src, def : Pat<(v2f64 (movl immAllZerosV_bc, VR128:$src)),
MOVL_shuffle_mask)),
(MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>; (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
(MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>; (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;

View File

@ -1,5 +1,7 @@
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin | not grep and ; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -o %t -f
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin | grep psrldq ; RUN: not grep and %t
; RUN: not grep psrldq %t
; RUN: grep xorps %t
define <4 x float> @test(<4 x float>* %v1) nounwind { define <4 x float> @test(<4 x float>* %v1) nounwind {
%tmp = load <4 x float>* %v1 ; <<4 x float>> [#uses=1] %tmp = load <4 x float>* %v1 ; <<4 x float>> [#uses=1]

View File

@ -1,9 +1,7 @@
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | \ ; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
; RUN: grep unpcklps | count 1 ; RUN: grep unpcklps %t | count 1
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | \ ; RUN: grep pshufd %t | count 1
; RUN: grep unpckhps | count 1 ; RUN: not grep {sub.*esp} %t
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | \
; RUN: not grep {sub.*esp}
define void @test(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B) { define void @test(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B) {
%tmp = load <4 x float>* %B ; <<4 x float>> [#uses=2] %tmp = load <4 x float>* %B ; <<4 x float>> [#uses=2]

View File

@ -1,8 +1,10 @@
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse,-sse2 | grep shufps | count 4 ; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse,-sse2 -mtriple=i386-apple-darwin -o %t -f
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse,-sse2 -mtriple=i386-apple-darwin | grep mov | count 2 ; RUN: grep shufps %t | count 4
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshufd | count 4 ; RUN: grep movaps %t | count 2
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep shufps ; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -o %t -f
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin | not grep mov ; RUN: grep pshufd %t | count 4
; RUN: not grep shufps %t
; RUN: not grep mov %t
define <4 x float> @t1(<4 x float> %a, <4 x float> %b) nounwind { define <4 x float> @t1(<4 x float> %a, <4 x float> %b) nounwind {
%tmp1 = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer %tmp1 = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer

View File

@ -1,8 +1,7 @@
; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -disable-mmx -o %t -f ; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -disable-mmx -o %t -f
; RUN: grep pshufhw %t | grep 161 | count 1 ; RUN: grep pshufhw %t | grep 161 | count 1
; RUN: grep pslldq %t | count 1 ; RUN: grep shufps %t | count 1
; RUN: not grep pslldq %t
; Test case when creating pshufhw, we incorrectly set the higher order bit ; Test case when creating pshufhw, we incorrectly set the higher order bit
; for an undef, ; for an undef,
@ -20,4 +19,4 @@ entry:
%0 = shufflevector <4 x i32> %in, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> < i32 undef, i32 5, i32 undef, i32 2> %0 = shufflevector <4 x i32> %in, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> < i32 undef, i32 5, i32 undef, i32 2>
store <4 x i32> %0, <4 x i32>* %dest store <4 x i32> %0, <4 x i32>* %dest
ret void ret void
} }

View File

@ -1,6 +1,6 @@
; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah -o %t -f ; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah -o %t -f
; RUN: grep pextrw %t | count 1 ; RUN: grep pextrw %t | count 1
; RUN: grep punpcklqdq %t | count 1 ; RUN: grep movlhps %t | count 1
; RUN: grep pshufhw %t | count 1 ; RUN: grep pshufhw %t | count 1
; RUN: grep pinsrw %t | count 1 ; RUN: grep pinsrw %t | count 1
; RUN: llvm-as < %s | llc -march=x86 -mcpu=core2 -o %t -f ; RUN: llvm-as < %s | llc -march=x86 -mcpu=core2 -o %t -f

View File

@ -194,10 +194,6 @@ SDTypeConstraint::SDTypeConstraint(Record *R) {
ConstraintType = SDTCisOpSmallerThanOp; ConstraintType = SDTCisOpSmallerThanOp;
x.SDTCisOpSmallerThanOp_Info.BigOperandNum = x.SDTCisOpSmallerThanOp_Info.BigOperandNum =
R->getValueAsInt("BigOperandNum"); R->getValueAsInt("BigOperandNum");
} else if (R->isSubClassOf("SDTCisIntVectorOfSameSize")) {
ConstraintType = SDTCisIntVectorOfSameSize;
x.SDTCisIntVectorOfSameSize_Info.OtherOperandNum =
R->getValueAsInt("OtherOpNum");
} else if (R->isSubClassOf("SDTCisEltOfVec")) { } else if (R->isSubClassOf("SDTCisEltOfVec")) {
ConstraintType = SDTCisEltOfVec; ConstraintType = SDTCisEltOfVec;
x.SDTCisEltOfVec_Info.OtherOperandNum = x.SDTCisEltOfVec_Info.OtherOperandNum =
@ -365,23 +361,9 @@ bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode *N,
} }
return MadeChange; return MadeChange;
} }
case SDTCisIntVectorOfSameSize: {
TreePatternNode *OtherOperand =
getOperandNum(x.SDTCisIntVectorOfSameSize_Info.OtherOperandNum,
N, NumResults);
if (OtherOperand->hasTypeSet()) {
if (!isVector(OtherOperand->getTypeNum(0)))
TP.error(N->getOperator()->getName() + " VT operand must be a vector!");
MVT IVT = OtherOperand->getTypeNum(0);
unsigned NumElements = IVT.getVectorNumElements();
IVT = MVT::getIntVectorWithNumElements(NumElements);
return NodeToApply->UpdateNodeType(IVT.getSimpleVT(), TP);
}
return false;
}
case SDTCisEltOfVec: { case SDTCisEltOfVec: {
TreePatternNode *OtherOperand = TreePatternNode *OtherOperand =
getOperandNum(x.SDTCisIntVectorOfSameSize_Info.OtherOperandNum, getOperandNum(x.SDTCisEltOfVec_Info.OtherOperandNum,
N, NumResults); N, NumResults);
if (OtherOperand->hasTypeSet()) { if (OtherOperand->hasTypeSet()) {
if (!isVector(OtherOperand->getTypeNum(0))) if (!isVector(OtherOperand->getTypeNum(0)))
@ -925,25 +907,6 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
if (NI.getNumResults() == 0) if (NI.getNumResults() == 0)
MadeChange |= UpdateNodeType(MVT::isVoid, TP); MadeChange |= UpdateNodeType(MVT::isVoid, TP);
// If this is a vector_shuffle operation, apply types to the build_vector
// operation. The types of the integers don't matter, but this ensures they
// won't get checked.
if (getOperator()->getName() == "vector_shuffle" &&
getChild(2)->getOperator()->getName() == "build_vector") {
TreePatternNode *BV = getChild(2);
const std::vector<MVT::SimpleValueType> &LegalVTs
= CDP.getTargetInfo().getLegalValueTypes();
MVT::SimpleValueType LegalIntVT = MVT::Other;
for (unsigned i = 0, e = LegalVTs.size(); i != e; ++i)
if (isInteger(LegalVTs[i]) && !isVector(LegalVTs[i])) {
LegalIntVT = LegalVTs[i];
break;
}
assert(LegalIntVT != MVT::Other && "No legal integer VT?");
for (unsigned i = 0, e = BV->getNumChildren(); i != e; ++i)
MadeChange |= BV->getChild(i)->UpdateNodeType(LegalIntVT, TP);
}
return MadeChange; return MadeChange;
} else if (getOperator()->isSubClassOf("Instruction")) { } else if (getOperator()->isSubClassOf("Instruction")) {
const DAGInstruction &Inst = CDP.getInstruction(getOperator()); const DAGInstruction &Inst = CDP.getInstruction(getOperator());
@ -2086,6 +2049,9 @@ void CodeGenDAGPatterns::ParsePatterns() {
IterateInference |= Result->getTree(0)-> IterateInference |= Result->getTree(0)->
UpdateNodeType(Pattern->getTree(0)->getExtTypes(), *Result); UpdateNodeType(Pattern->getTree(0)->getExtTypes(), *Result);
} while (IterateInference); } while (IterateInference);
// Blah?
Result->getTree(0)->setTransformFn(Pattern->getTree(0)->getTransformFn());
// Verify that we inferred enough types that we can do something with the // Verify that we inferred enough types that we can do something with the
// pattern and result. If these fire the user has to add type casts. // pattern and result. If these fire the user has to add type casts.

View File

@ -62,8 +62,7 @@ struct SDTypeConstraint {
unsigned OperandNo; // The operand # this constraint applies to. unsigned OperandNo; // The operand # this constraint applies to.
enum { enum {
SDTCisVT, SDTCisPtrTy, SDTCisInt, SDTCisFP, SDTCisSameAs, SDTCisVT, SDTCisPtrTy, SDTCisInt, SDTCisFP, SDTCisSameAs,
SDTCisVTSmallerThanOp, SDTCisOpSmallerThanOp, SDTCisIntVectorOfSameSize, SDTCisVTSmallerThanOp, SDTCisOpSmallerThanOp, SDTCisEltOfVec
SDTCisEltOfVec
} ConstraintType; } ConstraintType;
union { // The discriminated union. union { // The discriminated union.
@ -79,9 +78,6 @@ struct SDTypeConstraint {
struct { struct {
unsigned BigOperandNum; unsigned BigOperandNum;
} SDTCisOpSmallerThanOp_Info; } SDTCisOpSmallerThanOp_Info;
struct {
unsigned OtherOperandNum;
} SDTCisIntVectorOfSameSize_Info;
struct { struct {
unsigned OtherOperandNum; unsigned OtherOperandNum;
} SDTCisEltOfVec_Info; } SDTCisEltOfVec_Info;