2nd attempt, fixing SSE4.1 issues and implementing feedback from duncan.

PR2957

ISD::VECTOR_SHUFFLE now stores an array of integers representing the shuffle
mask internal to the node, rather than taking a BUILD_VECTOR of ConstantSDNodes
as the shuffle mask.  A value of -1 represents UNDEF.

In addition to eliminating the creation of illegal BUILD_VECTORS just to 
represent shuffle masks, we are better about canonicalizing the shuffle mask,
resulting in substantially better code for some classes of shuffles.

llvm-svn: 70225
This commit is contained in:
Nate Begeman 2009-04-27 18:41:29 +00:00
parent bd53d3b24f
commit 9d121924fd
26 changed files with 1672 additions and 2234 deletions

View File

@ -353,6 +353,13 @@ public:
SDValue getConvertRndSat(MVT VT, DebugLoc dl, SDValue Val, SDValue DTy,
SDValue STy,
SDValue Rnd, SDValue Sat, ISD::CvtCode Code);
/// getVectorShuffle - Return an ISD::VECTOR_SHUFFLE node. The number of
/// elements in VT, which must be a vector type, must match the number of
/// mask elements NumElts. A negative integer mask element is treated as
/// undefined.
SDValue getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1, SDValue N2,
const int *MaskElts);
/// getZeroExtendInReg - Return the expression required to zero extend the Op
/// value assuming it was the smaller SrcTy value.

View File

@ -24,6 +24,7 @@
#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/ilist_node.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/CodeGen/MachineMemOperand.h"
@ -1703,6 +1704,41 @@ public:
}
};
class ShuffleVectorSDNode : public SDNode {
SDUse Ops[2];
int *Mask;
protected:
friend class SelectionDAG;
ShuffleVectorSDNode(MVT VT, DebugLoc dl, SDValue N1, SDValue N2, int *M)
: SDNode(ISD::VECTOR_SHUFFLE, dl, getSDVTList(VT)), Mask(M) {
InitOperands(Ops, N1, N2);
}
public:
void getMask(SmallVectorImpl<int> &M) const {
MVT VT = getValueType(0);
M.clear();
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
M.push_back(Mask[i]);
}
int getMaskElt(unsigned Idx) const {
assert(Idx < getValueType(0).getVectorNumElements() && "Idx out of range!");
return Mask[Idx];
}
bool isSplat() const { return isSplatMask(Mask, getValueType(0)); }
int getSplatIndex() const {
assert(isSplat() && "Cannot get splat index for non-splat!");
return Mask[0];
}
static bool isSplatMask(const int *Mask, MVT VT);
static bool classof(const ShuffleVectorSDNode *) { return true; }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::VECTOR_SHUFFLE;
}
};
class ConstantSDNode : public SDNode {
const ConstantInt *Value;
friend class SelectionDAG;
@ -2084,7 +2120,7 @@ public:
return N->getOpcode() == ISD::CONDCODE;
}
};
/// CvtRndSatSDNode - NOTE: avoid using this node as this may disappear in the
/// future and most targets don't support it.
class CvtRndSatSDNode : public SDNode {

View File

@ -28,6 +28,7 @@
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/DebugLoc.h"
#include "llvm/Target/TargetMachine.h"
@ -328,7 +329,7 @@ public:
/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
/// are assumed to be legal.
virtual bool isShuffleMaskLegal(SDValue Mask, MVT VT) const {
virtual bool isShuffleMaskLegal(SmallVectorImpl<int> &Mask, MVT VT) const {
return true;
}
@ -336,9 +337,7 @@ public:
/// used by Targets can use this to indicate if there is a suitable
/// VECTOR_SHUFFLE that can be used to replace a VAND with a constant
/// pool entry.
virtual bool isVectorClearMaskLegal(const std::vector<SDValue> &BVOps,
MVT EVT,
SelectionDAG &DAG) const {
virtual bool isVectorClearMaskLegal(SmallVectorImpl<int> &M, MVT VT) const {
return false;
}

View File

@ -51,15 +51,6 @@ class SDTCisOpSmallerThanOp<int SmallOp, int BigOp> : SDTypeConstraint<SmallOp>{
int BigOperandNum = BigOp;
}
/// SDTCisIntVectorOfSameSize - This indicates that ThisOp and OtherOp are
/// vector types, and that ThisOp is the result of
/// MVT::getIntVectorWithNumElements with the number of elements
/// that ThisOp has.
class SDTCisIntVectorOfSameSize<int ThisOp, int OtherOp>
: SDTypeConstraint<ThisOp> {
int OtherOpNum = OtherOp;
}
/// SDTCisEltOfVec - This indicates that ThisOp is a scalar type of the same
/// type as the element type of OtherOp, which is a vector type.
class SDTCisEltOfVec<int ThisOp, int OtherOp>
@ -175,8 +166,8 @@ def SDTIStore : SDTypeProfile<1, 3, [ // indexed store
SDTCisSameAs<0, 2>, SDTCisPtrTy<0>, SDTCisPtrTy<3>
]>;
def SDTVecShuffle : SDTypeProfile<1, 3, [
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisIntVectorOfSameSize<3, 0>
def SDTVecShuffle : SDTypeProfile<1, 2, [
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
]>;
def SDTVecExtract : SDTypeProfile<1, 2, [ // vector extract
SDTCisEltOfVec<0, 1>, SDTCisPtrTy<2>

View File

@ -5102,7 +5102,21 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
InVec.getValueType(), &Ops[0], Ops.size());
}
// If the invec is an UNDEF and if EltNo is a constant, create a new
// BUILD_VECTOR with undef elements and the inserted element.
if (!LegalOperations && InVec.getOpcode() == ISD::UNDEF &&
isa<ConstantSDNode>(EltNo)) {
MVT VT = InVec.getValueType();
MVT EVT = VT.getVectorElementType();
unsigned NElts = VT.getVectorNumElements();
SmallVector<SDValue, 8> Ops(NElts, DAG.getUNDEF(EVT));
unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
if (Elt < Ops.size())
Ops[Elt] = InVal;
return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
InVec.getValueType(), &Ops[0], Ops.size());
}
return SDValue();
}
@ -5164,9 +5178,8 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// to examine the mask.
if (BCNumEltsChanged)
return SDValue();
unsigned Idx = cast<ConstantSDNode>(InVec.getOperand(2).
getOperand(Elt))->getZExtValue();
unsigned NumElems = InVec.getOperand(2).getNumOperands();
int Idx = cast<ShuffleVectorSDNode>(InVec)->getMaskElt(Elt);
int NumElems = InVec.getValueType().getVectorNumElements();
InVec = (Idx < NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
if (InVec.getOpcode() == ISD::BIT_CONVERT)
InVec = InVec.getOperand(0);
@ -5213,7 +5226,6 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
unsigned NumInScalars = N->getNumOperands();
MVT VT = N->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
MVT EltType = VT.getVectorElementType();
// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
@ -5256,56 +5268,36 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
}
// If everything is good, we can make a shuffle operation.
MVT IndexVT = MVT::i32;
if (VecIn1.getNode()) {
SmallVector<SDValue, 8> BuildVecIndices;
SmallVector<int, 8> Mask;
for (unsigned i = 0; i != NumInScalars; ++i) {
if (N->getOperand(i).getOpcode() == ISD::UNDEF) {
BuildVecIndices.push_back(DAG.getUNDEF(IndexVT));
Mask.push_back(-1);
continue;
}
SDValue Extract = N->getOperand(i);
// If extracting from the first vector, just use the index directly.
SDValue Extract = N->getOperand(i);
SDValue ExtVal = Extract.getOperand(1);
if (Extract.getOperand(0) == VecIn1) {
if (ExtVal.getValueType() == IndexVT)
BuildVecIndices.push_back(ExtVal);
else {
unsigned Idx = cast<ConstantSDNode>(ExtVal)->getZExtValue();
BuildVecIndices.push_back(DAG.getConstant(Idx, IndexVT));
}
Mask.push_back(cast<ConstantSDNode>(ExtVal)->getZExtValue());
continue;
}
// Otherwise, use InIdx + VecSize
unsigned Idx = cast<ConstantSDNode>(ExtVal)->getZExtValue();
BuildVecIndices.push_back(DAG.getConstant(Idx+NumInScalars, IndexVT));
Mask.push_back(Idx+NumInScalars);
}
// Add count and size info.
MVT BuildVecVT = MVT::getVectorVT(IndexVT, NumElts);
if (!TLI.isTypeLegal(BuildVecVT) && LegalTypes)
if (!TLI.isTypeLegal(VT) && LegalTypes)
return SDValue();
// Return the new VECTOR_SHUFFLE node.
SDValue Ops[5];
SDValue Ops[2];
Ops[0] = VecIn1;
if (VecIn2.getNode()) {
Ops[1] = VecIn2;
} else {
// Use an undef build_vector as input for the second operand.
std::vector<SDValue> UnOps(NumInScalars,
DAG.getUNDEF(EltType));
Ops[1] = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
&UnOps[0], UnOps.size());
AddToWorkList(Ops[1].getNode());
}
Ops[2] = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), BuildVecVT,
&BuildVecIndices[0], BuildVecIndices.size());
return DAG.getNode(ISD::VECTOR_SHUFFLE, N->getDebugLoc(), VT, Ops, 3);
Ops[1] = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]);
}
return SDValue();
@ -5325,8 +5317,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
}
SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
SDValue ShufMask = N->getOperand(2);
unsigned NumElts = ShufMask.getNumOperands();
return SDValue();
MVT VT = N->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@ -5334,60 +5328,13 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
assert(N0.getValueType().getVectorNumElements() == NumElts &&
"Vector shuffle must be normalized in DAG");
// If the shuffle mask is an identity operation on the LHS, return the LHS.
bool isIdentity = true;
for (unsigned i = 0; i != NumElts; ++i) {
if (ShufMask.getOperand(i).getOpcode() != ISD::UNDEF &&
cast<ConstantSDNode>(ShufMask.getOperand(i))->getZExtValue() != i) {
isIdentity = false;
break;
}
}
if (isIdentity) return N->getOperand(0);
// If the shuffle mask is an identity operation on the RHS, return the RHS.
isIdentity = true;
for (unsigned i = 0; i != NumElts; ++i) {
if (ShufMask.getOperand(i).getOpcode() != ISD::UNDEF &&
cast<ConstantSDNode>(ShufMask.getOperand(i))->getZExtValue() !=
i+NumElts) {
isIdentity = false;
break;
}
}
if (isIdentity) return N->getOperand(1);
// Check if the shuffle is a unary shuffle, i.e. one of the vectors is not
// needed at all.
bool isUnary = true;
bool isSplat = true;
int VecNum = -1;
unsigned BaseIdx = 0;
for (unsigned i = 0; i != NumElts; ++i)
if (ShufMask.getOperand(i).getOpcode() != ISD::UNDEF) {
unsigned Idx=cast<ConstantSDNode>(ShufMask.getOperand(i))->getZExtValue();
int V = (Idx < NumElts) ? 0 : 1;
if (VecNum == -1) {
VecNum = V;
BaseIdx = Idx;
} else {
if (BaseIdx != Idx)
isSplat = false;
if (VecNum != V) {
isUnary = false;
break;
}
}
}
// Normalize unary shuffle so the RHS is undef.
if (isUnary && VecNum == 1)
std::swap(N0, N1);
// FIXME: implement canonicalizations from DAG.getVectorShuffle()
// If it is a splat, check if the argument vector is a build_vector with
// all scalar elements the same.
if (isSplat) {
if (cast<ShuffleVectorSDNode>(N)->isSplat()) {
SDNode *V = N0.getNode();
// If this is a bit convert that changes the element type of the vector but
// not the number of vector elements, look through it. Be careful not to
@ -5401,6 +5348,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
if (V->getOpcode() == ISD::BUILD_VECTOR) {
unsigned NumElems = V->getNumOperands();
unsigned BaseIdx = cast<ShuffleVectorSDNode>(N)->getSplatIndex();
if (NumElems > BaseIdx) {
SDValue Base;
bool AllSame = true;
@ -5425,38 +5373,6 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
}
}
}
// If it is a unary or the LHS and the RHS are the same node, turn the RHS
// into an undef.
if (isUnary || N0 == N1) {
// Check the SHUFFLE mask, mapping any inputs from the 2nd operand into the
// first operand.
SmallVector<SDValue, 8> MappedOps;
for (unsigned i = 0; i != NumElts; ++i) {
if (ShufMask.getOperand(i).getOpcode() == ISD::UNDEF ||
cast<ConstantSDNode>(ShufMask.getOperand(i))->getZExtValue() <
NumElts) {
MappedOps.push_back(ShufMask.getOperand(i));
} else {
unsigned NewIdx =
cast<ConstantSDNode>(ShufMask.getOperand(i))->getZExtValue() -
NumElts;
MappedOps.push_back(DAG.getConstant(NewIdx,
ShufMask.getOperand(i).getValueType()));
}
}
ShufMask = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
ShufMask.getValueType(),
&MappedOps[0], MappedOps.size());
AddToWorkList(ShufMask.getNode());
return DAG.getNode(ISD::VECTOR_SHUFFLE, N->getDebugLoc(),
N->getValueType(0), N0,
DAG.getUNDEF(N->getValueType(0)),
ShufMask);
}
return SDValue();
}
@ -5465,52 +5381,42 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
/// vector_shuffle V, Zero, <0, 4, 2, 4>
SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
MVT VT = N->getValueType(0);
DebugLoc dl = N->getDebugLoc();
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
if (N->getOpcode() == ISD::AND) {
if (RHS.getOpcode() == ISD::BIT_CONVERT)
RHS = RHS.getOperand(0);
if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
std::vector<SDValue> IdxOps;
unsigned NumOps = RHS.getNumOperands();
unsigned NumElts = NumOps;
SmallVector<int, 8> Indices;
unsigned NumElts = RHS.getNumOperands();
for (unsigned i = 0; i != NumElts; ++i) {
SDValue Elt = RHS.getOperand(i);
if (!isa<ConstantSDNode>(Elt))
return SDValue();
else if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
IdxOps.push_back(DAG.getIntPtrConstant(i));
Indices.push_back(i);
else if (cast<ConstantSDNode>(Elt)->isNullValue())
IdxOps.push_back(DAG.getIntPtrConstant(NumElts));
Indices.push_back(NumElts);
else
return SDValue();
}
// Let's see if the target supports this vector_shuffle.
if (!TLI.isVectorClearMaskLegal(IdxOps, TLI.getPointerTy(), DAG))
MVT RVT = RHS.getValueType();
if (!TLI.isVectorClearMaskLegal(Indices, RVT))
return SDValue();
// Return the new VECTOR_SHUFFLE node.
MVT EVT = RHS.getValueType().getVectorElementType();
MVT VT = MVT::getVectorVT(EVT, NumElts);
MVT MaskVT = MVT::getVectorVT(TLI.getPointerTy(), NumElts);
std::vector<SDValue> Ops;
LHS = DAG.getNode(ISD::BIT_CONVERT, LHS.getDebugLoc(), VT, LHS);
Ops.push_back(LHS);
AddToWorkList(LHS.getNode());
std::vector<SDValue> ZeroOps(NumElts, DAG.getConstant(0, EVT));
Ops.push_back(DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
VT, &ZeroOps[0], ZeroOps.size()));
Ops.push_back(DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
MaskVT, &IdxOps[0], IdxOps.size()));
SDValue Result = DAG.getNode(ISD::VECTOR_SHUFFLE, N->getDebugLoc(),
VT, &Ops[0], Ops.size());
if (VT != N->getValueType(0))
Result = DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
N->getValueType(0), Result);
return Result;
MVT EVT = RVT.getVectorElementType();
SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
DAG.getConstant(0, EVT));
SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
RVT, &ZeroOps[0], ZeroOps.size());
LHS = DAG.getNode(ISD::BIT_CONVERT, dl, RVT, LHS);
SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuf);
}
}

View File

@ -267,16 +267,10 @@ private:
bool isVolatile, SDValue ValOp,
unsigned StWidth, DebugLoc dl);
/// isShuffleLegal - Return non-null if a vector shuffle is legal with the
/// specified mask and type. Targets can specify exactly which masks they
/// support and the code generator is tasked with not creating illegal masks.
///
/// Note that this will also return true for shuffles that are promoted to a
/// different type.
///
/// If this is a legal shuffle, this method returns the (possibly promoted)
/// build_vector Mask. If it's not a legal shuffle, it returns null.
SDNode *isShuffleLegal(MVT VT, SDValue Mask) const;
/// promoteShuffle - Promote a shuffle mask of a vector VT to perform the
/// same shuffle on a vector of NVT. Must not create an illegal shuffle mask.
SDValue promoteShuffle(MVT NVT, MVT VT, DebugLoc dl, SDValue N1, SDValue N2,
SmallVectorImpl<int> &Mask) const;
bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,
SmallPtrSet<SDNode*, 32> &NodesLeadingTo);
@ -319,50 +313,35 @@ private:
};
}
/// isVectorShuffleLegal - Return true if a vector shuffle is legal with the
/// specified mask and type. Targets can specify exactly which masks they
/// support and the code generator is tasked with not creating illegal masks.
///
/// Note that this will also return true for shuffles that are promoted to a
/// different type.
SDNode *SelectionDAGLegalize::isShuffleLegal(MVT VT, SDValue Mask) const {
switch (TLI.getOperationAction(ISD::VECTOR_SHUFFLE, VT)) {
default: return 0;
case TargetLowering::Legal:
case TargetLowering::Custom:
break;
case TargetLowering::Promote: {
// If this is promoted to a different type, convert the shuffle mask and
// ask if it is legal in the promoted type!
MVT NVT = TLI.getTypeToPromoteTo(ISD::VECTOR_SHUFFLE, VT);
MVT EltVT = NVT.getVectorElementType();
/// promoteShuffle - Promote a shuffle mask of a vector VT to perform the
/// same shuffle on a vector of NVT. Must not create an illegal shuffle mask.
/// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
SDValue SelectionDAGLegalize::promoteShuffle(MVT NVT, MVT VT, DebugLoc dl,
SDValue N1, SDValue N2,
SmallVectorImpl<int> &Mask) const {
MVT EltVT = NVT.getVectorElementType();
int NumMaskElts = VT.getVectorNumElements();
int NumDestElts = NVT.getVectorNumElements();
unsigned NumEltsGrowth = NumDestElts / NumMaskElts;
// If we changed # elements, change the shuffle mask.
unsigned NumEltsGrowth =
NVT.getVectorNumElements() / VT.getVectorNumElements();
assert(NumEltsGrowth && "Cannot promote to vector type with fewer elts!");
if (NumEltsGrowth > 1) {
// Renumber the elements.
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0, e = Mask.getNumOperands(); i != e; ++i) {
SDValue InOp = Mask.getOperand(i);
for (unsigned j = 0; j != NumEltsGrowth; ++j) {
if (InOp.getOpcode() == ISD::UNDEF)
Ops.push_back(DAG.getUNDEF(EltVT));
else {
unsigned InEltNo = cast<ConstantSDNode>(InOp)->getZExtValue();
Ops.push_back(DAG.getConstant(InEltNo*NumEltsGrowth+j, EltVT));
}
}
}
Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getDebugLoc(),
NVT, &Ops[0], Ops.size());
assert(NumEltsGrowth && "Cannot promote to vector type with fewer elts!");
if (NumEltsGrowth == 1)
return DAG.getVectorShuffle(NVT, dl, N1, N2, &Mask[0]);
SmallVector<int, 8> NewMask;
for (int i = 0; i != NumMaskElts; ++i) {
int Idx = Mask[i];
for (unsigned j = 0; j != NumEltsGrowth; ++j) {
if (Idx < 0)
NewMask.push_back(-1);
else
NewMask.push_back(Idx * NumEltsGrowth + j);
}
VT = NVT;
break;
}
}
return TLI.isShuffleMaskLegal(Mask, VT) ? Mask.getNode() : 0;
assert((int)NewMask.size() == NumDestElts && "Non-integer NumEltsGrowth?");
assert(TLI.isShuffleMaskLegal(NewMask, NVT) && "Shuffle not legal?");
return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]);
}
SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag,
@ -1652,25 +1631,15 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Tmp1.getValueType(), Tmp2);
unsigned NumElts = Tmp1.getValueType().getVectorNumElements();
MVT ShufMaskVT =
MVT::getIntVectorWithNumElements(NumElts);
MVT ShufMaskEltVT = ShufMaskVT.getVectorElementType();
// We generate a shuffle of InVec and ScVec, so the shuffle mask
// should be 0,1,2,3,4,5... with the appropriate element replaced with
// elt 0 of the RHS.
SmallVector<SDValue, 8> ShufOps;
for (unsigned i = 0; i != NumElts; ++i) {
if (i != InsertPos->getZExtValue())
ShufOps.push_back(DAG.getConstant(i, ShufMaskEltVT));
else
ShufOps.push_back(DAG.getConstant(NumElts, ShufMaskEltVT));
}
SDValue ShufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, ShufMaskVT,
&ShufOps[0], ShufOps.size());
Result = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, Tmp1.getValueType(),
Tmp1, ScVec, ShufMask);
SmallVector<int, 8> ShufOps;
for (unsigned i = 0; i != NumElts; ++i)
ShufOps.push_back(i != InsertPos->getZExtValue() ? i : NumElts);
Result = DAG.getVectorShuffle(Tmp1.getValueType(), dl, Tmp1, ScVec,
&ShufOps[0]);
Result = LegalizeOp(Result);
break;
}
@ -1705,16 +1674,21 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
break;
}
break;
case ISD::VECTOR_SHUFFLE:
case ISD::VECTOR_SHUFFLE: {
Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the input vectors,
Tmp2 = LegalizeOp(Node->getOperand(1)); // but not the shuffle mask.
Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Node->getOperand(2));
Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2);
MVT VT = Result.getValueType();
// Copy the Mask to a local SmallVector for use wi
SmallVector<int, 8> Mask;
cast<ShuffleVectorSDNode>(Result)->getMask(Mask);
// Allow targets to custom lower the SHUFFLEs they support.
switch (TLI.getOperationAction(ISD::VECTOR_SHUFFLE, Result.getValueType())){
switch (TLI.getOperationAction(ISD::VECTOR_SHUFFLE, VT)) {
default: assert(0 && "Unknown operation action!");
case TargetLowering::Legal:
assert(isShuffleLegal(Result.getValueType(), Node->getOperand(2)) &&
assert(TLI.isShuffleMaskLegal(Mask, VT) &&
"vector shuffle should not be created if not legal!");
break;
case TargetLowering::Custom:
@ -1725,26 +1699,21 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
}
// FALLTHROUGH
case TargetLowering::Expand: {
MVT VT = Node->getValueType(0);
MVT EltVT = VT.getVectorElementType();
MVT PtrVT = TLI.getPointerTy();
SDValue Mask = Node->getOperand(2);
unsigned NumElems = Mask.getNumOperands();
int NumElems = VT.getVectorNumElements();
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i != NumElems; ++i) {
SDValue Arg = Mask.getOperand(i);
if (Arg.getOpcode() == ISD::UNDEF) {
for (int i = 0; i != NumElems; ++i) {
if (Mask[i] < 0) {
Ops.push_back(DAG.getUNDEF(EltVT));
} else {
assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
unsigned Idx = cast<ConstantSDNode>(Arg)->getZExtValue();
if (Idx < NumElems)
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Tmp1,
DAG.getConstant(Idx, PtrVT)));
else
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Tmp2,
DAG.getConstant(Idx - NumElems, PtrVT)));
continue;
}
int Idx = Mask[i];
if (Idx < NumElems)
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Tmp1,
DAG.getIntPtrConstant(Idx)));
else
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Tmp2,
DAG.getIntPtrConstant(Idx - NumElems)));
}
Result = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
break;
@ -1759,15 +1728,13 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Tmp2 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Tmp2);
// Convert the shuffle mask to the right # elements.
Tmp3 = SDValue(isShuffleLegal(OVT, Node->getOperand(2)), 0);
assert(Tmp3.getNode() && "Shuffle not legal?");
Result = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, NVT, Tmp1, Tmp2, Tmp3);
Result = promoteShuffle(NVT, OVT, dl, Tmp1, Tmp2, Mask);
Result = DAG.getNode(ISD::BIT_CONVERT, dl, OVT, Result);
break;
}
}
break;
}
case ISD::EXTRACT_VECTOR_ELT:
Tmp1 = Node->getOperand(0);
Tmp2 = LegalizeOp(Node->getOperand(1));
@ -5490,6 +5457,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
// FIXME: it would be far nicer to change this into map<SDValue,uint64_t>
// and use a bitmask instead of a list of elements.
// FIXME: this doesn't treat <0, u, 0, u> for example, as a splat.
std::map<SDValue, std::vector<unsigned> > Values;
Values[SplatValue].push_back(0);
bool isConstant = true;
@ -5546,21 +5514,17 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
if (SplatValue.getNode()) { // Splat of one value?
// Build the shuffle constant vector: <0, 0, 0, 0>
MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems);
SDValue Zero = DAG.getConstant(0, MaskVT.getVectorElementType());
std::vector<SDValue> ZeroVec(NumElems, Zero);
SDValue SplatMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT,
&ZeroVec[0], ZeroVec.size());
SmallVector<int, 8> ZeroVec(NumElems, 0);
// If the target supports VECTOR_SHUFFLE and this shuffle mask, use it.
if (isShuffleLegal(VT, SplatMask)) {
if (TLI.isShuffleMaskLegal(ZeroVec, Node->getValueType(0))) {
// Get the splatted value into the low element of a vector register.
SDValue LowValVec =
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, SplatValue);
// Return shuffle(LowValVec, undef, <0,0,0,0>)
return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, LowValVec,
DAG.getUNDEF(VT), SplatMask);
return DAG.getVectorShuffle(VT, dl, LowValVec, DAG.getUNDEF(VT),
&ZeroVec[0]);
}
}
@ -5582,35 +5546,25 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
std::swap(Val1, Val2);
// Build the shuffle constant vector: e.g. <0, 4, 0, 4>
MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT MaskEltVT = MaskVT.getVectorElementType();
std::vector<SDValue> MaskVec(NumElems);
SmallVector<int, 8> ShuffleMask(NumElems, -1);
// Set elements of the shuffle mask for Val1.
std::vector<unsigned> &Val1Elts = Values[Val1];
for (unsigned i = 0, e = Val1Elts.size(); i != e; ++i)
MaskVec[Val1Elts[i]] = DAG.getConstant(0, MaskEltVT);
ShuffleMask[Val1Elts[i]] = 0;
// Set elements of the shuffle mask for Val2.
std::vector<unsigned> &Val2Elts = Values[Val2];
for (unsigned i = 0, e = Val2Elts.size(); i != e; ++i)
if (Val2.getOpcode() != ISD::UNDEF)
MaskVec[Val2Elts[i]] = DAG.getConstant(NumElems, MaskEltVT);
else
MaskVec[Val2Elts[i]] = DAG.getUNDEF(MaskEltVT);
SDValue ShuffleMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT,
&MaskVec[0], MaskVec.size());
ShuffleMask[Val2Elts[i]] = NumElems;
// If the target supports SCALAR_TO_VECTOR and this shuffle mask, use it.
if (TLI.isOperationLegalOrCustom(ISD::SCALAR_TO_VECTOR, VT) &&
isShuffleLegal(VT, ShuffleMask)) {
TLI.isShuffleMaskLegal(ShuffleMask, VT)) {
Val1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Val1);
Val2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Val2);
SDValue Ops[] = { Val1, Val2, ShuffleMask };
// Return shuffle(LoValVec, HiValVec, <0,1,0,1>)
return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, Ops, 3);
return DAG.getVectorShuffle(VT, dl, Val1, Val2, &ShuffleMask[0]);
}
}
@ -8066,36 +8020,19 @@ SDValue SelectionDAGLegalize::WidenVectorOp(SDValue Op, MVT WidenVT) {
case ISD::VECTOR_SHUFFLE: {
SDValue Tmp1 = WidenVectorOp(Node->getOperand(0), WidenVT);
SDValue Tmp2 = WidenVectorOp(Node->getOperand(1), WidenVT);
// VECTOR_SHUFFLE 3rd operand must be a constant build vector that is
// used as permutation array. We build the vector here instead of widening
// because we don't want to legalize and have it turned to something else.
SDValue PermOp = Node->getOperand(2);
SDValueVector NewOps;
MVT PVT = PermOp.getValueType().getVectorElementType();
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Node);
SmallVector<int, 8> NewMask;
for (unsigned i = 0; i < NumElts; ++i) {
if (PermOp.getOperand(i).getOpcode() == ISD::UNDEF) {
NewOps.push_back(PermOp.getOperand(i));
} else {
unsigned Idx =
cast<ConstantSDNode>(PermOp.getOperand(i))->getZExtValue();
if (Idx < NumElts) {
NewOps.push_back(PermOp.getOperand(i));
}
else {
NewOps.push_back(DAG.getConstant(Idx + NewNumElts - NumElts,
PermOp.getOperand(i).getValueType()));
}
}
int Idx = SVOp->getMaskElt(i);
if (Idx < (int)NumElts)
NewMask.push_back(Idx);
else
NewMask.push_back(Idx + NewNumElts - NumElts);
}
for (unsigned i = NumElts; i < NewNumElts; ++i) {
NewOps.push_back(DAG.getUNDEF(PVT));
}
SDValue Tmp3 = DAG.getNode(ISD::BUILD_VECTOR, dl,
MVT::getVectorVT(PVT, NewOps.size()),
&NewOps[0], NewOps.size());
Result = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, WidenVT, Tmp1, Tmp2, Tmp3);
for (unsigned i = NumElts; i < NewNumElts; ++i)
NewMask.push_back(-1);
Result = DAG.getVectorShuffle(WidenVT, dl, Tmp1, Tmp2, &NewMask[0]);
break;
}
case ISD::LOAD: {

View File

@ -761,6 +761,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(SDNode *N, SDValue &Lo,
SDValue &Hi) {
// The low and high parts of the original input give four input vectors.
SDValue Inputs[4];
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
DebugLoc dl = N->getDebugLoc();
GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]);
GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]);
@ -772,10 +773,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(SDNode *N, SDValue &Lo,
// If Lo or Hi uses elements from at most two of the four input vectors, then
// express it as a vector shuffle of those two inputs. Otherwise extract the
// input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
SDValue Mask = N->getOperand(2);
MVT IdxVT = Mask.getValueType().getVectorElementType();
SmallVector<SDValue, 16> Ops;
Ops.reserve(NewElts);
SmallVector<int, 16> Ops;
for (unsigned High = 0; High < 2; ++High) {
SDValue &Output = High ? Hi : Lo;
@ -787,18 +785,15 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(SDNode *N, SDValue &Lo,
unsigned FirstMaskIdx = High * NewElts;
bool useBuildVector = false;
for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
SDValue Arg = Mask.getOperand(FirstMaskIdx + MaskOffset);
// The mask element. This indexes into the input.
unsigned Idx = Arg.getOpcode() == ISD::UNDEF ?
-1U : cast<ConstantSDNode>(Arg)->getZExtValue();
int Idx = SVN->getMaskElt(FirstMaskIdx + MaskOffset);
// The input vector this mask element indexes into.
unsigned Input = Idx / NewElts;
unsigned Input = (unsigned)Idx / NewElts;
if (Input >= array_lengthof(Inputs)) {
// The mask element does not index into any input vector.
Ops.push_back(DAG.getUNDEF(IdxVT));
Ops.push_back(-1);
continue;
}
@ -826,27 +821,24 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(SDNode *N, SDValue &Lo,
}
// Add the mask index for the new shuffle vector.
Ops.push_back(DAG.getConstant(Idx + OpNo * NewElts, IdxVT));
Ops.push_back(Idx + OpNo * NewElts);
}
if (useBuildVector) {
MVT EltVT = NewVT.getVectorElementType();
Ops.clear();
SmallVector<SDValue, 16> SVOps;
// Extract the input elements by hand.
for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
SDValue Arg = Mask.getOperand(FirstMaskIdx + MaskOffset);
// The mask element. This indexes into the input.
unsigned Idx = Arg.getOpcode() == ISD::UNDEF ?
-1U : cast<ConstantSDNode>(Arg)->getZExtValue();
int Idx = SVN->getMaskElt(FirstMaskIdx + MaskOffset);
// The input vector this mask element indexes into.
unsigned Input = Idx / NewElts;
unsigned Input = (unsigned)Idx / NewElts;
if (Input >= array_lengthof(Inputs)) {
// The mask element is "undef" or indexes off the end of the input.
Ops.push_back(DAG.getUNDEF(EltVT));
SVOps.push_back(DAG.getUNDEF(EltVT));
continue;
}
@ -854,25 +846,22 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(SDNode *N, SDValue &Lo,
Idx -= Input * NewElts;
// Extract the vector element by hand.
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
Inputs[Input], DAG.getIntPtrConstant(Idx)));
SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
Inputs[Input], DAG.getIntPtrConstant(Idx)));
}
// Construct the Lo/Hi output using a BUILD_VECTOR.
Output = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, &Ops[0], Ops.size());
Output = DAG.getNode(ISD::BUILD_VECTOR,dl,NewVT, &SVOps[0], SVOps.size());
} else if (InputUsed[0] == -1U) {
// No input vectors were used! The result is undefined.
Output = DAG.getUNDEF(NewVT);
} else {
// At least one input vector was used. Create a new shuffle vector.
SDValue NewMask = DAG.getNode(ISD::BUILD_VECTOR, dl,
MVT::getVectorVT(IdxVT, Ops.size()),
&Ops[0], Ops.size());
SDValue Op0 = Inputs[InputUsed[0]];
// If only one input was used, use an undefined vector for the other.
SDValue Op1 = InputUsed[1] == -1U ?
DAG.getUNDEF(NewVT) : Inputs[InputUsed[1]];
Output = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, NewVT, Op0, Op1, NewMask);
// At least one input vector was used. Create a new shuffle vector.
Output = DAG.getVectorShuffle(NewVT, dl, Op0, Op1, &Ops[0]);
}
Ops.clear();
@ -1473,18 +1462,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
if (NumOperands == 2) {
// Replace concat of two operands with a shuffle.
MVT PtrVT = TLI.getPointerTy();
SmallVector<SDValue, 16> MaskOps(WidenNumElts);
SmallVector<int, 16> MaskOps(WidenNumElts);
for (unsigned i=0; i < WidenNumElts/2; ++i) {
MaskOps[i] = DAG.getConstant(i, PtrVT);
MaskOps[i+WidenNumElts/2] = DAG.getConstant(i+WidenNumElts, PtrVT);
MaskOps[i] = i;
MaskOps[i+WidenNumElts/2] = i+WidenNumElts;
}
SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl,
MVT::getVectorVT(PtrVT, WidenNumElts),
&MaskOps[0], WidenNumElts);
return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, WidenVT,
GetWidenedVector(N->getOperand(0)),
GetWidenedVector(N->getOperand(1)), Mask);
return DAG.getVectorShuffle(WidenVT, dl,
GetWidenedVector(N->getOperand(0)),
GetWidenedVector(N->getOperand(1)),
&MaskOps[0]);
}
}
}
@ -1761,8 +1747,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) {
}
SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(SDNode *N) {
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
MVT VT = N->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
int NumElts = VT.getVectorNumElements();
DebugLoc dl = N->getDebugLoc();
MVT WidenVT = TLI.getTypeToTransformTo(VT);
@ -1772,28 +1759,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(SDNode *N) {
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
// Adjust mask based on new input vector length.
SDValue Mask = N->getOperand(2);
SmallVector<SDValue, 16> MaskOps(WidenNumElts);
MVT IdxVT = Mask.getValueType().getVectorElementType();
for (unsigned i = 0; i < NumElts; ++i) {
SDValue Arg = Mask.getOperand(i);
if (Arg.getOpcode() == ISD::UNDEF)
MaskOps[i] = Arg;
else {
unsigned Idx = cast<ConstantSDNode>(Arg)->getZExtValue();
if (Idx < NumElts)
MaskOps[i] = Arg;
else
MaskOps[i] = DAG.getConstant(Idx - NumElts + WidenNumElts, IdxVT);
}
SmallVector<int, 16> NewMask;
for (int i = 0; i < NumElts; ++i) {
int Idx = SVN->getMaskElt(i);
if (Idx < NumElts)
NewMask.push_back(Idx);
else
NewMask.push_back(Idx - NumElts + WidenNumElts);
}
for (unsigned i = NumElts; i < WidenNumElts; ++i)
MaskOps[i] = DAG.getUNDEF(IdxVT);
SDValue NewMask = DAG.getNode(ISD::BUILD_VECTOR, dl,
MVT::getVectorVT(IdxVT, WidenNumElts),
&MaskOps[0], WidenNumElts);
return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, WidenVT, InOp1, InOp2, NewMask);
NewMask.push_back(-1);
return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, &NewMask[0]);
}
SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {

View File

@ -456,6 +456,13 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(AT->getRawSubclassData());
break;
}
case ISD::VECTOR_SHUFFLE: {
const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
for (unsigned i = 0, e = N->getValueType(0).getVectorNumElements();
i != e; ++i)
ID.AddInteger(SVN->getMaskElt(i));
break;
}
} // end switch (N->getOpcode())
}
@ -765,9 +772,9 @@ void SelectionDAG::VerifyNode(SDNode *N) {
MVT EltVT = N->getValueType(0).getVectorElementType();
for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I)
assert((I->getValueType() == EltVT ||
(EltVT.isInteger() && I->getValueType().isInteger() &&
EltVT.bitsLE(I->getValueType()))) &&
"Wrong operand type!");
(EltVT.isInteger() && I->getValueType().isInteger() &&
EltVT.bitsLE(I->getValueType()))) &&
"Wrong operand type!");
break;
}
}
@ -1126,6 +1133,110 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) {
return SDValue(CondCodeNodes[Cond], 0);
}
static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl<int> &M) {
std::swap(N1, N2);
int NElts = M.size();
for (int i = 0; i != NElts; ++i) {
if (M[i] >= NElts)
M[i] -= NElts;
else if (M[i] >= 0)
M[i] += NElts;
}
}
SDValue SelectionDAG::getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1,
SDValue N2, const int *Mask) {
assert(N1.getValueType() == N2.getValueType() && "Invalid VECTOR_SHUFFLE");
assert(VT.isVector() && N1.getValueType().isVector() &&
"Vector Shuffle VTs must be a vectors");
assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType()
&& "Vector Shuffle VTs must have same element type");
// Canonicalize shuffle undef, undef -> undef
if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() == ISD::UNDEF)
return N1;
// Validate that all indices in Mask are within the range of the elements
// input to the shuffle.
int NElts = VT.getVectorNumElements();
SmallVector<int, 8> MaskVec;
for (int i = 0; i != NElts; ++i) {
if (Mask[i] >= (NElts * 2)) {
assert(0 && "Index out of range");
return SDValue();
}
MaskVec.push_back(Mask[i]);
}
// Canonicalize shuffle v, v -> v, undef
if (N1 == N2) {
N2 = getUNDEF(VT);
for (int i = 0; i != NElts; ++i)
if (MaskVec[i] >= NElts) MaskVec[i] -= NElts;
}
// Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
if (N1.getOpcode() == ISD::UNDEF)
commuteShuffle(N1, N2, MaskVec);
// Canonicalize all index into lhs, -> shuffle lhs, undef
// Canonicalize all index into rhs, -> shuffle rhs, undef
bool AllLHS = true, AllRHS = true;
bool N2Undef = N2.getOpcode() == ISD::UNDEF;
for (int i = 0; i != NElts; ++i) {
if (MaskVec[i] >= NElts) {
if (N2Undef)
MaskVec[i] = -1;
else
AllLHS = false;
} else if (MaskVec[i] >= 0) {
AllRHS = false;
}
}
if (AllLHS && AllRHS)
return getUNDEF(VT);
if (AllLHS)
N2 = getUNDEF(VT);
if (AllRHS) {
N1 = getUNDEF(VT);
commuteShuffle(N1, N2, MaskVec);
}
// If Identity shuffle, or all shuffle in to undef, return that node.
bool AllUndef = true;
bool Identity = true;
for (int i = 0; i < NElts; ++i) {
if (MaskVec[i] >= 0 && MaskVec[i] != i) Identity = false;
if (MaskVec[i] >= 0) AllUndef = false;
}
if (Identity)
return N1;
if (AllUndef)
return getUNDEF(VT);
FoldingSetNodeID ID;
SDValue Ops[2] = { N1, N2 };
AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops, 2);
for (int i = 0; i != NElts; ++i)
ID.AddInteger(MaskVec[i]);
void* IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
// Allocate the mask array for the node out of the BumpPtrAllocator, since
// SDNode doesn't have access to it. This memory will be "leaked" when
// the node is deallocated, but recovered when the NodeAllocator is released.
int *MaskAlloc = OperandAllocator.Allocate<int>(NElts);
memcpy(MaskAlloc, &MaskVec[0], NElts * sizeof(int));
ShuffleVectorSDNode *N = NodeAllocator.Allocate<ShuffleVectorSDNode>();
new (N) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
return SDValue(N, 0);
}
SDValue SelectionDAG::getConvertRndSat(MVT VT, DebugLoc dl,
SDValue Val, SDValue DTy,
SDValue STy, SDValue Rnd, SDValue Sat,
@ -2087,19 +2198,18 @@ bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const {
SDValue SelectionDAG::getShuffleScalarElt(const SDNode *N, unsigned i) {
MVT VT = N->getValueType(0);
DebugLoc dl = N->getDebugLoc();
SDValue PermMask = N->getOperand(2);
SDValue Idx = PermMask.getOperand(i);
if (Idx.getOpcode() == ISD::UNDEF)
const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
int Index = SVN->getMaskElt(i);
if (Index < 0)
return getUNDEF(VT.getVectorElementType());
unsigned Index = cast<ConstantSDNode>(Idx)->getZExtValue();
unsigned NumElems = PermMask.getNumOperands();
int NumElems = VT.getVectorNumElements();
SDValue V = (Index < NumElems) ? N->getOperand(0) : N->getOperand(1);
Index %= NumElems;
if (V.getOpcode() == ISD::BIT_CONVERT) {
V = V.getOperand(0);
MVT VVT = V.getValueType();
if (!VVT.isVector() || VVT.getVectorNumElements() != NumElems)
if (!VVT.isVector() || VVT.getVectorNumElements() != (unsigned)NumElems)
return SDValue();
}
if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
@ -2794,12 +2904,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT,
}
break;
case ISD::VECTOR_SHUFFLE:
assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType().isVector() &&
VT.isVector() && N3.getValueType().isVector() &&
N3.getOpcode() == ISD::BUILD_VECTOR &&
VT.getVectorNumElements() == N3.getNumOperands() &&
"Illegal VECTOR_SHUFFLE node!");
assert(0 && "should use getVectorShuffle constructor!");
break;
case ISD::BIT_CONVERT:
// Fold bit_convert nodes from a type to themselves.
@ -5323,14 +5428,15 @@ void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
if (!isTargetOpcode() && getOpcode() == ISD::VECTOR_SHUFFLE) {
SDNode *Mask = getOperand(2).getNode();
const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(this);
OS << "<";
for (unsigned i = 0, e = Mask->getNumOperands(); i != e; ++i) {
for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) {
int Idx = SVN->getMaskElt(i);
if (i) OS << ",";
if (Mask->getOperand(i).getOpcode() == ISD::UNDEF)
if (Idx < 0)
OS << "u";
else
OS << cast<ConstantSDNode>(Mask->getOperand(i))->getZExtValue();
OS << Idx;
}
OS << ">";
}
@ -5611,3 +5717,13 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
SplatBitSize = sz;
return true;
}
bool ShuffleVectorSDNode::isSplatMask(const int *Mask, MVT VT) {
int Idx = -1;
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
if (Idx < 0) Idx = Mask[i];
if (Mask[i] >= 0 && Mask[i] != Idx)
return false;
}
return true;
}

View File

@ -870,8 +870,7 @@ SDValue SelectionDAGLowering::getValue(const Value *V) {
if (ConstantFP *CFP = dyn_cast<ConstantFP>(C))
return N = DAG.getConstantFP(*CFP, VT);
if (isa<UndefValue>(C) && !isa<VectorType>(V->getType()) &&
!V->getType()->isAggregateType())
if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
return N = DAG.getUNDEF(VT);
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
@ -925,14 +924,11 @@ SDValue SelectionDAGLowering::getValue(const Value *V) {
for (unsigned i = 0; i != NumElements; ++i)
Ops.push_back(getValue(CP->getOperand(i)));
} else {
assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
"Unknown vector constant!");
assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
MVT EltVT = TLI.getValueType(VecTy->getElementType());
SDValue Op;
if (isa<UndefValue>(C))
Op = DAG.getUNDEF(EltVT);
else if (EltVT.isFloatingPoint())
if (EltVT.isFloatingPoint())
Op = DAG.getConstantFP(0, EltVT);
else
Op = DAG.getConstant(0, EltVT);
@ -2435,37 +2431,42 @@ void SelectionDAGLowering::visitExtractElement(User &I) {
// Utility for visitShuffleVector - Returns true if the mask is mask starting
// from SIndx and increasing to the element length (undefs are allowed).
static bool SequentialMask(SDValue Mask, unsigned SIndx) {
unsigned MaskNumElts = Mask.getNumOperands();
for (unsigned i = 0; i != MaskNumElts; ++i) {
if (Mask.getOperand(i).getOpcode() != ISD::UNDEF) {
unsigned Idx = cast<ConstantSDNode>(Mask.getOperand(i))->getZExtValue();
if (Idx != i + SIndx)
return false;
}
}
static bool SequentialMask(SmallVectorImpl<int> &Mask, int SIndx) {
int MaskNumElts = Mask.size();
for (int i = 0; i != MaskNumElts; ++i)
if ((Mask[i] >= 0) && (Mask[i] != i + SIndx))
return false;
return true;
}
void SelectionDAGLowering::visitShuffleVector(User &I) {
SmallVector<int, 8> Mask;
SDValue Src1 = getValue(I.getOperand(0));
SDValue Src2 = getValue(I.getOperand(1));
SDValue Mask = getValue(I.getOperand(2));
// Convert the ConstantVector mask operand into an array of ints, with -1
// representing undef values.
SmallVector<Constant*, 8> MaskElts;
cast<Constant>(I.getOperand(2))->getVectorElements(MaskElts);
int MaskNumElts = MaskElts.size();
for (int i = 0; i != MaskNumElts; ++i) {
if (isa<UndefValue>(MaskElts[i]))
Mask.push_back(-1);
else
Mask.push_back(cast<ConstantInt>(MaskElts[i])->getSExtValue());
}
MVT VT = TLI.getValueType(I.getType());
MVT SrcVT = Src1.getValueType();
int MaskNumElts = Mask.getNumOperands();
int SrcNumElts = SrcVT.getVectorNumElements();
if (SrcNumElts == MaskNumElts) {
setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, getCurDebugLoc(),
VT, Src1, Src2, Mask));
setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
&Mask[0]));
return;
}
// Normalize the shuffle vector since mask and vector length don't match.
MVT MaskEltVT = Mask.getValueType().getVectorElementType();
if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) {
// Mask is longer than the source vectors and is a multiple of the source
// vectors. We can use concatenate vector to make the mask and vectors
@ -2479,44 +2480,33 @@ void SelectionDAGLowering::visitShuffleVector(User &I) {
// Pad both vectors with undefs to make them the same length as the mask.
unsigned NumConcat = MaskNumElts / SrcNumElts;
bool Src1U = Src1.getOpcode() == ISD::UNDEF;
bool Src2U = Src2.getOpcode() == ISD::UNDEF;
SDValue UndefVal = DAG.getUNDEF(SrcVT);
SDValue* MOps1 = new SDValue[NumConcat];
SDValue* MOps2 = new SDValue[NumConcat];
SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
MOps1[0] = Src1;
MOps2[0] = Src2;
for (unsigned i = 1; i != NumConcat; ++i) {
MOps1[i] = UndefVal;
MOps2[i] = UndefVal;
}
Src1 = DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
VT, MOps1, NumConcat);
Src2 = DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
VT, MOps2, NumConcat);
delete [] MOps1;
delete [] MOps2;
Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
getCurDebugLoc(), VT,
&MOps1[0], NumConcat);
Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
getCurDebugLoc(), VT,
&MOps2[0], NumConcat);
// Readjust mask for new input vector length.
SmallVector<SDValue, 8> MappedOps;
SmallVector<int, 8> MappedOps;
for (int i = 0; i != MaskNumElts; ++i) {
if (Mask.getOperand(i).getOpcode() == ISD::UNDEF) {
MappedOps.push_back(Mask.getOperand(i));
} else {
int Idx = cast<ConstantSDNode>(Mask.getOperand(i))->getZExtValue();
if (Idx < SrcNumElts)
MappedOps.push_back(DAG.getConstant(Idx, MaskEltVT));
else
MappedOps.push_back(DAG.getConstant(Idx + MaskNumElts - SrcNumElts,
MaskEltVT));
}
int Idx = Mask[i];
if (Idx < SrcNumElts)
MappedOps.push_back(Idx);
else
MappedOps.push_back(Idx + MaskNumElts - SrcNumElts);
}
Mask = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
Mask.getValueType(),
&MappedOps[0], MappedOps.size());
setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, getCurDebugLoc(),
VT, Src1, Src2, Mask));
setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
&MappedOps[0]));
return;
}
@ -2541,20 +2531,19 @@ void SelectionDAGLowering::visitShuffleVector(User &I) {
int MaxRange[2] = {-1, -1};
for (int i = 0; i != MaskNumElts; ++i) {
SDValue Arg = Mask.getOperand(i);
if (Arg.getOpcode() != ISD::UNDEF) {
assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
int Idx = cast<ConstantSDNode>(Arg)->getZExtValue();
int Input = 0;
if (Idx >= SrcNumElts) {
Input = 1;
Idx -= SrcNumElts;
}
if (Idx > MaxRange[Input])
MaxRange[Input] = Idx;
if (Idx < MinRange[Input])
MinRange[Input] = Idx;
int Idx = Mask[i];
int Input = 0;
if (Idx < 0)
continue;
if (Idx >= SrcNumElts) {
Input = 1;
Idx -= SrcNumElts;
}
if (Idx > MaxRange[Input])
MaxRange[Input] = Idx;
if (Idx < MinRange[Input])
MinRange[Input] = Idx;
}
// Check if the access is smaller than the vector size and can we find
@ -2596,26 +2585,18 @@ void SelectionDAGLowering::visitShuffleVector(User &I) {
}
}
// Calculate new mask.
SmallVector<SDValue, 8> MappedOps;
SmallVector<int, 8> MappedOps;
for (int i = 0; i != MaskNumElts; ++i) {
SDValue Arg = Mask.getOperand(i);
if (Arg.getOpcode() == ISD::UNDEF) {
MappedOps.push_back(Arg);
} else {
int Idx = cast<ConstantSDNode>(Arg)->getZExtValue();
if (Idx < SrcNumElts)
MappedOps.push_back(DAG.getConstant(Idx - StartIdx[0], MaskEltVT));
else {
Idx = Idx - SrcNumElts - StartIdx[1] + MaskNumElts;
MappedOps.push_back(DAG.getConstant(Idx, MaskEltVT));
}
}
int Idx = Mask[i];
if (Idx < 0)
MappedOps.push_back(Idx);
else if (Idx < SrcNumElts)
MappedOps.push_back(Idx - StartIdx[0]);
else
MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts);
}
Mask = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
Mask.getValueType(),
&MappedOps[0], MappedOps.size());
setValue(&I, DAG.getNode(ISD::VECTOR_SHUFFLE, getCurDebugLoc(),
VT, Src1, Src2, Mask));
setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
&MappedOps[0]));
return;
}
}
@ -2627,12 +2608,10 @@ void SelectionDAGLowering::visitShuffleVector(User &I) {
MVT PtrVT = TLI.getPointerTy();
SmallVector<SDValue,8> Ops;
for (int i = 0; i != MaskNumElts; ++i) {
SDValue Arg = Mask.getOperand(i);
if (Arg.getOpcode() == ISD::UNDEF) {
if (Mask[i] < 0) {
Ops.push_back(DAG.getUNDEF(EltVT));
} else {
assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
int Idx = cast<ConstantSDNode>(Arg)->getZExtValue();
int Idx = Mask[i];
if (Idx < SrcNumElts)
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
EltVT, Src1, DAG.getConstant(Idx, PtrVT)));

View File

@ -1670,9 +1670,9 @@ SPU::LowerV2I64Splat(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
/// \note
/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
SDValue PermMask = Op.getOperand(2);
DebugLoc dl = Op.getDebugLoc();
if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
@ -1703,39 +1703,40 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
} else
assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
for (unsigned i = 0; i != PermMask.getNumOperands(); ++i) {
if (PermMask.getOperand(i).getOpcode() != ISD::UNDEF) {
unsigned SrcElt = cast<ConstantSDNode > (PermMask.getOperand(i))->getZExtValue();
for (unsigned i = 0; i != MaxElts; ++i) {
if (SVN->getMaskElt(i) < 0)
continue;
unsigned SrcElt = SVN->getMaskElt(i);
if (monotonic) {
if (SrcElt >= V2EltIdx0) {
if (1 >= (++EltsFromV2)) {
V2Elt = (V2EltIdx0 - SrcElt) << 2;
}
} else if (CurrElt != SrcElt) {
monotonic = false;
if (monotonic) {
if (SrcElt >= V2EltIdx0) {
if (1 >= (++EltsFromV2)) {
V2Elt = (V2EltIdx0 - SrcElt) << 2;
}
++CurrElt;
} else if (CurrElt != SrcElt) {
monotonic = false;
}
if (rotate) {
if (PrevElt > 0 && SrcElt < MaxElts) {
if ((PrevElt == SrcElt - 1)
|| (PrevElt == MaxElts - 1 && SrcElt == 0)) {
PrevElt = SrcElt;
if (SrcElt == 0)
V0Elt = i;
} else {
rotate = false;
}
} else if (PrevElt == 0) {
// First time through, need to keep track of previous element
++CurrElt;
}
if (rotate) {
if (PrevElt > 0 && SrcElt < MaxElts) {
if ((PrevElt == SrcElt - 1)
|| (PrevElt == MaxElts - 1 && SrcElt == 0)) {
PrevElt = SrcElt;
if (SrcElt == 0)
V0Elt = i;
} else {
// This isn't a rotation, takes elements from vector 2
rotate = false;
}
} else if (PrevElt == 0) {
// First time through, need to keep track of previous element
PrevElt = SrcElt;
} else {
// This isn't a rotation, takes elements from vector 2
rotate = false;
}
}
}
@ -1768,17 +1769,11 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
unsigned BytesPerElement = EltVT.getSizeInBits()/8;
SmallVector<SDValue, 16> ResultMask;
for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
unsigned SrcElt;
if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
SrcElt = 0;
else
SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
for (unsigned i = 0, e = MaxElts; i != e; ++i) {
unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
for (unsigned j = 0; j < BytesPerElement; ++j) {
ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
MVT::i8));
}
for (unsigned j = 0; j < BytesPerElement; ++j)
ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
}
SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,

View File

@ -456,22 +456,21 @@ static bool isFloatingPointZero(SDValue Op) {
/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
/// true if Op is undef or if it matches the specified value.
static bool isConstantOrUndef(SDValue Op, unsigned Val) {
return Op.getOpcode() == ISD::UNDEF ||
cast<ConstantSDNode>(Op)->getZExtValue() == Val;
static bool isConstantOrUndef(int Op, int Val) {
return Op < 0 || Op == Val;
}
/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUHUM instruction.
bool PPC::isVPKUHUMShuffleMask(SDNode *N, bool isUnary) {
bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
if (!isUnary) {
for (unsigned i = 0; i != 16; ++i)
if (!isConstantOrUndef(N->getOperand(i), i*2+1))
if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
return false;
} else {
for (unsigned i = 0; i != 8; ++i)
if (!isConstantOrUndef(N->getOperand(i), i*2+1) ||
!isConstantOrUndef(N->getOperand(i+8), i*2+1))
if (!isConstantOrUndef(N->getMaskElt(i), i*2+1) ||
!isConstantOrUndef(N->getMaskElt(i+8), i*2+1))
return false;
}
return true;
@ -479,18 +478,18 @@ bool PPC::isVPKUHUMShuffleMask(SDNode *N, bool isUnary) {
/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUWUM instruction.
bool PPC::isVPKUWUMShuffleMask(SDNode *N, bool isUnary) {
bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
if (!isUnary) {
for (unsigned i = 0; i != 16; i += 2)
if (!isConstantOrUndef(N->getOperand(i ), i*2+2) ||
!isConstantOrUndef(N->getOperand(i+1), i*2+3))
if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
!isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
return false;
} else {
for (unsigned i = 0; i != 8; i += 2)
if (!isConstantOrUndef(N->getOperand(i ), i*2+2) ||
!isConstantOrUndef(N->getOperand(i+1), i*2+3) ||
!isConstantOrUndef(N->getOperand(i+8), i*2+2) ||
!isConstantOrUndef(N->getOperand(i+9), i*2+3))
if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
!isConstantOrUndef(N->getMaskElt(i+1), i*2+3) ||
!isConstantOrUndef(N->getMaskElt(i+8), i*2+2) ||
!isConstantOrUndef(N->getMaskElt(i+9), i*2+3))
return false;
}
return true;
@ -498,27 +497,28 @@ bool PPC::isVPKUWUMShuffleMask(SDNode *N, bool isUnary) {
/// isVMerge - Common function, used to match vmrg* shuffles.
///
static bool isVMerge(SDNode *N, unsigned UnitSize,
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
unsigned LHSStart, unsigned RHSStart) {
assert(N->getOpcode() == ISD::BUILD_VECTOR &&
N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!");
assert(N->getValueType(0) == MVT::v16i8 &&
"PPC only supports shuffles by bytes!");
assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
"Unsupported merge size!");
for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
if (!isConstantOrUndef(N->getOperand(i*UnitSize*2+j),
if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
LHSStart+j+i*UnitSize) ||
!isConstantOrUndef(N->getOperand(i*UnitSize*2+UnitSize+j),
!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
RHSStart+j+i*UnitSize))
return false;
}
return true;
return true;
}
/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
bool PPC::isVMRGLShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) {
bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
bool isUnary) {
if (!isUnary)
return isVMerge(N, UnitSize, 8, 24);
return isVMerge(N, UnitSize, 8, 8);
@ -526,7 +526,8 @@ bool PPC::isVMRGLShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) {
/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
bool PPC::isVMRGHShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) {
bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
bool isUnary) {
if (!isUnary)
return isVMerge(N, UnitSize, 0, 16);
return isVMerge(N, UnitSize, 0, 0);
@ -536,91 +537,90 @@ bool PPC::isVMRGHShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) {
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
/// amount, otherwise return -1.
int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {
assert(N->getOpcode() == ISD::BUILD_VECTOR &&
N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!");
assert(N->getValueType(0) == MVT::v16i8 &&
"PPC only supports shuffles by bytes!");
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
// Find the first non-undef value in the shuffle mask.
unsigned i;
for (i = 0; i != 16 && N->getOperand(i).getOpcode() == ISD::UNDEF; ++i)
for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
/*search*/;
if (i == 16) return -1; // all undef.
// Otherwise, check to see if the rest of the elements are consequtively
// Otherwise, check to see if the rest of the elements are consecutively
// numbered from this value.
unsigned ShiftAmt = cast<ConstantSDNode>(N->getOperand(i))->getZExtValue();
unsigned ShiftAmt = SVOp->getMaskElt(i);
if (ShiftAmt < i) return -1;
ShiftAmt -= i;
if (!isUnary) {
// Check the rest of the elements to see if they are consequtive.
// Check the rest of the elements to see if they are consecutive.
for (++i; i != 16; ++i)
if (!isConstantOrUndef(N->getOperand(i), ShiftAmt+i))
if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
return -1;
} else {
// Check the rest of the elements to see if they are consequtive.
// Check the rest of the elements to see if they are consecutive.
for (++i; i != 16; ++i)
if (!isConstantOrUndef(N->getOperand(i), (ShiftAmt+i) & 15))
if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
return -1;
}
return ShiftAmt;
}
/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a splat of a single element that is suitable for input to
/// VSPLTB/VSPLTH/VSPLTW.
bool PPC::isSplatShuffleMask(SDNode *N, unsigned EltSize) {
assert(N->getOpcode() == ISD::BUILD_VECTOR &&
N->getNumOperands() == 16 &&
bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
assert(N->getValueType(0) == MVT::v16i8 &&
(EltSize == 1 || EltSize == 2 || EltSize == 4));
// This is a splat operation if each element of the permute is the same, and
// if the value doesn't reference the second vector.
unsigned ElementBase = 0;
SDValue Elt = N->getOperand(0);
if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt))
ElementBase = EltV->getZExtValue();
else
return false; // FIXME: Handle UNDEF elements too!
if (cast<ConstantSDNode>(Elt)->getZExtValue() >= 16)
unsigned ElementBase = N->getMaskElt(0);
// FIXME: Handle UNDEF elements too!
if (ElementBase >= 16)
return false;
// Check that they are consequtive.
for (unsigned i = 1; i != EltSize; ++i) {
if (!isa<ConstantSDNode>(N->getOperand(i)) ||
cast<ConstantSDNode>(N->getOperand(i))->getZExtValue() != i+ElementBase)
// Check that the indices are consecutive, in the case of a multi-byte element
// splatted with a v16i8 mask.
for (unsigned i = 1; i != EltSize; ++i)
if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
return false;
}
assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!");
for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
assert(isa<ConstantSDNode>(N->getOperand(i)) &&
"Invalid VECTOR_SHUFFLE mask!");
if (N->getMaskElt(i) < 0) continue;
for (unsigned j = 0; j != EltSize; ++j)
if (N->getOperand(i+j) != N->getOperand(j))
if (N->getMaskElt(i+j) != N->getMaskElt(j))
return false;
}
return true;
}
/// isAllNegativeZeroVector - Returns true if all elements of build_vector
/// are -0.0.
bool PPC::isAllNegativeZeroVector(SDNode *N) {
assert(N->getOpcode() == ISD::BUILD_VECTOR);
if (PPC::isSplatShuffleMask(N, N->getNumOperands()))
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N))
BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N);
APInt APVal, APUndef;
unsigned BitSize;
bool HasAnyUndefs;
if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32))
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
return CFP->getValueAPF().isNegZero();
return false;
}
/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) {
assert(isSplatShuffleMask(N, EltSize));
return cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() / EltSize;
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
assert(isSplatShuffleMask(SVOp, EltSize));
return SVOp->getMaskElt(0) / EltSize;
}
/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
@ -3149,11 +3149,10 @@ static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
LHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, LHS);
RHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, RHS);
SDValue Ops[16];
int Ops[16];
for (unsigned i = 0; i != 16; ++i)
Ops[i] = DAG.getConstant(i+Amt, MVT::i8);
SDValue T = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v16i8, LHS, RHS,
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, Ops,16));
Ops[i] = i + Amt;
SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
return DAG.getNode(ISD::BIT_CONVERT, dl, VT, T);
}
@ -3354,7 +3353,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
unsigned ShufIdxs[16];
int ShufIdxs[16];
switch (OpNum) {
default: assert(0 && "Unknown i32 permute!");
case OP_VMRGHW:
@ -3392,13 +3391,11 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
case OP_VSLDOI12:
return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
}
SDValue Ops[16];
for (unsigned i = 0; i != 16; ++i)
Ops[i] = DAG.getConstant(ShufIdxs[i], MVT::i8);
return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, OpLHS.getValueType(),
OpLHS, OpRHS,
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, Ops, 16));
MVT VT = OpLHS.getValueType();
OpLHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OpLHS);
OpRHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OpRHS);
SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
return DAG.getNode(ISD::BIT_CONVERT, dl, VT, T);
}
/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
@ -3406,28 +3403,29 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
/// return the code it can be lowered into. Worst case, it can always be
/// lowered into a vperm.
SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SelectionDAG &DAG) {
SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc();
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
SDValue PermMask = Op.getOperand(2);
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
MVT VT = Op.getValueType();
// Cases that are handled by instructions that take permute immediates
// (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
// selected by the instruction selector.
if (V2.getOpcode() == ISD::UNDEF) {
if (PPC::isSplatShuffleMask(PermMask.getNode(), 1) ||
PPC::isSplatShuffleMask(PermMask.getNode(), 2) ||
PPC::isSplatShuffleMask(PermMask.getNode(), 4) ||
PPC::isVPKUWUMShuffleMask(PermMask.getNode(), true) ||
PPC::isVPKUHUMShuffleMask(PermMask.getNode(), true) ||
PPC::isVSLDOIShuffleMask(PermMask.getNode(), true) != -1 ||
PPC::isVMRGLShuffleMask(PermMask.getNode(), 1, true) ||
PPC::isVMRGLShuffleMask(PermMask.getNode(), 2, true) ||
PPC::isVMRGLShuffleMask(PermMask.getNode(), 4, true) ||
PPC::isVMRGHShuffleMask(PermMask.getNode(), 1, true) ||
PPC::isVMRGHShuffleMask(PermMask.getNode(), 2, true) ||
PPC::isVMRGHShuffleMask(PermMask.getNode(), 4, true)) {
if (PPC::isSplatShuffleMask(SVOp, 1) ||
PPC::isSplatShuffleMask(SVOp, 2) ||
PPC::isSplatShuffleMask(SVOp, 4) ||
PPC::isVPKUWUMShuffleMask(SVOp, true) ||
PPC::isVPKUHUMShuffleMask(SVOp, true) ||
PPC::isVSLDOIShuffleMask(SVOp, true) != -1 ||
PPC::isVMRGLShuffleMask(SVOp, 1, true) ||
PPC::isVMRGLShuffleMask(SVOp, 2, true) ||
PPC::isVMRGLShuffleMask(SVOp, 4, true) ||
PPC::isVMRGHShuffleMask(SVOp, 1, true) ||
PPC::isVMRGHShuffleMask(SVOp, 2, true) ||
PPC::isVMRGHShuffleMask(SVOp, 4, true)) {
return Op;
}
}
@ -3435,29 +3433,31 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
// Altivec has a variety of "shuffle immediates" that take two vector inputs
// and produce a fixed permutation. If any of these match, do not lower to
// VPERM.
if (PPC::isVPKUWUMShuffleMask(PermMask.getNode(), false) ||
PPC::isVPKUHUMShuffleMask(PermMask.getNode(), false) ||
PPC::isVSLDOIShuffleMask(PermMask.getNode(), false) != -1 ||
PPC::isVMRGLShuffleMask(PermMask.getNode(), 1, false) ||
PPC::isVMRGLShuffleMask(PermMask.getNode(), 2, false) ||
PPC::isVMRGLShuffleMask(PermMask.getNode(), 4, false) ||
PPC::isVMRGHShuffleMask(PermMask.getNode(), 1, false) ||
PPC::isVMRGHShuffleMask(PermMask.getNode(), 2, false) ||
PPC::isVMRGHShuffleMask(PermMask.getNode(), 4, false))
if (PPC::isVPKUWUMShuffleMask(SVOp, false) ||
PPC::isVPKUHUMShuffleMask(SVOp, false) ||
PPC::isVSLDOIShuffleMask(SVOp, false) != -1 ||
PPC::isVMRGLShuffleMask(SVOp, 1, false) ||
PPC::isVMRGLShuffleMask(SVOp, 2, false) ||
PPC::isVMRGLShuffleMask(SVOp, 4, false) ||
PPC::isVMRGHShuffleMask(SVOp, 1, false) ||
PPC::isVMRGHShuffleMask(SVOp, 2, false) ||
PPC::isVMRGHShuffleMask(SVOp, 4, false))
return Op;
// Check to see if this is a shuffle of 4-byte values. If so, we can use our
// perfect shuffle table to emit an optimal matching sequence.
SmallVector<int, 16> PermMask;
SVOp->getMask(PermMask);
unsigned PFIndexes[4];
bool isFourElementShuffle = true;
for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
unsigned EltNo = 8; // Start out undef.
for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
if (PermMask.getOperand(i*4+j).getOpcode() == ISD::UNDEF)
if (PermMask[i*4+j] < 0)
continue; // Undef, ignore it.
unsigned ByteSource =
cast<ConstantSDNode>(PermMask.getOperand(i*4+j))->getZExtValue();
unsigned ByteSource = PermMask[i*4+j];
if ((ByteSource & 3) != j) {
isFourElementShuffle = false;
break;
@ -3509,12 +3509,8 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
unsigned BytesPerElement = EltVT.getSizeInBits()/8;
SmallVector<SDValue, 16> ResultMask;
for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
unsigned SrcElt;
if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
SrcElt = 0;
else
SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
for (unsigned j = 0; j != BytesPerElement; ++j)
ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
@ -3704,13 +3700,12 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) {
OddParts = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OddParts);
// Merge the results together.
SDValue Ops[16];
int Ops[16];
for (unsigned i = 0; i != 8; ++i) {
Ops[i*2 ] = DAG.getConstant(2*i+1, MVT::i8);
Ops[i*2+1] = DAG.getConstant(2*i+1+16, MVT::i8);
Ops[i*2 ] = 2*i+1;
Ops[i*2+1] = 2*i+1+16;
}
return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v16i8, EvenParts, OddParts,
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, Ops, 16));
return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
} else {
assert(0 && "Unknown mul to lower!");
abort();

View File

@ -175,19 +175,21 @@ namespace llvm {
namespace PPC {
/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUHUM instruction.
bool isVPKUHUMShuffleMask(SDNode *N, bool isUnary);
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary);
/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUWUM instruction.
bool isVPKUWUMShuffleMask(SDNode *N, bool isUnary);
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary);
/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
bool isVMRGLShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary);
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
bool isUnary);
/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
bool isVMRGHShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary);
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
bool isUnary);
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
/// amount, otherwise return -1.
@ -196,7 +198,7 @@ namespace llvm {
/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a splat of a single element that is suitable for input to
/// VSPLTB/VSPLTH/VSPLTW.
bool isSplatShuffleMask(SDNode *N, unsigned EltSize);
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize);
/// isAllNegativeZeroVector - Returns true if all elements of build_vector
/// are -0.0.

View File

@ -15,96 +15,118 @@
// Altivec transformation functions and pattern fragments.
//
/// VPKUHUM_shuffle_mask/VPKUWUM_shuffle_mask - Return true if this is a valid
/// shuffle mask for the VPKUHUM or VPKUWUM instructions.
def VPKUHUM_shuffle_mask : PatLeaf<(build_vector), [{
return PPC::isVPKUHUMShuffleMask(N, false);
}]>;
def VPKUWUM_shuffle_mask : PatLeaf<(build_vector), [{
return PPC::isVPKUWUMShuffleMask(N, false);
}]>;
def VPKUHUM_unary_shuffle_mask : PatLeaf<(build_vector), [{
return PPC::isVPKUHUMShuffleMask(N, true);
def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), false);
}]>;
def VPKUWUM_unary_shuffle_mask : PatLeaf<(build_vector), [{
return PPC::isVPKUWUMShuffleMask(N, true);
def vpkuwum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), false);
}]>;
def vpkuhum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), true);
}]>;
def vpkuwum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), true);
}]>;
def VMRGLB_shuffle_mask : PatLeaf<(build_vector), [{
return PPC::isVMRGLShuffleMask(N, 1, false);
def vmrglb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false);
}]>;
def VMRGLH_shuffle_mask : PatLeaf<(build_vector), [{
return PPC::isVMRGLShuffleMask(N, 2, false);
def vmrglh_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false);
}]>;
def VMRGLW_shuffle_mask : PatLeaf<(build_vector), [{
return PPC::isVMRGLShuffleMask(N, 4, false);
def vmrglw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false);
}]>;
def VMRGHB_shuffle_mask : PatLeaf<(build_vector), [{
return PPC::isVMRGHShuffleMask(N, 1, false);
def vmrghb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false);
}]>;
def VMRGHH_shuffle_mask : PatLeaf<(build_vector), [{
return PPC::isVMRGHShuffleMask(N, 2, false);
def vmrghh_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false);
}]>;
def VMRGHW_shuffle_mask : PatLeaf<(build_vector), [{
return PPC::isVMRGHShuffleMask(N, 4, false);
def vmrghw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false);
}]>;
def VMRGLB_unary_shuffle_mask : PatLeaf<(build_vector), [{
return PPC::isVMRGLShuffleMask(N, 1, true);
def vmrglb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true);
}]>;
def VMRGLH_unary_shuffle_mask : PatLeaf<(build_vector), [{
return PPC::isVMRGLShuffleMask(N, 2, true);
def vmrglh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true);
}]>;
def VMRGLW_unary_shuffle_mask : PatLeaf<(build_vector), [{
return PPC::isVMRGLShuffleMask(N, 4, true);
def vmrglw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true);
}]>;
def VMRGHB_unary_shuffle_mask : PatLeaf<(build_vector), [{
return PPC::isVMRGHShuffleMask(N, 1, true);
def vmrghb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true);
}]>;
def VMRGHH_unary_shuffle_mask : PatLeaf<(build_vector), [{
return PPC::isVMRGHShuffleMask(N, 2, true);
def vmrghh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true);
}]>;
def VMRGHW_unary_shuffle_mask : PatLeaf<(build_vector), [{
return PPC::isVMRGHShuffleMask(N, 4, true);
def vmrghw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true);
}]>;
def VSLDOI_get_imm : SDNodeXForm<build_vector, [{
def VSLDOI_get_imm : SDNodeXForm<vector_shuffle, [{
return getI32Imm(PPC::isVSLDOIShuffleMask(N, false));
}]>;
def VSLDOI_shuffle_mask : PatLeaf<(build_vector), [{
def vsldoi_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVSLDOIShuffleMask(N, false) != -1;
}], VSLDOI_get_imm>;
/// VSLDOI_unary* - These are used to match vsldoi(X,X), which is turned into
/// vector_shuffle(X,undef,mask) by the dag combiner.
def VSLDOI_unary_get_imm : SDNodeXForm<build_vector, [{
def VSLDOI_unary_get_imm : SDNodeXForm<vector_shuffle, [{
return getI32Imm(PPC::isVSLDOIShuffleMask(N, true));
}]>;
def VSLDOI_unary_shuffle_mask : PatLeaf<(build_vector), [{
def vsldoi_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVSLDOIShuffleMask(N, true) != -1;
}], VSLDOI_unary_get_imm>;
// VSPLT*_get_imm xform function: convert vector_shuffle mask to VSPLT* imm.
def VSPLTB_get_imm : SDNodeXForm<build_vector, [{
def VSPLTB_get_imm : SDNodeXForm<vector_shuffle, [{
return getI32Imm(PPC::getVSPLTImmediate(N, 1));
}]>;
def VSPLTB_shuffle_mask : PatLeaf<(build_vector), [{
return PPC::isSplatShuffleMask(N, 1);
def vspltb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 1);
}], VSPLTB_get_imm>;
def VSPLTH_get_imm : SDNodeXForm<build_vector, [{
def VSPLTH_get_imm : SDNodeXForm<vector_shuffle, [{
return getI32Imm(PPC::getVSPLTImmediate(N, 2));
}]>;
def VSPLTH_shuffle_mask : PatLeaf<(build_vector), [{
return PPC::isSplatShuffleMask(N, 2);
def vsplth_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 2);
}], VSPLTH_get_imm>;
def VSPLTW_get_imm : SDNodeXForm<build_vector, [{
def VSPLTW_get_imm : SDNodeXForm<vector_shuffle, [{
return getI32Imm(PPC::getVSPLTImmediate(N, 4));
}]>;
def VSPLTW_shuffle_mask : PatLeaf<(build_vector), [{
return PPC::isSplatShuffleMask(N, 4);
def vspltw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 4);
}], VSPLTW_get_imm>;
@ -268,8 +290,7 @@ def VSEL : VA1a_Int<42, "vsel", int_ppc_altivec_vsel>;
def VSLDOI : VAForm_2<44, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, u5imm:$SH),
"vsldoi $vD, $vA, $vB, $SH", VecFP,
[(set VRRC:$vD,
(vector_shuffle (v16i8 VRRC:$vA), VRRC:$vB,
VSLDOI_shuffle_mask:$SH))]>;
(vsldoi_shuffle:$SH (v16i8 VRRC:$vA), VRRC:$vB))]>;
// VX-Form instructions. AltiVec arithmetic ops.
def VADDFP : VXForm_1<10, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
@ -345,28 +366,22 @@ def VMINUW : VX1_Int< 642, "vminuw", int_ppc_altivec_vminuw>;
def VMRGHB : VXForm_1< 12, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vmrghb $vD, $vA, $vB", VecFP,
[(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA),
VRRC:$vB, VMRGHB_shuffle_mask))]>;
[(set VRRC:$vD, (vmrghb_shuffle VRRC:$vA, VRRC:$vB))]>;
def VMRGHH : VXForm_1< 76, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vmrghh $vD, $vA, $vB", VecFP,
[(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA),
VRRC:$vB, VMRGHH_shuffle_mask))]>;
[(set VRRC:$vD, (vmrghh_shuffle VRRC:$vA, VRRC:$vB))]>;
def VMRGHW : VXForm_1<140, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vmrghw $vD, $vA, $vB", VecFP,
[(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA),
VRRC:$vB, VMRGHW_shuffle_mask))]>;
[(set VRRC:$vD, (vmrghw_shuffle VRRC:$vA, VRRC:$vB))]>;
def VMRGLB : VXForm_1<268, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vmrglb $vD, $vA, $vB", VecFP,
[(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA),
VRRC:$vB, VMRGLB_shuffle_mask))]>;
[(set VRRC:$vD, (vmrglb_shuffle VRRC:$vA, VRRC:$vB))]>;
def VMRGLH : VXForm_1<332, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vmrglh $vD, $vA, $vB", VecFP,
[(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA),
VRRC:$vB, VMRGLH_shuffle_mask))]>;
[(set VRRC:$vD, (vmrglh_shuffle VRRC:$vA, VRRC:$vB))]>;
def VMRGLW : VXForm_1<396, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vmrglw $vD, $vA, $vB", VecFP,
[(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA),
VRRC:$vB, VMRGLW_shuffle_mask))]>;
[(set VRRC:$vD, (vmrglw_shuffle VRRC:$vA, VRRC:$vB))]>;
def VMSUMMBM : VA1a_Int<37, "vmsummbm", int_ppc_altivec_vmsummbm>;
def VMSUMSHM : VA1a_Int<40, "vmsumshm", int_ppc_altivec_vmsumshm>;
@ -440,16 +455,16 @@ def VSLW : VX1_Int< 388, "vslw", int_ppc_altivec_vslw>;
def VSPLTB : VXForm_1<524, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
"vspltb $vD, $vB, $UIMM", VecPerm,
[(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vB), (undef),
VSPLTB_shuffle_mask:$UIMM))]>;
[(set VRRC:$vD,
(vspltb_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>;
def VSPLTH : VXForm_1<588, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
"vsplth $vD, $vB, $UIMM", VecPerm,
[(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vB), (undef),
VSPLTH_shuffle_mask:$UIMM))]>;
[(set VRRC:$vD,
(vsplth_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>;
def VSPLTW : VXForm_1<652, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
"vspltw $vD, $vB, $UIMM", VecPerm,
[(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vB), (undef),
VSPLTW_shuffle_mask:$UIMM))]>;
[(set VRRC:$vD,
(vspltw_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>;
def VSR : VX1_Int< 708, "vsr" , int_ppc_altivec_vsr>;
def VSRO : VX1_Int<1100, "vsro" , int_ppc_altivec_vsro>;
@ -479,13 +494,13 @@ def VPKSWSS : VX1_Int<462, "vpkswss", int_ppc_altivec_vpkswss>;
def VPKSWUS : VX1_Int<334, "vpkswus", int_ppc_altivec_vpkswus>;
def VPKUHUM : VXForm_1<14, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vpkuhum $vD, $vA, $vB", VecFP,
[(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA),
VRRC:$vB, VPKUHUM_shuffle_mask))]>;
[(set VRRC:$vD,
(vpkuhum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>;
def VPKUHUS : VX1_Int<142, "vpkuhus", int_ppc_altivec_vpkuhus>;
def VPKUWUM : VXForm_1<78, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vpkuwum $vD, $vA, $vB", VecFP,
[(set VRRC:$vD, (vector_shuffle (v16i8 VRRC:$vA),
VRRC:$vB, VPKUWUM_shuffle_mask))]>;
[(set VRRC:$vD,
(vpkuwum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>;
def VPKUWUS : VX1_Int<206, "vpkuwus", int_ppc_altivec_vpkuwus>;
// Vector Unpack.
@ -603,25 +618,25 @@ def : Pat<(v4f32 (bitconvert (v4i32 VRRC:$src))), (v4f32 VRRC:$src)>;
// Shuffles.
// Match vsldoi(x,x), vpkuwum(x,x), vpkuhum(x,x)
def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VSLDOI_unary_shuffle_mask:$in),
(VSLDOI VRRC:$vA, VRRC:$vA, VSLDOI_unary_shuffle_mask:$in)>;
def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef,VPKUWUM_unary_shuffle_mask:$in),
def:Pat<(vsldoi_unary_shuffle:$in (v16i8 VRRC:$vA), undef),
(VSLDOI VRRC:$vA, VRRC:$vA, (VSLDOI_unary_get_imm VRRC:$in))>;
def:Pat<(vpkuwum_unary_shuffle (v16i8 VRRC:$vA), undef),
(VPKUWUM VRRC:$vA, VRRC:$vA)>;
def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef,VPKUHUM_unary_shuffle_mask:$in),
def:Pat<(vpkuhum_unary_shuffle (v16i8 VRRC:$vA), undef),
(VPKUHUM VRRC:$vA, VRRC:$vA)>;
// Match vmrg*(x,x)
def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGLB_unary_shuffle_mask:$in),
def:Pat<(vmrglb_unary_shuffle (v16i8 VRRC:$vA), undef),
(VMRGLB VRRC:$vA, VRRC:$vA)>;
def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGLH_unary_shuffle_mask:$in),
def:Pat<(vmrglh_unary_shuffle (v16i8 VRRC:$vA), undef),
(VMRGLH VRRC:$vA, VRRC:$vA)>;
def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGLW_unary_shuffle_mask:$in),
def:Pat<(vmrglw_unary_shuffle (v16i8 VRRC:$vA), undef),
(VMRGLW VRRC:$vA, VRRC:$vA)>;
def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGHB_unary_shuffle_mask:$in),
def:Pat<(vmrghb_unary_shuffle (v16i8 VRRC:$vA), undef),
(VMRGHB VRRC:$vA, VRRC:$vA)>;
def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGHH_unary_shuffle_mask:$in),
def:Pat<(vmrghh_unary_shuffle (v16i8 VRRC:$vA), undef),
(VMRGHH VRRC:$vA, VRRC:$vA)>;
def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGHW_unary_shuffle_mask:$in),
def:Pat<(vmrghw_unary_shuffle (v16i8 VRRC:$vA), undef),
(VMRGHW VRRC:$vA, VRRC:$vA)>;
// Logical Operations

File diff suppressed because it is too large Load Diff

View File

@ -230,7 +230,8 @@ namespace llvm {
// VSHL, VSRL - Vector logical left / right shift.
VSHL, VSRL,
// CMPPD, CMPPS - Vector double/float comparison.
// CMPPD, CMPPS - Vector double/float comparison.
CMPPD, CMPPS,
@ -251,80 +252,72 @@ namespace llvm {
namespace X86 {
/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to PSHUFD.
bool isPSHUFDMask(SDNode *N);
bool isPSHUFDMask(ShuffleVectorSDNode *N);
/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to PSHUFD.
bool isPSHUFHWMask(SDNode *N);
bool isPSHUFHWMask(ShuffleVectorSDNode *N);
/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to PSHUFD.
bool isPSHUFLWMask(SDNode *N);
bool isPSHUFLWMask(ShuffleVectorSDNode *N);
/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to SHUFP*.
bool isSHUFPMask(SDNode *N);
bool isSHUFPMask(ShuffleVectorSDNode *N);
/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
bool isMOVHLPSMask(SDNode *N);
bool isMOVHLPSMask(ShuffleVectorSDNode *N);
/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
/// <2, 3, 2, 3>
bool isMOVHLPS_v_undef_Mask(SDNode *N);
bool isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N);
/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
bool isMOVLPMask(SDNode *N);
/// specifies a shuffle of elements that is suitable for MOVLP{S|D}.
bool isMOVLPMask(ShuffleVectorSDNode *N);
/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D}
/// specifies a shuffle of elements that is suitable for MOVHP{S|D}.
/// as well as MOVLHPS.
bool isMOVHPMask(SDNode *N);
bool isMOVHPMask(ShuffleVectorSDNode *N);
/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to UNPCKL.
bool isUNPCKLMask(SDNode *N, bool V2IsSplat = false);
bool isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat = false);
/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to UNPCKH.
bool isUNPCKHMask(SDNode *N, bool V2IsSplat = false);
bool isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat = false);
/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
/// <0, 0, 1, 1>
bool isUNPCKL_v_undef_Mask(SDNode *N);
bool isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N);
/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
/// <2, 2, 3, 3>
bool isUNPCKH_v_undef_Mask(SDNode *N);
bool isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N);
/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVSS,
/// MOVSD, and MOVD, i.e. setting the lowest element.
bool isMOVLMask(SDNode *N);
bool isMOVLMask(ShuffleVectorSDNode *N);
/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
bool isMOVSHDUPMask(SDNode *N);
bool isMOVSHDUPMask(ShuffleVectorSDNode *N);
/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
bool isMOVSLDUPMask(SDNode *N);
/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a splat of a single element.
bool isSplatMask(SDNode *N);
/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a splat of zero element.
bool isSplatLoMask(SDNode *N);
bool isMOVSLDUPMask(ShuffleVectorSDNode *N);
/// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVDDUP.
bool isMOVDDUPMask(SDNode *N);
bool isMOVDDUPMask(ShuffleVectorSDNode *N);
/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
@ -477,14 +470,13 @@ namespace llvm {
/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask
/// values are assumed to be legal.
virtual bool isShuffleMaskLegal(SDValue Mask, MVT VT) const;
virtual bool isShuffleMaskLegal(SmallVectorImpl<int> &Mask, MVT VT) const;
/// isVectorClearMaskLegal - Similar to isShuffleMaskLegal. This is
/// used by Targets can use this to indicate if there is a suitable
/// VECTOR_SHUFFLE that can be used to replace a VAND with a constant
/// pool entry.
virtual bool isVectorClearMaskLegal(const std::vector<SDValue> &BVOps,
MVT EVT, SelectionDAG &DAG) const;
virtual bool isVectorClearMaskLegal(SmallVectorImpl<int> &M, MVT VT) const;
/// ShouldShrinkFPConstant - If true, then instruction selection should
/// seek to shrink the FP constant of the specified type to a smaller type

View File

@ -3821,6 +3821,7 @@ def : Pat<(parallel (store (i32 (X86dec_flag (loadi32 addr:$dst))), addr:$dst),
(implicit EFLAGS)),
(DEC32m addr:$dst)>, Requires<[In32BitMode]>;
//===----------------------------------------------------------------------===//
// Floating Point Stack Support
//===----------------------------------------------------------------------===//

View File

@ -30,33 +30,37 @@ def bc_v1i64 : PatFrag<(ops node:$in), (v1i64 (bitconvert node:$in))>;
// MMX_SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to
// PSHUFW imm.
def MMX_SHUFFLE_get_shuf_imm : SDNodeXForm<build_vector, [{
def MMX_SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{
return getI8Imm(X86::getShuffleSHUFImmediate(N));
}]>;
// Patterns for: vector_shuffle v1, v2, <2, 6, 3, 7, ...>
def MMX_UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isUNPCKHMask(N);
def mmx_unpckh : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N));
}]>;
// Patterns for: vector_shuffle v1, v2, <0, 4, 2, 5, ...>
def MMX_UNPCKL_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isUNPCKLMask(N);
def mmx_unpckl : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N));
}]>;
// Patterns for: vector_shuffle v1, <undef>, <0, 0, 1, 1, ...>
def MMX_UNPCKH_v_undef_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isUNPCKH_v_undef_Mask(N);
def mmx_unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
}]>;
// Patterns for: vector_shuffle v1, <undef>, <2, 2, 3, 3, ...>
def MMX_UNPCKL_v_undef_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isUNPCKL_v_undef_Mask(N);
def mmx_unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
}]>;
// Patterns for shuffling.
def MMX_PSHUFW_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isPSHUFDMask(N);
def mmx_pshufw : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N));
}], MMX_SHUFFLE_get_shuf_imm>;
//===----------------------------------------------------------------------===//
@ -185,9 +189,8 @@ def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMDestMem, (outs VR64:$dst), (ins VR128:$src),
def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMDestMem, (outs VR128:$dst), (ins VR64:$src),
"movq2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2i64 (vector_shuffle immAllZerosV,
(v2i64 (scalar_to_vector (i64 (bitconvert VR64:$src)))),
MOVL_shuffle_mask)))]>;
(movl immAllZerosV,
(v2i64 (scalar_to_vector (i64 (bitconvert VR64:$src))))))]>;
let neverHasSideEffects = 1 in
def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMDestMem, (outs FR64:$dst), (ins VR64:$src),
@ -319,86 +322,74 @@ let isTwoAddress = 1 in {
(outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
"punpckhbw\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst,
(v8i8 (vector_shuffle VR64:$src1, VR64:$src2,
MMX_UNPCKH_shuffle_mask)))]>;
(v8i8 (mmx_unpckh VR64:$src1, VR64:$src2)))]>;
def MMX_PUNPCKHBWrm : MMXI<0x68, MRMSrcMem,
(outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
"punpckhbw\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst,
(v8i8 (vector_shuffle VR64:$src1,
(bc_v8i8 (load_mmx addr:$src2)),
MMX_UNPCKH_shuffle_mask)))]>;
(v8i8 (mmx_unpckh VR64:$src1,
(bc_v8i8 (load_mmx addr:$src2)))))]>;
def MMX_PUNPCKHWDrr : MMXI<0x69, MRMSrcReg,
(outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
"punpckhwd\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst,
(v4i16 (vector_shuffle VR64:$src1, VR64:$src2,
MMX_UNPCKH_shuffle_mask)))]>;
(v4i16 (mmx_unpckh VR64:$src1, VR64:$src2)))]>;
def MMX_PUNPCKHWDrm : MMXI<0x69, MRMSrcMem,
(outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
"punpckhwd\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst,
(v4i16 (vector_shuffle VR64:$src1,
(bc_v4i16 (load_mmx addr:$src2)),
MMX_UNPCKH_shuffle_mask)))]>;
(v4i16 (mmx_unpckh VR64:$src1,
(bc_v4i16 (load_mmx addr:$src2)))))]>;
def MMX_PUNPCKHDQrr : MMXI<0x6A, MRMSrcReg,
(outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
"punpckhdq\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst,
(v2i32 (vector_shuffle VR64:$src1, VR64:$src2,
MMX_UNPCKH_shuffle_mask)))]>;
(v2i32 (mmx_unpckh VR64:$src1, VR64:$src2)))]>;
def MMX_PUNPCKHDQrm : MMXI<0x6A, MRMSrcMem,
(outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
"punpckhdq\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst,
(v2i32 (vector_shuffle VR64:$src1,
(bc_v2i32 (load_mmx addr:$src2)),
MMX_UNPCKH_shuffle_mask)))]>;
(v2i32 (mmx_unpckh VR64:$src1,
(bc_v2i32 (load_mmx addr:$src2)))))]>;
// Unpack Low Packed Data Instructions
def MMX_PUNPCKLBWrr : MMXI<0x60, MRMSrcReg,
(outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
"punpcklbw\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst,
(v8i8 (vector_shuffle VR64:$src1, VR64:$src2,
MMX_UNPCKL_shuffle_mask)))]>;
(v8i8 (mmx_unpckl VR64:$src1, VR64:$src2)))]>;
def MMX_PUNPCKLBWrm : MMXI<0x60, MRMSrcMem,
(outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
"punpcklbw\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst,
(v8i8 (vector_shuffle VR64:$src1,
(bc_v8i8 (load_mmx addr:$src2)),
MMX_UNPCKL_shuffle_mask)))]>;
(v8i8 (mmx_unpckl VR64:$src1,
(bc_v8i8 (load_mmx addr:$src2)))))]>;
def MMX_PUNPCKLWDrr : MMXI<0x61, MRMSrcReg,
(outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
"punpcklwd\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst,
(v4i16 (vector_shuffle VR64:$src1, VR64:$src2,
MMX_UNPCKL_shuffle_mask)))]>;
(v4i16 (mmx_unpckl VR64:$src1, VR64:$src2)))]>;
def MMX_PUNPCKLWDrm : MMXI<0x61, MRMSrcMem,
(outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
"punpcklwd\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst,
(v4i16 (vector_shuffle VR64:$src1,
(bc_v4i16 (load_mmx addr:$src2)),
MMX_UNPCKL_shuffle_mask)))]>;
(v4i16 (mmx_unpckl VR64:$src1,
(bc_v4i16 (load_mmx addr:$src2)))))]>;
def MMX_PUNPCKLDQrr : MMXI<0x62, MRMSrcReg,
(outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
"punpckldq\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst,
(v2i32 (vector_shuffle VR64:$src1, VR64:$src2,
MMX_UNPCKL_shuffle_mask)))]>;
(v2i32 (mmx_unpckl VR64:$src1, VR64:$src2)))]>;
def MMX_PUNPCKLDQrm : MMXI<0x62, MRMSrcMem,
(outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
"punpckldq\t{$src2, $dst|$dst, $src2}",
[(set VR64:$dst,
(v2i32 (vector_shuffle VR64:$src1,
(bc_v2i32 (load_mmx addr:$src2)),
MMX_UNPCKL_shuffle_mask)))]>;
(v2i32 (mmx_unpckl VR64:$src1,
(bc_v2i32 (load_mmx addr:$src2)))))]>;
}
// -- Pack Instructions
@ -411,17 +402,13 @@ def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg,
(outs VR64:$dst), (ins VR64:$src1, i8imm:$src2),
"pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR64:$dst,
(v4i16 (vector_shuffle
VR64:$src1, (undef),
MMX_PSHUFW_shuffle_mask:$src2)))]>;
(v4i16 (mmx_pshufw:$src2 VR64:$src1, (undef))))]>;
def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem,
(outs VR64:$dst), (ins i64mem:$src1, i8imm:$src2),
"pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR64:$dst,
(v4i16 (vector_shuffle
(bc_v4i16 (load_mmx addr:$src1)),
(undef),
MMX_PSHUFW_shuffle_mask:$src2)))]>;
(mmx_pshufw:$src2 (bc_v4i16 (load_mmx addr:$src1)),
(undef)))]>;
// -- Conversion Instructions
let neverHasSideEffects = 1 in {
@ -627,34 +614,27 @@ def : Pat<(bc_v4i16 (v2i32 (scalar_to_vector GR32:$src))),
// Patterns to perform canonical versions of vector shuffling.
let AddedComplexity = 10 in {
def : Pat<(v8i8 (vector_shuffle VR64:$src, (undef),
MMX_UNPCKL_v_undef_shuffle_mask)),
def : Pat<(v8i8 (mmx_unpckl_undef VR64:$src, (undef))),
(MMX_PUNPCKLBWrr VR64:$src, VR64:$src)>;
def : Pat<(v4i16 (vector_shuffle VR64:$src, (undef),
MMX_UNPCKL_v_undef_shuffle_mask)),
def : Pat<(v4i16 (mmx_unpckl_undef VR64:$src, (undef))),
(MMX_PUNPCKLWDrr VR64:$src, VR64:$src)>;
def : Pat<(v2i32 (vector_shuffle VR64:$src, (undef),
MMX_UNPCKL_v_undef_shuffle_mask)),
def : Pat<(v2i32 (mmx_unpckl_undef VR64:$src, (undef))),
(MMX_PUNPCKLDQrr VR64:$src, VR64:$src)>;
}
let AddedComplexity = 10 in {
def : Pat<(v8i8 (vector_shuffle VR64:$src, (undef),
MMX_UNPCKH_v_undef_shuffle_mask)),
def : Pat<(v8i8 (mmx_unpckh_undef VR64:$src, (undef))),
(MMX_PUNPCKHBWrr VR64:$src, VR64:$src)>;
def : Pat<(v4i16 (vector_shuffle VR64:$src, (undef),
MMX_UNPCKH_v_undef_shuffle_mask)),
def : Pat<(v4i16 (mmx_unpckh_undef VR64:$src, (undef))),
(MMX_PUNPCKHWDrr VR64:$src, VR64:$src)>;
def : Pat<(v2i32 (vector_shuffle VR64:$src, (undef),
MMX_UNPCKH_v_undef_shuffle_mask)),
def : Pat<(v2i32 (mmx_unpckh_undef VR64:$src, (undef))),
(MMX_PUNPCKHDQrr VR64:$src, VR64:$src)>;
}
// Patterns to perform vector shuffling with a zeroed out vector.
let AddedComplexity = 20 in {
def : Pat<(bc_v2i32 (vector_shuffle immAllZerosV,
(v2i32 (scalar_to_vector (load_mmx addr:$src))),
MMX_UNPCKL_shuffle_mask)),
def : Pat<(bc_v2i32 (mmx_unpckl immAllZerosV,
(v2i32 (scalar_to_vector (load_mmx addr:$src))))),
(MMX_PUNPCKLDQrm VR64:$src, VR64:$src)>;
}

View File

@ -175,103 +175,108 @@ def PSxLDQ_imm : SDNodeXForm<imm, [{
// SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
// SHUFP* etc. imm.
def SHUFFLE_get_shuf_imm : SDNodeXForm<build_vector, [{
def SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{
return getI8Imm(X86::getShuffleSHUFImmediate(N));
}]>;
// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to
// PSHUFHW imm.
def SHUFFLE_get_pshufhw_imm : SDNodeXForm<build_vector, [{
def SHUFFLE_get_pshufhw_imm : SDNodeXForm<vector_shuffle, [{
return getI8Imm(X86::getShufflePSHUFHWImmediate(N));
}]>;
// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to
// PSHUFLW imm.
def SHUFFLE_get_pshuflw_imm : SDNodeXForm<build_vector, [{
def SHUFFLE_get_pshuflw_imm : SDNodeXForm<vector_shuffle, [{
return getI8Imm(X86::getShufflePSHUFLWImmediate(N));
}]>;
def SSE_splat_mask : PatLeaf<(build_vector), [{
return X86::isSplatMask(N);
def splat_lo : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
return SVOp->isSplat() && SVOp->getSplatIndex() == 0;
}]>;
def movddup : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isMOVDDUPMask(cast<ShuffleVectorSDNode>(N));
}]>;
def movhlps : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isMOVHLPSMask(cast<ShuffleVectorSDNode>(N));
}]>;
def movhlps_undef : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isMOVHLPS_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
}]>;
def movhp : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isMOVHPMask(cast<ShuffleVectorSDNode>(N));
}]>;
def movlp : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isMOVLPMask(cast<ShuffleVectorSDNode>(N));
}]>;
def movl : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isMOVLMask(cast<ShuffleVectorSDNode>(N));
}]>;
def movshdup : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isMOVSHDUPMask(cast<ShuffleVectorSDNode>(N));
}]>;
def movsldup : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isMOVSLDUPMask(cast<ShuffleVectorSDNode>(N));
}]>;
def unpckl : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N));
}]>;
def unpckh : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N));
}]>;
def unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
}]>;
def unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
}]>;
def pshufd : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N));
}], SHUFFLE_get_shuf_imm>;
def SSE_splat_lo_mask : PatLeaf<(build_vector), [{
return X86::isSplatLoMask(N);
}]>;
def MOVDDUP_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isMOVDDUPMask(N);
}]>;
def MOVHLPS_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isMOVHLPSMask(N);
}]>;
def MOVHLPS_v_undef_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isMOVHLPS_v_undef_Mask(N);
}]>;
def MOVHP_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isMOVHPMask(N);
}]>;
def MOVLP_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isMOVLPMask(N);
}]>;
def MOVL_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isMOVLMask(N);
}]>;
def MOVSHDUP_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isMOVSHDUPMask(N);
}]>;
def MOVSLDUP_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isMOVSLDUPMask(N);
}]>;
def UNPCKL_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isUNPCKLMask(N);
}]>;
def UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isUNPCKHMask(N);
}]>;
def UNPCKL_v_undef_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isUNPCKL_v_undef_Mask(N);
}]>;
def UNPCKH_v_undef_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isUNPCKH_v_undef_Mask(N);
}]>;
def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isPSHUFDMask(N);
def shufp : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isSHUFPMask(cast<ShuffleVectorSDNode>(N));
}], SHUFFLE_get_shuf_imm>;
def PSHUFHW_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isPSHUFHWMask(N);
def pshufhw : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isPSHUFHWMask(cast<ShuffleVectorSDNode>(N));
}], SHUFFLE_get_pshufhw_imm>;
def PSHUFLW_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isPSHUFLWMask(N);
def pshuflw : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isPSHUFLWMask(cast<ShuffleVectorSDNode>(N));
}], SHUFFLE_get_pshuflw_imm>;
def SHUFP_unary_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isPSHUFDMask(N);
}], SHUFFLE_get_shuf_imm>;
def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isSHUFPMask(N);
}], SHUFFLE_get_shuf_imm>;
def PSHUFD_binary_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isSHUFPMask(N);
}], SHUFFLE_get_shuf_imm>;
//===----------------------------------------------------------------------===//
// SSE scalar FP Instructions
//===----------------------------------------------------------------------===//
@ -704,16 +709,14 @@ let Constraints = "$src1 = $dst" in {
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
"movlps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v4f32 (vector_shuffle VR128:$src1,
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))),
MOVLP_shuffle_mask)))]>;
(movlp VR128:$src1,
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>;
def MOVHPSrm : PSI<0x16, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
"movhps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v4f32 (vector_shuffle VR128:$src1,
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))),
MOVHP_shuffle_mask)))]>;
(movhp VR128:$src1,
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>;
} // AddedComplexity
} // Constraints = "$src1 = $dst"
@ -728,29 +731,25 @@ def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhps\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract
(v2f64 (vector_shuffle
(bc_v2f64 (v4f32 VR128:$src)), (undef),
UNPCKH_shuffle_mask)), (iPTR 0))),
addr:$dst)]>;
(unpckh (bc_v2f64 (v4f32 VR128:$src)),
(undef)), (iPTR 0))), addr:$dst)]>;
let Constraints = "$src1 = $dst" in {
let AddedComplexity = 20 in {
def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"movlhps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
MOVHP_shuffle_mask)))]>;
(v4f32 (movhp VR128:$src1, VR128:$src2)))]>;
def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"movhlps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
MOVHLPS_shuffle_mask)))]>;
(v4f32 (movhlps VR128:$src1, VR128:$src2)))]>;
} // AddedComplexity
} // Constraints = "$src1 = $dst"
let AddedComplexity = 20 in
def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef), MOVDDUP_shuffle_mask)),
def : Pat<(v4f32 (movddup VR128:$src, (undef))),
(MOVLHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
@ -908,51 +907,41 @@ let Constraints = "$src1 = $dst" in {
let isConvertibleToThreeAddress = 1 in // Convert to pshufd
def SHUFPSrri : PSIi8<0xC6, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1,
VR128:$src2, i32i8imm:$src3),
VR128:$src2, i8imm:$src3),
"shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR128:$dst,
(v4f32 (vector_shuffle
VR128:$src1, VR128:$src2,
SHUFP_shuffle_mask:$src3)))]>;
(v4f32 (shufp:$src3 VR128:$src1, VR128:$src2)))]>;
def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1,
f128mem:$src2, i32i8imm:$src3),
f128mem:$src2, i8imm:$src3),
"shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR128:$dst,
(v4f32 (vector_shuffle
VR128:$src1, (memopv4f32 addr:$src2),
SHUFP_shuffle_mask:$src3)))]>;
(v4f32 (shufp:$src3
VR128:$src1, (memopv4f32 addr:$src2))))]>;
let AddedComplexity = 10 in {
def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"unpckhps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v4f32 (vector_shuffle
VR128:$src1, VR128:$src2,
UNPCKH_shuffle_mask)))]>;
(v4f32 (unpckh VR128:$src1, VR128:$src2)))]>;
def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
"unpckhps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v4f32 (vector_shuffle
VR128:$src1, (memopv4f32 addr:$src2),
UNPCKH_shuffle_mask)))]>;
(v4f32 (unpckh VR128:$src1,
(memopv4f32 addr:$src2))))]>;
def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"unpcklps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v4f32 (vector_shuffle
VR128:$src1, VR128:$src2,
UNPCKL_shuffle_mask)))]>;
(v4f32 (unpckl VR128:$src1, VR128:$src2)))]>;
def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
"unpcklps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v4f32 (vector_shuffle
VR128:$src1, (memopv4f32 addr:$src2),
UNPCKL_shuffle_mask)))]>;
(unpckl VR128:$src1, (memopv4f32 addr:$src2)))]>;
} // AddedComplexity
} // Constraints = "$src1 = $dst"
@ -1044,8 +1033,7 @@ let neverHasSideEffects = 1 in
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"movss\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
MOVL_shuffle_mask)))]>;
(v4f32 (movl VR128:$src1, VR128:$src2)))]>;
}
// Move to lower bits of a VR128 and zeroing upper bits.
@ -1451,16 +1439,14 @@ let Constraints = "$src1 = $dst" in {
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
"movlpd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v2f64 (vector_shuffle VR128:$src1,
(scalar_to_vector (loadf64 addr:$src2)),
MOVLP_shuffle_mask)))]>;
(v2f64 (movlp VR128:$src1,
(scalar_to_vector (loadf64 addr:$src2)))))]>;
def MOVHPDrm : PDI<0x16, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
"movhpd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v2f64 (vector_shuffle VR128:$src1,
(scalar_to_vector (loadf64 addr:$src2)),
MOVHP_shuffle_mask)))]>;
(v2f64 (movhp VR128:$src1,
(scalar_to_vector (loadf64 addr:$src2)))))]>;
} // AddedComplexity
} // Constraints = "$src1 = $dst"
@ -1474,9 +1460,8 @@ def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhpd\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract
(v2f64 (vector_shuffle VR128:$src, (undef),
UNPCKH_shuffle_mask)), (iPTR 0))),
addr:$dst)]>;
(v2f64 (unpckh VR128:$src, (undef))),
(iPTR 0))), addr:$dst)]>;
// SSE2 instructions without OpSize prefix
def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@ -1744,48 +1729,39 @@ let Constraints = "$src1 = $dst" in {
def SHUFPDrri : PDIi8<0xC6, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3),
"shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR128:$dst, (v2f64 (vector_shuffle
VR128:$src1, VR128:$src2,
SHUFP_shuffle_mask:$src3)))]>;
[(set VR128:$dst,
(v2f64 (shufp:$src3 VR128:$src1, VR128:$src2)))]>;
def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1,
f128mem:$src2, i8imm:$src3),
"shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR128:$dst,
(v2f64 (vector_shuffle
VR128:$src1, (memopv2f64 addr:$src2),
SHUFP_shuffle_mask:$src3)))]>;
(v2f64 (shufp:$src3
VR128:$src1, (memopv2f64 addr:$src2))))]>;
let AddedComplexity = 10 in {
def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"unpckhpd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v2f64 (vector_shuffle
VR128:$src1, VR128:$src2,
UNPCKH_shuffle_mask)))]>;
(v2f64 (unpckh VR128:$src1, VR128:$src2)))]>;
def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
"unpckhpd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v2f64 (vector_shuffle
VR128:$src1, (memopv2f64 addr:$src2),
UNPCKH_shuffle_mask)))]>;
(v2f64 (unpckh VR128:$src1,
(memopv2f64 addr:$src2))))]>;
def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"unpcklpd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v2f64 (vector_shuffle
VR128:$src1, VR128:$src2,
UNPCKL_shuffle_mask)))]>;
(v2f64 (unpckl VR128:$src1, VR128:$src2)))]>;
def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
"unpcklpd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v2f64 (vector_shuffle
VR128:$src1, (memopv2f64 addr:$src2),
UNPCKL_shuffle_mask)))]>;
(unpckl VR128:$src1, (memopv2f64 addr:$src2)))]>;
} // AddedComplexity
} // Constraints = "$src1 = $dst"
@ -2043,49 +2019,43 @@ defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>;
def PSHUFDri : PDIi8<0x70, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
"pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (v4i32 (vector_shuffle
VR128:$src1, (undef),
PSHUFD_shuffle_mask:$src2)))]>;
[(set VR128:$dst, (v4i32 (pshufd:$src2
VR128:$src1, (undef))))]>;
def PSHUFDmi : PDIi8<0x70, MRMSrcMem,
(outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
"pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (v4i32 (vector_shuffle
[(set VR128:$dst, (v4i32 (pshufd:$src2
(bc_v4i32(memopv2i64 addr:$src1)),
(undef),
PSHUFD_shuffle_mask:$src2)))]>;
(undef))))]>;
// SSE2 with ImmT == Imm8 and XS prefix.
def PSHUFHWri : Ii8<0x70, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
"pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (v8i16 (vector_shuffle
VR128:$src1, (undef),
PSHUFHW_shuffle_mask:$src2)))]>,
[(set VR128:$dst, (v8i16 (pshufhw:$src2 VR128:$src1,
(undef))))]>,
XS, Requires<[HasSSE2]>;
def PSHUFHWmi : Ii8<0x70, MRMSrcMem,
(outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
"pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (v8i16 (vector_shuffle
(bc_v8i16 (memopv2i64 addr:$src1)),
(undef),
PSHUFHW_shuffle_mask:$src2)))]>,
[(set VR128:$dst, (v8i16 (pshufhw:$src2
(bc_v8i16 (memopv2i64 addr:$src1)),
(undef))))]>,
XS, Requires<[HasSSE2]>;
// SSE2 with ImmT == Imm8 and XD prefix.
def PSHUFLWri : Ii8<0x70, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
(outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
"pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (v8i16 (vector_shuffle
VR128:$src1, (undef),
PSHUFLW_shuffle_mask:$src2)))]>,
[(set VR128:$dst, (v8i16 (pshuflw:$src2 VR128:$src1,
(undef))))]>,
XD, Requires<[HasSSE2]>;
def PSHUFLWmi : Ii8<0x70, MRMSrcMem,
(outs VR128:$dst), (ins i128mem:$src1, i32i8imm:$src2),
(outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
"pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (v8i16 (vector_shuffle
(bc_v8i16 (memopv2i64 addr:$src1)),
(undef),
PSHUFLW_shuffle_mask:$src2)))]>,
[(set VR128:$dst, (v8i16 (pshuflw:$src2
(bc_v8i16 (memopv2i64 addr:$src1)),
(undef))))]>,
XD, Requires<[HasSSE2]>;
@ -2094,107 +2064,91 @@ let Constraints = "$src1 = $dst" in {
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"punpcklbw\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v16i8 (vector_shuffle VR128:$src1, VR128:$src2,
UNPCKL_shuffle_mask)))]>;
(v16i8 (unpckl VR128:$src1, VR128:$src2)))]>;
def PUNPCKLBWrm : PDI<0x60, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"punpcklbw\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v16i8 (vector_shuffle VR128:$src1,
(bc_v16i8 (memopv2i64 addr:$src2)),
UNPCKL_shuffle_mask)))]>;
(unpckl VR128:$src1,
(bc_v16i8 (memopv2i64 addr:$src2))))]>;
def PUNPCKLWDrr : PDI<0x61, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"punpcklwd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v8i16 (vector_shuffle VR128:$src1, VR128:$src2,
UNPCKL_shuffle_mask)))]>;
(v8i16 (unpckl VR128:$src1, VR128:$src2)))]>;
def PUNPCKLWDrm : PDI<0x61, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"punpcklwd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v8i16 (vector_shuffle VR128:$src1,
(bc_v8i16 (memopv2i64 addr:$src2)),
UNPCKL_shuffle_mask)))]>;
(unpckl VR128:$src1,
(bc_v8i16 (memopv2i64 addr:$src2))))]>;
def PUNPCKLDQrr : PDI<0x62, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"punpckldq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
UNPCKL_shuffle_mask)))]>;
(v4i32 (unpckl VR128:$src1, VR128:$src2)))]>;
def PUNPCKLDQrm : PDI<0x62, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"punpckldq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v4i32 (vector_shuffle VR128:$src1,
(bc_v4i32 (memopv2i64 addr:$src2)),
UNPCKL_shuffle_mask)))]>;
(unpckl VR128:$src1,
(bc_v4i32 (memopv2i64 addr:$src2))))]>;
def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"punpcklqdq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
UNPCKL_shuffle_mask)))]>;
(v2i64 (unpckl VR128:$src1, VR128:$src2)))]>;
def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"punpcklqdq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v2i64 (vector_shuffle VR128:$src1,
(memopv2i64 addr:$src2),
UNPCKL_shuffle_mask)))]>;
(v2i64 (unpckl VR128:$src1,
(memopv2i64 addr:$src2))))]>;
def PUNPCKHBWrr : PDI<0x68, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"punpckhbw\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v16i8 (vector_shuffle VR128:$src1, VR128:$src2,
UNPCKH_shuffle_mask)))]>;
(v16i8 (unpckh VR128:$src1, VR128:$src2)))]>;
def PUNPCKHBWrm : PDI<0x68, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"punpckhbw\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v16i8 (vector_shuffle VR128:$src1,
(bc_v16i8 (memopv2i64 addr:$src2)),
UNPCKH_shuffle_mask)))]>;
[(set VR128:$dst,
(unpckh VR128:$src1,
(bc_v16i8 (memopv2i64 addr:$src2))))]>;
def PUNPCKHWDrr : PDI<0x69, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"punpckhwd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v8i16 (vector_shuffle VR128:$src1, VR128:$src2,
UNPCKH_shuffle_mask)))]>;
(v8i16 (unpckh VR128:$src1, VR128:$src2)))]>;
def PUNPCKHWDrm : PDI<0x69, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"punpckhwd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v8i16 (vector_shuffle VR128:$src1,
(bc_v8i16 (memopv2i64 addr:$src2)),
UNPCKH_shuffle_mask)))]>;
(unpckh VR128:$src1,
(bc_v8i16 (memopv2i64 addr:$src2))))]>;
def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"punpckhdq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
UNPCKH_shuffle_mask)))]>;
(v4i32 (unpckh VR128:$src1, VR128:$src2)))]>;
def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"punpckhdq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v4i32 (vector_shuffle VR128:$src1,
(bc_v4i32 (memopv2i64 addr:$src2)),
UNPCKH_shuffle_mask)))]>;
(unpckh VR128:$src1,
(bc_v4i32 (memopv2i64 addr:$src2))))]>;
def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"punpckhqdq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
UNPCKH_shuffle_mask)))]>;
(v2i64 (unpckh VR128:$src1, VR128:$src2)))]>;
def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
"punpckhqdq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v2i64 (vector_shuffle VR128:$src1,
(memopv2i64 addr:$src2),
UNPCKH_shuffle_mask)))]>;
(v2i64 (unpckh VR128:$src1,
(memopv2i64 addr:$src2))))]>;
}
// Extract / Insert
@ -2357,8 +2311,7 @@ let Constraints = "$src1 = $dst" in {
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"movsd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v2f64 (vector_shuffle VR128:$src1, VR128:$src2,
MOVL_shuffle_mask)))]>;
(v2f64 (movl VR128:$src1, VR128:$src2)))]>;
}
// Store / copy lower 64-bits of a XMM register.
@ -2449,44 +2402,35 @@ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))),
// Move Instructions
def MOVSHDUPrr : S3SI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movshdup\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v4f32 (vector_shuffle
VR128:$src, (undef),
MOVSHDUP_shuffle_mask)))]>;
[(set VR128:$dst, (v4f32 (movshdup
VR128:$src, (undef))))]>;
def MOVSHDUPrm : S3SI<0x16, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"movshdup\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v4f32 (vector_shuffle
(memopv4f32 addr:$src), (undef),
MOVSHDUP_shuffle_mask)))]>;
[(set VR128:$dst, (movshdup
(memopv4f32 addr:$src), (undef)))]>;
def MOVSLDUPrr : S3SI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movsldup\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v4f32 (vector_shuffle
VR128:$src, (undef),
MOVSLDUP_shuffle_mask)))]>;
[(set VR128:$dst, (v4f32 (movsldup
VR128:$src, (undef))))]>;
def MOVSLDUPrm : S3SI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"movsldup\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v4f32 (vector_shuffle
(memopv4f32 addr:$src), (undef),
MOVSLDUP_shuffle_mask)))]>;
[(set VR128:$dst, (movsldup
(memopv4f32 addr:$src), (undef)))]>;
def MOVDDUPrr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movddup\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2f64 (vector_shuffle VR128:$src, (undef),
MOVDDUP_shuffle_mask)))]>;
[(set VR128:$dst,(v2f64 (movddup VR128:$src, (undef))))]>;
def MOVDDUPrm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"movddup\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2f64 (vector_shuffle
(scalar_to_vector (loadf64 addr:$src)),
(undef), MOVDDUP_shuffle_mask)))]>;
(v2f64 (movddup (scalar_to_vector (loadf64 addr:$src)),
(undef))))]>;
def : Pat<(vector_shuffle
(bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))),
(undef), MOVDDUP_shuffle_mask),
def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))),
(undef)),
(MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
def : Pat<(vector_shuffle
(memopv2f64 addr:$src), (undef), MOVDDUP_shuffle_mask),
def : Pat<(movddup (memopv2f64 addr:$src), (undef)),
(MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
@ -2555,22 +2499,18 @@ def MWAIT : I<0xC9, RawFrm, (outs), (ins), "mwait",
// vector_shuffle v1, <undef> <1, 1, 3, 3>
let AddedComplexity = 15 in
def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
MOVSHDUP_shuffle_mask)),
def : Pat<(v4i32 (movshdup VR128:$src, (undef))),
(MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>;
let AddedComplexity = 20 in
def : Pat<(v4i32 (vector_shuffle (bc_v4i32 (memopv2i64 addr:$src)), (undef),
MOVSHDUP_shuffle_mask)),
def : Pat<(v4i32 (movshdup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
(MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>;
// vector_shuffle v1, <undef> <0, 0, 2, 2>
let AddedComplexity = 15 in
def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
MOVSLDUP_shuffle_mask)),
def : Pat<(v4i32 (movsldup VR128:$src, (undef))),
(MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>;
let AddedComplexity = 20 in
def : Pat<(v4i32 (vector_shuffle (bc_v4i32 (memopv2i64 addr:$src)), (undef),
MOVSLDUP_shuffle_mask)),
def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
(MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>;
//===----------------------------------------------------------------------===//
@ -2911,207 +2851,173 @@ def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
// Splat v2f64 / v2i64
let AddedComplexity = 10 in {
def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), SSE_splat_lo_mask:$sm),
def : Pat<(splat_lo (v2f64 VR128:$src), (undef)),
(UNPCKLPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), UNPCKH_shuffle_mask:$sm),
def : Pat<(unpckh (v2f64 VR128:$src), (undef)),
(UNPCKHPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), SSE_splat_lo_mask:$sm),
def : Pat<(splat_lo (v2i64 VR128:$src), (undef)),
(PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
def : Pat<(vector_shuffle (v2i64 VR128:$src), (undef), UNPCKH_shuffle_mask:$sm),
def : Pat<(unpckh (v2i64 VR128:$src), (undef)),
(PUNPCKHQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
}
// Special unary SHUFPSrri case.
def : Pat<(v4f32 (vector_shuffle VR128:$src1, (undef),
SHUFP_unary_shuffle_mask:$sm)),
(SHUFPSrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>,
def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))),
(SHUFPSrri VR128:$src1, VR128:$src1,
(SHUFFLE_get_shuf_imm VR128:$src3))>,
Requires<[HasSSE1]>;
let AddedComplexity = 5 in
def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))),
(PSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>,
Requires<[HasSSE2]>;
// Special unary SHUFPDrri case.
def : Pat<(v2f64 (vector_shuffle VR128:$src1, (undef),
SHUFP_unary_shuffle_mask:$sm)),
(SHUFPDrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>,
def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))),
(SHUFPDrri VR128:$src1, VR128:$src1,
(SHUFFLE_get_shuf_imm VR128:$src3))>,
Requires<[HasSSE2]>;
// Special unary SHUFPDrri case.
def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))),
(SHUFPDrri VR128:$src1, VR128:$src1,
(SHUFFLE_get_shuf_imm VR128:$src3))>,
Requires<[HasSSE2]>;
// Unary v4f32 shuffle with PSHUF* in order to fold a load.
def : Pat<(vector_shuffle (bc_v4i32 (memopv4f32 addr:$src1)), (undef),
SHUFP_unary_shuffle_mask:$sm),
(PSHUFDmi addr:$src1, SHUFP_unary_shuffle_mask:$sm)>,
def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)),
(PSHUFDmi addr:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>,
Requires<[HasSSE2]>;
// Special binary v4i32 shuffle cases with SHUFPS.
def : Pat<(v4i32 (vector_shuffle VR128:$src1, (v4i32 VR128:$src2),
PSHUFD_binary_shuffle_mask:$sm)),
(SHUFPSrri VR128:$src1, VR128:$src2, PSHUFD_binary_shuffle_mask:$sm)>,
def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))),
(SHUFPSrri VR128:$src1, VR128:$src2,
(SHUFFLE_get_shuf_imm VR128:$src3))>,
Requires<[HasSSE2]>;
def : Pat<(v4i32 (vector_shuffle VR128:$src1,
(bc_v4i32 (memopv2i64 addr:$src2)), PSHUFD_binary_shuffle_mask:$sm)),
(SHUFPSrmi VR128:$src1, addr:$src2, PSHUFD_binary_shuffle_mask:$sm)>,
def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
(SHUFPSrmi VR128:$src1, addr:$src2,
(SHUFFLE_get_shuf_imm VR128:$src3))>,
Requires<[HasSSE2]>;
// Special binary v2i64 shuffle cases using SHUFPDrri.
def : Pat<(v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
SHUFP_shuffle_mask:$sm)),
(SHUFPDrri VR128:$src1, VR128:$src2, SHUFP_shuffle_mask:$sm)>,
def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)),
(SHUFPDrri VR128:$src1, VR128:$src2,
(SHUFFLE_get_shuf_imm VR128:$src3))>,
Requires<[HasSSE2]>;
// Special unary SHUFPDrri case.
def : Pat<(v2i64 (vector_shuffle VR128:$src1, (undef),
SHUFP_unary_shuffle_mask:$sm)),
(SHUFPDrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>,
Requires<[HasSSE2]>;
// vector_shuffle v1, <undef>, <0, 0, 1, 1, ...>
let AddedComplexity = 15 in {
def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
UNPCKL_v_undef_shuffle_mask:$sm)),
(PSHUFDri VR128:$src, PSHUFD_shuffle_mask:$sm)>,
def : Pat<(v4i32 (unpckl_undef:$src2 VR128:$src, (undef))),
(PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
Requires<[OptForSpeed, HasSSE2]>;
def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef),
UNPCKL_v_undef_shuffle_mask:$sm)),
(PSHUFDri VR128:$src, PSHUFD_shuffle_mask:$sm)>,
def : Pat<(v4f32 (unpckl_undef:$src2 VR128:$src, (undef))),
(PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
Requires<[OptForSpeed, HasSSE2]>;
}
let AddedComplexity = 10 in {
def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef),
UNPCKL_v_undef_shuffle_mask)),
def : Pat<(v4f32 (unpckl_undef VR128:$src, (undef))),
(UNPCKLPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
def : Pat<(v16i8 (vector_shuffle VR128:$src, (undef),
UNPCKL_v_undef_shuffle_mask)),
def : Pat<(v16i8 (unpckl_undef VR128:$src, (undef))),
(PUNPCKLBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
def : Pat<(v8i16 (vector_shuffle VR128:$src, (undef),
UNPCKL_v_undef_shuffle_mask)),
def : Pat<(v8i16 (unpckl_undef VR128:$src, (undef))),
(PUNPCKLWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
UNPCKL_v_undef_shuffle_mask)),
def : Pat<(v4i32 (unpckl_undef VR128:$src, (undef))),
(PUNPCKLDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
}
// vector_shuffle v1, <undef>, <2, 2, 3, 3, ...>
let AddedComplexity = 15 in {
def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
UNPCKH_v_undef_shuffle_mask:$sm)),
(PSHUFDri VR128:$src, PSHUFD_shuffle_mask:$sm)>,
def : Pat<(v4i32 (unpckh_undef:$src2 VR128:$src, (undef))),
(PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
Requires<[OptForSpeed, HasSSE2]>;
def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef),
UNPCKH_v_undef_shuffle_mask:$sm)),
(PSHUFDri VR128:$src, PSHUFD_shuffle_mask:$sm)>,
def : Pat<(v4f32 (unpckh_undef:$src2 VR128:$src, (undef))),
(PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
Requires<[OptForSpeed, HasSSE2]>;
}
let AddedComplexity = 10 in {
def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef),
UNPCKH_v_undef_shuffle_mask)),
def : Pat<(v4f32 (unpckh_undef VR128:$src, (undef))),
(UNPCKHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
def : Pat<(v16i8 (vector_shuffle VR128:$src, (undef),
UNPCKH_v_undef_shuffle_mask)),
def : Pat<(v16i8 (unpckh_undef VR128:$src, (undef))),
(PUNPCKHBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
def : Pat<(v8i16 (vector_shuffle VR128:$src, (undef),
UNPCKH_v_undef_shuffle_mask)),
def : Pat<(v8i16 (unpckh_undef VR128:$src, (undef))),
(PUNPCKHWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
UNPCKH_v_undef_shuffle_mask)),
def : Pat<(v4i32 (unpckh_undef VR128:$src, (undef))),
(PUNPCKHDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
}
let AddedComplexity = 20 in {
// vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS
def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
MOVHP_shuffle_mask)),
def : Pat<(v4i32 (movhp VR128:$src1, VR128:$src2)),
(MOVLHPSrr VR128:$src1, VR128:$src2)>;
// vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS
def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
MOVHLPS_shuffle_mask)),
def : Pat<(v4i32 (movhlps VR128:$src1, VR128:$src2)),
(MOVHLPSrr VR128:$src1, VR128:$src2)>;
// vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS
def : Pat<(v4f32 (vector_shuffle VR128:$src1, (undef),
MOVHLPS_v_undef_shuffle_mask)),
def : Pat<(v4f32 (movhlps_undef VR128:$src1, (undef))),
(MOVHLPSrr VR128:$src1, VR128:$src1)>;
def : Pat<(v4i32 (vector_shuffle VR128:$src1, (undef),
MOVHLPS_v_undef_shuffle_mask)),
def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))),
(MOVHLPSrr VR128:$src1, VR128:$src1)>;
}
let AddedComplexity = 20 in {
// vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS
// vector_shuffle v1, (load v2) <0, 1, 4, 5> using MOVHPS
def : Pat<(v4f32 (vector_shuffle VR128:$src1, (load addr:$src2),
MOVLP_shuffle_mask)),
def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))),
(MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
def : Pat<(v2f64 (vector_shuffle VR128:$src1, (load addr:$src2),
MOVLP_shuffle_mask)),
def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))),
(MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
def : Pat<(v4f32 (vector_shuffle VR128:$src1, (load addr:$src2),
MOVHP_shuffle_mask)),
def : Pat<(v4f32 (movhp VR128:$src1, (load addr:$src2))),
(MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
def : Pat<(v2f64 (vector_shuffle VR128:$src1, (load addr:$src2),
MOVHP_shuffle_mask)),
def : Pat<(v2f64 (movhp VR128:$src1, (load addr:$src2))),
(MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
def : Pat<(v4i32 (vector_shuffle VR128:$src1, (load addr:$src2),
MOVLP_shuffle_mask)),
def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))),
(MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
def : Pat<(v2i64 (vector_shuffle VR128:$src1, (load addr:$src2),
MOVLP_shuffle_mask)),
def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))),
(MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
def : Pat<(v4i32 (vector_shuffle VR128:$src1, (load addr:$src2),
MOVHP_shuffle_mask)),
def : Pat<(v4i32 (movhp VR128:$src1, (load addr:$src2))),
(MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
def : Pat<(v2i64 (vector_shuffle VR128:$src1, (load addr:$src2),
MOVHP_shuffle_mask)),
def : Pat<(v2i64 (movhp VR128:$src1, (load addr:$src2))),
(MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
}
// (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
// (store (vector_shuffle (load addr), v2, <0, 1, 4, 5>), addr) using MOVHPS
def : Pat<(store (v4f32 (vector_shuffle (load addr:$src1), VR128:$src2,
MOVLP_shuffle_mask)), addr:$src1),
def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
(MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
def : Pat<(store (v2f64 (vector_shuffle (load addr:$src1), VR128:$src2,
MOVLP_shuffle_mask)), addr:$src1),
def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
(MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
def : Pat<(store (v4f32 (vector_shuffle (load addr:$src1), VR128:$src2,
MOVHP_shuffle_mask)), addr:$src1),
def : Pat<(store (v4f32 (movhp (load addr:$src1), VR128:$src2)), addr:$src1),
(MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
def : Pat<(store (v2f64 (vector_shuffle (load addr:$src1), VR128:$src2,
MOVHP_shuffle_mask)), addr:$src1),
def : Pat<(store (v2f64 (movhp (load addr:$src1), VR128:$src2)), addr:$src1),
(MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
def : Pat<(store (v4i32 (vector_shuffle
(bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2,
MOVLP_shuffle_mask)), addr:$src1),
def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)),
addr:$src1),
(MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
def : Pat<(store (v2i64 (vector_shuffle (load addr:$src1), VR128:$src2,
MOVLP_shuffle_mask)), addr:$src1),
def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
(MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
def : Pat<(store (v4i32 (vector_shuffle
(bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2,
MOVHP_shuffle_mask)), addr:$src1),
def : Pat<(store (v4i32 (movhp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)),
addr:$src1),
(MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
def : Pat<(store (v2i64 (vector_shuffle (load addr:$src1), VR128:$src2,
MOVHP_shuffle_mask)), addr:$src1),
def : Pat<(store (v2i64 (movhp (load addr:$src1), VR128:$src2)), addr:$src1),
(MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
let AddedComplexity = 15 in {
// Setting the lowest element in the vector.
def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
MOVL_shuffle_mask)),
def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)),
(MOVLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
def : Pat<(v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
MOVL_shuffle_mask)),
def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)),
(MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
// vector_shuffle v1, v2 <4, 5, 2, 3> using MOVLPDrr (movsd)
def : Pat<(v4f32 (vector_shuffle VR128:$src1, VR128:$src2,
MOVLP_shuffle_mask)),
def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)),
(MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
MOVLP_shuffle_mask)),
def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)),
(MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
}
// Set lowest element and zero upper elements.
let AddedComplexity = 15 in
def : Pat<(v2f64 (vector_shuffle immAllZerosV_bc, VR128:$src,
MOVL_shuffle_mask)),
def : Pat<(v2f64 (movl immAllZerosV_bc, VR128:$src)),
(MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
(MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;

View File

@ -1,45 +1,9 @@
; RUN: llvm-as < %s | llc
; PR2671
define void @a(<2 x double>* %p, <2 x i8>* %q) {
%t = load <2 x double>* %p
%r = fptosi <2 x double> %t to <2 x i8>
store <2 x i8> %r, <2 x i8>* %q
ret void
}
define void @b(<2 x double>* %p, <2 x i8>* %q) {
%t = load <2 x double>* %p
%r = fptoui <2 x double> %t to <2 x i8>
store <2 x i8> %r, <2 x i8>* %q
ret void
}
define void @c(<2 x i8>* %p, <2 x double>* %q) {
%t = load <2 x i8>* %p
%r = sitofp <2 x i8> %t to <2 x double>
store <2 x double> %r, <2 x double>* %q
ret void
}
define void @d(<2 x i8>* %p, <2 x double>* %q) {
%t = load <2 x i8>* %p
%r = uitofp <2 x i8> %t to <2 x double>
store <2 x double> %r, <2 x double>* %q
ret void
}
define void @e(<2 x i8>* %p, <2 x i16>* %q) {
%t = load <2 x i8>* %p
%r = sext <2 x i8> %t to <2 x i16>
store <2 x i16> %r, <2 x i16>* %q
ret void
}
define void @f(<2 x i8>* %p, <2 x i16>* %q) {
%t = load <2 x i8>* %p
%r = zext <2 x i8> %t to <2 x i16>
store <2 x i16> %r, <2 x i16>* %q
ret void
}
define void @g(<2 x i16>* %p, <2 x i8>* %q) {
%t = load <2 x i16>* %p
%r = trunc <2 x i16> %t to <2 x i8>
store <2 x i8> %r, <2 x i8>* %q
ret void
%t = load <2 x i16>* %p
%r = trunc <2 x i16> %t to <2 x i8>
store <2 x i8> %r, <2 x i8>* %q
ret void
}

View File

@ -1,5 +1,7 @@
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin | not grep and
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin | grep psrldq
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -o %t -f
; RUN: not grep and %t
; RUN: not grep psrldq %t
; RUN: grep xorps %t
define <4 x float> @test(<4 x float>* %v1) nounwind {
%tmp = load <4 x float>* %v1 ; <<4 x float>> [#uses=1]

View File

@ -1,9 +1,7 @@
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | \
; RUN: grep unpcklps | count 1
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | \
; RUN: grep unpckhps | count 1
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | \
; RUN: not grep {sub.*esp}
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f
; RUN: grep unpcklps %t | count 1
; RUN: grep pshufd %t | count 1
; RUN: not grep {sub.*esp} %t
define void @test(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B) {
%tmp = load <4 x float>* %B ; <<4 x float>> [#uses=2]

View File

@ -1,8 +1,10 @@
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse,-sse2 | grep shufps | count 4
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse,-sse2 -mtriple=i386-apple-darwin | grep mov | count 2
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshufd | count 4
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep shufps
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin | not grep mov
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse,-sse2 -mtriple=i386-apple-darwin -o %t -f
; RUN: grep shufps %t | count 4
; RUN: grep movaps %t | count 2
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -o %t -f
; RUN: grep pshufd %t | count 4
; RUN: not grep shufps %t
; RUN: not grep mov %t
define <4 x float> @t1(<4 x float> %a, <4 x float> %b) nounwind {
%tmp1 = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer

View File

@ -1,8 +1,7 @@
; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 -disable-mmx -o %t -f
; RUN: grep pshufhw %t | grep 161 | count 1
; RUN: grep pslldq %t | count 1
; RUN: grep shufps %t | count 1
; RUN: not grep pslldq %t
; Test case when creating pshufhw, we incorrectly set the higher order bit
; for an undef,
@ -20,4 +19,4 @@ entry:
%0 = shufflevector <4 x i32> %in, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> < i32 undef, i32 5, i32 undef, i32 2>
store <4 x i32> %0, <4 x i32>* %dest
ret void
}
}

View File

@ -1,6 +1,6 @@
; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah -o %t -f
; RUN: grep pextrw %t | count 1
; RUN: grep punpcklqdq %t | count 1
; RUN: grep movlhps %t | count 1
; RUN: grep pshufhw %t | count 1
; RUN: grep pinsrw %t | count 1
; RUN: llvm-as < %s | llc -march=x86 -mcpu=core2 -o %t -f

View File

@ -194,10 +194,6 @@ SDTypeConstraint::SDTypeConstraint(Record *R) {
ConstraintType = SDTCisOpSmallerThanOp;
x.SDTCisOpSmallerThanOp_Info.BigOperandNum =
R->getValueAsInt("BigOperandNum");
} else if (R->isSubClassOf("SDTCisIntVectorOfSameSize")) {
ConstraintType = SDTCisIntVectorOfSameSize;
x.SDTCisIntVectorOfSameSize_Info.OtherOperandNum =
R->getValueAsInt("OtherOpNum");
} else if (R->isSubClassOf("SDTCisEltOfVec")) {
ConstraintType = SDTCisEltOfVec;
x.SDTCisEltOfVec_Info.OtherOperandNum =
@ -365,23 +361,9 @@ bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode *N,
}
return MadeChange;
}
case SDTCisIntVectorOfSameSize: {
TreePatternNode *OtherOperand =
getOperandNum(x.SDTCisIntVectorOfSameSize_Info.OtherOperandNum,
N, NumResults);
if (OtherOperand->hasTypeSet()) {
if (!isVector(OtherOperand->getTypeNum(0)))
TP.error(N->getOperator()->getName() + " VT operand must be a vector!");
MVT IVT = OtherOperand->getTypeNum(0);
unsigned NumElements = IVT.getVectorNumElements();
IVT = MVT::getIntVectorWithNumElements(NumElements);
return NodeToApply->UpdateNodeType(IVT.getSimpleVT(), TP);
}
return false;
}
case SDTCisEltOfVec: {
TreePatternNode *OtherOperand =
getOperandNum(x.SDTCisIntVectorOfSameSize_Info.OtherOperandNum,
getOperandNum(x.SDTCisEltOfVec_Info.OtherOperandNum,
N, NumResults);
if (OtherOperand->hasTypeSet()) {
if (!isVector(OtherOperand->getTypeNum(0)))
@ -925,25 +907,6 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
if (NI.getNumResults() == 0)
MadeChange |= UpdateNodeType(MVT::isVoid, TP);
// If this is a vector_shuffle operation, apply types to the build_vector
// operation. The types of the integers don't matter, but this ensures they
// won't get checked.
if (getOperator()->getName() == "vector_shuffle" &&
getChild(2)->getOperator()->getName() == "build_vector") {
TreePatternNode *BV = getChild(2);
const std::vector<MVT::SimpleValueType> &LegalVTs
= CDP.getTargetInfo().getLegalValueTypes();
MVT::SimpleValueType LegalIntVT = MVT::Other;
for (unsigned i = 0, e = LegalVTs.size(); i != e; ++i)
if (isInteger(LegalVTs[i]) && !isVector(LegalVTs[i])) {
LegalIntVT = LegalVTs[i];
break;
}
assert(LegalIntVT != MVT::Other && "No legal integer VT?");
for (unsigned i = 0, e = BV->getNumChildren(); i != e; ++i)
MadeChange |= BV->getChild(i)->UpdateNodeType(LegalIntVT, TP);
}
return MadeChange;
} else if (getOperator()->isSubClassOf("Instruction")) {
const DAGInstruction &Inst = CDP.getInstruction(getOperator());
@ -2086,7 +2049,7 @@ void CodeGenDAGPatterns::ParsePatterns() {
IterateInference |= Result->getTree(0)->
UpdateNodeType(Pattern->getTree(0)->getExtTypes(), *Result);
} while (IterateInference);
// Verify that we inferred enough types that we can do something with the
// pattern and result. If these fire the user has to add type casts.
if (!InferredAllPatternTypes)

View File

@ -62,8 +62,7 @@ struct SDTypeConstraint {
unsigned OperandNo; // The operand # this constraint applies to.
enum {
SDTCisVT, SDTCisPtrTy, SDTCisInt, SDTCisFP, SDTCisSameAs,
SDTCisVTSmallerThanOp, SDTCisOpSmallerThanOp, SDTCisIntVectorOfSameSize,
SDTCisEltOfVec
SDTCisVTSmallerThanOp, SDTCisOpSmallerThanOp, SDTCisEltOfVec
} ConstraintType;
union { // The discriminated union.
@ -79,9 +78,6 @@ struct SDTypeConstraint {
struct {
unsigned BigOperandNum;
} SDTCisOpSmallerThanOp_Info;
struct {
unsigned OtherOperandNum;
} SDTCisIntVectorOfSameSize_Info;
struct {
unsigned OtherOperandNum;
} SDTCisEltOfVec_Info;