Fix PR10492 by teaching MOVHLPS and MOVLPS mask matching to be more strict.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@137324 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Bruno Cardoso Lopes 2011-08-11 18:59:13 +00:00
parent 2b7b238e84
commit 59353b436a
2 changed files with 17 additions and 2 deletions

View File

@ -3863,7 +3863,10 @@ static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, EVT VT) {
/// V1 (and in order), and the upper half elements should come from the upper
/// half of V2 (and in order).
static bool ShouldXformToMOVHLPS(ShuffleVectorSDNode *Op) {
if (Op->getValueType(0).getVectorNumElements() != 4)
EVT VT = Op->getValueType(0);
if (VT.getSizeInBits() != 128)
return false;
if (VT.getVectorNumElements() != 4)
return false;
for (unsigned i = 0, e = 2; i != e; ++i)
if (!isUndefOrEqual(Op->getMaskElt(i), i+2))
@ -3895,6 +3898,10 @@ static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) {
/// MOVLP, it must be either a vector load or a scalar load to vector.
static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
ShuffleVectorSDNode *Op) {
EVT VT = Op->getValueType(0);
if (VT.getSizeInBits() != 128)
return false;
if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1))
return false;
// Is V2 is a vector load, don't do this transformation. We will try to use
@ -3902,7 +3909,7 @@ static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
if (ISD::isNON_EXTLoad(V2))
return false;
unsigned NumElems = Op->getValueType(0).getVectorNumElements();
unsigned NumElems = VT.getVectorNumElements();
if (NumElems != 2 && NumElems != 4)
return false;

View File

@ -42,3 +42,11 @@ allocas:
store <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <8 x i32>* %ptr2vec615, align 32
ret void
}
;;; Just make sure this doesn't crash
; CHECK: _ISelCrash
define <4 x i64> @ISelCrash(<4 x i64> %a) nounwind uwtable readnone ssp {
entry:
%shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 4>
ret <4 x i64> %shuffle
}