- Fix a x86 vector isel bug: illegal transformation of a vector_shuffle into a

shift.
- Add a readme entry for a missing vector_shuffle optimization that results in
  awful codegen.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@52740 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2008-06-25 20:52:59 +00:00
parent 041cde26ea
commit ab26227c8c
4 changed files with 54 additions and 15 deletions

View File

@ -1859,12 +1859,16 @@ bool SelectionDAG::isVerifiedDebugInfoDesc(SDOperand Op) const {
/// getShuffleScalarElt - Returns the scalar element that will make up the ith
/// element of the result of the vector shuffle.
SDOperand SelectionDAG::getShuffleScalarElt(const SDNode *N, unsigned Idx) {
SDOperand SelectionDAG::getShuffleScalarElt(const SDNode *N, unsigned i) {
MVT VT = N->getValueType(0);
SDOperand PermMask = N->getOperand(2);
SDOperand Idx = PermMask.getOperand(i);
if (Idx.getOpcode() == ISD::UNDEF)
return getNode(ISD::UNDEF, VT.getVectorElementType());
unsigned Index = cast<ConstantSDNode>(Idx)->getValue();
unsigned NumElems = PermMask.getNumOperands();
SDOperand V = (Idx < NumElems) ? N->getOperand(0) : N->getOperand(1);
Idx %= NumElems;
SDOperand V = (Index < NumElems) ? N->getOperand(0) : N->getOperand(1);
Index %= NumElems;
if (V.getOpcode() == ISD::BIT_CONVERT) {
V = V.getOperand(0);
@ -1872,16 +1876,12 @@ SDOperand SelectionDAG::getShuffleScalarElt(const SDNode *N, unsigned Idx) {
return SDOperand();
}
if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
return (Idx == 0) ? V.getOperand(0)
return (Index == 0) ? V.getOperand(0)
: getNode(ISD::UNDEF, VT.getVectorElementType());
if (V.getOpcode() == ISD::BUILD_VECTOR)
return V.getOperand(Idx);
if (V.getOpcode() == ISD::VECTOR_SHUFFLE) {
SDOperand Elt = PermMask.getOperand(Idx);
if (Elt.getOpcode() == ISD::UNDEF)
return getNode(ISD::UNDEF, VT.getVectorElementType());
return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Elt)->getValue());
}
return V.getOperand(Index);
if (V.getOpcode() == ISD::VECTOR_SHUFFLE)
return getShuffleScalarElt(V.Val, Index);
return SDOperand();
}

View File

@ -808,3 +808,34 @@ LC0:
With SSE4, it should be
movdqa .LC0(%rip), %xmm0
pinsrb $6, %edi, %xmm0
//===---------------------------------------------------------------------===//
We should transform a shuffle of two vectors of constants into a single vector
of constants. Also, insertelement of a constant into a vector of constants
should also result in a vector of constants. e.g. 2008-06-25-VecISelBug.ll.
We compiled it to something horrible:
.align 4
LCPI1_1: ## float
.long 1065353216 ## float 1
.const
.align 4
LCPI1_0: ## <4 x float>
.space 4
.long 1065353216 ## float 1
.space 4
.long 1065353216 ## float 1
.text
.align 4,0x90
.globl _t
_t:
xorps %xmm0, %xmm0
movhps LCPI1_0, %xmm0
movss LCPI1_1, %xmm1
movaps %xmm0, %xmm2
shufps $2, %xmm1, %xmm2
shufps $132, %xmm2, %xmm0
movaps %xmm0, 0

View File

@ -2933,12 +2933,12 @@ unsigned getNumOfConsecutiveZeros(SDOperand Op, SDOperand Mask,
SelectionDAG &DAG) {
unsigned NumZeros = 0;
for (unsigned i = 0; i < NumElems; ++i) {
SDOperand Idx = Mask.getOperand(Low ? i : NumElems-i-1);
unsigned Index = Low ? i : NumElems-i-1;
SDOperand Idx = Mask.getOperand(Index);
if (Idx.getOpcode() == ISD::UNDEF) {
++NumZeros;
continue;
}
unsigned Index = cast<ConstantSDNode>(Idx)->getValue();
SDOperand Elt = DAG.getShuffleScalarElt(Op.Val, Index);
if (Elt.Val && isZeroNode(Elt))
++NumZeros;
@ -6373,8 +6373,7 @@ static bool EltsFromConsecutiveLoads(SDNode *N, SDOperand PermMask,
continue;
}
unsigned Index = cast<ConstantSDNode>(Idx)->getValue();
SDOperand Elt = DAG.getShuffleScalarElt(N, Index);
SDOperand Elt = DAG.getShuffleScalarElt(N, i);
if (!Elt.Val ||
(Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.Val)))
return false;

View File

@ -0,0 +1,9 @@
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep pslldq
define void @t() nounwind {
entry:
%tmp1 = shufflevector <4 x float> zeroinitializer, <4 x float> < float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 0, i32 1, i32 4, i32 5 >
%tmp2 = insertelement <4 x float> %tmp1, float 1.000000e+00, i32 3
store <4 x float> %tmp2, <4 x float>* null, align 16
unreachable
}