mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-19 16:35:10 +00:00
- Fix a x86 vector isel bug: illegal transformation of a vector_shuffle into a
shift. - Add a readme entry for a missing vector_shuffle optimization that results in awful codegen. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@52740 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
041cde26ea
commit
ab26227c8c
@ -1859,12 +1859,16 @@ bool SelectionDAG::isVerifiedDebugInfoDesc(SDOperand Op) const {
|
|||||||
|
|
||||||
/// getShuffleScalarElt - Returns the scalar element that will make up the ith
|
/// getShuffleScalarElt - Returns the scalar element that will make up the ith
|
||||||
/// element of the result of the vector shuffle.
|
/// element of the result of the vector shuffle.
|
||||||
SDOperand SelectionDAG::getShuffleScalarElt(const SDNode *N, unsigned Idx) {
|
SDOperand SelectionDAG::getShuffleScalarElt(const SDNode *N, unsigned i) {
|
||||||
MVT VT = N->getValueType(0);
|
MVT VT = N->getValueType(0);
|
||||||
SDOperand PermMask = N->getOperand(2);
|
SDOperand PermMask = N->getOperand(2);
|
||||||
|
SDOperand Idx = PermMask.getOperand(i);
|
||||||
|
if (Idx.getOpcode() == ISD::UNDEF)
|
||||||
|
return getNode(ISD::UNDEF, VT.getVectorElementType());
|
||||||
|
unsigned Index = cast<ConstantSDNode>(Idx)->getValue();
|
||||||
unsigned NumElems = PermMask.getNumOperands();
|
unsigned NumElems = PermMask.getNumOperands();
|
||||||
SDOperand V = (Idx < NumElems) ? N->getOperand(0) : N->getOperand(1);
|
SDOperand V = (Index < NumElems) ? N->getOperand(0) : N->getOperand(1);
|
||||||
Idx %= NumElems;
|
Index %= NumElems;
|
||||||
|
|
||||||
if (V.getOpcode() == ISD::BIT_CONVERT) {
|
if (V.getOpcode() == ISD::BIT_CONVERT) {
|
||||||
V = V.getOperand(0);
|
V = V.getOperand(0);
|
||||||
@ -1872,16 +1876,12 @@ SDOperand SelectionDAG::getShuffleScalarElt(const SDNode *N, unsigned Idx) {
|
|||||||
return SDOperand();
|
return SDOperand();
|
||||||
}
|
}
|
||||||
if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
|
if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
|
||||||
return (Idx == 0) ? V.getOperand(0)
|
return (Index == 0) ? V.getOperand(0)
|
||||||
: getNode(ISD::UNDEF, VT.getVectorElementType());
|
: getNode(ISD::UNDEF, VT.getVectorElementType());
|
||||||
if (V.getOpcode() == ISD::BUILD_VECTOR)
|
if (V.getOpcode() == ISD::BUILD_VECTOR)
|
||||||
return V.getOperand(Idx);
|
return V.getOperand(Index);
|
||||||
if (V.getOpcode() == ISD::VECTOR_SHUFFLE) {
|
if (V.getOpcode() == ISD::VECTOR_SHUFFLE)
|
||||||
SDOperand Elt = PermMask.getOperand(Idx);
|
return getShuffleScalarElt(V.Val, Index);
|
||||||
if (Elt.getOpcode() == ISD::UNDEF)
|
|
||||||
return getNode(ISD::UNDEF, VT.getVectorElementType());
|
|
||||||
return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Elt)->getValue());
|
|
||||||
}
|
|
||||||
return SDOperand();
|
return SDOperand();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -808,3 +808,34 @@ LC0:
|
|||||||
With SSE4, it should be
|
With SSE4, it should be
|
||||||
movdqa .LC0(%rip), %xmm0
|
movdqa .LC0(%rip), %xmm0
|
||||||
pinsrb $6, %edi, %xmm0
|
pinsrb $6, %edi, %xmm0
|
||||||
|
|
||||||
|
//===---------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
We should transform a shuffle of two vectors of constants into a single vector
|
||||||
|
of constants. Also, insertelement of a constant into a vector of constants
|
||||||
|
should also result in a vector of constants. e.g. 2008-06-25-VecISelBug.ll.
|
||||||
|
|
||||||
|
We compiled it to something horrible:
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
LCPI1_1: ## float
|
||||||
|
.long 1065353216 ## float 1
|
||||||
|
.const
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
LCPI1_0: ## <4 x float>
|
||||||
|
.space 4
|
||||||
|
.long 1065353216 ## float 1
|
||||||
|
.space 4
|
||||||
|
.long 1065353216 ## float 1
|
||||||
|
.text
|
||||||
|
.align 4,0x90
|
||||||
|
.globl _t
|
||||||
|
_t:
|
||||||
|
xorps %xmm0, %xmm0
|
||||||
|
movhps LCPI1_0, %xmm0
|
||||||
|
movss LCPI1_1, %xmm1
|
||||||
|
movaps %xmm0, %xmm2
|
||||||
|
shufps $2, %xmm1, %xmm2
|
||||||
|
shufps $132, %xmm2, %xmm0
|
||||||
|
movaps %xmm0, 0
|
||||||
|
@ -2933,12 +2933,12 @@ unsigned getNumOfConsecutiveZeros(SDOperand Op, SDOperand Mask,
|
|||||||
SelectionDAG &DAG) {
|
SelectionDAG &DAG) {
|
||||||
unsigned NumZeros = 0;
|
unsigned NumZeros = 0;
|
||||||
for (unsigned i = 0; i < NumElems; ++i) {
|
for (unsigned i = 0; i < NumElems; ++i) {
|
||||||
SDOperand Idx = Mask.getOperand(Low ? i : NumElems-i-1);
|
unsigned Index = Low ? i : NumElems-i-1;
|
||||||
|
SDOperand Idx = Mask.getOperand(Index);
|
||||||
if (Idx.getOpcode() == ISD::UNDEF) {
|
if (Idx.getOpcode() == ISD::UNDEF) {
|
||||||
++NumZeros;
|
++NumZeros;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
unsigned Index = cast<ConstantSDNode>(Idx)->getValue();
|
|
||||||
SDOperand Elt = DAG.getShuffleScalarElt(Op.Val, Index);
|
SDOperand Elt = DAG.getShuffleScalarElt(Op.Val, Index);
|
||||||
if (Elt.Val && isZeroNode(Elt))
|
if (Elt.Val && isZeroNode(Elt))
|
||||||
++NumZeros;
|
++NumZeros;
|
||||||
@ -6373,8 +6373,7 @@ static bool EltsFromConsecutiveLoads(SDNode *N, SDOperand PermMask,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned Index = cast<ConstantSDNode>(Idx)->getValue();
|
SDOperand Elt = DAG.getShuffleScalarElt(N, i);
|
||||||
SDOperand Elt = DAG.getShuffleScalarElt(N, Index);
|
|
||||||
if (!Elt.Val ||
|
if (!Elt.Val ||
|
||||||
(Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.Val)))
|
(Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.Val)))
|
||||||
return false;
|
return false;
|
||||||
|
9
test/CodeGen/X86/2008-06-25-VecISelBug.ll
Normal file
9
test/CodeGen/X86/2008-06-25-VecISelBug.ll
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep pslldq
|
||||||
|
|
||||||
|
define void @t() nounwind {
|
||||||
|
entry:
|
||||||
|
%tmp1 = shufflevector <4 x float> zeroinitializer, <4 x float> < float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 0, i32 1, i32 4, i32 5 >
|
||||||
|
%tmp2 = insertelement <4 x float> %tmp1, float 1.000000e+00, i32 3
|
||||||
|
store <4 x float> %tmp2, <4 x float>* null, align 16
|
||||||
|
unreachable
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user