mirror of
https://github.com/RPCSX/llvm.git
synced 2025-02-06 12:26:45 +00:00
[X86] Fixes bug in build_vector v4x32 lowering
r222375 made some improvements to build_vector lowering of v4x32 and v4xf32 into an insertps, but it missed a case where: 1. A single extracted element is used twice. 2. The lower of the two non-zero indexes should be preserved, and the higher should be used for the dest mask. This caused a crash, since the source value for the insertps ends-up uninitialized. Differential Revision: http://reviews.llvm.org/D6377 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@222635 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
71777d18ad
commit
d539147834
@ -5771,7 +5771,8 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
|
|||||||
// We only know how to deal with build_vector nodes where elements are either
|
// We only know how to deal with build_vector nodes where elements are either
|
||||||
// zeroable or extract_vector_elt with constant index.
|
// zeroable or extract_vector_elt with constant index.
|
||||||
SDValue FirstNonZero;
|
SDValue FirstNonZero;
|
||||||
for (int i=0; i < 4; ++i) {
|
unsigned FirstNonZeroIdx;
|
||||||
|
for (unsigned i=0; i < 4; ++i) {
|
||||||
if (Zeroable[i])
|
if (Zeroable[i])
|
||||||
continue;
|
continue;
|
||||||
SDValue Elt = Op->getOperand(i);
|
SDValue Elt = Op->getOperand(i);
|
||||||
@ -5782,8 +5783,10 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
|
|||||||
MVT VT = Elt.getOperand(0).getSimpleValueType();
|
MVT VT = Elt.getOperand(0).getSimpleValueType();
|
||||||
if (!VT.is128BitVector())
|
if (!VT.is128BitVector())
|
||||||
return SDValue();
|
return SDValue();
|
||||||
if (!FirstNonZero.getNode())
|
if (!FirstNonZero.getNode()) {
|
||||||
FirstNonZero = Elt;
|
FirstNonZero = Elt;
|
||||||
|
FirstNonZeroIdx = i;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(FirstNonZero.getNode() && "Unexpected build vector of all zeros!");
|
assert(FirstNonZero.getNode() && "Unexpected build vector of all zeros!");
|
||||||
@ -5822,7 +5825,7 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
|
|||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
SDValue V2 = Elt.getOperand(0);
|
SDValue V2 = Elt.getOperand(0);
|
||||||
if (Elt == FirstNonZero)
|
if (Elt == FirstNonZero && EltIdx == FirstNonZeroIdx)
|
||||||
V1 = SDValue();
|
V1 = SDValue();
|
||||||
|
|
||||||
bool CanFold = true;
|
bool CanFold = true;
|
||||||
|
@ -1145,6 +1145,23 @@ entry:
|
|||||||
ret <4 x float> %vecinit3
|
ret <4 x float> %vecinit3
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define <4 x float> @insertps_10(<4 x float> %A)
|
||||||
|
{
|
||||||
|
; X32-LABEL: insertps_10:
|
||||||
|
; X32: ## BB#0:
|
||||||
|
; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[0],zero
|
||||||
|
; X32-NEXT: retl
|
||||||
|
;
|
||||||
|
; X64-LABEL: insertps_10:
|
||||||
|
; X64: ## BB#0:
|
||||||
|
; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[0],zero
|
||||||
|
; X64-NEXT: retq
|
||||||
|
%vecext = extractelement <4 x float> %A, i32 0
|
||||||
|
%vecbuild1 = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %vecext, i32 0
|
||||||
|
%vecbuild2 = insertelement <4 x float> %vecbuild1, float %vecext, i32 2
|
||||||
|
ret <4 x float> %vecbuild2
|
||||||
|
}
|
||||||
|
|
||||||
define <4 x float> @build_vector_to_shuffle_1(<4 x float> %A) {
|
define <4 x float> @build_vector_to_shuffle_1(<4 x float> %A) {
|
||||||
; X32-LABEL: build_vector_to_shuffle_1:
|
; X32-LABEL: build_vector_to_shuffle_1:
|
||||||
; X32: ## BB#0:
|
; X32: ## BB#0:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user