mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-17 15:33:45 +00:00
improve comments in the unpcklps generating logic, introduce
a new EltStride variable instead of reusing NumElems variable for a non-obvious purpose. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112377 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
44edb0bd0c
commit
6e80e44926
@ -4040,8 +4040,8 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
|
|||||||
SDValue
|
SDValue
|
||||||
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
|
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
|
||||||
DebugLoc dl = Op.getDebugLoc();
|
DebugLoc dl = Op.getDebugLoc();
|
||||||
// All zero's are handled with pxor in SSE2 and above, xorps in SSE1 and
|
// All zero's are handled with pxor in SSE2 and above, xorps in SSE1.
|
||||||
// all one's are handled with pcmpeqd. In AVX, zero's are handled with
|
// All one's are handled with pcmpeqd. In AVX, zero's are handled with
|
||||||
// vpxor in 128-bit and xor{pd,ps} in 256-bit, but no 256 version of pcmpeqd
|
// vpxor in 128-bit and xor{pd,ps} in 256-bit, but no 256 version of pcmpeqd
|
||||||
// is present, so AllOnes is ignored.
|
// is present, so AllOnes is ignored.
|
||||||
if (ISD::isBuildVectorAllZeros(Op.getNode()) ||
|
if (ISD::isBuildVectorAllZeros(Op.getNode()) ||
|
||||||
@ -4288,18 +4288,25 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
return V[0];
|
return V[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
// Otherwise, expand into a number of unpckl*
|
// Otherwise, expand into a number of unpckl*, start by extending each of
|
||||||
// e.g. for v4f32
|
// our (non-undef) elements to the full vector width with the element in the
|
||||||
|
// bottom slot of the vector (which generates no code for SSE).
|
||||||
|
for (unsigned i = 0; i < NumElems; ++i) {
|
||||||
|
if (Op.getOperand(i).getOpcode() != ISD::UNDEF)
|
||||||
|
V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
|
||||||
|
else
|
||||||
|
V[i] = DAG.getUNDEF(VT);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Next, we iteratively mix elements, e.g. for v4f32:
|
||||||
// Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
|
// Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
|
||||||
// : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
|
// : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
|
||||||
// Step 2: unpcklps X, Y ==> <3, 2, 1, 0>
|
// Step 2: unpcklps X, Y ==> <3, 2, 1, 0>
|
||||||
for (unsigned i = 0; i < NumElems; ++i)
|
unsigned EltStride = NumElems >> 1;
|
||||||
V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
|
while (EltStride != 0) {
|
||||||
NumElems >>= 1;
|
for (unsigned i = 0; i < EltStride; ++i)
|
||||||
while (NumElems != 0) {
|
V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + EltStride]);
|
||||||
for (unsigned i = 0; i < NumElems; ++i)
|
EltStride >>= 1;
|
||||||
V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + NumElems]);
|
|
||||||
NumElems >>= 1;
|
|
||||||
}
|
}
|
||||||
return V[0];
|
return V[0];
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user