[x86] Sink a generic combine of VZEXT nodes from the lowering to VZEXT

nodes to the DAG combining of them.

This will allow the combine to fire on both old vector shuffle lowering
and the new vector shuffle lowering and generally seems like a cleaner
design. I've trimmed down the code a bit and tried to make it and the
surrounding combine fairly clean while moving it around.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@219042 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chandler Carruth 2014-10-04 01:05:48 +00:00
parent 9747d27b59
commit cd2c1d8db1

View File

@ -11532,37 +11532,6 @@ static SDValue LowerVectorIntExtend(SDValue Op, const X86Subtarget *Subtarget,
if (!DAG.getTargetLoweringInfo().isTypeLegal(NVT))
return SDValue();
// Simplify the operand as it's prepared to be fed into shuffle.
unsigned SignificantBits = NVT.getSizeInBits() >> Shift;
if (V1.getOpcode() == ISD::BITCAST &&
V1.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
V1.getOperand(0).getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
V1.getOperand(0).getOperand(0)
.getSimpleValueType().getSizeInBits() == SignificantBits) {
// (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast x)
SDValue V = V1.getOperand(0).getOperand(0).getOperand(0);
ConstantSDNode *CIdx =
dyn_cast<ConstantSDNode>(V1.getOperand(0).getOperand(0).getOperand(1));
// If it's foldable, i.e. normal load with single use, we will let code
// selection to fold it. Otherwise, we will short the conversion sequence.
if (CIdx && CIdx->getZExtValue() == 0 &&
(!ISD::isNormalLoad(V.getNode()) || !V.hasOneUse())) {
MVT FullVT = V.getSimpleValueType();
MVT V1VT = V1.getSimpleValueType();
if (FullVT.getSizeInBits() > V1VT.getSizeInBits()) {
// The "ext_vec_elt" node is wider than the result node.
// In this case we should extract subvector from V.
// (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast (extract_subvector x)).
unsigned Ratio = FullVT.getSizeInBits() / V1VT.getSizeInBits();
MVT SubVecVT = MVT::getVectorVT(FullVT.getVectorElementType(),
FullVT.getVectorNumElements()/Ratio);
V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, V,
DAG.getIntPtrConstant(0));
}
V1 = DAG.getNode(ISD::BITCAST, DL, V1VT, V);
}
}
return DAG.getNode(ISD::BITCAST, DL, VT,
DAG.getNode(X86ISD::VZEXT, DL, NVT, V1));
}
@ -24622,16 +24591,46 @@ static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG,
static SDValue performVZEXTCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
SDLoc DL(N);
MVT VT = N->getSimpleValueType(0);
SDValue Op = N->getOperand(0);
MVT OpVT = Op.getSimpleValueType();
MVT OpEltVT = OpVT.getVectorElementType();
// (vzext (bitcast (vzext (x)) -> (vzext x)
SDValue In = N->getOperand(0);
while (In.getOpcode() == ISD::BITCAST)
In = In.getOperand(0);
SDValue V = Op;
while (V.getOpcode() == ISD::BITCAST)
V = V.getOperand(0);
if (V != Op && V.getOpcode() == X86ISD::VZEXT)
return DAG.getNode(X86ISD::VZEXT, DL, VT, V.getOperand(0));
// Check if we can bypass extracting and re-inserting an element of an input
// vector. Essentialy:
// (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast x)
unsigned InputBits = OpEltVT.getSizeInBits() * VT.getVectorNumElements();
if (V.getOpcode() == ISD::SCALAR_TO_VECTOR &&
V.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
V.getOperand(0).getSimpleValueType().getSizeInBits() == InputBits) {
SDValue ExtractedV = V.getOperand(0);
SDValue OrigV = ExtractedV.getOperand(0);
if (auto *ExtractIdx = dyn_cast<ConstantSDNode>(ExtractedV.getOperand(1)))
if (ExtractIdx->getZExtValue() == 0) {
MVT OrigVT = OrigV.getSimpleValueType();
// Extract a subvector if necessary...
if (OrigVT.getSizeInBits() > OpVT.getSizeInBits()) {
int Ratio = OrigVT.getSizeInBits() / OpVT.getSizeInBits();
OrigVT = MVT::getVectorVT(OrigVT.getVectorElementType(),
OrigVT.getVectorNumElements() / Ratio);
OrigV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OrigVT, OrigV,
DAG.getIntPtrConstant(0));
}
Op = DAG.getNode(ISD::BITCAST, DL, OpVT, OrigV);
return DAG.getNode(X86ISD::VZEXT, DL, VT, Op);
}
}
if (In.getOpcode() != X86ISD::VZEXT)
return SDValue();
return DAG.getNode(X86ISD::VZEXT, SDLoc(N), N->getValueType(0),
In.getOperand(0));
}
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,