Custom lower arbitrary VECTOR_SHUFFLE's to VPERM.

TODO: leave specific ones as VECTOR_SHUFFLE's and turn them into specialized
operations like vsplt*


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@26887 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chris Lattner 2006-03-20 01:53:53 +00:00
parent 2bc6dc27e9
commit f1d0b2beda
3 changed files with 60 additions and 15 deletions

View File

@ -167,6 +167,11 @@ PPCTargetLowering::PPCTargetLowering(TargetMachine &TM)
setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand);
setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);
setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand);
setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand);
// FIXME: We don't support any BUILD_VECTOR's yet. We should custom expand
// the ones we do, like splat(0.0) and splat(-0.0).
setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Expand);
}
if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) {
@ -179,11 +184,11 @@ PPCTargetLowering::PPCTargetLowering(TargetMachine &TM)
setOperationAction(ISD::LOAD , MVT::v4f32, Legal);
setOperationAction(ISD::ADD , MVT::v4i32, Legal);
setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
// FIXME: We don't support any BUILD_VECTOR's yet. We should custom expand
// the ones we do!
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Expand);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Expand);
setOperationAction(ISD::LOAD , MVT::v16i8, Legal);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i32, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
}
@ -209,6 +214,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::VMADDFP: return "PPCISD::VMADDFP";
case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";
case PPCISD::LVE_X: return "PPCISD::LVE_X";
case PPCISD::VPERM: return "PPCISD::VPERM";
case PPCISD::Hi: return "PPCISD::Hi";
case PPCISD::Lo: return "PPCISD::Lo";
case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
@ -566,6 +572,36 @@ SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
return DAG.getNode(PPCISD::LVE_X, Op.getValueType(), Store, FIdx,
DAG.getSrcValue(NULL));
}
case ISD::VECTOR_SHUFFLE: {
// FIXME: Cases that are handled by instructions that take permute
// immediates (such as vsplt*) shouldn't be lowered here! Also handle cases
// that are cheaper to do as multiple such instructions than as a constant
// pool load/vperm pair.
// Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
// vector that will get spilled to the constant pool.
SDOperand V1 = Op.getOperand(0);
SDOperand V2 = Op.getOperand(1);
if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
SDOperand PermMask = Op.getOperand(2);
// The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
// that it is in input element units, not in bytes. Convert now.
MVT::ValueType EltVT = MVT::getVectorBaseType(V1.getValueType());
unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
std::vector<SDOperand> ResultMask;
for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
unsigned SrcElt =cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
for (unsigned j = 0; j != BytesPerElement; ++j)
ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
MVT::i8));
}
SDOperand VPermMask =DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, ResultMask);
return DAG.getNode(PPCISD::VPERM, V1.getValueType(), V1, V2, VPermMask);
}
}
return SDOperand();
}

View File

@ -56,6 +56,10 @@ namespace llvm {
/// the third is the SRCVALUE node.
LVE_X,
/// VPERM - The PPC VPERM Instruction.
///
VPERM,
/// Hi/Lo - These represent the high and low 16-bit parts of a global
/// address respectively. These nodes have two operands, the first of
/// which must be a TargetGlobalAddress, and the second of which must be a

View File

@ -26,6 +26,10 @@ def SDT_PPCShiftOp : SDTypeProfile<1, 2, [ // PPCshl, PPCsra, PPCsrl
def SDT_PPCCallSeq : SDTypeProfile<0, 1, [ SDTCisVT<0, i32> ]>;
def SDT_PPCRetFlag : SDTypeProfile<0, 0, []>;
def SDT_PPCvperm : SDTypeProfile<1, 3, [
SDTCisVT<3, v16i8>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>
]>;
//===----------------------------------------------------------------------===//
// PowerPC specific DAG Nodes.
//
@ -46,6 +50,7 @@ def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>;
def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>;
def PPClve_x : SDNode<"PPCISD::LVE_X", SDTLoad, [SDNPHasChain]>;
def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
// These nodes represent the 32-bit PPC shifts that operate on 6-bit shift
// amounts. These nodes are generated by the multi-precision shift code.
@ -118,15 +123,6 @@ def imm16Shifted : PatLeaf<(imm), [{
return ((unsigned)N->getValue() & 0xFFFF0000U) == (unsigned)N->getValue();
}], HI16>;
/*
// Example of a legalize expander: Only for PPC64.
def : Expander<(set i64:$dst, (fp_to_sint f64:$src)),
[(set f64:$tmp , (FCTIDZ f64:$src)),
(set i32:$tmpFI, (CreateNewFrameIndex 8, 8)),
(store f64:$tmp, i32:$tmpFI),
(set i64:$dst, (load i32:$tmpFI))],
Subtarget_PPC64>;
*/
//===----------------------------------------------------------------------===//
// PowerPC Flag Definitions.
@ -956,7 +952,9 @@ def VNMSUBFP: VAForm_1<47, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB, VRRC:$vC),
Requires<[FPContractions]>;
def VPERM : VAForm_1<43, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB, VRRC:$vC),
"vperm $vD, $vA, $vC, $vB", VecFP, []>;
"vperm $vD, $vA, $vC, $vB", VecFP,
[(set VRRC:$vD,
(PPCvperm (v4f32 VRRC:$vA), VRRC:$vB, VRRC:$vC))]>;
// VX-Form instructions. AltiVec arithmetic ops.
@ -1153,6 +1151,13 @@ def : Pat<(f64 (extload xaddr:$src, f32)),
def : Pat<(v4i32 (load xoaddr:$src)),
(v4i32 (LVX xoaddr:$src))>;
def : Pat<(v16i8 (load xoaddr:$src)),
(v16i8 (LVX xoaddr:$src))>;
def : Pat<(PPCvperm (v4i32 VRRC:$vA), VRRC:$vB, VRRC:$vC),
(v4i32 (VPERM VRRC:$vA, VRRC:$vB, VRRC:$vC))>;
def : Pat<(store (v4i32 VRRC:$rS), xoaddr:$dst),
(STVX (v4i32 VRRC:$rS), xoaddr:$dst)>;
def : Pat<(v4i32 (PPClve_x xoaddr:$src)),