From cfe33c46aa50f04adb0431243e7d25f79b719ac6 Mon Sep 17 00:00:00 2001 From: David Greene Date: Wed, 26 Jan 2011 19:13:22 +0000 Subject: [PATCH] [AVX] Add INSERT_SUBVECTOR and support it on x86. This provides a default implementation for x86, going through the stack in a similr fashion to how the codegen implements BUILD_VECTOR. Eventually this will get matched to VINSERTF128 if AVX is available. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@124307 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/ISDOpcodes.h | 8 ++++ include/llvm/Target/TargetSelectionDAG.td | 1 + lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 48 +++++++++++++++++++++++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 25 ++++++++++++ lib/Target/X86/X86ISelLowering.cpp | 23 ++++++++++- lib/Target/X86/X86ISelLowering.h | 1 + 6 files changed, 105 insertions(+), 1 deletion(-) diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h index ac3d87c4bd5..c7edb12097f 100644 --- a/include/llvm/CodeGen/ISDOpcodes.h +++ b/include/llvm/CodeGen/ISDOpcodes.h @@ -269,6 +269,14 @@ namespace ISD { /// lengths of the input vectors. CONCAT_VECTORS, + /// INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector + /// with VECTOR2 inserted into VECTOR1 at the (potentially + /// variable) element number IDX, which must be a multiple of the + /// VECTOR2 vector length. The elements of VECTOR1 starting at + /// IDX are overwritten with VECTOR2. Elements IDX through + /// vector_length(VECTOR2) must be valid VECTOR1 indices. + INSERT_SUBVECTOR, + /// EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an /// vector value) starting with the element number IDX, which must be a /// constant multiple of the result vector length. diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td index e64059ea439..c9be40d23f0 100644 --- a/include/llvm/Target/TargetSelectionDAG.td +++ b/include/llvm/Target/TargetSelectionDAG.td @@ -446,6 +446,7 @@ def vector_extract_subvec : SDNode<"ISD::EXTRACT_SUBVECTOR", // This operator does subvector type checking. def extract_subvector : SDNode<"ISD::EXTRACT_SUBVECTOR", SDTSubVecExtract, []>; +def insert_subvector : SDNode<"ISD::INSERT_SUBVECTOR", SDTSubVecInsert, []>; // Nodes for intrinsics, you should use the intrinsic itself and let tblgen use // these internally. Don't reference these directly. diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index d06a8b43977..6b7f83fb7f2 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -171,6 +171,7 @@ private: SDValue ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl); SDValue ExpandExtractFromVectorThroughStack(SDValue Op); + SDValue ExpandInsertToVectorThroughStack(SDValue Op); SDValue ExpandVectorBuildThroughStack(SDNode* Node); std::pair ExpandAtomic(SDNode *Node); @@ -1588,6 +1589,50 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { false, false, 0); } +SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { + assert(Op.getValueType().isVector() && "Non-vector insert subvector!"); + + SDValue Vec = Op.getOperand(0); + SDValue Part = Op.getOperand(1); + SDValue Idx = Op.getOperand(2); + DebugLoc dl = Op.getDebugLoc(); + + // Store the value to a temporary stack slot, then LOAD the returned part. + + SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType()); + int FI = cast(StackPtr.getNode())->getIndex(); + MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI); + + // First store the whole vector. + SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo, + false, false, 0); + + // Then store the inserted part. + + // Add the offset to the index. + unsigned EltSize = + Vec.getValueType().getVectorElementType().getSizeInBits()/8; + + Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx, + DAG.getConstant(EltSize, Idx.getValueType())); + + if (Idx.getValueType().bitsGT(TLI.getPointerTy())) + Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx); + else + Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx); + + SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, + StackPtr); + + // Store the subvector. + Ch = DAG.getStore(DAG.getEntryNode(), dl, Part, SubStackPtr, + MachinePointerInfo(), false, false, 0); + + // Finally, load the updated vector. + return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo, + false, false, 0); +} + SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { // We can't handle this case efficiently. Allocate a sufficiently // aligned object on the stack, store each element into it, then load @@ -2806,6 +2851,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, case ISD::EXTRACT_SUBVECTOR: Results.push_back(ExpandExtractFromVectorThroughStack(SDValue(Node, 0))); break; + case ISD::INSERT_SUBVECTOR: + Results.push_back(ExpandInsertToVectorThroughStack(SDValue(Node, 0))); + break; case ISD::CONCAT_VECTORS: { Results.push_back(ExpandVectorBuildThroughStack(Node)); break; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 68d7621f6b4..661f8108476 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3082,6 +3082,30 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, case ISD::VECTOR_SHUFFLE: llvm_unreachable("should use getVectorShuffle constructor!"); break; + case ISD::INSERT_SUBVECTOR: { + SDValue Index = N3; + if (VT.isSimple() && N1.getValueType().isSimple() + && N2.getValueType().isSimple()) { + assert(VT.isVector() && N1.getValueType().isVector() && + N2.getValueType().isVector() && + "Insert subvector VTs must be a vectors"); + assert(VT == N1.getValueType() && + "Dest and insert subvector source types must match!"); + assert(N2.getValueType().getSimpleVT() <= N1.getValueType().getSimpleVT() && + "Insert subvector must be from smaller vector to larger vector!"); + if (ConstantSDNode *CSD = dyn_cast(Index.getNode())) { + uint64_t Idx = CSD->getZExtValue(); + assert((N2.getValueType().getVectorNumElements() + Idx + <= VT.getVectorNumElements()) + && "Insert subvector overflow!"); + } + + // Trivial insertion. + if (VT.getSimpleVT() == N2.getValueType().getSimpleVT()) + return N2; + } + break; + } case ISD::BITCAST: // Fold bit_convert nodes from a type to themselves. if (N1.getValueType() == VT) @@ -5811,6 +5835,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt"; case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt"; case ISD::CONCAT_VECTORS: return "concat_vectors"; + case ISD::INSERT_SUBVECTOR: return "insert_subvector"; case ISD::EXTRACT_SUBVECTOR: return "extract_subvector"; case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector"; case ISD::VECTOR_SHUFFLE: return "vector_shuffle"; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index fb4495d80f9..d5195318162 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -568,8 +568,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::EXTRACT_VECTOR_ELT,(MVT::SimpleValueType)VT,Expand); - setOperationAction(ISD::EXTRACT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand); setOperationAction(ISD::INSERT_VECTOR_ELT,(MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::EXTRACT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand); + setOperationAction(ISD::INSERT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand); setOperationAction(ISD::FABS, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FSIN, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FCOS, (MVT::SimpleValueType)VT, Expand); @@ -5856,6 +5857,25 @@ X86TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } +// Lower a node with an INSERT_SUBVECTOR opcode. This may result in a +// simple superregister reference or explicit instructions to insert +// the upper bits of a vector. +SDValue +X86TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const { + if (Subtarget->hasAVX()) { + DebugLoc dl = Op.getNode()->getDebugLoc(); + SDValue Vec = Op.getNode()->getOperand(0); + SDValue SubVec = Op.getNode()->getOperand(1); + SDValue Idx = Op.getNode()->getOperand(2); + + if (Op.getNode()->getValueType(0).getSizeInBits() == 256 + && SubVec.getNode()->getValueType(0).getSizeInBits() == 128) { + // TODO + } + } + return SDValue(); +} + // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as // their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is // one of the above mentioned nodes. It has to be wrapped because otherwise @@ -8705,6 +8725,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG); + case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, DAG); case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index e81f0414d6a..8ede98a0664 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -742,6 +742,7 @@ namespace llvm { SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,