diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h index 2b8d678eb31..8ea14a5dfdd 100644 --- a/include/llvm/CodeGen/SelectionDAG.h +++ b/include/llvm/CodeGen/SelectionDAG.h @@ -323,17 +323,20 @@ public: SDOperand getNode(unsigned Opcode, SDVTList VTs, const SDOperand *Ops, unsigned NumOps); - SDOperand getMemcpy(SDOperand Chain, SDOperand Dest, SDOperand Src, - SDOperand Size, SDOperand Align, - SDOperand AlwaysInline); + SDOperand getMemcpy(SDOperand Chain, SDOperand Dst, SDOperand Src, + SDOperand Size, unsigned Align, + bool AlwaysInline, + Value *DstSV, uint64_t DstOff, + Value *SrcSV, uint64_t SrcOff); - SDOperand getMemmove(SDOperand Chain, SDOperand Dest, SDOperand Src, - SDOperand Size, SDOperand Align, - SDOperand AlwaysInline); + SDOperand getMemmove(SDOperand Chain, SDOperand Dst, SDOperand Src, + SDOperand Size, unsigned Align, + Value *DstSV, uint64_t DstOff, + Value *SrcSV, uint64_t SrcOff); - SDOperand getMemset(SDOperand Chain, SDOperand Dest, SDOperand Src, - SDOperand Size, SDOperand Align, - SDOperand AlwaysInline); + SDOperand getMemset(SDOperand Chain, SDOperand Dst, SDOperand Src, + SDOperand Size, unsigned Align, + Value *DstSV, uint64_t DstOff); /// getSetCC - Helper function to make it easier to build SetCC's if you just /// have an ISD::CondCode instead of an SDOperand. diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index 6b2b8572c30..deded1a36ac 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -497,14 +497,6 @@ namespace ISD { // it returns an output chain. STACKRESTORE, - // MEMSET/MEMCPY/MEMMOVE - The first operand is the chain. The following - // correspond to the operands of the LLVM intrinsic functions and the last - // one is AlwaysInline. The only result is a token chain. The alignment - // argument is guaranteed to be a Constant node. - MEMSET, - MEMMOVE, - MEMCPY, - // CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of // a call sequence, and carry arbitrary information that target might want // to know. The first operand is a chain, the rest are specified by the diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 719f719846b..16f9ed63b36 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -948,17 +948,60 @@ public: SDOperand Callee, ArgListTy &Args, SelectionDAG &DAG); - virtual SDOperand LowerMEMCPY(SDOperand Op, SelectionDAG &DAG); - virtual SDOperand LowerMEMCPYCall(SDOperand Chain, SDOperand Dest, - SDOperand Source, SDOperand Count, - SelectionDAG &DAG); - virtual SDOperand LowerMEMCPYInline(SDOperand Chain, SDOperand Dest, - SDOperand Source, unsigned Size, - unsigned Align, SelectionDAG &DAG) { - assert(0 && "Not Implemented"); - return SDOperand(); // this is here to silence compiler errors + /// EmitTargetCodeForMemcpy - Emit target-specific code that performs a + /// memcpy. This can be used by targets to provide code sequences for cases + /// that don't fit the target's parameters for simple loads/stores and can be + /// more efficient than using a library call. This function can return a null + /// SDOperand if the target declines to use inline code and a different + /// lowering strategy should be used. + /// + /// If AlwaysInline is true, the size is constant and the target should not + /// emit any calls and is strongly encouraged to attempt to emit inline code + /// even if it is beyond the usual threshold because this intrinsic is being + /// expanded in a place where calls are not feasible (e.g. within the prologue + /// for another call). If the target chooses to decline an AlwaysInline + /// request here, legalize will resort to using simple loads and stores. + virtual SDOperand + EmitTargetCodeForMemcpy(SelectionDAG &DAG, + SDOperand Chain, + SDOperand Op1, SDOperand Op2, + SDOperand Op3, unsigned Align, + bool AlwaysInline, + Value *DstSV, uint64_t DstOff, + Value *SrcSV, uint64_t SrcOff) { + return SDOperand(); } + /// EmitTargetCodeForMemmove - Emit target-specific code that performs a + /// memmove. This can be used by targets to provide code sequences for cases + /// that don't fit the target's parameters for simple loads/stores and can be + /// more efficient than using a library call. This function can return a null + /// SDOperand if the target declines to use code and a different lowering + /// strategy should be used. + virtual SDOperand + EmitTargetCodeForMemmove(SelectionDAG &DAG, + SDOperand Chain, + SDOperand Op1, SDOperand Op2, + SDOperand Op3, unsigned Align, + Value *DstSV, uint64_t DstOff, + Value *SrcSV, uint64_t SrcOff) { + return SDOperand(); + } + + /// EmitTargetCodeForMemset - Emit target-specific code that performs a + /// memset. This can be used by targets to provide code sequences for cases + /// that don't fit the target's parameters for simple stores and can be more + /// efficient than using a library call. This function can return a null + /// SDOperand if the target declines to use code and a different lowering + /// strategy should be used. + virtual SDOperand + EmitTargetCodeForMemset(SelectionDAG &DAG, + SDOperand Chain, + SDOperand Op1, SDOperand Op2, + SDOperand Op3, unsigned Align, + Value *DstSV, uint64_t DstOff) { + return SDOperand(); + } /// LowerOperation - This callback is invoked for operations that are /// unsupported by the target, which are registered to use 'custom' lowering, diff --git a/include/llvm/Target/TargetSubtarget.h b/include/llvm/Target/TargetSubtarget.h index 1096b1635c7..fde8f44669a 100644 --- a/include/llvm/Target/TargetSubtarget.h +++ b/include/llvm/Target/TargetSubtarget.h @@ -28,9 +28,6 @@ class TargetSubtarget { protected: // Can only create subclasses... TargetSubtarget(); public: - /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size - /// that still makes it profitable to inline the call. - virtual unsigned getMaxInlineSizeThreshold() const {return 0; } virtual ~TargetSubtarget(); }; diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 5cb13e3f44c..2df363e5be1 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -22,6 +22,7 @@ #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetSubtarget.h" #include "llvm/CallingConv.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" @@ -2842,123 +2843,6 @@ SDOperand SelectionDAGLegalize::LegalizeOp(SDOperand Op) { break; } break; - case ISD::MEMSET: - case ISD::MEMCPY: - case ISD::MEMMOVE: { - Tmp1 = LegalizeOp(Node->getOperand(0)); // Chain - Tmp2 = LegalizeOp(Node->getOperand(1)); // Pointer - - if (Node->getOpcode() == ISD::MEMSET) { // memset = ubyte - switch (getTypeAction(Node->getOperand(2).getValueType())) { - case Expand: assert(0 && "Cannot expand a byte!"); - case Legal: - Tmp3 = LegalizeOp(Node->getOperand(2)); - break; - case Promote: - Tmp3 = PromoteOp(Node->getOperand(2)); - break; - } - } else { - Tmp3 = LegalizeOp(Node->getOperand(2)); // memcpy/move = pointer, - } - - SDOperand Tmp4; - switch (getTypeAction(Node->getOperand(3).getValueType())) { - case Expand: { - // Length is too big, just take the lo-part of the length. - SDOperand HiPart; - ExpandOp(Node->getOperand(3), Tmp4, HiPart); - break; - } - case Legal: - Tmp4 = LegalizeOp(Node->getOperand(3)); - break; - case Promote: - Tmp4 = PromoteOp(Node->getOperand(3)); - break; - } - - SDOperand Tmp5; - switch (getTypeAction(Node->getOperand(4).getValueType())) { // uint - case Expand: assert(0 && "Cannot expand this yet!"); - case Legal: - Tmp5 = LegalizeOp(Node->getOperand(4)); - break; - case Promote: - Tmp5 = PromoteOp(Node->getOperand(4)); - break; - } - - SDOperand Tmp6; - switch (getTypeAction(Node->getOperand(5).getValueType())) { // bool - case Expand: assert(0 && "Cannot expand this yet!"); - case Legal: - Tmp6 = LegalizeOp(Node->getOperand(5)); - break; - case Promote: - Tmp6 = PromoteOp(Node->getOperand(5)); - break; - } - - switch (TLI.getOperationAction(Node->getOpcode(), MVT::Other)) { - default: assert(0 && "This action not implemented for this operation!"); - case TargetLowering::Custom: - isCustom = true; - // FALLTHROUGH - case TargetLowering::Legal: { - SDOperand Ops[] = { Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6 }; - Result = DAG.UpdateNodeOperands(Result, Ops, 6); - if (isCustom) { - Tmp1 = TLI.LowerOperation(Result, DAG); - if (Tmp1.Val) Result = Tmp1; - } - break; - } - case TargetLowering::Expand: { - // Otherwise, the target does not support this operation. Lower the - // operation to an explicit libcall as appropriate. - MVT::ValueType IntPtr = TLI.getPointerTy(); - const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(); - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - - const char *FnName = 0; - if (Node->getOpcode() == ISD::MEMSET) { - Entry.Node = Tmp2; Entry.Ty = IntPtrTy; - Args.push_back(Entry); - // Extend the (previously legalized) ubyte argument to be an int value - // for the call. - if (Tmp3.getValueType() > MVT::i32) - Tmp3 = DAG.getNode(ISD::TRUNCATE, MVT::i32, Tmp3); - else - Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Tmp3); - Entry.Node = Tmp3; Entry.Ty = Type::Int32Ty; Entry.isSExt = true; - Args.push_back(Entry); - Entry.Node = Tmp4; Entry.Ty = IntPtrTy; Entry.isSExt = false; - Args.push_back(Entry); - - FnName = "memset"; - } else if (Node->getOpcode() == ISD::MEMCPY || - Node->getOpcode() == ISD::MEMMOVE) { - Entry.Ty = IntPtrTy; - Entry.Node = Tmp2; Args.push_back(Entry); - Entry.Node = Tmp3; Args.push_back(Entry); - Entry.Node = Tmp4; Args.push_back(Entry); - FnName = Node->getOpcode() == ISD::MEMMOVE ? "memmove" : "memcpy"; - } else { - assert(0 && "Unknown op!"); - } - - std::pair CallResult = - TLI.LowerCallTo(Tmp1, Type::VoidTy, - false, false, false, CallingConv::C, false, - DAG.getExternalSymbol(FnName, IntPtr), Args, DAG); - Result = CallResult.second; - break; - } - } - break; - } case ISD::SHL_PARTS: case ISD::SRA_PARTS: diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 6511cff1c6d..380c42220c7 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -439,51 +439,6 @@ SDOperand DAGTypeLegalizer::CreateStackStoreLoad(SDOperand Op, return DAG.getLoad(DestVT, Store, FIPtr, NULL, 0); } -/// HandleMemIntrinsic - This handles memcpy/memset/memmove with invalid -/// operands. This promotes or expands the operands as required. -SDOperand DAGTypeLegalizer::HandleMemIntrinsic(SDNode *N) { - // The chain and pointer [operands #0 and #1] are always valid types. - SDOperand Chain = N->getOperand(0); - SDOperand Ptr = N->getOperand(1); - SDOperand Op2 = N->getOperand(2); - - // Op #2 is either a value (memset) or a pointer. Promote it if required. - switch (getTypeAction(Op2.getValueType())) { - default: assert(0 && "Unknown action for pointer/value operand"); - case Legal: break; - case Promote: Op2 = GetPromotedOp(Op2); break; - } - - // The length could have any action required. - SDOperand Length = N->getOperand(3); - switch (getTypeAction(Length.getValueType())) { - default: assert(0 && "Unknown action for memop operand"); - case Legal: break; - case Promote: Length = GetPromotedZExtOp(Length); break; - case Expand: - SDOperand Dummy; // discard the high part. - GetExpandedOp(Length, Length, Dummy); - break; - } - - SDOperand Align = N->getOperand(4); - switch (getTypeAction(Align.getValueType())) { - default: assert(0 && "Unknown action for memop operand"); - case Legal: break; - case Promote: Align = GetPromotedZExtOp(Align); break; - } - - SDOperand AlwaysInline = N->getOperand(5); - switch (getTypeAction(AlwaysInline.getValueType())) { - default: assert(0 && "Unknown action for memop operand"); - case Legal: break; - case Promote: AlwaysInline = GetPromotedZExtOp(AlwaysInline); break; - } - - SDOperand Ops[] = { Chain, Ptr, Op2, Length, Align, AlwaysInline }; - return DAG.UpdateNodeOperands(SDOperand(N, 0), Ops, 6); -} - /// JoinIntegers - Build an integer with low bits Lo and high bits Hi. SDOperand DAGTypeLegalizer::JoinIntegers(SDOperand Lo, SDOperand Hi) { MVT::ValueType LVT = Lo.getValueType(); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 7d245abed59..5b9879315f8 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -165,7 +165,6 @@ private: // Common routines. SDOperand BitConvertToInteger(SDOperand Op); SDOperand CreateStackStoreLoad(SDOperand Op, MVT::ValueType DestVT); - SDOperand HandleMemIntrinsic(SDNode *N); SDOperand JoinIntegers(SDOperand Lo, SDOperand Hi); void SplitInteger(SDOperand Op, SDOperand &Lo, SDOperand &Hi); void SplitInteger(SDOperand Op, MVT::ValueType LoVT, MVT::ValueType HiVT, diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesExpand.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesExpand.cpp index b872a44fec4..fcde8f32d25 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesExpand.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesExpand.cpp @@ -946,9 +946,6 @@ bool DAGTypeLegalizer::ExpandOperand(SDNode *N, unsigned OpNo) { case ISD::STORE: Res = ExpandOperand_STORE(cast(N), OpNo); break; - case ISD::MEMSET: - case ISD::MEMCPY: - case ISD::MEMMOVE: Res = HandleMemIntrinsic(N); break; case ISD::BUILD_VECTOR: Res = ExpandOperand_BUILD_VECTOR(N); break; } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesPromote.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesPromote.cpp index b8118eb0392..93c8c605841 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesPromote.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesPromote.cpp @@ -447,9 +447,6 @@ bool DAGTypeLegalizer::PromoteOperand(SDNode *N, unsigned OpNo) { case ISD::STORE: Res = PromoteOperand_STORE(cast(N), OpNo); break; - case ISD::MEMSET: - case ISD::MEMCPY: - case ISD::MEMMOVE: Res = HandleMemIntrinsic(N); break; case ISD::BUILD_VECTOR: Res = PromoteOperand_BUILD_VECTOR(N); break; case ISD::INSERT_VECTOR_ELT: diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index f096c70a3e4..327a8fe8976 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -17,6 +17,7 @@ #include "llvm/Intrinsics.h" #include "llvm/DerivedTypes.h" #include "llvm/Assembly/Writer.h" +#include "llvm/CallingConv.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -2385,28 +2386,357 @@ SDOperand SelectionDAG::getNode(unsigned Opcode, MVT::ValueType VT, return getNode(Opcode, VT, Ops, 5); } -SDOperand SelectionDAG::getMemcpy(SDOperand Chain, SDOperand Dest, - SDOperand Src, SDOperand Size, - SDOperand Align, - SDOperand AlwaysInline) { - SDOperand Ops[] = { Chain, Dest, Src, Size, Align, AlwaysInline }; - return getNode(ISD::MEMCPY, MVT::Other, Ops, 6); +/// getMemsetValue - Vectorized representation of the memset value +/// operand. +static SDOperand getMemsetValue(SDOperand Value, MVT::ValueType VT, + SelectionDAG &DAG) { + MVT::ValueType CurVT = VT; + if (ConstantSDNode *C = dyn_cast(Value)) { + uint64_t Val = C->getValue() & 255; + unsigned Shift = 8; + while (CurVT != MVT::i8) { + Val = (Val << Shift) | Val; + Shift <<= 1; + CurVT = (MVT::ValueType)((unsigned)CurVT - 1); + } + return DAG.getConstant(Val, VT); + } else { + Value = DAG.getNode(ISD::ZERO_EXTEND, VT, Value); + unsigned Shift = 8; + while (CurVT != MVT::i8) { + Value = + DAG.getNode(ISD::OR, VT, + DAG.getNode(ISD::SHL, VT, Value, + DAG.getConstant(Shift, MVT::i8)), Value); + Shift <<= 1; + CurVT = (MVT::ValueType)((unsigned)CurVT - 1); + } + + return Value; + } } -SDOperand SelectionDAG::getMemmove(SDOperand Chain, SDOperand Dest, - SDOperand Src, SDOperand Size, - SDOperand Align, - SDOperand AlwaysInline) { - SDOperand Ops[] = { Chain, Dest, Src, Size, Align, AlwaysInline }; - return getNode(ISD::MEMMOVE, MVT::Other, Ops, 6); +/// getMemsetStringVal - Similar to getMemsetValue. Except this is only +/// used when a memcpy is turned into a memset when the source is a constant +/// string ptr. +static SDOperand getMemsetStringVal(MVT::ValueType VT, + SelectionDAG &DAG, + const TargetLowering &TLI, + std::string &Str, unsigned Offset) { + uint64_t Val = 0; + unsigned MSB = MVT::getSizeInBits(VT) / 8; + if (TLI.isLittleEndian()) + Offset = Offset + MSB - 1; + for (unsigned i = 0; i != MSB; ++i) { + Val = (Val << 8) | (unsigned char)Str[Offset]; + Offset += TLI.isLittleEndian() ? -1 : 1; + } + return DAG.getConstant(Val, VT); } -SDOperand SelectionDAG::getMemset(SDOperand Chain, SDOperand Dest, +/// getMemBasePlusOffset - Returns base and offset node for the +static SDOperand getMemBasePlusOffset(SDOperand Base, unsigned Offset, + SelectionDAG &DAG) { + MVT::ValueType VT = Base.getValueType(); + return DAG.getNode(ISD::ADD, VT, Base, DAG.getConstant(Offset, VT)); +} + +/// MeetsMaxMemopRequirement - Determines if the number of memory ops required +/// to replace the memset / memcpy is below the threshold. It also returns the +/// types of the sequence of memory ops to perform memset / memcpy. +static bool MeetsMaxMemopRequirement(std::vector &MemOps, + unsigned Limit, uint64_t Size, + unsigned Align, + const TargetLowering &TLI) { + MVT::ValueType VT; + + if (TLI.allowsUnalignedMemoryAccesses()) { + VT = MVT::i64; + } else { + switch (Align & 7) { + case 0: + VT = MVT::i64; + break; + case 4: + VT = MVT::i32; + break; + case 2: + VT = MVT::i16; + break; + default: + VT = MVT::i8; + break; + } + } + + MVT::ValueType LVT = MVT::i64; + while (!TLI.isTypeLegal(LVT)) + LVT = (MVT::ValueType)((unsigned)LVT - 1); + assert(MVT::isInteger(LVT)); + + if (VT > LVT) + VT = LVT; + + unsigned NumMemOps = 0; + while (Size != 0) { + unsigned VTSize = MVT::getSizeInBits(VT) / 8; + while (VTSize > Size) { + VT = (MVT::ValueType)((unsigned)VT - 1); + VTSize >>= 1; + } + assert(MVT::isInteger(VT)); + + if (++NumMemOps > Limit) + return false; + MemOps.push_back(VT); + Size -= VTSize; + } + + return true; +} + +static SDOperand getMemcpyLoadsAndStores(SelectionDAG &DAG, + SDOperand Chain, SDOperand Dst, + SDOperand Src, uint64_t Size, + unsigned Align, + bool AlwaysInline, + Value *DstSV, uint64_t DstOff, + Value *SrcSV, uint64_t SrcOff) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // Expand memcpy to a series of store ops if the size operand falls below + // a certain threshold. + std::vector MemOps; + uint64_t Limit = -1; + if (!AlwaysInline) + Limit = TLI.getMaxStoresPerMemcpy(); + if (!MeetsMaxMemopRequirement(MemOps, Limit, Size, Align, TLI)) + return SDOperand(); + + SmallVector OutChains; + + unsigned NumMemOps = MemOps.size(); + unsigned SrcDelta = 0; + GlobalAddressSDNode *G = NULL; + std::string Str; + bool CopyFromStr = false; + + if (Src.getOpcode() == ISD::GlobalAddress) + G = cast(Src); + else if (Src.getOpcode() == ISD::ADD && + Src.getOperand(0).getOpcode() == ISD::GlobalAddress && + Src.getOperand(1).getOpcode() == ISD::Constant) { + G = cast(Src.getOperand(0)); + SrcDelta = cast(Src.getOperand(1))->getValue(); + } + if (G) { + GlobalVariable *GV = dyn_cast(G->getGlobal()); + if (GV && GV->isConstant()) { + Str = GV->getStringValue(false); + if (!Str.empty()) { + CopyFromStr = true; + SrcOff += SrcDelta; + } + } + } + + for (unsigned i = 0; i < NumMemOps; i++) { + MVT::ValueType VT = MemOps[i]; + unsigned VTSize = MVT::getSizeInBits(VT) / 8; + SDOperand Value, Store; + + if (CopyFromStr) { + Value = getMemsetStringVal(VT, DAG, TLI, Str, SrcOff); + Store = + DAG.getStore(Chain, Value, + getMemBasePlusOffset(Dst, DstOff, DAG), + DstSV, DstOff); + } else { + Value = DAG.getLoad(VT, Chain, + getMemBasePlusOffset(Src, SrcOff, DAG), + SrcSV, SrcOff, false, Align); + Store = + DAG.getStore(Chain, Value, + getMemBasePlusOffset(Dst, DstOff, DAG), + DstSV, DstOff, false, Align); + } + OutChains.push_back(Store); + SrcOff += VTSize; + DstOff += VTSize; + } + + return DAG.getNode(ISD::TokenFactor, MVT::Other, + &OutChains[0], OutChains.size()); +} + +static SDOperand getMemsetStores(SelectionDAG &DAG, + SDOperand Chain, SDOperand Dst, + SDOperand Src, uint64_t Size, + unsigned Align, + Value *DstSV, uint64_t DstOff) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // Expand memset to a series of load/store ops if the size operand + // falls below a certain threshold. + std::vector MemOps; + if (!MeetsMaxMemopRequirement(MemOps, TLI.getMaxStoresPerMemset(), + Size, Align, TLI)) + return SDOperand(); + + SmallVector OutChains; + + unsigned NumMemOps = MemOps.size(); + for (unsigned i = 0; i < NumMemOps; i++) { + MVT::ValueType VT = MemOps[i]; + unsigned VTSize = MVT::getSizeInBits(VT) / 8; + SDOperand Value = getMemsetValue(Src, VT, DAG); + SDOperand Store = DAG.getStore(Chain, Value, + getMemBasePlusOffset(Dst, DstOff, DAG), + DstSV, DstOff); + OutChains.push_back(Store); + DstOff += VTSize; + } + + return DAG.getNode(ISD::TokenFactor, MVT::Other, + &OutChains[0], OutChains.size()); +} + +SDOperand SelectionDAG::getMemcpy(SDOperand Chain, SDOperand Dst, SDOperand Src, SDOperand Size, - SDOperand Align, - SDOperand AlwaysInline) { - SDOperand Ops[] = { Chain, Dest, Src, Size, Align, AlwaysInline }; - return getNode(ISD::MEMSET, MVT::Other, Ops, 6); + unsigned Align, bool AlwaysInline, + Value *DstSV, uint64_t DstOff, + Value *SrcSV, uint64_t SrcOff) { + + // Check to see if we should lower the memcpy to loads and stores first. + // For cases within the target-specified limits, this is the best choice. + ConstantSDNode *ConstantSize = dyn_cast(Size); + if (ConstantSize) { + // Memcpy with size zero? Just return the original chain. + if (ConstantSize->isNullValue()) + return Chain; + + SDOperand Result = + getMemcpyLoadsAndStores(*this, Chain, Dst, Src, ConstantSize->getValue(), + Align, false, DstSV, DstOff, SrcSV, SrcOff); + if (Result.Val) + return Result; + } + + // Then check to see if we should lower the memcpy with target-specific + // code. If the target chooses to do this, this is the next best. + SDOperand Result = + TLI.EmitTargetCodeForMemcpy(*this, Chain, Dst, Src, Size, Align, + AlwaysInline, + DstSV, DstOff, SrcSV, SrcOff); + if (Result.Val) + return Result; + + // If we really need inline code and the target declined to provide it, + // use a (potentially long) sequence of loads and stores. + if (AlwaysInline) { + assert(ConstantSize && "AlwaysInline requires a constant size!"); + return getMemcpyLoadsAndStores(*this, Chain, Dst, Src, + ConstantSize->getValue(), Align, true, + DstSV, DstOff, SrcSV, SrcOff); + } + + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = TLI.getTargetData()->getIntPtrType(); + Entry.Node = Dst; Args.push_back(Entry); + Entry.Node = Src; Args.push_back(Entry); + Entry.Node = Size; Args.push_back(Entry); + std::pair CallResult = + TLI.LowerCallTo(Chain, Type::VoidTy, + false, false, false, CallingConv::C, false, + getExternalSymbol("memcpy", TLI.getPointerTy()), + Args, *this); + return CallResult.second; +} + +SDOperand SelectionDAG::getMemmove(SDOperand Chain, SDOperand Dst, + SDOperand Src, SDOperand Size, + unsigned Align, + Value *DstSV, uint64_t DstOff, + Value *SrcSV, uint64_t SrcOff) { + + // TODO: Optimize small memmove cases with simple loads and stores, + // ensuring that all loads precede all stores. This can cause severe + // register pressure, so targets should be careful with the size limit. + + // Then check to see if we should lower the memmove with target-specific + // code. If the target chooses to do this, this is the next best. + SDOperand Result = + TLI.EmitTargetCodeForMemmove(*this, Chain, Dst, Src, Size, Align, + DstSV, DstOff, SrcSV, SrcOff); + if (Result.Val) + return Result; + + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = TLI.getTargetData()->getIntPtrType(); + Entry.Node = Dst; Args.push_back(Entry); + Entry.Node = Src; Args.push_back(Entry); + Entry.Node = Size; Args.push_back(Entry); + std::pair CallResult = + TLI.LowerCallTo(Chain, Type::VoidTy, + false, false, false, CallingConv::C, false, + getExternalSymbol("memmove", TLI.getPointerTy()), + Args, *this); + return CallResult.second; +} + +SDOperand SelectionDAG::getMemset(SDOperand Chain, SDOperand Dst, + SDOperand Src, SDOperand Size, + unsigned Align, + Value *DstSV, uint64_t DstOff) { + + // Check to see if we should lower the memset to stores first. + // For cases within the target-specified limits, this is the best choice. + ConstantSDNode *ConstantSize = dyn_cast(Size); + if (ConstantSize) { + // Memset with size zero? Just return the original chain. + if (ConstantSize->isNullValue()) + return Chain; + + SDOperand Result = + getMemsetStores(*this, Chain, Dst, Src, ConstantSize->getValue(), Align, + DstSV, DstOff); + if (Result.Val) + return Result; + } + + // Then check to see if we should lower the memset with target-specific + // code. If the target chooses to do this, this is the next best. + SDOperand Result = + TLI.EmitTargetCodeForMemset(*this, Chain, Dst, Src, Size, Align, + DstSV, DstOff); + if (Result.Val) + return Result; + + // Emit a library call. + const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(); + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Node = Dst; Entry.Ty = IntPtrTy; + Args.push_back(Entry); + // Extend or truncate the argument to be an i32 value for the call. + if (Src.getValueType() > MVT::i32) + Src = getNode(ISD::TRUNCATE, MVT::i32, Src); + else + Src = getNode(ISD::ZERO_EXTEND, MVT::i32, Src); + Entry.Node = Src; Entry.Ty = Type::Int32Ty; Entry.isSExt = true; + Args.push_back(Entry); + Entry.Node = Size; Entry.Ty = IntPtrTy; Entry.isSExt = false; + Args.push_back(Entry); + std::pair CallResult = + TLI.LowerCallTo(Chain, Type::VoidTy, + false, false, false, CallingConv::C, false, + getExternalSymbol("memset", TLI.getPointerTy()), + Args, *this); + return CallResult.second; } SDOperand SelectionDAG::getAtomic(unsigned Opcode, SDOperand Chain, @@ -4009,11 +4339,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::STACKRESTORE: return "stackrestore"; case ISD::TRAP: return "trap"; - // Block memory operations. - case ISD::MEMSET: return "memset"; - case ISD::MEMCPY: return "memcpy"; - case ISD::MEMMOVE: return "memmove"; - // Bit manipulation case ISD::BSWAP: return "bswap"; case ISD::CTPOP: return "ctpop"; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index cfef9acd4fc..ac5cfd2e91e 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -647,8 +647,6 @@ public: void visitVAEnd(CallInst &I); void visitVACopy(CallInst &I); - void visitMemIntrinsic(CallInst &I, unsigned Op); - void visitGetResult(GetResultInst &I); void visitUserOp1(Instruction &I) { @@ -2737,18 +2735,48 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { return "_longjmp"+!TLI.usesUnderscoreLongJmp(); break; case Intrinsic::memcpy_i32: - case Intrinsic::memcpy_i64: - visitMemIntrinsic(I, ISD::MEMCPY); + case Intrinsic::memcpy_i64: { + SDOperand Op1 = getValue(I.getOperand(1)); + SDOperand Op2 = getValue(I.getOperand(2)); + SDOperand Op3 = getValue(I.getOperand(3)); + unsigned Align = cast(I.getOperand(4))->getZExtValue(); + DAG.setRoot(DAG.getMemcpy(getRoot(), Op1, Op2, Op3, Align, false, + I.getOperand(1), 0, I.getOperand(2), 0)); return 0; + } case Intrinsic::memset_i32: - case Intrinsic::memset_i64: - visitMemIntrinsic(I, ISD::MEMSET); + case Intrinsic::memset_i64: { + SDOperand Op1 = getValue(I.getOperand(1)); + SDOperand Op2 = getValue(I.getOperand(2)); + SDOperand Op3 = getValue(I.getOperand(3)); + unsigned Align = cast(I.getOperand(4))->getZExtValue(); + DAG.setRoot(DAG.getMemset(getRoot(), Op1, Op2, Op3, Align, + I.getOperand(1), 0)); return 0; + } case Intrinsic::memmove_i32: - case Intrinsic::memmove_i64: - visitMemIntrinsic(I, ISD::MEMMOVE); + case Intrinsic::memmove_i64: { + SDOperand Op1 = getValue(I.getOperand(1)); + SDOperand Op2 = getValue(I.getOperand(2)); + SDOperand Op3 = getValue(I.getOperand(3)); + unsigned Align = cast(I.getOperand(4))->getZExtValue(); + + // If the source and destination are known to not be aliases, we can + // lower memmove as memcpy. + uint64_t Size = -1ULL; + if (ConstantSDNode *C = dyn_cast(Op3)) + Size = C->getValue(); + if (AA.alias(I.getOperand(1), Size, I.getOperand(2), Size) == + AliasAnalysis::NoAlias) { + DAG.setRoot(DAG.getMemcpy(getRoot(), Op1, Op2, Op3, Align, false, + I.getOperand(1), 0, I.getOperand(2), 0)); + return 0; + } + + DAG.setRoot(DAG.getMemmove(getRoot(), Op1, Op2, Op3, Align, + I.getOperand(1), 0, I.getOperand(2), 0)); return 0; - + } case Intrinsic::dbg_stoppoint: { MachineModuleInfo *MMI = DAG.getMachineModuleInfo(); DbgStopPointInst &SPI = cast(I); @@ -4342,242 +4370,6 @@ SDOperand TargetLowering::CustomPromoteOperation(SDOperand Op, return SDOperand(); } -/// getMemsetValue - Vectorized representation of the memset value -/// operand. -static SDOperand getMemsetValue(SDOperand Value, MVT::ValueType VT, - SelectionDAG &DAG) { - MVT::ValueType CurVT = VT; - if (ConstantSDNode *C = dyn_cast(Value)) { - uint64_t Val = C->getValue() & 255; - unsigned Shift = 8; - while (CurVT != MVT::i8) { - Val = (Val << Shift) | Val; - Shift <<= 1; - CurVT = (MVT::ValueType)((unsigned)CurVT - 1); - } - return DAG.getConstant(Val, VT); - } else { - Value = DAG.getNode(ISD::ZERO_EXTEND, VT, Value); - unsigned Shift = 8; - while (CurVT != MVT::i8) { - Value = - DAG.getNode(ISD::OR, VT, - DAG.getNode(ISD::SHL, VT, Value, - DAG.getConstant(Shift, MVT::i8)), Value); - Shift <<= 1; - CurVT = (MVT::ValueType)((unsigned)CurVT - 1); - } - - return Value; - } -} - -/// getMemsetStringVal - Similar to getMemsetValue. Except this is only -/// used when a memcpy is turned into a memset when the source is a constant -/// string ptr. -static SDOperand getMemsetStringVal(MVT::ValueType VT, - SelectionDAG &DAG, TargetLowering &TLI, - std::string &Str, unsigned Offset) { - uint64_t Val = 0; - unsigned MSB = MVT::getSizeInBits(VT) / 8; - if (TLI.isLittleEndian()) - Offset = Offset + MSB - 1; - for (unsigned i = 0; i != MSB; ++i) { - Val = (Val << 8) | (unsigned char)Str[Offset]; - Offset += TLI.isLittleEndian() ? -1 : 1; - } - return DAG.getConstant(Val, VT); -} - -/// getMemBasePlusOffset - Returns base and offset node for the -static SDOperand getMemBasePlusOffset(SDOperand Base, unsigned Offset, - SelectionDAG &DAG, TargetLowering &TLI) { - MVT::ValueType VT = Base.getValueType(); - return DAG.getNode(ISD::ADD, VT, Base, DAG.getConstant(Offset, VT)); -} - -/// MeetsMaxMemopRequirement - Determines if the number of memory ops required -/// to replace the memset / memcpy is below the threshold. It also returns the -/// types of the sequence of memory ops to perform memset / memcpy. -static bool MeetsMaxMemopRequirement(std::vector &MemOps, - unsigned Limit, uint64_t Size, - unsigned Align, TargetLowering &TLI) { - MVT::ValueType VT; - - if (TLI.allowsUnalignedMemoryAccesses()) { - VT = MVT::i64; - } else { - switch (Align & 7) { - case 0: - VT = MVT::i64; - break; - case 4: - VT = MVT::i32; - break; - case 2: - VT = MVT::i16; - break; - default: - VT = MVT::i8; - break; - } - } - - MVT::ValueType LVT = MVT::i64; - while (!TLI.isTypeLegal(LVT)) - LVT = (MVT::ValueType)((unsigned)LVT - 1); - assert(MVT::isInteger(LVT)); - - if (VT > LVT) - VT = LVT; - - unsigned NumMemOps = 0; - while (Size != 0) { - unsigned VTSize = MVT::getSizeInBits(VT) / 8; - while (VTSize > Size) { - VT = (MVT::ValueType)((unsigned)VT - 1); - VTSize >>= 1; - } - assert(MVT::isInteger(VT)); - - if (++NumMemOps > Limit) - return false; - MemOps.push_back(VT); - Size -= VTSize; - } - - return true; -} - -void SelectionDAGLowering::visitMemIntrinsic(CallInst &I, unsigned Op) { - SDOperand Op1 = getValue(I.getOperand(1)); - SDOperand Op2 = getValue(I.getOperand(2)); - SDOperand Op3 = getValue(I.getOperand(3)); - SDOperand Op4 = getValue(I.getOperand(4)); - unsigned Align = (unsigned)cast(Op4)->getValue(); - if (Align == 0) Align = 1; - - // If the source and destination are known to not be aliases, we can - // lower memmove as memcpy. - if (Op == ISD::MEMMOVE) { - uint64_t Size = -1ULL; - if (ConstantSDNode *C = dyn_cast(Op3)) - Size = C->getValue(); - if (AA.alias(I.getOperand(1), Size, I.getOperand(2), Size) == - AliasAnalysis::NoAlias) - Op = ISD::MEMCPY; - } - - if (ConstantSDNode *Size = dyn_cast(Op3)) { - std::vector MemOps; - - // Expand memset / memcpy to a series of load / store ops - // if the size operand falls below a certain threshold. - SmallVector OutChains; - switch (Op) { - default: break; // Do nothing for now. - case ISD::MEMSET: { - if (MeetsMaxMemopRequirement(MemOps, TLI.getMaxStoresPerMemset(), - Size->getValue(), Align, TLI)) { - unsigned NumMemOps = MemOps.size(); - unsigned Offset = 0; - for (unsigned i = 0; i < NumMemOps; i++) { - MVT::ValueType VT = MemOps[i]; - unsigned VTSize = MVT::getSizeInBits(VT) / 8; - SDOperand Value = getMemsetValue(Op2, VT, DAG); - SDOperand Store = DAG.getStore(getRoot(), Value, - getMemBasePlusOffset(Op1, Offset, DAG, TLI), - I.getOperand(1), Offset); - OutChains.push_back(Store); - Offset += VTSize; - } - } - break; - } - case ISD::MEMCPY: { - if (MeetsMaxMemopRequirement(MemOps, TLI.getMaxStoresPerMemcpy(), - Size->getValue(), Align, TLI)) { - unsigned NumMemOps = MemOps.size(); - unsigned SrcOff = 0, DstOff = 0, SrcDelta = 0; - GlobalAddressSDNode *G = NULL; - std::string Str; - bool CopyFromStr = false; - - if (Op2.getOpcode() == ISD::GlobalAddress) - G = cast(Op2); - else if (Op2.getOpcode() == ISD::ADD && - Op2.getOperand(0).getOpcode() == ISD::GlobalAddress && - Op2.getOperand(1).getOpcode() == ISD::Constant) { - G = cast(Op2.getOperand(0)); - SrcDelta = cast(Op2.getOperand(1))->getValue(); - } - if (G) { - GlobalVariable *GV = dyn_cast(G->getGlobal()); - if (GV && GV->isConstant()) { - Str = GV->getStringValue(false); - if (!Str.empty()) { - CopyFromStr = true; - SrcOff += SrcDelta; - } - } - } - - for (unsigned i = 0; i < NumMemOps; i++) { - MVT::ValueType VT = MemOps[i]; - unsigned VTSize = MVT::getSizeInBits(VT) / 8; - SDOperand Value, Chain, Store; - - if (CopyFromStr) { - Value = getMemsetStringVal(VT, DAG, TLI, Str, SrcOff); - Chain = getRoot(); - Store = - DAG.getStore(Chain, Value, - getMemBasePlusOffset(Op1, DstOff, DAG, TLI), - I.getOperand(1), DstOff); - } else { - Value = DAG.getLoad(VT, getRoot(), - getMemBasePlusOffset(Op2, SrcOff, DAG, TLI), - I.getOperand(2), SrcOff, false, Align); - Chain = Value.getValue(1); - Store = - DAG.getStore(Chain, Value, - getMemBasePlusOffset(Op1, DstOff, DAG, TLI), - I.getOperand(1), DstOff, false, Align); - } - OutChains.push_back(Store); - SrcOff += VTSize; - DstOff += VTSize; - } - } - break; - } - } - - if (!OutChains.empty()) { - DAG.setRoot(DAG.getNode(ISD::TokenFactor, MVT::Other, - &OutChains[0], OutChains.size())); - return; - } - } - - SDOperand AlwaysInline = DAG.getConstant(0, MVT::i1); - SDOperand Node; - switch(Op) { - default: - assert(0 && "Unknown Op"); - case ISD::MEMCPY: - Node = DAG.getMemcpy(getRoot(), Op1, Op2, Op3, Op4, AlwaysInline); - break; - case ISD::MEMMOVE: - Node = DAG.getMemmove(getRoot(), Op1, Op2, Op3, Op4, AlwaysInline); - break; - case ISD::MEMSET: - Node = DAG.getMemset(getRoot(), Op1, Op2, Op3, Op4, AlwaysInline); - break; - } - DAG.setRoot(Node); -} - //===----------------------------------------------------------------------===// // SelectionDAGISel code //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index a0894ddebc6..f69f0462a07 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -17,7 +17,7 @@ #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/CallingConv.h" +#include "llvm/GlobalVariable.h" #include "llvm/DerivedTypes.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/ADT/StringExtras.h" @@ -234,59 +234,6 @@ TargetLowering::TargetLowering(TargetMachine &tm) TargetLowering::~TargetLowering() {} - -SDOperand TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) { - assert(getSubtarget() && "Subtarget not defined"); - SDOperand ChainOp = Op.getOperand(0); - SDOperand DestOp = Op.getOperand(1); - SDOperand SourceOp = Op.getOperand(2); - SDOperand CountOp = Op.getOperand(3); - SDOperand AlignOp = Op.getOperand(4); - SDOperand AlwaysInlineOp = Op.getOperand(5); - - bool AlwaysInline = (bool)cast(AlwaysInlineOp)->getValue(); - unsigned Align = (unsigned)cast(AlignOp)->getValue(); - if (Align == 0) Align = 1; - - // If size is unknown, call memcpy. - ConstantSDNode *I = dyn_cast(CountOp); - if (!I) { - assert(!AlwaysInline && "Cannot inline copy of unknown size"); - return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG); - } - - // If not DWORD aligned or if size is more than threshold, then call memcpy. - // The libc version is likely to be faster for the following cases. It can - // use the address value and run time information about the CPU. - // With glibc 2.6.1 on a core 2, coping an array of 100M longs was 30% faster - unsigned Size = I->getValue(); - if (AlwaysInline || - (Size <= getSubtarget()->getMaxInlineSizeThreshold() && - (Align & 3) == 0)) - return LowerMEMCPYInline(ChainOp, DestOp, SourceOp, Size, Align, DAG); - return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG); -} - - -SDOperand TargetLowering::LowerMEMCPYCall(SDOperand Chain, - SDOperand Dest, - SDOperand Source, - SDOperand Count, - SelectionDAG &DAG) { - MVT::ValueType IntPtr = getPointerTy(); - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Entry.Ty = getTargetData()->getIntPtrType(); - Entry.Node = Dest; Args.push_back(Entry); - Entry.Node = Source; Args.push_back(Entry); - Entry.Node = Count; Args.push_back(Entry); - std::pair CallResult = - LowerCallTo(Chain, Type::VoidTy, false, false, false, CallingConv::C, - false, DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG); - return CallResult.second; -} - - /// computeRegisterProperties - Once all of the register classes are added, /// this allows us to compute derived properties we expose. void TargetLowering::computeRegisterProperties() { diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 7218560cc6c..0095352c415 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -197,11 +197,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); - // Expand mem operations genericly. - setOperationAction(ISD::MEMSET , MVT::Other, Expand); - setOperationAction(ISD::MEMCPY , MVT::Other, Custom); - setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); - // Use the default implementation. setOperationAction(ISD::VASTART , MVT::Other, Custom); setOperationAction(ISD::VAARG , MVT::Other, Expand); @@ -1246,18 +1241,30 @@ static SDOperand LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) { return DAG.getNode(ARMISD::CNEG, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp); } -SDOperand ARMTargetLowering::LowerMEMCPYInline(SDOperand Chain, - SDOperand Dest, - SDOperand Source, - unsigned Size, - unsigned Align, - SelectionDAG &DAG) { +SDOperand +ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, + SDOperand Chain, + SDOperand Dst, SDOperand Src, + SDOperand Size, unsigned Align, + bool AlwaysInline, + Value *DstSV, uint64_t DstOff, + Value *SrcSV, uint64_t SrcOff){ // Do repeated 4-byte loads and stores. To be improved. - assert((Align & 3) == 0 && "Expected 4-byte aligned addresses!"); - unsigned BytesLeft = Size & 3; - unsigned NumMemOps = Size >> 2; + // This requires 4-byte alignment. + if ((Align & 3) != 0) + return SDOperand(); + // This requires the copy size to be a constant, preferrably + // within a subtarget-specific limit. + ConstantSDNode *ConstantSize = dyn_cast(Size); + if (!ConstantSize) + return SDOperand(); + uint64_t SizeVal = ConstantSize->getValue(); + if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold()) + return SDOperand(); + + unsigned BytesLeft = SizeVal & 3; + unsigned NumMemOps = SizeVal >> 2; unsigned EmittedNumMemOps = 0; - unsigned SrcOff = 0, DstOff = 0; MVT::ValueType VT = MVT::i32; unsigned VTSize = 4; unsigned i = 0; @@ -1272,9 +1279,9 @@ SDOperand ARMTargetLowering::LowerMEMCPYInline(SDOperand Chain, for (i = 0; i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { Loads[i] = DAG.getLoad(VT, Chain, - DAG.getNode(ISD::ADD, MVT::i32, Source, + DAG.getNode(ISD::ADD, MVT::i32, Src, DAG.getConstant(SrcOff, MVT::i32)), - NULL, 0); + SrcSV, SrcOff); TFOps[i] = Loads[i].getValue(1); SrcOff += VTSize; } @@ -1283,9 +1290,9 @@ SDOperand ARMTargetLowering::LowerMEMCPYInline(SDOperand Chain, for (i = 0; i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { TFOps[i] = DAG.getStore(Chain, Loads[i], - DAG.getNode(ISD::ADD, MVT::i32, Dest, + DAG.getNode(ISD::ADD, MVT::i32, Dst, DAG.getConstant(DstOff, MVT::i32)), - NULL, 0); + DstSV, DstOff); DstOff += VTSize; } Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, &TFOps[0], i); @@ -1309,9 +1316,9 @@ SDOperand ARMTargetLowering::LowerMEMCPYInline(SDOperand Chain, } Loads[i] = DAG.getLoad(VT, Chain, - DAG.getNode(ISD::ADD, MVT::i32, Source, + DAG.getNode(ISD::ADD, MVT::i32, Src, DAG.getConstant(SrcOff, MVT::i32)), - NULL, 0); + SrcSV, SrcOff); TFOps[i] = Loads[i].getValue(1); ++i; SrcOff += VTSize; @@ -1331,9 +1338,9 @@ SDOperand ARMTargetLowering::LowerMEMCPYInline(SDOperand Chain, } TFOps[i] = DAG.getStore(Chain, Loads[i], - DAG.getNode(ISD::ADD, MVT::i32, Dest, + DAG.getNode(ISD::ADD, MVT::i32, Dst, DAG.getConstant(DstOff, MVT::i32)), - NULL, 0); + DstSV, DstOff); ++i; DstOff += VTSize; BytesLeft -= VTSize; @@ -1409,7 +1416,6 @@ SDOperand ARMTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { case ISD::RETURNADDR: break; case ISD::FRAMEADDR: break; case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); - case ISD::MEMCPY: return LowerMEMCPY(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 285a20d23f8..58d8d8c6c86 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -119,8 +119,8 @@ namespace llvm { getRegClassForInlineAsmConstraint(const std::string &Constraint, MVT::ValueType VT) const; - virtual const TargetSubtarget* getSubtarget() { - return static_cast(Subtarget); + virtual const ARMSubtarget* getSubtarget() { + return Subtarget; } private: @@ -143,11 +143,14 @@ namespace llvm { SDOperand LowerGLOBAL_OFFSET_TABLE(SDOperand Op, SelectionDAG &DAG); SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG); SDOperand LowerBR_JT(SDOperand Op, SelectionDAG &DAG); - SDOperand LowerMEMCPYInline(SDOperand Chain, SDOperand Dest, - SDOperand Source, unsigned Size, - unsigned Align, SelectionDAG &DAG); - + SDOperand EmitTargetCodeForMemcpy(SelectionDAG &DAG, + SDOperand Chain, + SDOperand Dst, SDOperand Src, + SDOperand Size, unsigned Align, + bool AlwaysInline, + Value *DstSV, uint64_t DstOff, + Value *SrcSV, uint64_t SrcOff); }; } diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index c43924b53df..fbc9e579df1 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -62,6 +62,8 @@ protected: /// ARMSubtarget(const Module &M, const std::string &FS, bool thumb); + /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size + /// that still makes it profitable to inline the call. unsigned getMaxInlineSizeThreshold() const { // FIXME: For now, we don't lower memcpy's to loads / stores for Thumb. // Change this once Thumb ldmia / stmia support is added. diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp index d208f59e4f6..91b118029a3 100644 --- a/lib/Target/Alpha/AlphaISelLowering.cpp +++ b/lib/Target/Alpha/AlphaISelLowering.cpp @@ -87,10 +87,6 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) : TargetLowering(TM) setOperationAction(ISD::SDIV , MVT::i64, Custom); setOperationAction(ISD::UDIV , MVT::i64, Custom); - setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); - setOperationAction(ISD::MEMSET , MVT::Other, Expand); - setOperationAction(ISD::MEMCPY , MVT::Other, Expand); - // We don't support sin/cos/sqrt/pow setOperationAction(ISD::FSIN , MVT::f64, Expand); setOperationAction(ISD::FCOS , MVT::f64, Expand); diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index 29226092688..1cb691882de 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -175,9 +175,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); // SPU has no intrinsics for these particular operations: - setOperationAction(ISD::MEMMOVE, MVT::Other, Expand); - setOperationAction(ISD::MEMSET, MVT::Other, Expand); - setOperationAction(ISD::MEMCPY, MVT::Other, Expand); setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); // PowerPC has no SREM/UREM instructions diff --git a/lib/Target/IA64/IA64ISelLowering.cpp b/lib/Target/IA64/IA64ISelLowering.cpp index 2ec08b60b6f..c53f3b44eba 100644 --- a/lib/Target/IA64/IA64ISelLowering.cpp +++ b/lib/Target/IA64/IA64ISelLowering.cpp @@ -65,9 +65,6 @@ IA64TargetLowering::IA64TargetLowering(TargetMachine &TM) setOperationAction(ISD::UREM , MVT::f32 , Expand); setOperationAction(ISD::UREM , MVT::f64 , Expand); - setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); - setOperationAction(ISD::MEMSET , MVT::Other, Expand); - setOperationAction(ISD::MEMCPY , MVT::Other, Expand); setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand); setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 5c2e1c0190a..5ea9cdd9c25 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -80,9 +80,6 @@ MipsTargetLowering(MipsTargetMachine &TM): TargetLowering(TM) setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); // Mips not supported intrinsics. - setOperationAction(ISD::MEMMOVE, MVT::Other, Expand); - setOperationAction(ISD::MEMSET, MVT::Other, Expand); - setOperationAction(ISD::MEMCPY, MVT::Other, Expand); setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); setOperationAction(ISD::CTPOP, MVT::i32, Expand); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index ddc8e1a7859..e42e9dcba05 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -78,9 +78,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); // PowerPC has no intrinsics for these particular operations - setOperationAction(ISD::MEMMOVE, MVT::Other, Expand); - setOperationAction(ISD::MEMSET, MVT::Other, Expand); - setOperationAction(ISD::MEMCPY, MVT::Other, Expand); setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); // PowerPC has no SREM/UREM instructions @@ -1735,10 +1732,9 @@ static SDOperand CreateCopyOfByValArgument(SDOperand Src, SDOperand Dst, SDOperand Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, unsigned Size) { - SDOperand AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32); - SDOperand SizeNode = DAG.getConstant(Size, MVT::i32); - SDOperand AlwaysInline = DAG.getConstant(0, MVT::i32); - return DAG.getMemcpy(Chain, Dst, Src, SizeNode, AlignNode, AlwaysInline); + SDOperand SizeNode = DAG.getConstant(Size, MVT::i32); + return DAG.getMemcpy(Chain, Dst, Src, SizeNode, Flags.getByValAlign(), false, + NULL, 0, NULL, 0); } SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG, diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 1d4fe0bc8cb..3d5ad0b7402 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -570,9 +570,6 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); // SPARC has no intrinsics for these particular operations. - setOperationAction(ISD::MEMMOVE, MVT::Other, Expand); - setOperationAction(ISD::MEMSET, MVT::Other, Expand); - setOperationAction(ISD::MEMCPY, MVT::Other, Expand); setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); setOperationAction(ISD::FSIN , MVT::f64, Expand); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 66384f921c2..9db0288c4e3 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -206,7 +206,6 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::BRCOND , MVT::Other, Custom); setOperationAction(ISD::BR_CC , MVT::Other, Expand); setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); - setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); if (Subtarget->is64Bit()) setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal); @@ -281,9 +280,6 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::SRA_PARTS , MVT::i64 , Custom); setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom); } - // X86 wants to expand memset / memcpy itself. - setOperationAction(ISD::MEMSET , MVT::Other, Custom); - setOperationAction(ISD::MEMCPY , MVT::Other, Custom); if (Subtarget->hasSSE1()) setOperationAction(ISD::PREFETCH , MVT::Other, Legal); @@ -1113,10 +1109,10 @@ CopyTailCallClobberedArgumentsToVRegs(SDOperand Chain, static SDOperand CreateCopyOfByValArgument(SDOperand Src, SDOperand Dst, SDOperand Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG) { - SDOperand AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32); SDOperand SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); - SDOperand AlwaysInline = DAG.getConstant(1, MVT::i32); - return DAG.getMemcpy(Chain, Dst, Src, SizeNode, AlignNode, AlwaysInline); + return DAG.getMemcpy(Chain, Dst, Src, SizeNode, Flags.getByValAlign(), + /*AlwaysInline=*/true, + NULL, 0, NULL, 0); } SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op, SelectionDAG &DAG, @@ -4557,52 +4553,51 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op, return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops1, 2); } -SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { - SDOperand InFlag(0, 0); - SDOperand Chain = Op.getOperand(0); - unsigned Align = - (unsigned)cast(Op.getOperand(4))->getValue(); - if (Align == 0) Align = 1; +SDOperand +X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, + SDOperand Chain, + SDOperand Dst, SDOperand Src, + SDOperand Size, unsigned Align, + Value *DstSV, uint64_t DstOff) { + ConstantSDNode *ConstantSize = dyn_cast(Size); - ConstantSDNode *I = dyn_cast(Op.getOperand(3)); - // If not DWORD aligned or size is more than the threshold, call memset. - // The libc version is likely to be faster for these cases. It can use the - // address value and run time information about the CPU. - if ((Align & 3) != 0 || - (I && I->getValue() > Subtarget->getMaxInlineSizeThreshold())) { + /// If not DWORD aligned or size is more than the threshold, call the library. + /// The libc version is likely to be faster for these cases. It can use the + /// address value and run time information about the CPU. + if ((Align & 3) == 0 || + !ConstantSize || + ConstantSize->getValue() > getSubtarget()->getMaxInlineSizeThreshold()) { + SDOperand InFlag(0, 0); // Check to see if there is a specialized entry-point for memory zeroing. - ConstantSDNode *V = dyn_cast(Op.getOperand(2)); - const char *bzeroEntry = - V && V->isNullValue() ? Subtarget->getBZeroEntry() : 0; - - MVT::ValueType IntPtr = getPointerTy(); - const Type *IntPtrTy = getTargetData()->getIntPtrType(); - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Entry.Node = Op.getOperand(1); - Entry.Ty = IntPtrTy; - Args.push_back(Entry); - - if (!bzeroEntry) { - // Extend the unsigned i8 argument to be an int value for the call. - Entry.Node = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); + ConstantSDNode *V = dyn_cast(Src); + if (const char *bzeroEntry = + V && V->isNullValue() ? Subtarget->getBZeroEntry() : 0) { + MVT::ValueType IntPtr = getPointerTy(); + const Type *IntPtrTy = getTargetData()->getIntPtrType(); + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Node = Dst; Entry.Ty = IntPtrTy; Args.push_back(Entry); + Entry.Node = Size; + Args.push_back(Entry); + std::pair CallResult = + LowerCallTo(Chain, Type::VoidTy, false, false, false, CallingConv::C, + false, DAG.getExternalSymbol(bzeroEntry, IntPtr), + Args, DAG); + return CallResult.second; } - Entry.Node = Op.getOperand(3); - Args.push_back(Entry); - const char *Name = bzeroEntry ? bzeroEntry : "memset"; - std::pair CallResult = - LowerCallTo(Chain, Type::VoidTy, false, false, false, CallingConv::C, - false, DAG.getExternalSymbol(Name, IntPtr), Args, DAG); - return CallResult.second; + // Otherwise have the target-independent code call memset. + return SDOperand(); } + uint64_t SizeVal = ConstantSize->getValue(); + SDOperand InFlag(0, 0); MVT::ValueType AVT; SDOperand Count; - ConstantSDNode *ValC = dyn_cast(Op.getOperand(2)); + ConstantSDNode *ValC = dyn_cast(Src); unsigned BytesLeft = 0; bool TwoRepStos = false; if (ValC) { @@ -4630,22 +4625,14 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { default: // Byte aligned AVT = MVT::i8; ValReg = X86::AL; - Count = Op.getOperand(3); + Count = Size; break; } if (AVT > MVT::i8) { - if (I) { - unsigned UBytes = MVT::getSizeInBits(AVT) / 8; - Count = DAG.getIntPtrConstant(I->getValue() / UBytes); - BytesLeft = I->getValue() % UBytes; - } else { - assert(AVT >= MVT::i32 && - "Do not use rep;stos if not at least DWORD aligned"); - Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(), - Op.getOperand(3), DAG.getConstant(2, MVT::i8)); - TwoRepStos = true; - } + unsigned UBytes = MVT::getSizeInBits(AVT) / 8; + Count = DAG.getIntPtrConstant(SizeVal / UBytes); + BytesLeft = SizeVal % UBytes; } Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), @@ -4653,8 +4640,8 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { InFlag = Chain.getValue(1); } else { AVT = MVT::i8; - Count = Op.getOperand(3); - Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag); + Count = Size; + Chain = DAG.getCopyToReg(Chain, X86::AL, Src, InFlag); InFlag = Chain.getValue(1); } @@ -4662,7 +4649,7 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { Count, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, - Op.getOperand(1), InFlag); + Dst, InFlag); InFlag = Chain.getValue(1); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); @@ -4674,7 +4661,7 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { if (TwoRepStos) { InFlag = Chain.getValue(1); - Count = Op.getOperand(3); + Count = Size; MVT::ValueType CVT = Count.getValueType(); SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); @@ -4688,79 +4675,68 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { Ops.push_back(InFlag); Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); } else if (BytesLeft) { - // Issue stores for the last 1 - 7 bytes. - SDOperand Value; - unsigned Val = ValC->getValue() & 255; - unsigned Offset = I->getValue() - BytesLeft; - SDOperand DstAddr = Op.getOperand(1); - MVT::ValueType AddrVT = DstAddr.getValueType(); - if (BytesLeft >= 4) { - Val = (Val << 8) | Val; - Val = (Val << 16) | Val; - Value = DAG.getConstant(Val, MVT::i32); - Chain = DAG.getStore(Chain, Value, - DAG.getNode(ISD::ADD, AddrVT, DstAddr, - DAG.getConstant(Offset, AddrVT)), - NULL, 0); - BytesLeft -= 4; - Offset += 4; - } - if (BytesLeft >= 2) { - Value = DAG.getConstant((Val << 8) | Val, MVT::i16); - Chain = DAG.getStore(Chain, Value, - DAG.getNode(ISD::ADD, AddrVT, DstAddr, - DAG.getConstant(Offset, AddrVT)), - NULL, 0); - BytesLeft -= 2; - Offset += 2; - } - if (BytesLeft == 1) { - Value = DAG.getConstant(Val, MVT::i8); - Chain = DAG.getStore(Chain, Value, - DAG.getNode(ISD::ADD, AddrVT, DstAddr, - DAG.getConstant(Offset, AddrVT)), - NULL, 0); - } + // Handle the last 1 - 7 bytes. + unsigned Offset = SizeVal - BytesLeft; + MVT::ValueType AddrVT = Dst.getValueType(); + MVT::ValueType SizeVT = Size.getValueType(); + + Chain = DAG.getMemset(Chain, + DAG.getNode(ISD::ADD, AddrVT, Dst, + DAG.getConstant(Offset, AddrVT)), + Src, + DAG.getConstant(BytesLeft, SizeVT), + Align, DstSV, Offset); } + // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain. return Chain; } -SDOperand X86TargetLowering::LowerMEMCPYInline(SDOperand Chain, - SDOperand Dest, - SDOperand Source, - unsigned Size, - unsigned Align, - SelectionDAG &DAG) { +SDOperand +X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, + SDOperand Chain, + SDOperand Dst, SDOperand Src, + SDOperand Size, unsigned Align, + bool AlwaysInline, + Value *DstSV, uint64_t DstOff, + Value *SrcSV, uint64_t SrcOff){ + + // This requires the copy size to be a constant, preferrably + // within a subtarget-specific limit. + ConstantSDNode *ConstantSize = dyn_cast(Size); + if (!ConstantSize) + return SDOperand(); + uint64_t SizeVal = ConstantSize->getValue(); + if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold()) + return SDOperand(); + + SmallVector Results; + MVT::ValueType AVT; unsigned BytesLeft = 0; - switch (Align & 3) { - case 2: // WORD aligned - AVT = MVT::i16; - break; - case 0: // DWORD aligned - AVT = MVT::i32; - if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned - AVT = MVT::i64; - break; - default: // Byte aligned - AVT = MVT::i8; - break; - } + if (Align >= 8 && Subtarget->is64Bit()) + AVT = MVT::i64; + else if (Align >= 4) + AVT = MVT::i32; + else if (Align >= 2) + AVT = MVT::i16; + else + AVT = MVT::i8; unsigned UBytes = MVT::getSizeInBits(AVT) / 8; - SDOperand Count = DAG.getIntPtrConstant(Size / UBytes); - BytesLeft = Size % UBytes; + unsigned CountVal = SizeVal / UBytes; + SDOperand Count = DAG.getIntPtrConstant(CountVal); + BytesLeft = SizeVal % UBytes; SDOperand InFlag(0, 0); Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, Count, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, - Dest, InFlag); + Dst, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI, - Source, InFlag); + Src, InFlag); InFlag = Chain.getValue(1); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); @@ -4768,57 +4744,28 @@ SDOperand X86TargetLowering::LowerMEMCPYInline(SDOperand Chain, Ops.push_back(Chain); Ops.push_back(DAG.getValueType(AVT)); Ops.push_back(InFlag); - Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); + Results.push_back(DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size())); if (BytesLeft) { - // Issue loads and stores for the last 1 - 7 bytes. - unsigned Offset = Size - BytesLeft; - SDOperand DstAddr = Dest; - MVT::ValueType DstVT = DstAddr.getValueType(); - SDOperand SrcAddr = Source; - MVT::ValueType SrcVT = SrcAddr.getValueType(); - SDOperand Value; - if (BytesLeft >= 4) { - Value = DAG.getLoad(MVT::i32, Chain, - DAG.getNode(ISD::ADD, SrcVT, SrcAddr, - DAG.getConstant(Offset, SrcVT)), - NULL, 0); - Chain = Value.getValue(1); - Chain = DAG.getStore(Chain, Value, - DAG.getNode(ISD::ADD, DstVT, DstAddr, - DAG.getConstant(Offset, DstVT)), - NULL, 0); - BytesLeft -= 4; - Offset += 4; - } - if (BytesLeft >= 2) { - Value = DAG.getLoad(MVT::i16, Chain, - DAG.getNode(ISD::ADD, SrcVT, SrcAddr, - DAG.getConstant(Offset, SrcVT)), - NULL, 0); - Chain = Value.getValue(1); - Chain = DAG.getStore(Chain, Value, - DAG.getNode(ISD::ADD, DstVT, DstAddr, - DAG.getConstant(Offset, DstVT)), - NULL, 0); - BytesLeft -= 2; - Offset += 2; - } + // Handle the last 1 - 7 bytes. + unsigned Offset = SizeVal - BytesLeft; + MVT::ValueType DstVT = Dst.getValueType(); + MVT::ValueType SrcVT = Src.getValueType(); + MVT::ValueType SizeVT = Size.getValueType(); - if (BytesLeft == 1) { - Value = DAG.getLoad(MVT::i8, Chain, - DAG.getNode(ISD::ADD, SrcVT, SrcAddr, - DAG.getConstant(Offset, SrcVT)), - NULL, 0); - Chain = Value.getValue(1); - Chain = DAG.getStore(Chain, Value, - DAG.getNode(ISD::ADD, DstVT, DstAddr, - DAG.getConstant(Offset, DstVT)), - NULL, 0); - } + Results.push_back(DAG.getMemcpy(Chain, + DAG.getNode(ISD::ADD, DstVT, Dst, + DAG.getConstant(Offset, + DstVT)), + DAG.getNode(ISD::ADD, SrcVT, Src, + DAG.getConstant(Offset, + SrcVT)), + DAG.getConstant(BytesLeft, SizeVT), + Align, AlwaysInline, + DstSV, Offset, SrcSV, Offset)); } - return Chain; + return DAG.getNode(ISD::TokenFactor, MVT::Other, &Results[0], Results.size()); } /// Expand the result of: i64,outchain = READCYCLECOUNTER inchain @@ -5430,8 +5377,6 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { case ISD::CALL: return LowerCALL(Op, DAG); case ISD::RET: return LowerRET(Op, DAG); case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); - case ISD::MEMSET: return LowerMEMSET(Op, DAG); - case ISD::MEMCPY: return LowerMEMCPY(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::VACOPY: return LowerVACOPY(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index d8099506fd6..2abe237ed82 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -441,8 +441,8 @@ namespace llvm { SDOperand Ret, SelectionDAG &DAG) const; - virtual const TargetSubtarget* getSubtarget() { - return static_cast(Subtarget); + virtual const X86Subtarget* getSubtarget() { + return Subtarget; } /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is @@ -512,9 +512,6 @@ namespace llvm { SDOperand LowerSELECT(SDOperand Op, SelectionDAG &DAG); SDOperand LowerBRCOND(SDOperand Op, SelectionDAG &DAG); SDOperand LowerMEMSET(SDOperand Op, SelectionDAG &DAG); - SDOperand LowerMEMCPYInline(SDOperand Dest, SDOperand Source, - SDOperand Chain, unsigned Size, unsigned Align, - SelectionDAG &DAG); SDOperand LowerJumpTable(SDOperand Op, SelectionDAG &DAG); SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG); SDOperand LowerRET(SDOperand Op, SelectionDAG &DAG); @@ -535,6 +532,19 @@ namespace llvm { SDNode *ExpandFP_TO_SINT(SDNode *N, SelectionDAG &DAG); SDNode *ExpandREADCYCLECOUNTER(SDNode *N, SelectionDAG &DAG); SDNode *ExpandATOMIC_LCS(SDNode *N, SelectionDAG &DAG); + + SDOperand EmitTargetCodeForMemset(SelectionDAG &DAG, + SDOperand Chain, + SDOperand Dst, SDOperand Src, + SDOperand Size, unsigned Align, + Value *DstSV, uint64_t DstOff); + SDOperand EmitTargetCodeForMemcpy(SelectionDAG &DAG, + SDOperand Chain, + SDOperand Dst, SDOperand Src, + SDOperand Size, unsigned Align, + bool AlwaysInline, + Value *DstSV, uint64_t DstOff, + Value *SrcSV, uint64_t SrcOff); }; } diff --git a/test/CodeGen/X86/2004-02-12-Memcpy.llx b/test/CodeGen/X86/2004-02-12-Memcpy.llx index 151c5a5e849..59364c1f6d6 100644 --- a/test/CodeGen/X86/2004-02-12-Memcpy.llx +++ b/test/CodeGen/X86/2004-02-12-Memcpy.llx @@ -1,5 +1,4 @@ -; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu | grep movs | count 1 -; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu | grep memcpy | count 2 +; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu | grep movs | count 3 @A = global [32 x i32] zeroinitializer @B = global [32 x i32] zeroinitializer diff --git a/test/CodeGen/X86/byval2.ll b/test/CodeGen/X86/byval2.ll index f438160bdaa..f85c8ffbe4f 100644 --- a/test/CodeGen/X86/byval2.ll +++ b/test/CodeGen/X86/byval2.ll @@ -1,7 +1,9 @@ ; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsq | count 2 ; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl | count 2 -%struct.s = type { i64, i64, i64 } +%struct.s = type { i64, i64, i64, i64, i64, i64, i64, i64, + i64, i64, i64, i64, i64, i64, i64, i64, + i64 } define void @g(i64 %a, i64 %b, i64 %c) { entry: diff --git a/test/CodeGen/X86/byval3.ll b/test/CodeGen/X86/byval3.ll index b3794eccb46..074bab4c0a9 100644 --- a/test/CodeGen/X86/byval3.ll +++ b/test/CodeGen/X86/byval3.ll @@ -1,7 +1,11 @@ ; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsl | count 2 ; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl | count 2 -%struct.s = type { i32, i32, i32, i32, i32, i32 } +%struct.s = type { i32, i32, i32, i32, i32, i32, i32, i32, + i32, i32, i32, i32, i32, i32, i32, i32, + i32, i32, i32, i32, i32, i32, i32, i32, + i32, i32, i32, i32, i32, i32, i32, i32, + i32 } define void @g(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6) { entry: diff --git a/test/CodeGen/X86/byval4.ll b/test/CodeGen/X86/byval4.ll index 591749f768e..d2fa9e289e7 100644 --- a/test/CodeGen/X86/byval4.ll +++ b/test/CodeGen/X86/byval4.ll @@ -1,7 +1,15 @@ ; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsw | count 2 ; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl | count 2 -%struct.s = type { i16, i16, i16, i16, i16, i16 } +%struct.s = type { i16, i16, i16, i16, i16, i16, i16, i16, + i16, i16, i16, i16, i16, i16, i16, i16, + i16, i16, i16, i16, i16, i16, i16, i16, + i16, i16, i16, i16, i16, i16, i16, i16, + i16, i16, i16, i16, i16, i16, i16, i16, + i16, i16, i16, i16, i16, i16, i16, i16, + i16, i16, i16, i16, i16, i16, i16, i16, + i16, i16, i16, i16, i16, i16, i16, i16, + i16 } define void @g(i16 signext %a1, i16 signext %a2, i16 signext %a3, diff --git a/test/CodeGen/X86/byval5.ll b/test/CodeGen/X86/byval5.ll index 4965d166666..fd9c197bbfd 100644 --- a/test/CodeGen/X86/byval5.ll +++ b/test/CodeGen/X86/byval5.ll @@ -1,7 +1,23 @@ ; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsb | count 2 ; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl | count 2 -%struct.s = type { i8, i8, i8, i8, i8, i8 } +%struct.s = type { i8, i8, i8, i8, i8, i8, i8, i8, + i8, i8, i8, i8, i8, i8, i8, i8, + i8, i8, i8, i8, i8, i8, i8, i8, + i8, i8, i8, i8, i8, i8, i8, i8, + i8, i8, i8, i8, i8, i8, i8, i8, + i8, i8, i8, i8, i8, i8, i8, i8, + i8, i8, i8, i8, i8, i8, i8, i8, + i8, i8, i8, i8, i8, i8, i8, i8, + i8, i8, i8, i8, i8, i8, i8, i8, + i8, i8, i8, i8, i8, i8, i8, i8, + i8, i8, i8, i8, i8, i8, i8, i8, + i8, i8, i8, i8, i8, i8, i8, i8, + i8, i8, i8, i8, i8, i8, i8, i8, + i8, i8, i8, i8, i8, i8, i8, i8, + i8, i8, i8, i8, i8, i8, i8, i8, + i8, i8, i8, i8, i8, i8, i8, i8, + i8 } define void @g(i8 signext %a1, i8 signext %a2, i8 signext %a3, diff --git a/test/CodeGen/X86/byval7.ll b/test/CodeGen/X86/byval7.ll index 4199bf062e7..fcbc59b838a 100644 --- a/test/CodeGen/X86/byval7.ll +++ b/test/CodeGen/X86/byval7.ll @@ -1,6 +1,7 @@ ; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | grep add | grep 16 - %struct.S = type { <2 x i64> } + %struct.S = type { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, + <2 x i64> } define i32 @main() nounwind { entry: diff --git a/test/CodeGen/X86/small-byval-memcpy.ll b/test/CodeGen/X86/small-byval-memcpy.ll new file mode 100644 index 00000000000..dedd948c297 --- /dev/null +++ b/test/CodeGen/X86/small-byval-memcpy.ll @@ -0,0 +1,22 @@ +; RUN: llvm-as < %s | llc | not grep movs + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i386-apple-darwin8" + +define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 }* byval align 4 %z) nounwind { +entry: + %iz = alloca { x86_fp80, x86_fp80 } ; <{ x86_fp80, x86_fp80 }*> [#uses=3] + %tmp1 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 1 ; [#uses=1] + %tmp2 = load x86_fp80* %tmp1, align 16 ; [#uses=1] + %tmp3 = sub x86_fp80 0xK80000000000000000000, %tmp2 ; [#uses=1] + %tmp4 = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 1 ; [#uses=1] + %real = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 0 ; [#uses=1] + %tmp6 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 0 ; [#uses=1] + %tmp7 = load x86_fp80* %tmp6, align 16 ; [#uses=1] + store x86_fp80 %tmp3, x86_fp80* %real, align 16 + store x86_fp80 %tmp7, x86_fp80* %tmp4, align 16 + call void @ccoshl( { x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 }* byval align 4 %iz ) nounwind + ret void +} + +declare void @ccoshl({ x86_fp80, x86_fp80 }* noalias sret , { x86_fp80, x86_fp80 }* byval align 4 ) nounwind diff --git a/test/CodeGen/X86/variable-sized-darwin-bzero.ll b/test/CodeGen/X86/variable-sized-darwin-bzero.ll new file mode 100644 index 00000000000..b0cdf496d5f --- /dev/null +++ b/test/CodeGen/X86/variable-sized-darwin-bzero.ll @@ -0,0 +1,8 @@ +; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-apple-darwin10 | grep __bzero + +declare void @llvm.memset.i64(i8*, i8, i64, i32) + +define void @foo(i8* %p, i64 %n) { + call void @llvm.memset.i64(i8* %p, i8 0, i64 %n, i32 4) + ret void +}