diff --git a/include/llvm/Target/TargetData.h b/include/llvm/Target/TargetData.h index cc88dae9fa6..8c3f8962683 100644 --- a/include/llvm/Target/TargetData.h +++ b/include/llvm/Target/TargetData.h @@ -193,9 +193,7 @@ public: /// getTypeStoreSize - Return the maximum number of bytes that may be /// overwritten by storing the specified type. For example, returns 5 /// for i36 and 10 for x86_fp80. - uint64_t getTypeStoreSize(const Type *Ty) const { - return (getTypeSizeInBits(Ty)+7)/8; - } + uint64_t getTypeStoreSize(const Type *Ty) const; /// getTypeStoreSizeInBits - Return the maximum number of bits that may be /// overwritten by storing the specified type; always a multiple of 8. For @@ -208,10 +206,7 @@ public: /// of the specified type, including alignment padding. This is the amount /// that alloca reserves for this type. For example, returns 12 or 16 for /// x86_fp80, depending on alignment. - uint64_t getTypeAllocSize(const Type* Ty) const { - // Round up to the next alignment boundary. - return RoundUpAlignment(getTypeStoreSize(Ty), getABITypeAlignment(Ty)); - } + uint64_t getTypeAllocSize(const Type* Ty) const; /// getTypeAllocSizeInBits - Return the offset in bits between successive /// objects of the specified type, including alignment padding; always a diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index e9321dad8cb..87dc0ac935f 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -660,7 +660,8 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, unsigned CastOpc = IdxVT.bitsGT(PtrVT) ? ISD::TRUNCATE : ISD::ZERO_EXTEND; Tmp3 = DAG.getNode(CastOpc, dl, PtrVT, Tmp3); // Add the offset to the index. - unsigned EltSize = EltVT.getSizeInBits()/8; + unsigned EltSize = TLI.getTargetData()-> + getTypeAllocSize(EltVT.getTypeForEVT(*DAG.getContext())); Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3,DAG.getConstant(EltSize, IdxVT)); SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr); // Store the scalar value. @@ -1512,8 +1513,9 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { false, false, 0); // Add the offset to the index. - unsigned EltSize = - Vec.getValueType().getVectorElementType().getSizeInBits()/8; + unsigned EltSize = TLI.getTargetData()->getTypeAllocSize( + Vec.getValueType().getVectorElementType().getTypeForEVT(*DAG.getContext())); + Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx, DAG.getConstant(EltSize, Idx.getValueType())); @@ -1548,7 +1550,8 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { // Emit a store of each element to the stack slot. SmallVector Stores; - unsigned TypeByteSize = EltVT.getSizeInBits() / 8; + unsigned TypeByteSize = TLI.getTargetData()-> + getTypeAllocSize(EltVT.getTypeForEVT(*DAG.getContext())); // Store (in the right endianness) the elements to memory. for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { // Ignore undef elements. diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 0d929f12c95..2c364dc34a0 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -966,7 +966,8 @@ SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT, Index = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Index); // Calculate the element offset and add it to the pointer. - unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size. + unsigned EltSize = TLI.getTargetData()-> + getTypeAllocSize(EltVT.getTypeForEVT(*DAG.getContext())); Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index, DAG.getConstant(EltSize, Index.getValueType())); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 8363c3af212..1ac04fb2261 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -715,7 +715,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, false, false, 0); // Increment the pointer to the other part. - unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8; + unsigned IncrementSize = TLI.getTargetData()-> + getTypeAllocSize(Lo.getValueType().getTypeForEVT(*DAG.getContext())); StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, DAG.getIntPtrConstant(IncrementSize)); @@ -757,7 +758,8 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, Lo = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, LoVT, Ch, Ptr, Offset, SV, SVOffset, LoMemVT, isVolatile, isNonTemporal, Alignment); - unsigned IncrementSize = LoMemVT.getSizeInBits()/8; + unsigned IncrementSize = TLI.getTargetData()-> + getTypeAllocSize(LoMemVT.getTypeForEVT(*DAG.getContext())); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); SVOffset += IncrementSize; @@ -1121,7 +1123,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { EVT LoMemVT, HiMemVT; GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT); - unsigned IncrementSize = LoMemVT.getSizeInBits()/8; + unsigned IncrementSize = TLI.getTargetData()-> + getTypeAllocSize(LoMemVT.getTypeForEVT(*DAG.getContext())); if (isTruncating) Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset, @@ -2182,7 +2185,8 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector& LdChain, unsigned Offset = 0; while (LdWidth > 0) { - unsigned Increment = NewVTWidth / 8; + unsigned Increment = TLI.getTargetData()-> + getTypeAllocSize(NewVT.getTypeForEVT(*DAG.getContext())); Offset += Increment; BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, DAG.getIntPtrConstant(Increment)); @@ -2279,7 +2283,8 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector& LdChain, // Load each element and widen unsigned WidenNumElts = WidenVT.getVectorNumElements(); SmallVector Ops(WidenNumElts); - unsigned Increment = LdEltVT.getSizeInBits() / 8; + unsigned Increment = TLI.getTargetData()-> + getTypeAllocSize(LdEltVT.getTypeForEVT(*DAG.getContext())); Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, SV, SVOffset, LdEltVT, isVolatile, isNonTemporal, Align); LdChain.push_back(Ops[0].getValue(1)); @@ -2331,7 +2336,8 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector& StChain, // Find the largest vector type we can store with EVT NewVT = FindMemType(DAG, TLI, StWidth, ValVT); unsigned NewVTWidth = NewVT.getSizeInBits(); - unsigned Increment = NewVTWidth / 8; + unsigned Increment = TLI.getTargetData()-> + getTypeAllocSize(NewVT.getTypeForEVT(*DAG.getContext())); if (NewVT.isVector()) { unsigned NumVTElts = NewVT.getVectorNumElements(); do { @@ -2399,7 +2405,8 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector& StChain, // the store. EVT StEltVT = StVT.getVectorElementType(); EVT ValEltVT = ValVT.getVectorElementType(); - unsigned Increment = ValEltVT.getSizeInBits() / 8; + unsigned Increment = TLI.getTargetData()-> + getTypeAllocSize(ValEltVT.getTypeForEVT(*DAG.getContext())); unsigned NumElts = StVT.getVectorNumElements(); SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, DAG.getIntPtrConstant(0)); diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp index 9a168087aed..2a90d7f6c15 100644 --- a/lib/Target/TargetData.cpp +++ b/lib/Target/TargetData.cpp @@ -455,7 +455,7 @@ uint64_t TargetData::getTypeSizeInBits(const Type *Ty) const { return getPointerSizeInBits(); case Type::ArrayTyID: { const ArrayType *ATy = cast(Ty); - return getTypeAllocSizeInBits(ATy->getElementType())*ATy->getNumElements(); + return getTypeSizeInBits(ATy->getElementType())*ATy->getNumElements(); } case Type::StructTyID: // Get the layout annotation... which is lazily created on demand. @@ -484,6 +484,47 @@ uint64_t TargetData::getTypeSizeInBits(const Type *Ty) const { return 0; } +/// getTypeStoreSize - Return the maximum number of bytes that may be +/// overwritten by storing the specified type. For example, returns 5 +/// for i36 and 10 for x86_fp80. +uint64_t TargetData::getTypeStoreSize(const Type *Ty) const { + // Arrays and vectors are allocated as sequences of elements. + if (const ArrayType *ATy = dyn_cast(Ty)) { + if (ATy->getNumElements() == 0) + return 0; + const Type *ElementType = ATy->getElementType(); + return getTypeAllocSize(ElementType) * (ATy->getNumElements() - 1) + + getTypeStoreSize(ElementType); + } + if (const VectorType *VTy = dyn_cast(Ty)) { + const Type *ElementType = VTy->getElementType(); + return getTypeAllocSize(ElementType) * (VTy->getNumElements() - 1) + + getTypeStoreSize(ElementType); + } + + return (getTypeSizeInBits(Ty)+7)/8; +} + +/// getTypeAllocSize - Return the offset in bytes between successive objects +/// of the specified type, including alignment padding. This is the amount +/// that alloca reserves for this type. For example, returns 12 or 16 for +/// x86_fp80, depending on alignment. +uint64_t TargetData::getTypeAllocSize(const Type* Ty) const { + // Arrays and vectors are allocated as sequences of elements. + // Note that this means that things like vectors-of-i1 are not bit-packed + // in memory (except on a hypothetical bit-addressable machine). If + // someone builds hardware with native vector-of-i1 stores and the idiom + // of bitcasting vectors to integers in order to bitpack them for storage + // isn't sufficient, TargetData may need new "size" concept. + if (const ArrayType *ATy = dyn_cast(Ty)) + return getTypeAllocSize(ATy->getElementType()) * ATy->getNumElements(); + if (const VectorType *VTy = dyn_cast(Ty)) + return getTypeAllocSize(VTy->getElementType()) * VTy->getNumElements(); + + // Round up to the next alignment boundary. + return RoundUpAlignment(getTypeStoreSize(Ty), getABITypeAlignment(Ty)); +} + /*! \param abi_or_pref Flag that determines which alignment is returned. true returns the ABI alignment, false returns the preferred alignment. diff --git a/test/CodeGen/X86/vector-of-i1.ll b/test/CodeGen/X86/vector-of-i1.ll new file mode 100644 index 00000000000..7bbcf8ded7d --- /dev/null +++ b/test/CodeGen/X86/vector-of-i1.ll @@ -0,0 +1,39 @@ +; RUN: llc < %s -march=x86-64 | FileCheck %s + +; Vectors of i1 are stored with each element having a +; different address. Since the address unit on x86 is 8 bits, +; that means each i1 value takes 8 bits of storage. + +; CHECK: store: +; CHECK: movb $1, 7(%rdi) +; CHECK: movb $1, 6(%rdi) +; CHECK: movb $0, 5(%rdi) +; CHECK: movb $0, 4(%rdi) +; CHECK: movb $1, 3(%rdi) +; CHECK: movb $0, 2(%rdi) +; CHECK: movb $1, 1(%rdi) +; CHECK: movb $0, (%rdi) +define void @store(<8 x i1>* %p) nounwind { + store <8 x i1> , <8 x i1>* %p + ret void +} + +; CHECK: variable_extract: +; CHECK: movb 7(%rdi), +; CHECK: movb 6(%rdi), +; CHECK: movb 5(%rdi), +define i32 @variable_extract(<8 x i1>* %p, i32 %n) nounwind { + %t = load <8 x i1>* %p + %s = extractelement <8 x i1> %t, i32 %n + %e = zext i1 %s to i32 + ret i32 %e +} + +; CHECK: constant_extract: +; CHECK: movzbl 3(%rdi), %eax +define i32 @constant_extract(<8 x i1>* %p, i32 %n) nounwind { + %t = load <8 x i1>* %p + %s = extractelement <8 x i1> %t, i32 3 + %e = zext i1 %s to i32 + ret i32 %e +}