diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 9340e7f0a55..a23032bb3be 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -6727,7 +6727,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::aarch64_neon_ld4r: { Info.opc = ISD::INTRINSIC_W_CHAIN; // Conservatively set memVT to the entire set of vectors loaded. - uint64_t NumElts = DL.getTypeAllocSize(I.getType()) / 8; + uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64; Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1); Info.offset = 0; @@ -6753,7 +6753,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Type *ArgTy = I.getArgOperand(ArgI)->getType(); if (!ArgTy->isVectorTy()) break; - NumElts += DL.getTypeAllocSize(ArgTy) / 8; + NumElts += DL.getTypeSizeInBits(ArgTy) / 64; } Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1); @@ -6996,7 +6996,7 @@ bool AArch64TargetLowering::lowerInterleavedLoad( const DataLayout &DL = LI->getModule()->getDataLayout(); VectorType *VecTy = Shuffles[0]->getType(); - unsigned VecSize = DL.getTypeAllocSizeInBits(VecTy); + unsigned VecSize = DL.getTypeSizeInBits(VecTy); // Skip if we do not have NEON and skip illegal vector types. if (!Subtarget->hasNEON() || (VecSize != 64 && VecSize != 128)) @@ -7082,7 +7082,7 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI, VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts); const DataLayout &DL = SI->getModule()->getDataLayout(); - unsigned SubVecSize = DL.getTypeAllocSizeInBits(SubVecTy); + unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy); // Skip if we do not have NEON and skip illegal vector types. if (!Subtarget->hasNEON() || (SubVecSize != 64 && SubVecSize != 128)) diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index e803ef949b9..9af0e644478 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -448,7 +448,7 @@ int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, if (Factor <= TLI->getMaxSupportedInterleaveFactor()) { unsigned NumElts = VecTy->getVectorNumElements(); Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor); - unsigned SubVecSize = DL.getTypeAllocSizeInBits(SubVecTy); + unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy); // ldN/stN only support legal vector types of size 64 or 128 in bits. if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128)) diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 23f7bd0f4c8..cc9656aa0b4 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -11739,7 +11739,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.opc = ISD::INTRINSIC_W_CHAIN; // Conservatively set memVT to the entire set of vectors loaded. auto &DL = I.getCalledFunction()->getParent()->getDataLayout(); - uint64_t NumElts = DL.getTypeAllocSize(I.getType()) / 8; + uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64; Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; @@ -11765,7 +11765,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Type *ArgTy = I.getArgOperand(ArgI)->getType(); if (!ArgTy->isVectorTy()) break; - NumElts += DL.getTypeAllocSize(ArgTy) / 8; + NumElts += DL.getTypeSizeInBits(ArgTy) / 64; } Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(0); @@ -12108,8 +12108,8 @@ bool ARMTargetLowering::lowerInterleavedLoad( Type *EltTy = VecTy->getVectorElementType(); const DataLayout &DL = LI->getModule()->getDataLayout(); - unsigned VecSize = DL.getTypeAllocSizeInBits(VecTy); - bool EltIs64Bits = DL.getTypeAllocSizeInBits(EltTy) == 64; + unsigned VecSize = DL.getTypeSizeInBits(VecTy); + bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64; // Skip if we do not have NEON and skip illegal vector types and vector types // with i64/f64 elements (vldN doesn't support i64/f64 elements). @@ -12198,8 +12198,8 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI, VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts); const DataLayout &DL = SI->getModule()->getDataLayout(); - unsigned SubVecSize = DL.getTypeAllocSizeInBits(SubVecTy); - bool EltIs64Bits = DL.getTypeAllocSizeInBits(EltTy) == 64; + unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy); + bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64; // Skip if we do not have NEON and skip illegal vector types and vector types // with i64/f64 elements (vstN doesn't support i64/f64 elements). diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index 582a057e923..c1520119ef2 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -478,12 +478,12 @@ int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, assert(isa(VecTy) && "Expect a vector type"); // vldN/vstN doesn't support vector types of i64/f64 element. - bool EltIs64Bits = DL.getTypeAllocSizeInBits(VecTy->getScalarType()) == 64; + bool EltIs64Bits = DL.getTypeSizeInBits(VecTy->getScalarType()) == 64; if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits) { unsigned NumElts = VecTy->getVectorNumElements(); Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor); - unsigned SubVecSize = DL.getTypeAllocSizeInBits(SubVecTy); + unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy); // vldN/vstN only support legal vector types of size 64 or 128 in bits. if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128)) diff --git a/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll b/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll index 545aeda8860..1bc2a3ccb1c 100644 --- a/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll +++ b/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=aarch64 -aarch64-neon-syntax=generic -lower-interleaved-accesses=true < %s | FileCheck %s -check-prefix=NEON -; RUN: llc -march=aarch64 -mattr=-neon -lower-interleaved-accesses=true < %s | FileCheck %s -check-prefix=NONEON +; RUN: llc -mtriple=aarch64 -lower-interleaved-accesses=true < %s | FileCheck %s -check-prefix=NEON +; RUN: llc -mtriple=aarch64 -lower-interleaved-accesses=true -mattr=-neon < %s | FileCheck %s -check-prefix=NONEON ; NEON-LABEL: load_factor2: ; NEON: ld2 { v0.8b, v1.8b }, [x0] @@ -232,3 +232,39 @@ define void @store_undef_mask_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, < store <16 x i32> %interleaved.vec, <16 x i32>* %base, align 4 ret void } + +; Check that we do something sane with illegal types. + +; NEON-LABEL: load_illegal_factor2: +; NEON: BB#0: +; NEON-NEXT: ldr q[[V:[0-9]+]], [x0] +; NEON-NEXT: uzp1 v0.4s, v[[V]].4s, v{{.*}}.4s +; NEON-NEXT: ret +; NONEON-LABEL: load_illegal_factor2: +; NONEON: BB#0: +; NONEON-NEXT: ldr s0, [x0] +; NONEON-NEXT: ldr s1, [x0, #8] +; NONEON-NEXT: ret +define <3 x float> @load_illegal_factor2(<3 x float>* %p) nounwind { + %tmp1 = load <3 x float>, <3 x float>* %p, align 16 + %tmp2 = shufflevector <3 x float> %tmp1, <3 x float> undef, <3 x i32> + ret <3 x float> %tmp2 +} + +; NEON-LABEL: store_illegal_factor2: +; NEON: BB#0: +; NEON-NEXT: uzp1 v0.4s, v0.4s, v{{.*}}.4s +; NEON-NEXT: st1 { v0.d }[0], [x0] +; NEON-NEXT: ret +; NONEON-LABEL: store_illegal_factor2: +; NONEON: BB#0: +; NONEON-NEXT: fmov w[[ELT2:[0-9]+]], s2 +; NONEON-NEXT: fmov w[[RES:[0-9]+]], s0 +; NONEON-NEXT: bfi x[[RES]], x[[ELT2]], #32, #32 +; NONEON-NEXT: str x[[RES]], [x0] +; NONEON-NEXT: ret +define void @store_illegal_factor2(<3 x float>* %p, <3 x float> %v) nounwind { + %tmp1 = shufflevector <3 x float> %v, <3 x float> undef, <3 x i32> + store <3 x float> %tmp1, <3 x float>* %p, align 16 + ret void +} diff --git a/test/CodeGen/ARM/arm-interleaved-accesses.ll b/test/CodeGen/ARM/arm-interleaved-accesses.ll index c3aa2d6b4da..002e71f6d9b 100644 --- a/test/CodeGen/ARM/arm-interleaved-accesses.ll +++ b/test/CodeGen/ARM/arm-interleaved-accesses.ll @@ -264,3 +264,43 @@ define void @store_address_space(<2 x i32>* %A, <2 x i32>* %B, <4 x i32> addrspa store <4 x i32> %interleaved, <4 x i32> addrspace(1)* %C ret void } + +; Check that we do something sane with illegal types. + +; NEON-LABEL: load_illegal_factor2: +; NEON: BB#0: +; NEON-NEXT: vld1.64 {d16, d17}, [r0:128] +; NEON-NEXT: vuzp.32 q8, {{.*}} +; NEON-NEXT: vmov r0, r1, d16 +; NEON-NEXT: vmov r2, r3, {{.*}} +; NEON-NEXT: mov pc, lr +; NONEON-LABEL: load_illegal_factor2: +; NONEON: BB#0: +; NONEON-NEXT: ldr [[ELT0:r[0-9]+]], [r0] +; NONEON-NEXT: ldr r1, [r0, #8] +; NONEON-NEXT: mov r0, [[ELT0]] +; NONEON-NEXT: mov pc, lr +define <3 x float> @load_illegal_factor2(<3 x float>* %p) nounwind { + %tmp1 = load <3 x float>, <3 x float>* %p, align 16 + %tmp2 = shufflevector <3 x float> %tmp1, <3 x float> undef, <3 x i32> + ret <3 x float> %tmp2 +} + +; This lowering isn't great, but it's at least correct. + +; NEON-LABEL: store_illegal_factor2: +; NEON: BB#0: +; NEON-NEXT: vldr d17, [sp] +; NEON-NEXT: vmov d16, r2, r3 +; NEON-NEXT: vuzp.32 q8, {{.*}} +; NEON-NEXT: vstr d16, [r0] +; NEON-NEXT: mov pc, lr +; NONEON-LABEL: store_illegal_factor2: +; NONEON: BB#0: +; NONEON-NEXT: stm r0, {r1, r3} +; NONEON-NEXT: mov pc, lr +define void @store_illegal_factor2(<3 x float>* %p, <3 x float> %v) nounwind { + %tmp1 = shufflevector <3 x float> %v, <3 x float> undef, <3 x i32> + store <3 x float> %tmp1, <3 x float>* %p, align 16 + ret void +}