Try to lower memset/memcpy/memmove to vector instructions on ARM where the alignment permits.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143582 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Lang Hames 2011-11-02 22:52:45 +00:00
parent d69568723d
commit 1a1d1fcc0b
3 changed files with 54 additions and 1 deletions

View File

@ -8127,6 +8127,34 @@ bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
}
}
static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
unsigned AlignCheck) {
return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) &&
(DstAlign == 0 || DstAlign % AlignCheck == 0));
}
EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
bool NonScalarIntSafe,
bool MemcpyStrSrc,
MachineFunction &MF) const {
const Function *F = MF.getFunction();
// See if we can use NEON instructions for this...
if (NonScalarIntSafe &&
!F->hasFnAttr(Attribute::NoImplicitFloat) &&
Subtarget->hasNEON()) {
if (memOpAlign(SrcAlign, DstAlign, 16) && Size >= 16) {
return MVT::v4i32;
} else if (memOpAlign(SrcAlign, DstAlign, 8) && Size >= 8) {
return MVT::v2i32;
}
}
// Let the target-independent logic figure it out.
return MVT::Other;
}
static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
if (V < 0)
return false;

View File

@ -266,9 +266,14 @@ namespace llvm {
/// allowsUnalignedMemoryAccesses - Returns true if the target allows
/// unaligned memory accesses. of the specified type.
/// FIXME: Add getOptimalMemOpType to implement memcpy with NEON?
virtual bool allowsUnalignedMemoryAccesses(EVT VT) const;
virtual EVT getOptimalMemOpType(uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
bool NonScalarIntSafe,
bool MemcpyStrSrc,
MachineFunction &MF) const;
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const;

View File

@ -0,0 +1,20 @@
; RUN: llc -march=arm -mcpu=cortex-a8 < %s | FileCheck %s
; Should trigger a NEON store.
; CHECK: vstr.64
define void @f_0_12(i8* nocapture %c) nounwind optsize {
entry:
call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 12, i32 8, i1 false)
ret void
}
; Trigger multiple NEON stores.
; CHECK: vstmia
; CHECK-NEXT: vstmia
define void @f_0_40(i8* nocapture %c) nounwind optsize {
entry:
call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 40, i32 16, i1 false)
ret void
}
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind