mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-10 22:46:25 +00:00
[AArch64] Teach AArch64TargetLowering::getOptimalMemOpType to consider alignment
restrictions when choosing a type for small-memcpy inlining in SelectionDAGBuilder. This ensures that the loads and stores output for the memcpy won't be further expanded during legalization, which would cause the total number of instructions for the memcpy to exceed (often significantly) the inlining thresholds. <rdar://problem/17829180> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@234462 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
23295f613b
commit
174f04eefb
@ -6664,7 +6664,17 @@ EVT AArch64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
|
||||
(allowsMisalignedMemoryAccesses(MVT::f128, 0, 1, &Fast) && Fast)))
|
||||
return MVT::f128;
|
||||
|
||||
return Size >= 8 ? MVT::i64 : MVT::i32;
|
||||
if (Size >= 8 &&
|
||||
(memOpAlign(SrcAlign, DstAlign, 8) ||
|
||||
(allowsMisalignedMemoryAccesses(MVT::i64, 0, 1, &Fast) && Fast)))
|
||||
return MVT::i64;
|
||||
|
||||
if (Size >= 4 &&
|
||||
(memOpAlign(SrcAlign, DstAlign, 4) ||
|
||||
(allowsMisalignedMemoryAccesses(MVT::i32, 0, 1, &Fast) && Fast)))
|
||||
return MVT::i64;
|
||||
|
||||
return MVT::Other;
|
||||
}
|
||||
|
||||
// 12-bit optionally shifted immediates are legal for adds.
|
||||
|
14
test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll
Normal file
14
test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll
Normal file
@ -0,0 +1,14 @@
|
||||
; RUN: llc -march=arm64 -aarch64-strict-align < %s | FileCheck %s
|
||||
|
||||
; Small (16-bytes here) unaligned memcpys should stay memcpy calls if
|
||||
; strict-alignment is turned on.
|
||||
define void @t0(i8* %out, i8* %in) {
|
||||
; CHECK-LABEL: t0:
|
||||
; CHECK: orr w2, wzr, #0x10
|
||||
; CHECK-NEXT: bl _memcpy
|
||||
entry:
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 16, i32 1, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1)
|
Loading…
x
Reference in New Issue
Block a user