llvm-mirror/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll
Evandro Menezes 45428262f7 [AArch64] Fix PR32384: bump up the number of stores per memset and memcpy
As suggested in https://bugs.llvm.org/show_bug.cgi?id=32384#c1, this change
makes the inlining of `memset()` and `memcpy()` more aggressive when
compiling for speed.  The tuning remains the same when optimizing for size.

Patch by: Sebastian Pop <s.pop@samsung.com>
          Evandro Menezes <e.menezes@samsung.com>

Differential revision: https://reviews.llvm.org/D45098

llvm-svn: 333429
2018-05-29 15:58:50 +00:00

43 lines
1.4 KiB
LLVM

; RUN: llc -mtriple=arm64-apple-ios -mattr=+strict-align < %s | FileCheck %s
; Small (16 bytes here) unaligned memcpy() should be a function call if
; strict-alignment is turned on.
define void @t0(i8* %out, i8* %in) {
; CHECK-LABEL: t0:
; CHECK: orr w2, wzr, #0x10
; CHECK-NEXT: bl _memcpy
entry:
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 16, i1 false)
ret void
}
; Small (16 bytes here) aligned memcpy() should be inlined even if
; strict-alignment is turned on.
define void @t1(i8* align 8 %out, i8* align 8 %in) {
; CHECK-LABEL: t1:
; CHECK: ldp x{{[0-9]+}}, x{{[0-9]+}}, [x1]
; CHECK-NEXT: stp x{{[0-9]+}}, x{{[0-9]+}}, [x0]
entry:
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %out, i8* align 8 %in, i64 16, i1 false)
ret void
}
; Tiny (4 bytes here) unaligned memcpy() should be inlined with byte sized
; loads and stores if strict-alignment is turned on.
define void @t2(i8* %out, i8* %in) {
; CHECK-LABEL: t2:
; CHECK: ldrb w{{[0-9]+}}, [x1, #3]
; CHECK-NEXT: ldrb w{{[0-9]+}}, [x1, #2]
; CHECK-NEXT: ldrb w{{[0-9]+}}, [x1, #1]
; CHECK-NEXT: ldrb w{{[0-9]+}}, [x1]
; CHECK-NEXT: strb w{{[0-9]+}}, [x0, #3]
; CHECK-NEXT: strb w{{[0-9]+}}, [x0, #2]
; CHECK-NEXT: strb w{{[0-9]+}}, [x0, #1]
; CHECK-NEXT: strb w{{[0-9]+}}, [x0]
entry:
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 4, i1 false)
ret void
}
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1)