llvm/test/CodeGen/ARM/memcpy-ldm-stm.ll
Scott Douglass ab6273be70 [ARM] Modify codegen for memcpy intrinsic to prefer LDM/STM.
We were previously codegen'ing memcpy as regular load/store operations and
hoping that the register allocator would allocate registers in ascending order
so that we could apply an LDM/STM combine after register allocation. According
to the commit that first introduced this code (r37179), we planned to teach the
register allocator to allocate the registers in ascending order. This never got
implemented, and up to now we've been stuck with very poor codegen.

A much simpler approach for achieving better codegen is to create MEMCPY pseudo
instructions, attach scratch virtual registers to them and then, post register
allocation, expand the MEMCPYs into LDM/STM pairs using the scratch registers.
The register allocator will have picked arbitrary registers which we sort when
expanding the MEMCPY. This approach also avoids the need to repeatedly calculate
offsets which ultimately ought to be eliminated pre-RA in order to decrease
register pressure.

Fixes PR9199 and PR23768.

[This is based on Peter Collingbourne's r238473 which was reverted.]

Differential Revision: http://reviews.llvm.org/D13239

Change-Id: I727543c2e94136e0f80b8e22d5642d7b9ee5b458
Author: Peter Collingbourne <peter@pcc.me.uk>

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@249322 91177308-0d34-0410-b5e6-96231b3b80d8
2015-10-05 14:49:54 +00:00

95 lines
3.6 KiB
LLVM

; RUN: llc -mtriple=thumbv6m-eabi -verify-machineinstrs %s -o - | \
; RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECKV6
; RUN: llc -mtriple=thumbv6m-eabi -O=0 -verify-machineinstrs %s -o - | \
; RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECKV6
; RUN: llc -mtriple=thumbv7a-eabi -mattr=-neon -verify-machineinstrs %s -o - | \
; RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECKV7
; RUN: llc -mtriple=armv7a-eabi -mattr=-neon -verify-machineinstrs %s -o - | \
; RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECKV7
@d = external global [64 x i32]
@s = external global [64 x i32]
; Function Attrs: nounwind
define void @t1() #0 {
entry:
; CHECK-LABEL: t1:
; CHECKV6: ldr [[LB:r[0-7]]],
; CHECKV6-NEXT: ldr [[SB:r[0-7]]],
; We use '[rl0-9]+' to allow 'r0'..'r12', 'lr'
; CHECKV7: movt [[LB:[rl0-9]+]], :upper16:d
; CHECKV7-NEXT: movt [[SB:[rl0-9]+]], :upper16:s
; CHECK-NEXT: ldm{{(\.w)?}} [[LB]]!,
; CHECK-NEXT: stm{{(\.w)?}} [[SB]]!,
; Think of the monstrosity '{{\[}}[[LB]]]' as '[ [[LB]] ]' without the spaces.
; CHECK-NEXT: ldrb{{(\.w)?}} {{.*}}, {{\[}}[[LB]]]
; CHECK-NEXT: strb{{(\.w)?}} {{.*}}, {{\[}}[[SB]]]
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([64 x i32]* @s to i8*), i8* bitcast ([64 x i32]* @d to i8*), i32 17, i32 4, i1 false)
ret void
}
; Function Attrs: nounwind
define void @t2() #0 {
entry:
; CHECK-LABEL: t2:
; CHECKV6: ldr [[LB:r[0-7]]],
; CHECKV6-NEXT: ldr [[SB:r[0-7]]],
; CHECKV7: movt [[LB:[rl0-9]+]], :upper16:d
; CHECKV7-NEXT: movt [[SB:[rl0-9]+]], :upper16:s
; CHECK-NEXT: ldm{{(\.w)?}} [[LB]]!,
; CHECK-NEXT: stm{{(\.w)?}} [[SB]]!,
; CHECK-NEXT: ldrh{{(\.w)?}} {{.*}}, {{\[}}[[LB]]]
; CHECK-NEXT: ldrb{{(\.w)?}} {{.*}}, {{\[}}[[LB]], #2]
; CHECK-NEXT: strb{{(\.w)?}} {{.*}}, {{\[}}[[SB]], #2]
; CHECK-NEXT: strh{{(\.w)?}} {{.*}}, {{\[}}[[SB]]]
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([64 x i32]* @s to i8*), i8* bitcast ([64 x i32]* @d to i8*), i32 15, i32 4, i1 false)
ret void
}
; PR23768
%struct.T = type { i8, i64, i8 }
@copy = external global %struct.T, align 8
@etest = external global %struct.T, align 8
define void @t3() {
call void @llvm.memcpy.p0i8.p0i8.i32(
i8* getelementptr inbounds (%struct.T, %struct.T* @copy, i32 0, i32 0),
i8* getelementptr inbounds (%struct.T, %struct.T* @etest, i32 0, i32 0),
i32 24, i32 8, i1 false)
call void @llvm.memcpy.p0i8.p0i8.i32(
i8* getelementptr inbounds (%struct.T, %struct.T* @copy, i32 0, i32 0),
i8* getelementptr inbounds (%struct.T, %struct.T* @etest, i32 0, i32 0),
i32 24, i32 8, i1 false)
ret void
}
%struct.S = type { [12 x i32] }
; CHECK-LABEL: test3
define void @test3(%struct.S* %d, %struct.S* %s) #0 {
%1 = bitcast %struct.S* %d to i8*
%2 = bitcast %struct.S* %s to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %2, i32 48, i32 4, i1 false)
; 3 ldm/stm pairs in v6; 2 in v7
; CHECK: ldm{{(\.w)?}} {{[rl0-9]+!?}}, [[REGLIST1:{.*}]]
; CHECK: stm{{(\.w)?}} {{[rl0-9]+!?}}, [[REGLIST1]]
; CHECK: ldm{{(\.w)?}} {{[rl0-9]+!?}}, [[REGLIST2:{.*}]]
; CHECK: stm{{(\.w)?}} {{[rl0-9]+!?}}, [[REGLIST2]]
; CHECKV6: ldm {{r[0-7]!?}}, [[REGLIST3:{.*}]]
; CHECKV6: stm {{r[0-7]!?}}, [[REGLIST3]]
; CHECKV7-NOT: ldm
; CHECKV7-NOT: stm
%arrayidx = getelementptr inbounds %struct.S, %struct.S* %s, i32 0, i32 0, i32 1
tail call void @g(i32* %arrayidx) #3
ret void
}
declare void @g(i32*)
; Set "no-frame-pointer-elim" to increase register pressure
attributes #0 = { "no-frame-pointer-elim"="true" }
; Function Attrs: nounwind
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1