mirror of
https://github.com/RPCSX/llvm.git
synced 2024-12-16 00:16:50 +00:00
ab6273be70
We were previously codegen'ing memcpy as regular load/store operations and hoping that the register allocator would allocate registers in ascending order so that we could apply an LDM/STM combine after register allocation. According to the commit that first introduced this code (r37179), we planned to teach the register allocator to allocate the registers in ascending order. This never got implemented, and up to now we've been stuck with very poor codegen. A much simpler approach for achieving better codegen is to create MEMCPY pseudo instructions, attach scratch virtual registers to them and then, post register allocation, expand the MEMCPYs into LDM/STM pairs using the scratch registers. The register allocator will have picked arbitrary registers which we sort when expanding the MEMCPY. This approach also avoids the need to repeatedly calculate offsets which ultimately ought to be eliminated pre-RA in order to decrease register pressure. Fixes PR9199 and PR23768. [This is based on Peter Collingbourne's r238473 which was reverted.] Differential Revision: http://reviews.llvm.org/D13239 Change-Id: I727543c2e94136e0f80b8e22d5642d7b9ee5b458 Author: Peter Collingbourne <peter@pcc.me.uk> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@249322 91177308-0d34-0410-b5e6-96231b3b80d8
94 lines
3.8 KiB
LLVM
94 lines
3.8 KiB
LLVM
; RUN: llc -mtriple armv7a-none-eabi -mattr=-neon < %s -verify-machineinstrs -o - | FileCheck %s
|
|
|
|
; Thumb1 (thumbv6m) is tested in tests/Thumb
|
|
|
|
@a = external global i32*
|
|
@b = external global i32*
|
|
|
|
; Function Attrs: nounwind
|
|
define void @foo24() #0 {
|
|
entry:
|
|
; CHECK-LABEL: foo24:
|
|
; We use '[rl0-9]*' to allow 'r0'..'r12', 'lr'
|
|
; CHECK: movt [[LB:[rl0-9]+]], :upper16:b
|
|
; CHECK: movt [[SB:[rl0-9]+]], :upper16:a
|
|
; CHECK: add [[NLB:[rl0-9]+]], [[LB]], #4
|
|
; CHECK: add [[NSB:[rl0-9]+]], [[SB]], #4
|
|
; CHECK-NEXT: ldm [[NLB]], {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]], [[R4:[rl0-9]+]], [[R5:[rl0-9]+]], [[R6:[rl0-9]+]]}
|
|
; CHECK-NEXT: stm [[NSB]], {[[R1]], [[R2]], [[R3]], [[R4]], [[R5]], [[R6]]}
|
|
%0 = load i32*, i32** @a, align 4
|
|
%arrayidx = getelementptr inbounds i32, i32* %0, i32 1
|
|
%1 = bitcast i32* %arrayidx to i8*
|
|
%2 = load i32*, i32** @b, align 4
|
|
%arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1
|
|
%3 = bitcast i32* %arrayidx1 to i8*
|
|
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 24, i32 4, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @foo28() #0 {
|
|
entry:
|
|
; CHECK-LABEL: foo28:
|
|
; CHECK: movt [[LB:[rl0-9]+]], :upper16:b
|
|
; CHECK: movt [[SB:[rl0-9]+]], :upper16:a
|
|
; CHECK: add [[NLB:[rl0-9]+]], [[LB]], #4
|
|
; CHECK: add [[NSB:[rl0-9]+]], [[SB]], #4
|
|
; CHECK-NEXT: ldm [[NLB]]!, {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]]}
|
|
; CHECK-NEXT: stm [[NSB]]!, {[[R1]], [[R2]], [[R3]]}
|
|
; CHECK-NEXT: ldm [[NLB]], {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]], [[R4:[rl0-9]+]]}
|
|
; CHECK-NEXT: stm [[NSB]], {[[R1]], [[R2]], [[R3]], [[R4]]}
|
|
%0 = load i32*, i32** @a, align 4
|
|
%arrayidx = getelementptr inbounds i32, i32* %0, i32 1
|
|
%1 = bitcast i32* %arrayidx to i8*
|
|
%2 = load i32*, i32** @b, align 4
|
|
%arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1
|
|
%3 = bitcast i32* %arrayidx1 to i8*
|
|
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 28, i32 4, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @foo32() #0 {
|
|
entry:
|
|
; CHECK-LABEL: foo32:
|
|
; CHECK: movt [[LB:[rl0-9]+]], :upper16:b
|
|
; CHECK: movt [[SB:[rl0-9]+]], :upper16:a
|
|
; CHECK: add [[NLB:[rl0-9]+]], [[LB]], #4
|
|
; CHECK: add [[NSB:[rl0-9]+]], [[SB]], #4
|
|
; CHECK-NEXT: ldm [[NLB]]!, {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]], [[R4:[rl0-9]+]]}
|
|
; CHECK-NEXT: stm [[NSB]]!, {[[R1]], [[R2]], [[R3]], [[R4]]}
|
|
; CHECK-NEXT: ldm [[NLB]], {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]], [[R4:[rl0-9]+]]}
|
|
; CHECK-NEXT: stm [[NSB]], {[[R1]], [[R2]], [[R3]], [[R4]]}
|
|
%0 = load i32*, i32** @a, align 4
|
|
%arrayidx = getelementptr inbounds i32, i32* %0, i32 1
|
|
%1 = bitcast i32* %arrayidx to i8*
|
|
%2 = load i32*, i32** @b, align 4
|
|
%arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1
|
|
%3 = bitcast i32* %arrayidx1 to i8*
|
|
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 32, i32 4, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @foo36() #0 {
|
|
entry:
|
|
; CHECK-LABEL: foo36:
|
|
; CHECK: movt [[LB:[rl0-9]+]], :upper16:b
|
|
; CHECK: movt [[SB:[rl0-9]+]], :upper16:a
|
|
; CHECK: add [[NLB:[rl0-9]+]], [[LB]], #4
|
|
; CHECK: add [[NSB:[rl0-9]+]], [[SB]], #4
|
|
; CHECK-NEXT: ldm [[NLB]]!, {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]], [[R4:[rl0-9]+]]}
|
|
; CHECK-NEXT: stm [[NSB]]!, {[[R1]], [[R2]], [[R3]], [[R4]]}
|
|
; CHECK-NEXT: ldm [[NLB]], {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]], [[R4:[rl0-9]+]], [[R5:[rl0-9]+]]}
|
|
; CHECK-NEXT: stm [[NSB]], {[[R1]], [[R2]], [[R3]], [[R4]], [[R5]]}
|
|
%0 = load i32*, i32** @a, align 4
|
|
%arrayidx = getelementptr inbounds i32, i32* %0, i32 1
|
|
%1 = bitcast i32* %arrayidx to i8*
|
|
%2 = load i32*, i32** @b, align 4
|
|
%arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1
|
|
%3 = bitcast i32* %arrayidx1 to i8*
|
|
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 36, i32 4, i1 false)
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs: nounwind
|
|
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1
|