llvm/test/CodeGen/Thumb/stm-merge.ll
Moritz Roth d84561bf69 ARM: Fix and re-enable load/store optimizer for Thumb1.
In a previous iteration of the pass, we would try to compensate for
writeback by updating later instructions and/or inserting a SUBS to
reset the base register if necessary.
Since such a SUBS sets the condition flags it's not generally safe to do
this. For now, only merge LDR/STRs if there is no writeback to the base
register (LDM that loads into the base register) or the base register is
killed by one of the merged instructions. These cases are clear wins
both in terms of instruction count and performance.

Also add three new test cases, and update the existing ones accordingly.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@215729 91177308-0d34-0410-b5e6-96231b3b80d8
2014-08-15 17:00:30 +00:00

41 lines
1.6 KiB
LLVM

; RUN: llc -mtriple=thumbv6m-eabi -verify-machineinstrs %s -o - | FileCheck %s
target datalayout = "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv6m--linux-gnueabi"
@d = internal unnamed_addr global i32 0, align 4
@c = internal global i32* null, align 4
@e = internal unnamed_addr global i32* null, align 4
; Function Attrs: nounwind optsize
define void @fn1() #0 {
entry:
; CHECK-LABEL: fn1:
; CHECK: stm r[[BASE:[0-9]]]!, {{.*}}
; CHECK-NOT: {{.*}} r[[BASE]]
; CHECK: ldr r[[BASE]], {{.*}}
%g = alloca i32, align 4
%h = alloca i32, align 4
store i32 1, i32* %g, align 4
store i32 0, i32* %h, align 4
%.pr = load i32* @d, align 4
%cmp11 = icmp slt i32 %.pr, 1
br i1 %cmp11, label %for.inc.lr.ph, label %for.body5
for.inc.lr.ph: ; preds = %entry
store i32 1, i32* @d, align 4
br label %for.body5
for.body5: ; preds = %entry, %for.inc.lr.ph, %for.body5
%f.010 = phi i32 [ %inc7, %for.body5 ], [ 0, %for.inc.lr.ph ], [ 0, %entry ]
store volatile i32* %g, i32** @c, align 4
%inc7 = add nsw i32 %f.010, 1
%exitcond = icmp eq i32 %inc7, 2
br i1 %exitcond, label %for.end8, label %for.body5
for.end8: ; preds = %for.body5
store i32* %h, i32** @e, align 4
ret void
}
attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }