llvm/test/CodeGen/Thumb2/thumb2-spill-q.ll
Bob Wilson 7d24705f65 Change register allocation order for ARM VFP and NEON registers to put the
callee-saved registers at the end of the lists.  Also prefer to avoid using
the low registers that are in register subclasses required by certain
instructions, so that those registers will more likely be available when needed.
This change makes a huge improvement in spilling in some cases.  Thanks to
Jakob for helping me realize the problem.

Most of this patch is fixing the testsuite.  There are quite a few places
where we're checking for specific registers.  I changed those to wildcards
in places where that doesn't weaken the tests.  The spill-q.ll and
thumb2-spill-q.ll tests stopped spilling with this change, so I added a bunch
of live values to force spills on those tests.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@116055 91177308-0d34-0410-b5e6-96231b3b80d8
2010-10-08 06:15:13 +00:00

89 lines
5.1 KiB
LLVM

; RUN: llc < %s -mtriple=thumbv7-elf -mattr=+neon | FileCheck %s
; PR4789
%bar = type { float, float, float }
%baz = type { i32, [16 x %bar], [16 x float], [16 x i32], i8 }
%foo = type { <4 x float> }
%quux = type { i32 (...)**, %baz*, i32 }
%quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo }
declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
define void @aaa(%quuz* %this, i8* %block) {
; CHECK: aaa:
; CHECK: bic r4, r4, #15
; CHECK: vst1.64 {{.*}}[{{.*}}, :128]
; CHECK: vld1.64 {{.*}}[{{.*}}, :128]
entry:
%0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
store float 6.300000e+01, float* undef, align 4
%1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
store float 0.000000e+00, float* undef, align 4
%2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
%ld3 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
store float 0.000000e+00, float* undef, align 4
%ld4 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
store float 0.000000e+00, float* undef, align 4
%ld5 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
store float 0.000000e+00, float* undef, align 4
%ld6 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
store float 0.000000e+00, float* undef, align 4
%ld7 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
store float 0.000000e+00, float* undef, align 4
%ld8 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
store float 0.000000e+00, float* undef, align 4
%ld9 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
store float 0.000000e+00, float* undef, align 4
%ld10 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
store float 0.000000e+00, float* undef, align 4
%ld11 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
store float 0.000000e+00, float* undef, align 4
%ld12 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
store float 0.000000e+00, float* undef, align 4
%val173 = load <4 x float>* undef ; <<4 x float>> [#uses=1]
br label %bb4
bb4: ; preds = %bb193, %entry
%besterror.0.2264 = phi <4 x float> [ undef, %entry ], [ %besterror.0.0, %bb193 ] ; <<4 x float>> [#uses=2]
%part0.0.0261 = phi <4 x float> [ zeroinitializer, %entry ], [ %23, %bb193 ] ; <<4 x float>> [#uses=2]
%3 = fmul <4 x float> zeroinitializer, %0 ; <<4 x float>> [#uses=2]
%4 = fadd <4 x float> %3, %part0.0.0261 ; <<4 x float>> [#uses=1]
%5 = shufflevector <4 x float> %3, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
%6 = shufflevector <2 x float> %5, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>> [#uses=1]
%7 = fmul <4 x float> %1, undef ; <<4 x float>> [#uses=1]
%8 = fadd <4 x float> %7, <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01> ; <<4 x float>> [#uses=1]
%9 = fptosi <4 x float> %8 to <4 x i32> ; <<4 x i32>> [#uses=1]
%10 = sitofp <4 x i32> %9 to <4 x float> ; <<4 x float>> [#uses=1]
%11 = fmul <4 x float> %10, %2 ; <<4 x float>> [#uses=1]
%12 = fmul <4 x float> undef, %6 ; <<4 x float>> [#uses=1]
%13 = fmul <4 x float> %11, %4 ; <<4 x float>> [#uses=1]
%14 = fsub <4 x float> %12, %13 ; <<4 x float>> [#uses=1]
%15 = fsub <4 x float> %14, undef ; <<4 x float>> [#uses=1]
%16 = fmul <4 x float> %15, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> ; <<4 x float>> [#uses=1]
%17 = fadd <4 x float> %16, undef ; <<4 x float>> [#uses=1]
%18 = fmul <4 x float> %17, %val173 ; <<4 x float>> [#uses=1]
%19 = shufflevector <4 x float> %18, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
%20 = shufflevector <2 x float> %19, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
%tmp1 = fadd <4 x float> %20, %ld3
%tmp2 = fadd <4 x float> %tmp1, %ld4
%tmp3 = fadd <4 x float> %tmp2, %ld5
%tmp4 = fadd <4 x float> %tmp3, %ld6
%tmp5 = fadd <4 x float> %tmp4, %ld7
%tmp6 = fadd <4 x float> %tmp5, %ld8
%tmp7 = fadd <4 x float> %tmp6, %ld9
%tmp8 = fadd <4 x float> %tmp7, %ld10
%tmp9 = fadd <4 x float> %tmp8, %ld11
%21 = fadd <4 x float> %tmp9, %ld12
%22 = fcmp ogt <4 x float> %besterror.0.2264, %21 ; <<4 x i1>> [#uses=0]
%tmp = extractelement <4 x i1> %22, i32 0
br i1 %tmp, label %bb193, label %bb186
bb186: ; preds = %bb4
br label %bb193
bb193: ; preds = %bb186, %bb4
%besterror.0.0 = phi <4 x float> [ %21, %bb186 ], [ %besterror.0.2264, %bb4 ] ; <<4 x float>> [#uses=1]
%23 = fadd <4 x float> %part0.0.0261, zeroinitializer ; <<4 x float>> [#uses=1]
br label %bb4
}