llvm/test/CodeGen/ARM/saxpy10-a9.ll
Andrew Trick 6c9712fecb New machine model for cortex-a9. Schedule for resources and latency.
Schedule more conservatively to account for stalls on floating point
resources and latency. Use the AGU resource to model latency stalls
since it's shared between FP and LD/ST instructions. This might not be
completely accurate but should work well in practice.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@198125 91177308-0d34-0410-b5e6-96231b3b80d8
2013-12-28 21:57:05 +00:00

136 lines
4.4 KiB
LLVM

; RUN: llc < %s -march=arm -mtriple=thumbv7-apple-ios7.0.0 -float-abi=hard -mcpu=cortex-a9 -misched-postra -misched-bench -scheditins=false | FileCheck %s
;
; Test MI-Sched suppory latency based stalls on in in-order pipeline
; using the new machine model.
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
; Don't be too strict with the top of the schedule, but most of it
; should be nicely pipelined.
;
; CHECK: saxpy10:
; CHECK: vldr
; CHECK: vldr
; CHECK: vldr
; CHECK: vldr
; CHECK: vldr
; CHECK: vldr
; CHECK-NEXT: vadd
; CHECK-NEXT: vadd
; CHECK-NEXT: vldr
; CHECK-NEXT: vldr
; CHECK-NEXT: vldr
; CHECK-NEXT: vadd
; CHECK-NEXT: vmul
; CHECK-NEXT: vldr
; CHECK-NEXT: vadd
; CHECK-NEXT: vadd
; CHECK-NEXT: vmul
; CHECK-NEXT: vldr
; CHECK-NEXT: vadd
; CHECK-NEXT: vadd
; CHECK-NEXT: vldr
; CHECK-NEXT: vmul
; CHECK-NEXT: vadd
; CHECK-NEXT: vldr
; CHECK-NEXT: vadd
; CHECK-NEXT: vldr
; CHECK-NEXT: vmul
; CHECK-NEXT: vadd
; CHECK-NEXT: vldr
; CHECK-NEXT: vadd
; CHECK-NEXT: vldr
; CHECK-NEXT: vmul
; CHECK-NEXT: vadd
; CHECK-NEXT: vldr
; CHECK-NEXT: vadd
; CHECK-NEXT: vldr
; CHECK-NEXT: vmul
; CHECK-NEXT: vadd
; CHECK-NEXT: vldr
; CHECK-NEXT: vmul
; CHECK-NEXT: vadd
; CHECK-NEXT: vldr
; CHECK-NEXT: vmul
; CHECK-NEXT: vadd
; CHECK-NEXT: vldr
; CHECK-NEXT: vadd
; CHECK-NEXT: vadd
; CHECK-NEXT: vadd
; CHECK-NEXT: vmov
; CHECK-NEXT: bx
;
; This accumulates a sum rather than storing each result.
define float @saxpy10(float* nocapture readonly %data1, float* nocapture readonly %data2, float %a) {
entry:
%0 = load float* %data1, align 4
%mul = fmul float %0, %a
%1 = load float* %data2, align 4
%add = fadd float %mul, %1
%add2 = fadd float %add, 0.000000e+00
%arrayidx.1 = getelementptr inbounds float* %data1, i32 1
%2 = load float* %arrayidx.1, align 4
%mul.1 = fmul float %2, %a
%arrayidx1.1 = getelementptr inbounds float* %data2, i32 1
%3 = load float* %arrayidx1.1, align 4
%add.1 = fadd float %mul.1, %3
%add2.1 = fadd float %add2, %add.1
%arrayidx.2 = getelementptr inbounds float* %data1, i32 2
%4 = load float* %arrayidx.2, align 4
%mul.2 = fmul float %4, %a
%arrayidx1.2 = getelementptr inbounds float* %data2, i32 2
%5 = load float* %arrayidx1.2, align 4
%add.2 = fadd float %mul.2, %5
%add2.2 = fadd float %add2.1, %add.2
%arrayidx.3 = getelementptr inbounds float* %data1, i32 3
%6 = load float* %arrayidx.3, align 4
%mul.3 = fmul float %6, %a
%arrayidx1.3 = getelementptr inbounds float* %data2, i32 3
%7 = load float* %arrayidx1.3, align 4
%add.3 = fadd float %mul.3, %7
%add2.3 = fadd float %add2.2, %add.3
%arrayidx.4 = getelementptr inbounds float* %data1, i32 4
%8 = load float* %arrayidx.4, align 4
%mul.4 = fmul float %8, %a
%arrayidx1.4 = getelementptr inbounds float* %data2, i32 4
%9 = load float* %arrayidx1.4, align 4
%add.4 = fadd float %mul.4, %9
%add2.4 = fadd float %add2.3, %add.4
%arrayidx.5 = getelementptr inbounds float* %data1, i32 5
%10 = load float* %arrayidx.5, align 4
%mul.5 = fmul float %10, %a
%arrayidx1.5 = getelementptr inbounds float* %data2, i32 5
%11 = load float* %arrayidx1.5, align 4
%add.5 = fadd float %mul.5, %11
%add2.5 = fadd float %add2.4, %add.5
%arrayidx.6 = getelementptr inbounds float* %data1, i32 6
%12 = load float* %arrayidx.6, align 4
%mul.6 = fmul float %12, %a
%arrayidx1.6 = getelementptr inbounds float* %data2, i32 6
%13 = load float* %arrayidx1.6, align 4
%add.6 = fadd float %mul.6, %13
%add2.6 = fadd float %add2.5, %add.6
%arrayidx.7 = getelementptr inbounds float* %data1, i32 7
%14 = load float* %arrayidx.7, align 4
%mul.7 = fmul float %14, %a
%arrayidx1.7 = getelementptr inbounds float* %data2, i32 7
%15 = load float* %arrayidx1.7, align 4
%add.7 = fadd float %mul.7, %15
%add2.7 = fadd float %add2.6, %add.7
%arrayidx.8 = getelementptr inbounds float* %data1, i32 8
%16 = load float* %arrayidx.8, align 4
%mul.8 = fmul float %16, %a
%arrayidx1.8 = getelementptr inbounds float* %data2, i32 8
%17 = load float* %arrayidx1.8, align 4
%add.8 = fadd float %mul.8, %17
%add2.8 = fadd float %add2.7, %add.8
%arrayidx.9 = getelementptr inbounds float* %data1, i32 9
%18 = load float* %arrayidx.9, align 4
%mul.9 = fmul float %18, %a
%arrayidx1.9 = getelementptr inbounds float* %data2, i32 9
%19 = load float* %arrayidx1.9, align 4
%add.9 = fadd float %mul.9, %19
%add2.9 = fadd float %add2.8, %add.9
ret float %add2.9
}