llvm/test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll
Hal Finkel f606a6ed99 [PowerPC] Enable interleaved-access vectorization
This adds a basic cost model for interleaved-access vectorization (and a better
default for shuffles), and enables interleaved-access vectorization by default.
The relevant difference from the default cost model for interleaved-access
vectorization, is that on PPC, the shuffles that end up being used are *much*
cheaper than modeling the process with insert/extract pairs (which are
quite expensive, especially on older cores).

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@246824 91177308-0d34-0410-b5e6-96231b3b80d8
2015-09-04 00:10:41 +00:00

31 lines
1.1 KiB
LLVM

; RUN: opt -S -basicaa -loop-vectorize < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
; Function Attrs: nounwind
define void @foo(double* noalias nocapture %a, double* noalias nocapture readonly %b) #0 {
entry:
br label %for.body
; CHECK-LABEL: @foo
; CHECK: <2 x double>
for.cond.cleanup: ; preds = %for.body
ret void
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%0 = shl nsw i64 %indvars.iv, 1
%arrayidx = getelementptr inbounds double, double* %b, i64 %0
%1 = load double, double* %arrayidx, align 8
%add = fadd double %1, 1.000000e+00
%arrayidx2 = getelementptr inbounds double, double* %a, i64 %indvars.iv
store double %add, double* %arrayidx2, align 8
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1600
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}
attributes #0 = { nounwind "target-cpu"="pwr8" }