llvm-capstone/polly/test/ScheduleOptimizer/mat_mul_pattern_data_layout.ll
Tobias Grosser 4556c9b8fe [ScopInfo] Simplify new access functions under domain context
Summary:
We do not keep domain constraints on access functions when building the
scop. Hence, for consistency reasons, it makes also sense to not include
them when storing a new access function. This change results in simpler
access functions that make output easier to read.

This patch also helps to make DeLICMed memory accesses to be understood by
our matrix multiplication pattern matching pass. Further changes to the
matrix multiplication pattern matching are needed for this to work, so the
corresponding test case will be added in a future commit.

Reviewers: Meinersbur, bollu, gareevroman, efriedma, huihuiz, sebpop, simbuerg

Subscribers: pollydev, llvm-commits

Tags: #polly

Differential Revision: https://reviews.llvm.org/D35237

llvm-svn: 308215
2017-07-17 20:47:10 +00:00

102 lines
5.1 KiB
LLVM

; RUN: opt %loadPolly -polly-opt-isl -polly-pattern-matching-based-opts=true \
; RUN: -polly-target-throughput-vector-fma=1 \
; RUN: -polly-target-latency-vector-fma=8 \
; RUN: -polly-target-1st-cache-level-associativity=8 \
; RUN: -polly-target-2nd-cache-level-associativity=8 \
; RUN: -polly-target-1st-cache-level-size=32768 \
; RUN: -polly-target-2nd-cache-level-size=262144 \
; RUN: -polly-optimized-scops \
; RUN: -polly-target-vector-register-bitwidth=256 \
; RUN: < %s 2>&1 | FileCheck %s
;
; /* C := alpha*A*B + beta*C */
; for (i = 0; i < _PB_NI; i++)
; for (j = 0; j < _PB_NJ; j++)
; {
; C[i][j] *= beta;
; for (k = 0; k < _PB_NK; ++k)
; C[i][j] += alpha * A[i][k] * B[k][j];
; }
;
; CHECK: double Packed_B[ { [] -> [(256)] } ][ { [] -> [(256)] } ][ { [] -> [(8)] } ];
; CHECK-NEXT: double Packed_A[ { [] -> [(24)] } ][ { [] -> [(256)] } ][ { [] -> [(4)] } ]; // Element size 8
;
; CHECK: { Stmt_Copy_0[i0, i1, i2] -> MemRef_arg6[i0, i2] };
; CHECK-NEXT: new: { Stmt_Copy_0[i0, i1, i2] -> Packed_A[o0, o1, o2] : 256*floor((-i2 + o1)/256) = -i2 + o1 and 96*floor((-i0 + 4o0 + o2)/96) = -i0 + 4o0 + o2 and 0 <= o1 <= 255 and o2 >= 0 and -4o0 <= o2 <= 95 - 4o0 and o2 <= 3 };
;
; CHECK: { Stmt_Copy_0[i0, i1, i2] -> MemRef_arg7[i2, i1] };
; CHECK-NEXT: new: { Stmt_Copy_0[i0, i1, i2] -> Packed_B[o0, o1, i1 - 8o0] : 256*floor((-i2 + o1)/256) = -i2 + o1 and -7 + i1 <= 8o0 <= i1 and 0 <= o1 <= 255 };
;
; CHECK: CopyStmt_0
; CHECK-NEXT: Domain :=
; CHECK-NEXT: { CopyStmt_0[i0, i1, i2] : 0 <= i0 <= 1055 and 0 <= i1 <= 1055 and 0 <= i2 <= 1023 };
; CHECK-NEXT: Schedule :=
; CHECK-NEXT: ;
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: null;
; CHECK-NEXT: new: { CopyStmt_0[i0, i1, i2] -> Packed_B[o0, o1, i1 - 8o0] : 256*floor((-i2 + o1)/256) = -i2 + o1 and -7 + i1 <= 8o0 <= i1 and 0 <= o1 <= 255 };
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: null;
; CHECK-NEXT: new: { CopyStmt_0[i0, i1, i2] -> MemRef_arg7[i2, i1] };
; CHECK-NEXT: CopyStmt_1
; CHECK-NEXT: Domain :=
; CHECK-NEXT: { CopyStmt_1[i0, i1, i2] : 0 <= i0 <= 1055 and 0 <= i1 <= 1055 and 0 <= i2 <= 1023 };
; CHECK-NEXT: Schedule :=
; CHECK-NEXT: ;
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: null;
; CHECK-NEXT: new: { CopyStmt_1[i0, i1, i2] -> Packed_A[o0, o1, o2] : 256*floor((-i2 + o1)/256) = -i2 + o1 and 96*floor((-i0 + 4o0 + o2)/96) = -i0 + 4o0 + o2 and 0 <= o1 <= 255 and o2 >= 0 and -4o0 <= o2 <= 95 - 4o0 and o2 <= 3 };
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: null;
; CHECK-NEXT: new: { CopyStmt_1[i0, i1, i2] -> MemRef_arg6[i0, i2] };
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-unknown"
define internal void @kernel_gemm(i32 %arg, i32 %arg1, i32 %arg2, double %arg3, double %arg4, [1056 x double]* %arg5, [1024 x double]* %arg6, [1056 x double]* %arg7) #0 {
bb:
br label %bb8
bb8: ; preds = %bb29, %bb
%tmp = phi i64 [ 0, %bb ], [ %tmp30, %bb29 ]
br label %bb9
bb9: ; preds = %bb26, %bb8
%tmp10 = phi i64 [ 0, %bb8 ], [ %tmp27, %bb26 ]
%tmp11 = getelementptr inbounds [1056 x double], [1056 x double]* %arg5, i64 %tmp, i64 %tmp10
%tmp12 = load double, double* %tmp11, align 8
%tmp13 = fmul double %tmp12, %arg4
store double %tmp13, double* %tmp11, align 8
br label %Copy_0
Copy_0: ; preds = %Copy_0, %bb9
%tmp15 = phi i64 [ 0, %bb9 ], [ %tmp24, %Copy_0 ]
%tmp16 = getelementptr inbounds [1024 x double], [1024 x double]* %arg6, i64 %tmp, i64 %tmp15
%tmp17 = load double, double* %tmp16, align 8
%tmp18 = fmul double %tmp17, %arg3
%tmp19 = getelementptr inbounds [1056 x double], [1056 x double]* %arg7, i64 %tmp15, i64 %tmp10
%tmp20 = load double, double* %tmp19, align 8
%tmp21 = fmul double %tmp18, %tmp20
%tmp22 = load double, double* %tmp11, align 8
%tmp23 = fadd double %tmp22, %tmp21
store double %tmp23, double* %tmp11, align 8
%tmp24 = add nuw nsw i64 %tmp15, 1
%tmp25 = icmp ne i64 %tmp24, 1024
br i1 %tmp25, label %Copy_0, label %bb26
bb26: ; preds = %Copy_0
%tmp27 = add nuw nsw i64 %tmp10, 1
%tmp28 = icmp ne i64 %tmp27, 1056
br i1 %tmp28, label %bb9, label %bb29
bb29: ; preds = %bb26
%tmp30 = add nuw nsw i64 %tmp, 1
%tmp31 = icmp ne i64 %tmp30, 1056
br i1 %tmp31, label %bb8, label %bb32
bb32: ; preds = %bb29
ret void
}
attributes #0 = { nounwind uwtable "target-cpu"="x86-64" "target-features"="+aes,+avx,+cmov,+cx16,+fxsr,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" }