NFC: Rename the 'for' operation in the AffineOps dialect to 'affine.for' and set the namespace of the AffineOps dialect to 'affine'.

PiperOrigin-RevId: 240165792
This commit is contained in:
River Riddle 2019-03-25 10:14:34 -07:00 committed by jpienaar
parent 8f5fa56623
commit 832567b379
70 changed files with 1304 additions and 1294 deletions

View File

@ -56,11 +56,11 @@ class EdscTest(unittest.TestCase):
code = str(fun)
# TODO(zinenko,ntv): use FileCheck for these tests
self.assertIn(
' "for"() {lower_bound: () -> (0), step: 1 : index, upper_bound: () -> (42)} : () -> () {\n',
' "affine.for"() {lower_bound: () -> (0), step: 1 : index, upper_bound: () -> (42)} : () -> () {\n',
code)
self.assertIn(" ^bb1(%i0: index):", code)
self.assertIn(
' "for"(%c42, %2) {lower_bound: (d0) -> (d0), step: 2 : index, upper_bound: (d0) -> (d0)} : (index, index) -> () {\n',
' "affine.for"(%c42, %2) {lower_bound: (d0) -> (d0), step: 2 : index, upper_bound: (d0) -> (d0)} : (index, index) -> () {\n',
code)
self.assertIn(" ^bb2(%i1: index):", code)
self.assertIn(
@ -76,19 +76,19 @@ class EdscTest(unittest.TestCase):
code = str(fun)
self.assertIn(
' "for"() {lower_bound: () -> (0), step: 1 : index, upper_bound: () -> (5)} : () -> () {\n',
' "affine.for"() {lower_bound: () -> (0), step: 1 : index, upper_bound: () -> (5)} : () -> () {\n',
code)
self.assertIn(" ^bb1(%i0: index):", code)
self.assertIn(
' "for"() {lower_bound: () -> (1), step: 3 : index, upper_bound: () -> (15)} : () -> () {\n',
' "affine.for"() {lower_bound: () -> (1), step: 3 : index, upper_bound: () -> (15)} : () -> () {\n',
code)
self.assertIn(" ^bb2(%i1: index):", code)
self.assertIn(
' "for"() {lower_bound: () -> (2), step: 5 : index, upper_bound: () -> (25)} : () -> () {\n',
' "affine.for"() {lower_bound: () -> (2), step: 5 : index, upper_bound: () -> (25)} : () -> () {\n',
code)
self.assertIn(" ^bb3(%i2: index):", code)
self.assertIn(
' "for"() {lower_bound: () -> (3), step: 7 : index, upper_bound: () -> (35)} : () -> () {\n',
' "affine.for"() {lower_bound: () -> (3), step: 7 : index, upper_bound: () -> (35)} : () -> () {\n',
code)
self.assertIn(" ^bb4(%i3: index):", code)
self.assertIn(
@ -342,10 +342,10 @@ class EdscTest(unittest.TestCase):
code = str(fun)
self.assertIn(
'"for"() {lower_bound: () -> (0), step: 1 : index, upper_bound: () -> (10)}',
'"affine.for"() {lower_bound: () -> (0), step: 1 : index, upper_bound: () -> (10)}',
code)
self.assertIn(
'"for"() {lower_bound: () -> (0), step: 1 : index, upper_bound: () -> (42)}',
'"affine.for"() {lower_bound: () -> (0), step: 1 : index, upper_bound: () -> (42)}',
code)
self.assertIn("%0 = load %arg0[%i0, %i1] : memref<10x42xf32>", code)
self.assertIn("%1 = addf %0, %cst : f32", code)
@ -367,7 +367,7 @@ class EdscTest(unittest.TestCase):
code = str(fun)
self.assertIn(
'"for"() {lower_bound: () -> (0), step: 1 : index, upper_bound: () -> (32)} : () -> ()',
'"affine.for"() {lower_bound: () -> (0), step: 1 : index, upper_bound: () -> (32)} : () -> ()',
code)
self.assertIn("%0 = load %arg0[%i0, %i2] : memref<32x32xf32>", code)
self.assertIn("%1 = load %arg1[%i2, %i1] : memref<32x32xf32>", code)

View File

@ -15,7 +15,7 @@ loops and if instructions), the result of a
[`affine.apply` operation](#'affine.apply'-operation) that recursively takes as
arguments any symbolic identifiers. Dimensions may be bound not only to anything
that a symbol is bound to, but also to induction variables of enclosing
[`for` operations](#'for'-operation), and the result of an
[`affine.for` operations](#'affine.for'-operation), and the result of an
[`affine.apply` operation](#'affine.apply'-operation) (which recursively may use
other dimensions and symbols).
@ -47,12 +47,12 @@ Example:
%2 = affine.apply (i)[s0] -> (i+s0) (%42)[%n]
```
#### 'for' operation {#'for'-operation}
#### 'affine.for' operation {#'affine.for'-operation}
Syntax:
``` {.ebnf}
operation ::= `for` ssa-id `=` lower-bound `to` upper-bound
operation ::= `affine.for` ssa-id `=` lower-bound `to` upper-bound
(`step` integer-literal)? `{` inst* `}`
lower-bound ::= `max`? affine-map dim-and-symbol-use-list | shorthand-bound
@ -60,17 +60,17 @@ upper-bound ::= `min`? affine-map dim-and-symbol-use-list | shorthand-bound
shorthand-bound ::= ssa-id | `-`? integer-literal
```
The `for` operation represents an affine loop nest, defining an SSA value for
its induction variable. This SSA value always has type
The `affine.for` operation represents an affine loop nest, defining an SSA value
for its induction variable. This SSA value always has type
[`index`](LangRef.md#index-type), which is the size of the machine word.
The `for` operation executes its body a number of times iterating from a lower
bound to an upper bound by a stride. The stride, represented by `step`, is a
positive constant integer which defaults to "1" if not present. The lower and
The `affine.for` operation executes its body a number of times iterating from a
lower bound to an upper bound by a stride. The stride, represented by `step`, is
a positive constant integer which defaults to "1" if not present. The lower and
upper bounds specify a half-open range: the range includes the lower bound but
does not include the upper bound.
The lower and upper bounds of a `for` operation are represented as an
The lower and upper bounds of a `affine.for` operation are represented as an
application of an affine mapping to a list of SSA values passed to the map. The
[same restrictions](#restrictions-on-dimensions-and-symbols) hold for these SSA
values as for all bindings of SSA values to dimensions and symbols.
@ -94,8 +94,8 @@ Example showing reverse iteration of the inner loop:
func @simple_example(%A: memref<?x?xf32>, %B: memref<?x?xf32>) {
%N = dim %A, 0 : memref<?x?xf32>
for %i = 0 to %N step 1 {
for %j = 0 to %N { // implicitly steps by 1
affine.for %i = 0 to %N step 1 {
affine.for %j = 0 to %N { // implicitly steps by 1
%0 = affine.apply #map57(%j)[%N]
%tmp = call @F1(%A, %i, %0) : (memref<?x?xf32>, index, index)->(f32)
call @F2(%tmp, %B, %i, %0) : (f32, memref<?x?xf32>, index, index)->()
@ -130,8 +130,8 @@ Example:
#set = (d0, d1)[s0]: (d0 - 10 >= 0, s0 - d0 - 9 >= 0,
d1 - 10 >= 0, s0 - d1 - 9 >= 0)
func @reduced_domain_example(%A, %X, %N) : (memref<10xi32>, i32, i32) {
for %i = 0 to %N {
for %j = 0 to %N {
affine.for %i = 0 to %N {
affine.for %j = 0 to %N {
%0 = affine.apply #map42(%j)
%tmp = call @S1(%X, %i, %0)
affine.if #set(%i, %j)[%N] {

View File

@ -23,8 +23,8 @@ Examples:
// pad with %f0 to handle the boundary case:
%f0 = constant 0.0f : f32
for %i0 = 0 to %0 {
for %i1 = 0 to %1 step 256 {
for %i2 = 0 to %2 step 32 {
affine.for %i1 = 0 to %1 step 256 {
affine.for %i2 = 0 to %2 step 32 {
%v = vector_transfer_read %A, %i0, %i1, %i2, %f0
{permutation_map: (d0, d1, d2) -> (d2, d1)} :
(memref<?x?x?xf32>, index, index, f32) -> vector<32x256xf32>
@ -34,7 +34,7 @@ for %i0 = 0 to %0 {
// vector<128xf32>. The underlying implementation will require a 1-D vector
// broadcast:
for %i0 = 0 to %0 {
for %i1 = 0 to %1 {
affine.for %i1 = 0 to %1 {
%3 = vector_transfer_read %A, %i0, %i1
{permutation_map: (d0, d1) -> (0)} :
(memref<?x?xf32>, index, index) -> vector<128xf32>
@ -81,8 +81,8 @@ A notional lowering of vector_transfer_read could generate code resembling:
%tmp = alloc() : vector<3x4x5xf32>
%view_in_tmp = "element_type_cast"(%tmp) : memref<1xvector<3x4x5xf32>>
for %i = 0 to 3 {
for %j = 0 to 4 {
for %k = 0 to 5 {
affine.for %j = 0 to 4 {
affine.for %k = 0 to 5 {
%a = load %A[%expr1 + %k, %expr2, %expr3 + %i, %expr4] : memref<?x?x?x?xf32>
store %tmp[%i, %j, %k] : vector<3x4x5xf32>
}}}
@ -102,7 +102,7 @@ lowered code would resemble:
%tmp = alloc() : vector<3x4x5xf32>
%view_in_tmp = "element_type_cast"(%tmp) : memref<1xvector<3x4x5xf32>>
for %i = 0 to 3 {
for %k = 0 to 5 {
affine.for %k = 0 to 5 {
%a = load %A[%expr1 + %k, %expr2, %expr3 + %i, %expr4] : memref<?x?x?x?xf32>
store %tmp[%i, 0, %k] : vector<3x4x5xf32>
}}
@ -130,9 +130,9 @@ Examples:
```mlir {.mlir}
// write vector<16x32x64xf32> into the slice `%A[%i0, %i1:%i1+32, %i2:%i2+64, %i3:%i3+16]`:
for %i0 = 0 to %0 {
for %i1 = 0 to %1 step 32 {
for %i2 = 0 to %2 step 64 {
for %i3 = 0 to %3 step 16 {
affine.for %i1 = 0 to %1 step 32 {
affine.for %i2 = 0 to %2 step 64 {
affine.for %i3 = 0 to %3 step 16 {
%val = `ssa-value` : vector<16x32x64xf32>
vector_transfer_write %val, %A, %i0, %i1, %i2, %i3
{permutation_map: (d0, d1, d2, d3) -> (d3, d1, d2)} :

View File

@ -40,10 +40,10 @@ which means that values are defined before use and have scope defined by their
dominance relations. Operations may produce zero or more results, and each is a
distinct SSA value with its own type defined by the [type system](#type-system).
MLIR incorporates polyhedral compiler concepts, including `for` and `affine.if`
operations defined by the [affine dialect](Dialects/Affine.md), which model
affine loops and affine conditionals. It also includes affine maps integrated
into the type system - they are key to the representation of data and
MLIR incorporates polyhedral compiler concepts, including `affine.for` and
`affine.if` operations defined by the [affine dialect](Dialects/Affine.md),
which model affine loops and affine conditionals. It also includes affine maps
integrated into the type system - they are key to the representation of data and
[MemRefs](#memref-type), which are the representation for tensors in addressable
memory. MLIR also supports a first-class Tensor type allowing it to concisely
represent operations on N-dimensional arrays.
@ -99,10 +99,10 @@ func @multiply(%A: memref<100x?xf32>, %B: memref<?x50xf32>)
%C = alloc() : memref<100x50xf32>
// Multiplication loop nest.
for %i = 0 to 100 {
for %j = 0 to 50 {
affine.for %i = 0 to 100 {
affine.for %j = 0 to 50 {
store 0 to %C[%i, %j] : memref<100x50xf32>
for %k = 0 to %n {
affine.for %k = 0 to %n {
%a_v = load %A[%i, %k] : memref<100x?xf32>
%b_v = load %B[%k, %j] : memref<?x50xf32>
%prod = mulf %a_v, %b_v : f32
@ -1697,8 +1697,8 @@ The arity of indices is the rank of the memref (i.e., if the memref loaded from
is of rank 3, then 3 indices are required for the load following the memref
identifier).
In an `affine.if` or `for` body, the indices of a load are restricted to SSA
values bound to surrounding loop induction variables,
In an `affine.if` or `affine.for` body, the indices of a load are restricted to
SSA values bound to surrounding loop induction variables,
[symbols](#dimensions-and-symbols), results of a
[`constant` operation](#'constant'-operation), or the result of an
`affine.apply` operation that can in turn take as arguments all of the
@ -1719,10 +1719,10 @@ Example:
**Context:** The `load` and `store` instructions are specifically crafted to
fully resolve a reference to an element of a memref, and (in affine `affine.if`
and `for` instructions) the compiler can follow use-def chains (e.g. through
[`affine.apply`](Dialects/Affine.md#'affine.apply'-operation) operations) to
precisely analyze references at compile-time using polyhedral techniques. This
is possible because of the
and `affine.for` instructions) the compiler can follow use-def chains (e.g.
through [`affine.apply`](Dialects/Affine.md#'affine.apply'-operation)
operations) to precisely analyze references at compile-time using polyhedral
techniques. This is possible because of the
[restrictions on dimensions and symbols](Dialects/Affine.md#restrictions-on-dimensions-and-symbols)
in these contexts.
@ -1755,10 +1755,11 @@ store %100, %A[%1, 1023] : memref<4x?xf32, #layout, hbm>
**Context:** The `load` and `store` instructions are specifically crafted to
fully resolve a reference to an element of a memref, and (in polyhedral
`affine.if` and `for` instructions) the compiler can follow use-def chains (e.g.
through [`affine.apply`](Dialects/Affine.md#'affine.apply'-operation)
operations) to precisely analyze references at compile-time using polyhedral
techniques. This is possible because of the
`affine.if` and `affine.for` instructions) the compiler can follow use-def
chains (e.g. through
[`affine.apply`](Dialects/Affine.md#'affine.apply'-operation) operations) to
precisely analyze references at compile-time using polyhedral techniques. This
is possible because of the
[restrictions on dimensions and symbols](Dialect/Affine.md#restrictions-on-dimensions-and-symbols)
in these contexts.

View File

@ -39,9 +39,9 @@ These restrictions may be lifted in the future.
### Output IR
Functions with `for` and `affine.if` instructions eliminated. These functions
may contain operations from the Standard dialect in addition to those already
present before the pass.
Functions with `affine.for` and `affine.if` instructions eliminated. These
functions may contain operations from the Standard dialect in addition to those
already present before the pass.
### Invariants
@ -95,10 +95,10 @@ Input
```mlir
func @loop_nest_tiled() -> memref<256x1024xf32> {
%0 = alloc() : memref<256x1024xf32>
for %i0 = 0 to 256 step 32 {
for %i1 = 0 to 1024 step 32 {
for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 32)(%i0) {
for %i3 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 32)(%i1) {
affine.for %i0 = 0 to 256 step 32 {
affine.for %i1 = 0 to 1024 step 32 {
affine.for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 32)(%i0) {
affine.for %i3 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 32)(%i1) {
%1 = load %0[%i2, %i3] : memref<256x1024xf32>
}
}
@ -119,16 +119,16 @@ func @loop_nest_tiled() -> memref<256x1024xf32> {
%c32 = constant 32 : index
%c0 = constant 0 : index
%0 = alloc() : memref<256x1024xf32>
for %i0 = 0 to 256 step 32 {
for %i1 = 0 to 1024 step 32 {
affine.for %i0 = 0 to 256 step 32 {
affine.for %i1 = 0 to 1024 step 32 {
%1 = affine.apply #map1(%i0)
%2 = affine.apply #map1(%i1)
%3 = alloc() : memref<32x32xf32, 1>
%4 = alloc() : memref<1xi32>
dma_start %0[%1, %2], %3[%c0, %c0], %c1024, %4[%c0], %c1024, %c32 : memref<256x1024xf32>, memref<32x32xf32, 1>, memref<1xi32>
dma_wait %4[%c0], %c1024 : memref<1xi32>
for %i2 = #map1(%i0) to #map2(%i0) {
for %i3 = #map1(%i1) to #map2(%i1) {
affine.for %i2 = #map1(%i0) to #map2(%i0) {
affine.for %i3 = #map1(%i1) to #map2(%i1) {
%5 = affine.apply #map3(%i0, %i2)
%6 = affine.apply #map3(%i1, %i3)
%7 = load %3[%5, %6] : memref<32x32xf32, 1>
@ -194,8 +194,8 @@ Input
func @store_load_affine_apply() -> memref<10x10xf32> {
%cf7 = constant 7.0 : f32
%m = alloc() : memref<10x10xf32>
for %i0 = 0 to 10 {
for %i1 = 0 to 10 {
affine.for %i0 = 0 to 10 {
affine.for %i1 = 0 to 10 {
%t0 = affine.apply (d0, d1) -> (d1 + 1)(%i0, %i1)
%t1 = affine.apply (d0, d1) -> (d0)(%i0, %i1)
%idx0 = affine.apply (d0, d1) -> (d1) (%t0, %t1)
@ -217,8 +217,8 @@ Output
func @store_load_affine_apply() -> memref<10x10xf32> {
%cst = constant 7.000000e+00 : f32
%0 = alloc() : memref<10x10xf32>
for %i0 = 0 to 10 {
for %i1 = 0 to 10 {
affine.for %i0 = 0 to 10 {
affine.for %i1 = 0 to 10 {
%3 = affine.apply #map1(%1, %2)
%4 = affine.apply #map2(%1, %2)
store %cst, %0[%3, %4] : memref<10x10xf32>
@ -258,7 +258,7 @@ Input
%2 = alloc() : memref<1xf32>
%c0 = constant 0 : index
%c128 = constant 128 : index
for %i0 = 0 to 8 {
affine.for %i0 = 0 to 8 {
dma_start %0[%i0], %1[%i0], %c128, %2[%c0] : memref<256xf32>, memref<32xf32, 1>, memref<1xf32>
dma_wait %2[%c0], %c128 : memref<1xf32>
%3 = load %1[%i0] : memref<32xf32, 1>
@ -282,7 +282,7 @@ Output
%1 = alloc() : memref<2x32xf32, 1>
%2 = alloc() : memref<2x1xf32>
dma_start %0[%c0], %1[%c0, %c0], %c128, %2[%c0, %c0] : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
for %i0 = 1 to 8 {
affine.for %i0 = 1 to 8 {
%3 = affine.apply #map2(%i0)
%4 = affine.apply #map2(%i0)
dma_start %0[%i0], %1[%3, %i0], %c128, %2[%4, %c0] : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>

View File

@ -150,8 +150,8 @@ func bar(%A : memref<8x?xf32, #lmap>) {
// dynamically using dim instruction.
%N = dim %A, 1 : memref<8x?xf32, #lmap>
for %i = 0 to 8 {
for %j = 0 to %N {
affine.for %i = 0 to 8 {
affine.for %j = 0 to %N {
// A[i,j] += 1
%s1 = load %A [%i, %j] : memref<8x?xf32, #lmap>
%s2 = add %s1, 1
@ -548,7 +548,7 @@ nested in an outer function that using affine loops.
func @search(memref<?x?xi32 %A, <?xi32> %S, i32 %key) {
%ni = dim %A, 0 : memref<?x?xi32>
// This loop can be parallelized
for %i = 0 to %ni {
affine.for %i = 0 to %ni {
call @search_body (%A, %S, %i) : (memref<?x?xi32>, memref<?xi32>, i32)
}
return
@ -582,9 +582,9 @@ func @search_body(%A: memref<?x?xi32>, %S: memref<?xi32>, %key: i32) {
As per the [MLIR spec](LangRef.md), the restrictions on dimensions and symbol
identifiers to be used with the affine.apply instruction only apply to accesses
inside `for` and `affine.if` instructions. However, an analysis of accesses
inside the called function (`@search_body`) is necessary to determine if the
`%i` loop could be parallelized: such function access analysis is calling
inside `affine.for` and `affine.if` instructions. However, an analysis of
accesses inside the called function (`@search_body`) is necessary to determine
if the `%i` loop could be parallelized: such function access analysis is calling
context sensitive.
### Non-affine loop bounds {#non-affine-loop-bounds}
@ -604,8 +604,8 @@ for (i=0; i <N; i++)
```mlir {.mlir}
func @outer_nest(%n) : (i32) {
for %i = 0 to %n {
for %j = 0 to %n {
affine.for %i = 0 to %n {
affine.for %j = 0 to %n {
call @inner_nest(%i, %j, %n)
}
}
@ -620,8 +620,8 @@ func @inner_nest(%i: i32, %j: i32, %n: i32) {
}
func @inner_nest2(%m, %n) -> i32 {
for %k = 0 to %m {
for %l = 0 to %n {
affine.for %k = 0 to %m {
affine.for %l = 0 to %n {
...
}
}
@ -663,13 +663,13 @@ in a dilated convolution.
func @conv2d(memref<16x1024x1024x3xf32, #lm0, vmem> %input,
memref<5x5x3x32xf32, #lm0, vmem> %kernel,
memref<16x512x512x32xf32, #lm0, vmem> %output) {
for %b = 0 to %batch {
for %oh = 0 to %output_height {
for %ow = 0 to %output_width {
for %of = 0 to %output_feature {
for %kh = 0 to %kernel_height {
for %kw = 0 to %kernel_width {
for %if = 0 to %input_feature {
affine.for %b = 0 to %batch {
affine.for %oh = 0 to %output_height {
affine.for %ow = 0 to %output_width {
affine.for %of = 0 to %output_feature {
affine.for %kh = 0 to %kernel_height {
affine.for %kw = 0 to %kernel_width {
affine.for %if = 0 to %input_feature {
// Calculate input indices.
%1_0 = affine.apply #map1_0 (%0#1, %0#2, %0#4, %0#5)
[%h_stride, %w_stride, %h_kernel_dilation, %w_kernel_dilation,
@ -913,10 +913,10 @@ func @dma_hbm_to_vmem(memref<1024 x f32, #layout_map0, hbm> %a,
representation. 2(b) requires no change, but impacts how cost models look at
index and layout maps.
### `affine.if` and `for` Extensions for "Escaping Scalars" {#extensions-for-"escaping-scalars"}
### `affine.if` and `affine.for` Extensions for "Escaping Scalars" {#extensions-for-"escaping-scalars"}
We considered providing a representation for SSA values that are live out of
`if/else` conditional bodies and loop carried in `for` loops. We
`if/else` conditional bodies and loop carried in `affine.for` loops. We
ultimately abandoned this approach due to its complexity. In the current design
of MLIR, scalar variables cannot escape for loops or if instructions. In
situations, where escaping is necessary, we use zero-dimensional tensors and
@ -948,7 +948,7 @@ Example:
// Return sum of elements in 1-dimensional mref A
func int32 @sum(%A : memref<?xi32>, %N : i32) -> (i32) {
%init = 0
%result = for %i = 0 to N with %tmp(%init) {
%result = affine.for %i = 0 to N with %tmp(%init) {
%value = load %A[%i]
%sum = %value + %tmp
yield %sum
@ -978,7 +978,7 @@ Example:
// Compute sum of half of the array
func int32 @sum_half(%A, %N) {
%s0 = 0
%s1 = for %i = 1 ... N step 1 with %s2 (%s0) {
%s1 = affine.for %i = 1 ... N step 1 with %s2 (%s0) {
%s3 = if (%i >= %N / 2) {
%v0 = load %A[%i]
%s4 = %s2 + %v0

View File

@ -184,8 +184,8 @@ Our simple example above would be represented as:
```mlir
mlfunc @simple_example(... %N) {
for %i = 0 ... %N step 1 {
for %j = 0 ... %N step 1 {
affine.for %i = 0 ... %N step 1 {
affine.for %j = 0 ... %N step 1 {
// identity noop in this case, but can exist in general.
%0,%1 = affine.apply #57(%i, %j)
@ -203,8 +203,8 @@ The example with the reduced domain would be represented with an if instruction:
```mlir
mlfunc @reduced_domain_example(... %N) {
for %i = 0 ... %N step 1 {
for %j = 0 ... %N step 1 {
affine.for %i = 0 ... %N step 1 {
affine.for %j = 0 ... %N step 1 {
// identity noop in this case, but can exist in general.
%0,%1 = affinecall #57(%i, %j)
@ -233,8 +233,8 @@ that transformations call into):
```mlir
mlfunc @skewed_domain_example(... %N) {
for %t1 = 0 ... 2*N-2 step 1 {
for %t2 = max(0, t1-N+1) ... min(N, t1) step 1 {
affine.for %t1 = 0 ... 2*N-2 step 1 {
affine.for %t2 = max(0, t1-N+1) ... min(N, t1) step 1 {
(%i, %j) = (%t1-%t2, %t2)
...
}
@ -373,7 +373,7 @@ mlfunc's (if we support them) will also have to have domains.
### Lack of redundancy in IR
The traditional form has multiple encodings for the same sorts of behavior: you
end up having bits on `for` loops to specify whether codegen should use
end up having bits on `affine.for` loops to specify whether codegen should use
"atomic/separate" policies, unroll loops, etc. Instructions can be split or can
generate multiple copies of their instruction because of overlapping domains,
etc.

View File

@ -88,15 +88,15 @@ public:
MLIRContext *context);
};
/// The "for" instruction represents an affine loop nest, defining an SSA value
/// for its induction variable. The induction variable is represented as a
/// The "affine.for" instruction represents an affine loop nest, defining an SSA
/// value for its induction variable. The induction variable is represented as a
/// BlockArgument to the entry block of the body. The body and induction
/// variable can be created automatically for new "for" ops with 'createBody'.
/// This SSA value always has type index, which is the size of the machine word.
/// The stride, represented by step, is a positive constant integer which
/// defaults to "1" if not present. The lower and upper bounds specify a
/// half-open range: the range includes the lower bound but does not include the
/// upper bound.
/// variable can be created automatically for new "affine.for" ops with
/// 'createBody'. This SSA value always has type index, which is the size of the
/// machine word. The stride, represented by step, is a positive constant
/// integer which defaults to "1" if not present. The lower and upper bounds
/// specify a half-open range: the range includes the lower bound but does not
/// include the upper bound.
///
/// The lower and upper bounds of a for operation are represented as an
/// application of an affine mapping to a list of SSA values passed to the map.
@ -108,7 +108,7 @@ public:
///
/// Example:
///
/// for %i = 1 to 10 {
/// affine.for %i = 1 to 10 {
/// ...
/// }
///
@ -131,7 +131,7 @@ public:
static void getCanonicalizationPatterns(OwningRewritePatternList &results,
MLIRContext *context);
static StringRef getOperationName() { return "for"; }
static StringRef getOperationName() { return "affine.for"; }
static StringRef getStepAttrName() { return "step"; }
static StringRef getLowerBoundAttrName() { return "lower_bound"; }
static StringRef getUpperBoundAttrName() { return "upper_bound"; }
@ -268,10 +268,10 @@ public:
operand_range getOperands() { return {operand_begin(), operand_end()}; }
private:
// 'for' instruction that contains this bound.
// 'affine.for' instruction that contains this bound.
AffineForOp inst;
// Start and end positions of this affine bound operands in the list of
// the containing 'for' instruction operands.
// the containing 'affine.for' instruction operands.
unsigned opStart, opEnd;
// Affine map for this bound.
AffineMap map;

View File

@ -376,14 +376,15 @@ public:
AffineExpr toAffineExpr(unsigned idx, MLIRContext *context);
/// Adds constraints (lower and upper bounds) for the specified 'for'
/// Adds constraints (lower and upper bounds) for the specified 'affine.for'
/// instruction's Value using IR information stored in its bound maps. The
/// right identifier is first looked up using forOp's Value. Asserts if the
/// Value corresponding to the 'for' instruction isn't found in the constraint
/// system. Returns failure for the yet unimplemented/unsupported cases. Any
/// new identifiers that are found in the bound operands of the 'for'
/// instruction are added as trailing identifiers (either dimensional or
/// symbolic depending on whether the operand is a valid ML Function symbol).
/// Value corresponding to the 'affine.for' instruction isn't found in the
/// constraint system. Returns failure for the yet unimplemented/unsupported
/// cases. Any new identifiers that are found in the bound operands of the
/// 'affine.for' instruction are added as trailing identifiers (either
/// dimensional or symbolic depending on whether the operand is a valid ML
/// Function symbol).
// TODO(bondhugula): add support for non-unit strides.
LogicalResult addAffineForOpDomain(AffineForOp forOp);

View File

@ -45,7 +45,7 @@ class Instruction;
class Value;
/// Populates 'loops' with IVs of the loops surrounding 'inst' ordered from
/// the outermost 'for' instruction to the innermost one.
/// the outermost 'affine.for' instruction to the innermost one.
// TODO(bondhugula): handle 'affine.if' inst's.
void getLoopIVs(Instruction &inst, SmallVectorImpl<AffineForOp> *loops);
@ -113,8 +113,8 @@ AffineForOp insertBackwardComputationSlice(Instruction *srcOpInst,
/// surrounding such op's.
// For example, the memref region for a load operation at loop depth = 1:
//
// for %i = 0 to 32 {
// for %ii = %i to (d0) -> (d0 + 8) (%i) {
// affine.for %i = 0 to 32 {
// affine.for %ii = %i to (d0) -> (d0 + 8) (%i) {
// load %A[%ii]
// }
// }
@ -146,8 +146,8 @@ struct MemRefRegion {
/// For example, the memref region for this operation at loopDepth = 1 will
/// be:
///
/// for %i = 0 to 32 {
/// for %ii = %i to (d0) -> (d0 + 8) (%i) {
/// affine.for %i = 0 to 32 {
/// affine.for %ii = %i to (d0) -> (d0 + 8) (%i) {
/// load %A[%ii]
/// }
/// }

View File

@ -76,9 +76,9 @@ shapeRatio(VectorType superVectorType, VectorType subVectorType);
/// The following MLIR snippet:
///
/// ```mlir
/// for %i3 = 0 to %0 {
/// for %i4 = 0 to %1 {
/// for %i5 = 0 to %2 {
/// affine.for %i3 = 0 to %0 {
/// affine.for %i4 = 0 to %1 {
/// affine.for %i5 = 0 to %2 {
/// %a5 = load %arg0[%i4, %i5, %i3] : memref<?x?x?xf32>
/// }}}
/// ```
@ -86,9 +86,9 @@ shapeRatio(VectorType superVectorType, VectorType subVectorType);
/// may vectorize with {permutation_map: (d0, d1, d2) -> (d2, d1)} into:
///
/// ```mlir
/// for %i3 = 0 to %0 step 32 {
/// for %i4 = 0 to %1 {
/// for %i5 = 0 to %2 step 256 {
/// affine.for %i3 = 0 to %0 step 32 {
/// affine.for %i4 = 0 to %1 {
/// affine.for %i5 = 0 to %2 step 256 {
/// %4 = vector_transfer_read %arg0, %i4, %i5, %i3
/// {permutation_map: (d0, d1, d2) -> (d2, d1)} :
/// (memref<?x?x?xf32>, index, index) -> vector<32x256xf32>
@ -103,7 +103,7 @@ shapeRatio(VectorType superVectorType, VectorType subVectorType);
///
/// ```mlir
/// %cst0 = constant 0 : index
/// for %i0 = 0 to %0 {
/// affine.for %i0 = 0 to %0 {
/// %a0 = load %arg0[%cst0, %cst0] : memref<?x?xf32>
/// }
/// ```
@ -111,7 +111,7 @@ shapeRatio(VectorType superVectorType, VectorType subVectorType);
/// may vectorize with {permutation_map: (d0) -> (0)} into:
///
/// ```mlir
/// for %i0 = 0 to %0 step 128 {
/// affine.for %i0 = 0 to %0 step 128 {
/// %3 = vector_transfer_read %arg0, %c0_0, %c0_0
/// {permutation_map: (d0, d1) -> (0)} :
/// (memref<?x?xf32>, index, index) -> vector<128xf32>

View File

@ -341,7 +341,8 @@ protected:
/// (e.g. vectorValue = load(vectorView, zero)).
///
/// Only ExprKind::StmtBlockLikeExpr have `enclosedStmts`, these comprise:
/// 1. `For`-loops for which the `lhs` binds to the induction variable, `rhs`
/// 1. `affine.for`-loops for which the `lhs` binds to the induction variable,
/// `rhs`
/// binds to an Expr of kind `ExprKind::For` with lower-bound, upper-bound and
/// step respectively.
// TODO(zinenko): this StmtBlockLikeExpr should be retired in favor of Expr
@ -647,7 +648,7 @@ Stmt For(llvm::ArrayRef<Expr> indices, llvm::ArrayRef<Expr> lbs,
llvm::ArrayRef<Expr> ubs, llvm::ArrayRef<Expr> steps,
llvm::ArrayRef<Stmt> enclosedStmts);
/// Define a 'for' loop from with multi-valued bounds.
/// Define a 'affine.for' loop from with multi-valued bounds.
///
/// for max(lbs...) to min(ubs...) {}
///

View File

@ -73,9 +73,10 @@ void getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
SmallVectorImpl<Value *> *operands,
FuncBuilder *builder);
/// Skew the instructions in the body of a 'for' instruction with the specified
/// instruction-wise shifts. The shifts are with respect to the original
/// execution order, and are multiplied by the loop 'step' before being applied.
/// Skew the instructions in the body of a 'affine.for' instruction with the
/// specified instruction-wise shifts. The shifts are with respect to the
/// original execution order, and are multiplied by the loop 'step' before being
/// applied.
LLVM_NODISCARD
LogicalResult instBodySkew(AffineForOp forOp, ArrayRef<uint64_t> shifts,
bool unrollPrologueEpilogue = false);

View File

@ -95,14 +95,14 @@ Instruction *createComposedAffineApplyOp(FuncBuilder *builder, Location loc,
///
/// Before
///
/// for %i = 0 to #map(%N)
/// affine.for %i = 0 to #map(%N)
/// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
/// send %A[%idx], ...
/// %v = "compute"(%idx, ...)
///
/// After
///
/// for %i = 0 to #map(%N)
/// affine.for %i = 0 to #map(%N)
/// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
/// send %A[%idx], ...
/// %idx_ = affine.apply (d0) -> (d0 mod 2) (%i)

View File

@ -36,7 +36,7 @@ using llvm::dbgs;
//===----------------------------------------------------------------------===//
AffineOpsDialect::AffineOpsDialect(MLIRContext *context)
: Dialect(/*namePrefix=*/"", context) {
: Dialect(/*namePrefix=*/"affine", context) {
addOperations<AffineApplyOp, AffineForOp, AffineIfOp>();
}
@ -69,7 +69,7 @@ bool mlir::isValidDim(Value *value) {
return isTopLevelSymbol(dimOp->getOperand());
return false;
}
// This value is a block argument (which also includes 'for' loop IVs).
// This value is a block argument (which also includes 'affine.for' loop IVs).
return true;
}
@ -969,7 +969,7 @@ static void printBound(AffineBound bound, const char *prefix, OpAsmPrinter *p) {
}
void AffineForOp::print(OpAsmPrinter *p) {
*p << "for ";
*p << "affine.for ";
p->printOperand(getBody()->getArgument(0));
*p << " = ";
printBound(getLowerBound(), "max", p);

View File

@ -708,8 +708,8 @@ void MemRefAccess::getAccessMap(AffineValueMap *accessMap) const {
// For example, given the following MLIR code with with "source" and
// "destination" accesses to the same memref labled, and symbols %M, %N, %K:
//
// for %i0 = 0 to 100 {
// for %i1 = 0 to 50 {
// affine.for %i0 = 0 to 100 {
// affine.for %i1 = 0 to 50 {
// %a0 = affine.apply
// (d0, d1) -> (d0 * 2 - d1 * 4 + s1, d1 * 3 - s0) (%i0, %i1)[%M, %N]
// // Source memref access.
@ -717,8 +717,8 @@ void MemRefAccess::getAccessMap(AffineValueMap *accessMap) const {
// }
// }
//
// for %i2 = 0 to 100 {
// for %i3 = 0 to 50 {
// affine.for %i2 = 0 to 100 {
// affine.for %i3 = 0 to 50 {
// %a1 = affine.apply
// (d0, d1) -> (d0 * 7 + d1 * 9 - s1, d1 * 11 + s0) (%i2, %i3)[%K, %M]
// // Destination memref access.

View File

@ -15,7 +15,7 @@
// limitations under the License.
// =============================================================================
//
// This file implements a pass to detect parallel affine 'for' ops.
// This file implements a pass to detect parallel affine 'affine.for' ops.
//
//===----------------------------------------------------------------------===//
@ -40,7 +40,8 @@ FunctionPassBase *mlir::createParallelismDetectionTestPass() {
return new TestParallelismDetection();
}
// Walks the function and emits a note for all 'for' ops detected as parallel.
// Walks the function and emits a note for all 'affine.for' ops detected as
// parallel.
void TestParallelismDetection::runOnFunction() {
Function *f = getFunction();
FuncBuilder b(f);

View File

@ -38,11 +38,11 @@ using namespace mlir;
using llvm::SmallDenseMap;
/// Populates 'loops' with IVs of the loops surrounding 'inst' ordered from
/// the outermost 'for' instruction to the innermost one.
/// the outermost 'affine.for' instruction to the innermost one.
void mlir::getLoopIVs(Instruction &inst, SmallVectorImpl<AffineForOp> *loops) {
auto *currInst = inst.getParentInst();
AffineForOp currAffineForOp;
// Traverse up the hierarchy collecing all 'for' instruction while
// Traverse up the hierarchy collecing all 'affine.for' instruction while
// skipping over 'affine.if' instructions.
while (currInst && ((currAffineForOp = currInst->dyn_cast<AffineForOp>()) ||
currInst->isa<AffineIfOp>())) {
@ -162,8 +162,8 @@ LogicalResult MemRefRegion::unionBoundingBox(const MemRefRegion &other) {
// For example, the memref region for this load operation at loopDepth = 1 will
// be as below:
//
// for %i = 0 to 32 {
// for %ii = %i to (d0) -> (d0 + 8) (%i) {
// affine.for %i = 0 to 32 {
// affine.for %ii = %i to (d0) -> (d0 + 8) (%i) {
// load %A[%ii]
// }
// }
@ -683,7 +683,7 @@ static Optional<int64_t> getMemoryFootprintBytes(Block &block,
int memorySpace) {
SmallDenseMap<Value *, std::unique_ptr<MemRefRegion>, 4> regions;
// Walk this 'for' instruction to gather all memory regions.
// Walk this 'affine.for' instruction to gather all memory regions.
bool error = false;
block.walk(start, end, [&](Instruction *opInst) {
if (!opInst->isa<LoadOp>() && !opInst->isa<StoreOp>()) {

View File

@ -146,7 +146,8 @@ Value *mlir::edsc::MLIREmitter::emitExpr(Expr e) {
if (auto expr = e.dyn_cast<StmtBlockLikeExpr>()) {
if (expr.getKind() == ExprKind::For) {
auto exprGroups = expr.getAllArgumentGroups();
assert(exprGroups.size() == 3 && "expected 3 expr groups in `for`");
assert(exprGroups.size() == 3 &&
"expected 3 expr groups in `affine.for`");
assert(!exprGroups[0].empty() && "expected at least one lower bound");
assert(!exprGroups[1].empty() && "expected at least one upper bound");
assert(exprGroups[2].size() == 1 &&

View File

@ -526,8 +526,8 @@ Stmt mlir::edsc::For(llvm::ArrayRef<Expr> idxs, llvm::ArrayRef<MaxExpr> lbs,
Stmt mlir::edsc::MaxMinFor(const Bindable &idx, ArrayRef<Expr> lbs,
ArrayRef<Expr> ubs, Expr step,
ArrayRef<Stmt> enclosedStmts) {
assert(!lbs.empty() && "'for' loop must have lower bounds");
assert(!ubs.empty() && "'for' loop must have upper bounds");
assert(!lbs.empty() && "'affine.for' loop must have lower bounds");
assert(!ubs.empty() && "'affine.for' loop must have upper bounds");
// Use a null expression as a sentinel between lower and upper bound
// expressions in the list of children.
@ -964,7 +964,7 @@ void mlir::edsc::Expr::print(raw_ostream &os) const {
} else if (auto stmtLikeExpr = this->dyn_cast<StmtBlockLikeExpr>()) {
switch (stmtLikeExpr.getKind()) {
// We only print the lb, ub and step here, which are the StmtBlockLike
// part of the `for` StmtBlockLikeExpr.
// part of the `affine.for` StmtBlockLikeExpr.
case ExprKind::For: {
auto exprGroups = stmtLikeExpr.getAllArgumentGroups();
assert(exprGroups.size() == 3 &&

View File

@ -343,7 +343,7 @@ bool DmaGeneration::generateDma(const MemRefRegion &region, Block *block,
auto fastMemRefType = top.getMemRefType(
fastBufferShape, memRefType.getElementType(), {}, fastMemorySpace);
// Create the fast memory space buffer just before the 'for'
// Create the fast memory space buffer just before the 'affine.for'
// instruction.
fastMemRef = prologue.create<AllocOp>(loc, fastMemRefType)->getResult();
// Record it.
@ -472,7 +472,7 @@ bool DmaGeneration::runOnBlock(Block *block) {
// approach is conservative in some cases at the moment, we do a check later
// and report an error with location info.
// TODO(bondhugula): An 'affine.if' instruction is being treated similar to an
// operation instruction. 'affine.if''s could have 'for's in them;
// operation instruction. 'affine.if''s could have 'affine.for's in them;
// treat them separately.
// Get to the first load, store, or for op.
@ -494,7 +494,7 @@ bool DmaGeneration::runOnBlock(Block *block) {
fastMemCapacityBytes);
};
// If the memory footprint of the 'for' loop is higher than fast
// If the memory footprint of the 'affine.for' loop is higher than fast
// memory capacity (when provided), we recurse to DMA at an inner level
// until we find a depth at which footprint fits in fast mem capacity. If
// the footprint can't be calculated, we assume for now it fits. Recurse
@ -507,7 +507,7 @@ bool DmaGeneration::runOnBlock(Block *block) {
runOnBlock(/*begin=*/curBegin, /*end=*/it);
// Recurse onto the body of this loop.
runOnBlock(forOp->getBody());
// The next region starts right after the 'for' instruction.
// The next region starts right after the 'affine.for' instruction.
curBegin = std::next(it);
} else {
// We have enough capacity, i.e., DMAs will be computed for the portion
@ -698,7 +698,8 @@ uint64_t DmaGeneration::runOnBlock(Block::iterator begin, Block::iterator end) {
[&](const SmallMapVector<Value *, std::unique_ptr<MemRefRegion>, 4>
&regions) {
for (const auto &regionEntry : regions) {
// For each region, hoist DMA transfer past all invariant 'for's.
// For each region, hoist DMA transfer past all invariant
// 'affine.for's.
Block::iterator dmaPlacementReadStart, dmaPlacementWriteStart;
Block *dmaPlacementBlock;
findHighestBlockForPlacement(

View File

@ -641,7 +641,8 @@ bool MemRefDependenceGraph::init(Function *f) {
// all loads and store accesses it contains.
LoopNestStateCollector collector;
collector.collect(&inst);
// Return false if a non 'for' region was found (not currently supported).
// Return false if a non 'affine.for' region was found (not currently
// supported).
if (collector.hasNonForRegion)
return false;
Node node(nextNodeId++, &inst);

View File

@ -258,7 +258,8 @@ LogicalResult mlir::tileCodeGen(MutableArrayRef<AffineForOp> band,
// Returns all maximal outermost perfect loop nests to tile.
static void getTileableBands(Function *f,
std::vector<SmallVector<AffineForOp, 6>> *bands) {
// Get maximal perfect nest of 'for' insts starting from root (inclusive).
// Get maximal perfect nest of 'affine.for' insts starting from root
// (inclusive).
auto getMaximalPerfectLoopNest = [&](AffineForOp root) {
SmallVector<AffineForOp, 6> band;
AffineForOp currInst = root;

View File

@ -158,8 +158,8 @@ void LoopUnroll::runOnFunction() {
}
}
/// Unrolls a 'for' inst. Returns success if the loop was unrolled, failure
/// otherwise. The default unroll factor is 4.
/// Unrolls a 'affine.for' inst. Returns success if the loop was unrolled,
/// failure otherwise. The default unroll factor is 4.
LogicalResult LoopUnroll::runOnAffineForOp(AffineForOp forOp) {
// Use the function callback if one was provided.
if (getUnrollFactor) {

View File

@ -96,7 +96,7 @@ void LoopUnrollAndJam::runOnFunction() {
runOnAffineForOp(forOp);
}
/// Unroll and jam a 'for' inst. Default unroll jam factor is
/// Unroll and jam a 'affine.for' inst. Default unroll jam factor is
/// kDefaultUnrollJamFactor. Return failure if nothing was done.
LogicalResult LoopUnrollAndJam::runOnAffineForOp(AffineForOp forOp) {
// Unroll and jam by the factor that was passed if any.

View File

@ -276,7 +276,7 @@ static Value *buildMinMaxReductionSeq(Location loc, CmpIPredicate predicate,
return value;
}
// Convert a "for" loop to a flow of blocks. Return `false` on success.
// Convert a "affine.for" loop to a flow of blocks. Return `false` on success.
//
// Create an SESE region for the loop (including its body) and append it to the
// end of the current region. The loop region consists of the initialization
@ -323,8 +323,9 @@ bool LowerAffinePass::lowerAffineFor(AffineForOp forOp) {
auto loc = forOp->getLoc();
auto *forInst = forOp->getInstruction();
// Start by splitting the block containing the 'for' into two parts. The part
// before will get the init code, the part after will be the end point.
// Start by splitting the block containing the 'affine.for' into two parts.
// The part before will get the init code, the part after will be the end
// point.
auto *initBlock = forInst->getBlock();
auto *endBlock = initBlock->splitBlock(forInst);

View File

@ -56,9 +56,9 @@
/// // Read the slice `%A[%i0, %i1:%i1+256, %i2:%i2+32]` into
/// // vector<32x256xf32> and pad with %f0 to handle the boundary case:
/// %f0 = constant 0.0f : f32
/// for %i0 = 0 to %0 {
/// for %i1 = 0 to %1 step 256 {
/// for %i2 = 0 to %2 step 32 {
/// affine.for %i0 = 0 to %0 {
/// affine.for %i1 = 0 to %1 step 256 {
/// affine.for %i2 = 0 to %2 step 32 {
/// %v = vector_transfer_read %A, %i0, %i1, %i2, %f0
/// {permutation_map: (d0, d1, d2) -> (d2, d1)} :
/// (memref<?x?x?xf32>, index, index, f32) -> vector<32x256xf32>
@ -70,8 +70,8 @@
/// abstraction):
///
/// ```mlir {.mlir}
/// for %d2 = 0 to 256 {
/// for %d1 = 0 to 32 {
/// affine.for %d2 = 0 to 256 {
/// affine.for %d1 = 0 to 32 {
/// %s = %A[%i0, %i1 + %d1, %i2 + %d2] : f32
/// %tmp[%d2, %d1] = %s
/// }

View File

@ -100,10 +100,10 @@
/// mlfunc @materialize(%M : index, %N : index, %O : index, %P : index) {
/// %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
/// %f1 = constant splat<vector<4x4x4xf32>, 1.000000e+00> :
/// vector<4x4x4xf32> for %i0 = 0 to %M step 4 {
/// for %i1 = 0 to %N step 4 {
/// for %i2 = 0 to %O {
/// for %i3 = 0 to %P step 4 {
/// vector<4x4x4xf32> affine.for %i0 = 0 to %M step 4 {
/// affine.for %i1 = 0 to %N step 4 {
/// affine.for %i2 = 0 to %O {
/// affine.for %i3 = 0 to %P step 4 {
/// vector_transfer_write %f1, %A, %i0, %i1, %i2, %i3
/// {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d0)} :
/// vector<4x4x4xf32>, memref<?x?x?x?xf32, 0>,
@ -119,10 +119,10 @@
/// mlfunc @materialize(%M : index, %N : index, %O : index, %P : index) {
/// %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
/// %f1 = constant splat<vector<4x4xf32>, 1.000000e+00> : vector<4x4x4xf32>
/// for %i0 = 0 to %arg0 step 4 {
/// for %i1 = 0 to %arg1 step 4 {
/// for %i2 = 0 to %arg2 {
/// for %i3 = 0 to %arg3 step 4 {
/// affine.for %i0 = 0 to %arg0 step 4 {
/// affine.for %i1 = 0 to %arg1 step 4 {
/// affine.for %i2 = 0 to %arg2 {
/// affine.for %i3 = 0 to %arg3 step 4 {
/// %1 = affine.apply (d0, d1, d2, d3) -> (d0, d1, d2, d3)
/// (%i0, %i1, %i2, %i3)
/// vector_transfer_write f1, %0, %1#0, %1#1, %1#2, %1#3
@ -286,10 +286,10 @@ static Value *substitute(Value *v, VectorType hwVectorType,
/// super-vectorization has been applied:
///
/// ```mlir
/// for %i0 = 0 to %M {
/// for %i1 = 0 to %N step 3 {
/// for %i2 = 0 to %O {
/// for %i3 = 0 to %P step 32 {
/// affine.for %i0 = 0 to %M {
/// affine.for %i1 = 0 to %N step 3 {
/// affine.for %i2 = 0 to %O {
/// affine.for %i3 = 0 to %P step 32 {
/// %r = vector_transfer_read(%A, map(%i..)#0, map(%i..)#1, map(%i..)#2)
/// -> vector<3x32xf32>
/// ...

View File

@ -19,7 +19,7 @@
// potentially getting rid of intermediate memref's entirely.
// TODO(mlir-team): In the future, similar techniques could be used to eliminate
// dead memref store's and perform more complex forwarding when support for
// SSA scalars live out of 'for'/'affine.if' statements is available.
// SSA scalars live out of 'affine.for'/'affine.if' statements is available.
//===----------------------------------------------------------------------===//
#include "mlir/Analysis/AffineAnalysis.h"
@ -55,7 +55,7 @@ namespace {
//
// (* A dependence being satisfied at a block: a dependence that is satisfied by
// virtue of the destination instruction appearing textually / lexically after
// the source instruction within the body of a 'for' instruction; thus, a
// the source instruction within the body of a 'affine.for' instruction; thus, a
// dependence is always either satisfied by a loop or by a block).
//
// The above conditions are simple to check, sufficient, and powerful for most
@ -139,8 +139,8 @@ void MemRefDataFlowOpt::forwardStoreToLoad(LoadOp loadOp) {
// Check if this store is a candidate for forwarding; we only forward if
// the dependence from the store is carried by the *body* of innermost
// common surrounding loop. As an example this filters out cases like:
// for %i0
// for %i1
// affine.for %i0
// affine.for %i1
// %idx = affine.apply (d0) -> (d0 + 1) (%i0)
// store %A[%idx]
// load %A[%i0]

View File

@ -66,11 +66,11 @@ static unsigned getTagMemRefPos(Instruction &dmaInst) {
return 0;
}
/// Doubles the buffer of the supplied memref on the specified 'for' instruction
/// by adding a leading dimension of size two to the memref. Replaces all uses
/// of the old memref by the new one while indexing the newly added dimension by
/// the loop IV of the specified 'for' instruction modulo 2. Returns false if
/// such a replacement cannot be performed.
/// Doubles the buffer of the supplied memref on the specified 'affine.for'
/// instruction by adding a leading dimension of size two to the memref.
/// Replaces all uses of the old memref by the new one while indexing the newly
/// added dimension by the loop IV of the specified 'affine.for' instruction
/// modulo 2. Returns false if such a replacement cannot be performed.
static bool doubleBuffer(Value *oldMemRef, AffineForOp forOp) {
auto *forBody = forOp->getBody();
FuncBuilder bInner(forBody, forBody->begin());
@ -104,7 +104,7 @@ static bool doubleBuffer(Value *oldMemRef, AffineForOp forOp) {
dynamicDimCount++));
}
// Create and place the alloc right before the 'for' instruction.
// Create and place the alloc right before the 'affine.for' instruction.
Value *newMemRef =
bOuter.create<AllocOp>(forInst->getLoc(), newMemRefType, allocOperands);
@ -139,9 +139,9 @@ static bool doubleBuffer(Value *oldMemRef, AffineForOp forOp) {
/// Returns success if the IR is in a valid state.
void PipelineDataTransfer::runOnFunction() {
// Do a post order walk so that inner loop DMAs are processed first. This is
// necessary since 'for' instructions nested within would otherwise become
// invalid (erased) when the outer loop is pipelined (the pipelined one gets
// deleted and replaced by a prologue, a new steady-state loop and an
// necessary since 'affine.for' instructions nested within would otherwise
// become invalid (erased) when the outer loop is pipelined (the pipelined one
// gets deleted and replaced by a prologue, a new steady-state loop and an
// epilogue).
forOps.clear();
getFunction()->walkPostOrder<AffineForOp>(

View File

@ -71,7 +71,7 @@ void mlir::getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
auto lb = b->create<AffineApplyOp>(forOp->getLoc(), lbMap, lbOperands);
// For each upper bound expr, get the range.
// Eg: for %i = lb to min (ub1, ub2),
// Eg: affine.for %i = lb to min (ub1, ub2),
// where tripCountExprs yield (tr1, tr2), we create affine.apply's:
// lb + tr1 - tr1 % ufactor, lb + tr2 - tr2 % ufactor; the results of all
// these affine.apply's make up the cleanup loop lower bound.
@ -161,8 +161,8 @@ void mlir::promoteSingleIterationLoops(Function *f) {
[](AffineForOp forOp) { promoteIfSingleIteration(forOp); });
}
/// Generates a 'for' inst with the specified lower and upper bounds while
/// generating the right IV remappings for the shifted instructions. The
/// Generates a 'affine.for' inst with the specified lower and upper bounds
/// while generating the right IV remappings for the shifted instructions. The
/// instruction blocks that go into the loop are specified in instGroupQueue
/// starting from the specified offset, and in that order; the first element of
/// the pair specifies the shift applied to that group of instructions; note
@ -216,10 +216,10 @@ generateLoop(AffineMap lbMap, AffineMap ubMap,
return loopChunk;
}
/// Skew the instructions in the body of a 'for' instruction with the specified
/// instruction-wise shifts. The shifts are with respect to the original
/// execution order, and are multiplied by the loop 'step' before being applied.
/// A shift of zero for each instruction will lead to no change.
/// Skew the instructions in the body of a 'affine.for' instruction with the
/// specified instruction-wise shifts. The shifts are with respect to the
/// original execution order, and are multiplied by the loop 'step' before being
/// applied. A shift of zero for each instruction will lead to no change.
// The skewing of instructions with respect to one another can be used for
// example to allow overlap of asynchronous operations (such as DMA
// communication) with computation, or just relative shifting of instructions
@ -267,7 +267,7 @@ LogicalResult mlir::instBodySkew(AffineForOp forOp, ArrayRef<uint64_t> shifts,
// An array of instruction groups sorted by shift amount; each group has all
// instructions with the same shift in the order in which they appear in the
// body of the 'for' inst.
// body of the 'affine.for' inst.
std::vector<std::vector<Instruction *>> sortedInstGroups(maxShift + 1);
unsigned pos = 0;
for (auto &inst : *forOp->getBody()) {
@ -499,7 +499,7 @@ void mlir::sinkLoop(AffineForOp forOp, unsigned loopDepth) {
// bounds, the resulting IR resembles:
//
// ```mlir
// for %i = max (`iv, ...) to min (`iv` + `offset`) {
// affine.for %i = max (`iv, ...) to min (`iv` + `offset`) {
// ...
// }
// ```

View File

@ -199,14 +199,14 @@ bool mlir::replaceAllMemRefUsesWith(Value *oldMemRef, Value *newMemRef,
///
/// Before
///
/// for %i = 0 to #map(%N)
/// affine.for %i = 0 to #map(%N)
/// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
/// "send"(%idx, %A, ...)
/// "compute"(%idx)
///
/// After
///
/// for %i = 0 to #map(%N)
/// affine.for %i = 0 to #map(%N)
/// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
/// "send"(%idx, %A, ...)
/// %idx_ = affine.apply (d0) -> (d0 mod 2) (%i)

View File

@ -113,7 +113,7 @@ using namespace mlir;
///
/// At a high level, a vectorized load in a loop will resemble:
/// ```mlir
/// for %i = ? to ? step ? {
/// affine.for %i = ? to ? step ? {
/// %v_a = "vector_transfer_read" (A, %i) : (memref<?xf32>, index) ->
/// vector<128xf32>
/// }
@ -309,7 +309,7 @@ using namespace mlir;
/// ```mlir
/// mlfunc @fill(%A : memref<128xf32>) -> () {
/// %f1 = constant 1.0 : f32
/// for %i0 = 0 to 32 {
/// affine.for %i0 = 0 to 32 {
/// store %f1, %A[%i0] : memref<128xf32, 0>
/// }
/// return
@ -322,7 +322,7 @@ using namespace mlir;
/// is still subject to exploratory tradeoffs. In particular, say we want to
/// vectorize by a factor 128, we want to transform the following input:
/// ```mlir
/// for %i = %M to %N {
/// affine.for %i = %M to %N {
/// %a = load A[%i] : memref<?xf32>
/// }
/// ```
@ -331,8 +331,8 @@ using namespace mlir;
/// memory promotion etc) say after stripmining (and potentially unrolling in
/// the case of LLVM's SLP vectorizer):
/// ```mlir
/// for %i = floor(%M, 128) to ceil(%N, 128) {
/// for %ii = max(%M, 128 * %i) to min(%N, 128*%i + 127) {
/// affine.for %i = floor(%M, 128) to ceil(%N, 128) {
/// affine.for %ii = max(%M, 128 * %i) to min(%N, 128*%i + 127) {
/// %a = load A[%ii] : memref<?xf32>
/// }
/// }
@ -341,7 +341,7 @@ using namespace mlir;
/// Instead, we seek to vectorize early and freeze vector types before
/// scheduling, so we want to generate a pattern that resembles:
/// ```mlir
/// for %i = ? to ? step ? {
/// affine.for %i = ? to ? step ? {
/// %v_a = "vector_transfer_read" (A, %i) : (memref<?xf32>, index) ->
/// vector<128xf32>
/// }
@ -362,7 +362,7 @@ using namespace mlir;
/// For the simple strawman example above, vectorizing for a 1-D vector
/// abstraction of size 128 returns code similar to:
/// ```mlir
/// for %i = %M to %N step 128 {
/// affine.for %i = %M to %N step 128 {
/// %v_a = "vector_transfer_read" (A, %i) : (memref<?xf32>, index) ->
/// vector<128xf32>
/// }
@ -391,20 +391,20 @@ using namespace mlir;
/// %C = alloc (%M, %N) : memref<?x?xf32, 0>
/// %f1 = constant 1.0 : f32
/// %f2 = constant 2.0 : f32
/// for %i0 = 0 to %M {
/// for %i1 = 0 to %N {
/// affine.for %i0 = 0 to %M {
/// affine.for %i1 = 0 to %N {
/// // non-scoped %f1
/// store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
/// }
/// }
/// for %i2 = 0 to %M {
/// for %i3 = 0 to %N {
/// affine.for %i2 = 0 to %M {
/// affine.for %i3 = 0 to %N {
/// // non-scoped %f2
/// store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
/// }
/// }
/// for %i4 = 0 to %M {
/// for %i5 = 0 to %N {
/// affine.for %i4 = 0 to %M {
/// affine.for %i5 = 0 to %N {
/// %a5 = load %A[%i4, %i5] : memref<?x?xf32, 0>
/// %b5 = load %B[%i4, %i5] : memref<?x?xf32, 0>
/// %s5 = addf %a5, %b5 : f32
@ -438,24 +438,24 @@ using namespace mlir;
/// %2 = alloc(%arg0, %arg1) : memref<?x?xf32>
/// %cst = constant 1.0 : f32
/// %cst_0 = constant 2.0 : f32
/// for %i0 = 0 to %arg0 {
/// for %i1 = 0 to %arg1 step 256 {
/// affine.for %i0 = 0 to %arg0 {
/// affine.for %i1 = 0 to %arg1 step 256 {
/// %cst_1 = constant splat<vector<256xf32>, 1.0> :
/// vector<256xf32>
/// "vector_transfer_write"(%cst_1, %0, %i0, %i1) :
/// (vector<256xf32>, memref<?x?xf32>, index, index) -> ()
/// }
/// }
/// for %i2 = 0 to %arg0 {
/// for %i3 = 0 to %arg1 step 256 {
/// affine.for %i2 = 0 to %arg0 {
/// affine.for %i3 = 0 to %arg1 step 256 {
/// %cst_2 = constant splat<vector<256xf32>, 2.0> :
/// vector<256xf32>
/// "vector_transfer_write"(%cst_2, %1, %i2, %i3) :
/// (vector<256xf32>, memref<?x?xf32>, index, index) -> ()
/// }
/// }
/// for %i4 = 0 to %arg0 {
/// for %i5 = 0 to %arg1 step 256 {
/// affine.for %i4 = 0 to %arg0 {
/// affine.for %i5 = 0 to %arg1 step 256 {
/// %3 = "vector_transfer_read"(%0, %i4, %i5) :
/// (memref<?x?xf32>, index, index) -> vector<256xf32>
/// %4 = "vector_transfer_read"(%1, %i4, %i5) :
@ -494,24 +494,24 @@ using namespace mlir;
/// %2 = alloc(%arg0, %arg1) : memref<?x?xf32>
/// %cst = constant 1.0 : f32
/// %cst_0 = constant 2.0 : f32
/// for %i0 = 0 to %arg0 step 32 {
/// for %i1 = 0 to %arg1 step 256 {
/// affine.for %i0 = 0 to %arg0 step 32 {
/// affine.for %i1 = 0 to %arg1 step 256 {
/// %cst_1 = constant splat<vector<32x256xf32>, 1.0> :
/// vector<32x256xf32>
/// "vector_transfer_write"(%cst_1, %0, %i0, %i1) :
/// (vector<32x256xf32>, memref<?x?xf32>, index, index) -> ()
/// }
/// }
/// for %i2 = 0 to %arg0 step 32 {
/// for %i3 = 0 to %arg1 step 256 {
/// affine.for %i2 = 0 to %arg0 step 32 {
/// affine.for %i3 = 0 to %arg1 step 256 {
/// %cst_2 = constant splat<vector<32x256xf32>, 2.0> :
/// vector<32x256xf32>
/// "vector_transfer_write"(%cst_2, %1, %i2, %i3) :
/// (vector<32x256xf32>, memref<?x?xf32>, index, index) -> ()
/// }
/// }
/// for %i4 = 0 to %arg0 step 32 {
/// for %i5 = 0 to %arg1 step 256 {
/// affine.for %i4 = 0 to %arg0 step 32 {
/// affine.for %i5 = 0 to %arg1 step 256 {
/// %3 = "vector_transfer_read"(%0, %i4, %i5) :
/// (memref<?x?xf32>, index, index) -> vector<32x256xf32>
/// %4 = "vector_transfer_read"(%1, %i4, %i5) :

View File

@ -47,7 +47,7 @@
func @compose_affine_maps_1dto2d_no_symbols() {
%0 = alloc() : memref<4x4xf32>
for %i0 = 0 to 15 {
affine.for %i0 = 0 to 15 {
// Test load[%x, %x]
%x0 = affine.apply (d0) -> (d0 - 1) (%i0)
@ -93,7 +93,7 @@ func @compose_affine_maps_1dto2d_no_symbols() {
func @compose_affine_maps_1dto2d_with_symbols() {
%0 = alloc() : memref<4x4xf32>
for %i0 = 0 to 15 {
affine.for %i0 = 0 to 15 {
// Test load[%x0, %x0] with symbol %c4
%c4 = constant 4 : index
%x0 = affine.apply (d0)[s0] -> (d0 - s0) (%i0)[%c4]
@ -134,13 +134,13 @@ func @compose_affine_maps_2d_tile() {
%c4 = constant 4 : index
%c8 = constant 8 : index
for %i0 = 0 to 3 {
affine.for %i0 = 0 to 3 {
%x0 = affine.apply (d0)[s0] -> (d0 ceildiv s0) (%i0)[%c4]
for %i1 = 0 to 3 {
affine.for %i1 = 0 to 3 {
%x1 = affine.apply (d0)[s0] -> (d0 ceildiv s0) (%i1)[%c8]
for %i2 = 0 to 3 {
affine.for %i2 = 0 to 3 {
%x2 = affine.apply (d0)[s0] -> (d0 mod s0) (%i2)[%c4]
for %i3 = 0 to 3 {
affine.for %i3 = 0 to 3 {
%x3 = affine.apply (d0)[s0] -> (d0 mod s0) (%i3)[%c8]
%x40 = affine.apply (d0, d1, d2, d3)[s0, s1] ->
@ -166,9 +166,9 @@ func @compose_affine_maps_dependent_loads() {
%0 = alloc() : memref<16x32xf32>
%1 = alloc() : memref<16x32xf32>
for %i0 = 0 to 3 {
for %i1 = 0 to 3 {
for %i2 = 0 to 3 {
affine.for %i0 = 0 to 3 {
affine.for %i1 = 0 to 3 {
affine.for %i2 = 0 to 3 {
%c3 = constant 3 : index
%c7 = constant 7 : index
@ -212,7 +212,7 @@ func @compose_affine_maps_dependent_loads() {
func @compose_affine_maps_diamond_dependency() {
%0 = alloc() : memref<4x4xf32>
for %i0 = 0 to 15 {
affine.for %i0 = 0 to 15 {
%a = affine.apply (d0) -> (d0 - 1) (%i0)
%b = affine.apply (d0) -> (d0 + 7) (%a)
%c = affine.apply (d0) -> (d0 * 4) (%a)
@ -232,8 +232,8 @@ func @arg_used_as_dim_and_symbol(%arg0: memref<100x100xf32>, %arg1: index) {
%c9 = constant 9 : index
%1 = alloc() : memref<100x100xf32, 1>
%2 = alloc() : memref<1xi32>
for %i0 = 0 to 100 {
for %i1 = 0 to 100 {
affine.for %i0 = 0 to 100 {
affine.for %i1 = 0 to 100 {
%3 = affine.apply (d0, d1)[s0, s1] -> (d1 + s0 + s1)
(%i0, %i1)[%arg1, %c9]
%4 = affine.apply (d0, d1, d3) -> (d3 - (d0 + d1))
@ -253,7 +253,7 @@ func @trivial_maps() {
%0 = alloc() : memref<10xf32>
%c0 = constant 0 : index
%cst = constant 0.000000e+00 : f32
for %i1 = 0 to 10 {
affine.for %i1 = 0 to 10 {
%1 = affine.apply ()[s0] -> (s0)()[%c0]
store %cst, %0[%1] : memref<10xf32>
%2 = load %0[%c0] : memref<10xf32>
@ -380,7 +380,7 @@ func @mix_dims_and_symbols_g(%M: index, %N: index) -> (index, index, index) {
// CHECK-LABEL: func @symbolic_semi_affine(%arg0: index, %arg1: index, %arg2: memref<?xf32>) {
func @symbolic_semi_affine(%M: index, %N: index, %A: memref<?xf32>) {
%f1 = constant 1.0 : f32
for %i0 = 1 to 100 {
affine.for %i0 = 1 to 100 {
%1 = affine.apply ()[s0] -> (s0 + 1) ()[%M]
%2 = affine.apply (d0)[s0] -> (d0 floordiv s0) (%i0)[%1]
// CHECK-DAG: {{.*}} = affine.apply [[symbolic_semi_affine]](%i0)[%arg0]
@ -404,20 +404,20 @@ func @constant_fold_bounds(%N : index) {
%c3 = affine.apply (d0, d1) -> (d0 + d1) (%c1, %c2)
%l = "foo"() : () -> index
// CHECK: for %i0 = 5 to 7 {
for %i = max (d0, d1) -> (0, d0 + d1)(%c2, %c3) to min (d0, d1) -> (d0 - 2, 32*d1) (%c9, %c1) {
// CHECK: affine.for %i0 = 5 to 7 {
affine.for %i = max (d0, d1) -> (0, d0 + d1)(%c2, %c3) to min (d0, d1) -> (d0 - 2, 32*d1) (%c9, %c1) {
"foo"(%i, %c3) : (index, index) -> ()
}
// Bound takes a non-constant argument but can still be folded.
// CHECK: for %i1 = 1 to 7 {
for %j = max (d0) -> (0, 1)(%N) to min (d0, d1) -> (7, 9)(%N, %l) {
// CHECK: affine.for %i1 = 1 to 7 {
affine.for %j = max (d0) -> (0, 1)(%N) to min (d0, d1) -> (7, 9)(%N, %l) {
"foo"(%j, %c3) : (index, index) -> ()
}
// None of the bounds can be folded.
// CHECK: for %i2 = max [[MAP0]]()[%0] to min [[MAP1]]()[%arg0] {
for %k = max ()[s0] -> (0, s0) ()[%l] to min ()[s0] -> (100, s0)()[%N] {
// CHECK: affine.for %i2 = max [[MAP0]]()[%0] to min [[MAP1]]()[%arg0] {
affine.for %k = max ()[s0] -> (0, s0) ()[%l] to min ()[s0] -> (100, s0)()[%N] {
"foo"(%k, %c3) : (index, index) -> ()
}
return

View File

@ -5,7 +5,7 @@
#map = (d0)[s0] -> (d0 + s0)
func @affine_apply_invalid_dim(%arg : index) {
for %n0 = 0 to 7 {
affine.for %n0 = 0 to 7 {
%dim = addi %arg, %arg : index
// expected-error@+1 {{operand cannot be used as a dimension id}}
@ -19,7 +19,7 @@ func @affine_apply_invalid_dim(%arg : index) {
#map0 = (d0)[s0] -> (d0 + s0)
func @affine_apply_invalid_sym() {
for %i0 = 0 to 7 {
affine.for %i0 = 0 to 7 {
// expected-error@+1 {{operand cannot be used as a symbol}}
%0 = affine.apply #map0(%i0)[%i0]
}
@ -31,11 +31,11 @@ func @affine_apply_invalid_sym() {
#map = (d0)[s0] -> (d0 + s0)
func @affine_for_lower_bound_invalid_dim(%arg : index) {
for %n0 = 0 to 7 {
affine.for %n0 = 0 to 7 {
%dim = addi %arg, %arg : index
// expected-error@+1 {{operand cannot be used as a dimension id}}
for %n1 = 0 to #map(%dim)[%arg] {
affine.for %n1 = 0 to #map(%dim)[%arg] {
}
}
return
@ -46,11 +46,11 @@ func @affine_for_lower_bound_invalid_dim(%arg : index) {
#map = (d0)[s0] -> (d0 + s0)
func @affine_for_upper_bound_invalid_dim(%arg : index) {
for %n0 = 0 to 7 {
affine.for %n0 = 0 to 7 {
%dim = addi %arg, %arg : index
// expected-error@+1 {{operand cannot be used as a dimension id}}
for %n1 = #map(%dim)[%arg] to 7 {
affine.for %n1 = #map(%dim)[%arg] to 7 {
}
}
return
@ -61,9 +61,9 @@ func @affine_for_upper_bound_invalid_dim(%arg : index) {
#map0 = (d0)[s0] -> (d0 + s0)
func @affine_for_lower_bound_invalid_sym() {
for %i0 = 0 to 7 {
affine.for %i0 = 0 to 7 {
// expected-error@+1 {{operand cannot be used as a symbol}}
for %n0 = #map0(%i0)[%i0] to 7 {
affine.for %n0 = #map0(%i0)[%i0] to 7 {
}
}
return
@ -74,9 +74,9 @@ func @affine_for_lower_bound_invalid_sym() {
#map0 = (d0)[s0] -> (d0 + s0)
func @affine_for_upper_bound_invalid_sym() {
for %i0 = 0 to 7 {
affine.for %i0 = 0 to 7 {
// expected-error@+1 {{operand cannot be used as a symbol}}
for %n0 = 0 to #map0(%i0)[%i0] {
affine.for %n0 = 0 to #map0(%i0)[%i0] {
}
}
return
@ -87,7 +87,7 @@ func @affine_for_upper_bound_invalid_sym() {
#set0 = (i)[N] : (i >= 0, N - i >= 0)
func @affine_if_invalid_dim(%arg : index) {
for %n0 = 0 to 7 {
affine.for %n0 = 0 to 7 {
%dim = addi %arg, %arg : index
// expected-error@+1 {{operand cannot be used as a dimension id}}
@ -101,7 +101,7 @@ func @affine_if_invalid_dim(%arg : index) {
#set0 = (i)[N] : (i >= 0, N - i >= 0)
func @affine_if_invalid_sym() {
for %i0 = 0 to 7 {
affine.for %i0 = 0 to 7 {
// expected-error@+1 {{operand cannot be used as a symbol}}
affine.if #set0(%i0)[%i0] {}
}
@ -113,7 +113,7 @@ func @affine_if_invalid_sym() {
#set0 = (i)[N] : (i >= 0, N - i >= 0)
func @affine_if_invalid_dimop_dim(%arg0: index, %arg1: index, %arg2: index, %arg3: index) {
for %n0 = 0 to 7 {
affine.for %n0 = 0 to 7 {
%0 = alloc(%arg0, %arg1, %arg2, %arg3) : memref<?x?x?x?xf32>
%dim = dim %0, 0 : memref<?x?x?x?xf32>

View File

@ -2,9 +2,9 @@
// Check that the attributes for the affine operations are round-tripped.
func @attributes() {
// CHECK: for %i
// CHECK: affine.for %i
// CHECK-NEXT: } {some_attr: true}
for %i = 0 to 10 {
affine.for %i = 0 to 10 {
} {some_attr: true}
// CHECK: if

View File

@ -143,7 +143,7 @@ TEST_FUNC(cond_branch) {
f->print(llvm::outs());
}
// Inject a EDSC-constructed `for` loop with bounds coming from function
// Inject a EDSC-constructed `affine.for` loop with bounds coming from function
// arguments.
TEST_FUNC(dynamic_for_func_args) {
auto indexType = IndexType::get(&globalContext());
@ -164,7 +164,7 @@ TEST_FUNC(dynamic_for_func_args) {
// clang-format off
// CHECK-LABEL: func @dynamic_for_func_args(%arg0: index, %arg1: index) {
// CHECK: for %i0 = (d0) -> (d0)(%arg0) to (d0) -> (d0)(%arg1) step 3 {
// CHECK: affine.for %i0 = (d0) -> (d0)(%arg0) to (d0) -> (d0)(%arg1) step 3 {
// CHECK: {{.*}} = affine.apply ()[s0] -> (s0 * 3)()[%arg0]
// CHECK: {{.*}} = affine.apply ()[s0, s1] -> (s1 + s0 * 3)()[%arg0, %arg1]
// CHECK: {{.*}} = affine.apply ()[s0] -> (s0 + 3)()[%arg0]
@ -172,7 +172,7 @@ TEST_FUNC(dynamic_for_func_args) {
f->print(llvm::outs());
}
// Inject a EDSC-constructed `for` loop with non-constant bounds that are
// Inject a EDSC-constructed `affine.for` loop with non-constant bounds that are
// obtained from AffineApplyOp (also constructed using EDSC operator
// overloads).
TEST_FUNC(dynamic_for) {
@ -200,12 +200,12 @@ TEST_FUNC(dynamic_for) {
// CHECK-LABEL: func @dynamic_for(%arg0: index, %arg1: index, %arg2: index, %arg3: index) {
// CHECK: %0 = affine.apply ()[s0, s1] -> (s0 - s1)()[%arg0, %arg1]
// CHECK-NEXT: %1 = affine.apply ()[s0, s1] -> (s0 + s1)()[%arg2, %arg3]
// CHECK-NEXT: for %i0 = (d0) -> (d0)(%0) to (d0) -> (d0)(%1) step 2 {
// CHECK-NEXT: affine.for %i0 = (d0) -> (d0)(%0) to (d0) -> (d0)(%1) step 2 {
// clang-format on
f->print(llvm::outs());
}
// Inject a EDSC-constructed empty `for` loop with max/min bounds that
// Inject a EDSC-constructed empty `affine.for` loop with max/min bounds that
// corresponds to
//
// for max(%arg0, %arg1) to (%arg2, %arg3) step 1
@ -234,7 +234,7 @@ TEST_FUNC(max_min_for) {
// clang-format off
// CHECK-LABEL: func @max_min_for(%arg0: index, %arg1: index, %arg2: index, %arg3: index) {
// CHECK: for %i0 = max (d0, d1) -> (d0, d1)(%arg0, %arg1) to min (d0, d1) -> (d0, d1)(%arg2, %arg3) {
// CHECK: affine.for %i0 = max (d0, d1) -> (d0, d1)(%arg0, %arg1) to min (d0, d1) -> (d0, d1)(%arg2, %arg3) {
// clang-format on
f->print(llvm::outs());
}
@ -334,7 +334,7 @@ TEST_FUNC(assignments_1) {
// clang-format off
// CHECK-LABEL: func @assignments(%arg0: memref<4xf32>, %arg1: memref<4xf32>, %arg2: memref<4xf32>) {
// CHECK: for %[[iv:.*]] = 0 to 4 {
// CHECK: affine.for %[[iv:.*]] = 0 to 4 {
// CHECK: %[[a:.*]] = load %arg0[%[[iv]]] : memref<4xf32>
// CHECK: %[[b:.*]] = load %arg1[%[[iv]]] : memref<4xf32>
// CHECK: %[[tmp:.*]] = mulf %[[a]], %[[b]] : f32
@ -348,7 +348,7 @@ TEST_FUNC(assignments_2) {
// clang-format off
// CHECK-LABEL: func @assignments(%arg0: memref<?xf32>, %arg1: memref<?xf32>, %arg2: memref<?xf32>) {
// CHECK: for %[[iv:.*]] = {{.*}} to {{.*}} {
// CHECK: affine.for %[[iv:.*]] = {{.*}} to {{.*}} {
// CHECK: %[[a:.*]] = load %arg0[%[[iv]]] : memref<?xf32>
// CHECK: %[[b:.*]] = load %arg1[%[[iv]]] : memref<?xf32>
// CHECK: %[[tmp:.*]] = mulf %[[a]], %[[b]] : f32
@ -405,13 +405,13 @@ TEST_FUNC(tile_2d) {
// CHECK: %[[M:[0-9]+]] = dim %arg0, 0 : memref<?x?x?xf32>
// CHECK-NEXT: %[[N:[0-9]+]] = dim %arg0, 1 : memref<?x?x?xf32>
// CHECK-NEXT: %[[P:[0-9]+]] = dim %arg0, 2 : memref<?x?x?xf32>
// CHECK: for %i0 = (d0) -> (d0)(%[[ZERO]]) to (d0) -> (d0)(%[[M]]) step 512 {
// CHECK-NEXT: for %i1 = (d0) -> (d0)(%[[ZERO]]) to (d0) -> (d0)(%[[N]]) step 1024 {
// CHECK-NEXT: for %i2 = (d0) -> (d0)(%[[ZERO]]) to (d0) -> (d0)(%[[P]]) {
// CHECK-NEXT: for %i3 = max (d0)[s0] -> (s0, d0)(%i0)[%[[ZERO]]] to min (d0)[s0] -> (s0, d0 + 512)(%i0)[%[[M]]] step 16 {
// CHECK-NEXT: for %i4 = max (d0)[s0] -> (s0, d0)(%i1)[%[[ZERO]]] to min (d0)[s0] -> (s0, d0 + 1024)(%i1)[%[[N]]] step 32 {
// CHECK-NEXT: for %i5 = max (d0, d1)[s0] -> (s0, d0, d1)(%i1, %i4)[%[[ZERO]]] to min (d0, d1)[s0] -> (s0, d0 + 1024, d1 + 32)(%i1, %i4)[%[[N]]] {
// CHECK-NEXT: for %i6 = max (d0, d1)[s0] -> (s0, d0, d1)(%i0, %i3)[%[[ZERO]]] to min (d0, d1)[s0] -> (s0, d0 + 512, d1 + 16)(%i0, %i3)[%[[M]]] {
// CHECK: affine.for %i0 = (d0) -> (d0)(%[[ZERO]]) to (d0) -> (d0)(%[[M]]) step 512 {
// CHECK-NEXT: affine.for %i1 = (d0) -> (d0)(%[[ZERO]]) to (d0) -> (d0)(%[[N]]) step 1024 {
// CHECK-NEXT: affine.for %i2 = (d0) -> (d0)(%[[ZERO]]) to (d0) -> (d0)(%[[P]]) {
// CHECK-NEXT: affine.for %i3 = max (d0)[s0] -> (s0, d0)(%i0)[%[[ZERO]]] to min (d0)[s0] -> (s0, d0 + 512)(%i0)[%[[M]]] step 16 {
// CHECK-NEXT: affine.for %i4 = max (d0)[s0] -> (s0, d0)(%i1)[%[[ZERO]]] to min (d0)[s0] -> (s0, d0 + 1024)(%i1)[%[[N]]] step 32 {
// CHECK-NEXT: affine.for %i5 = max (d0, d1)[s0] -> (s0, d0, d1)(%i1, %i4)[%[[ZERO]]] to min (d0, d1)[s0] -> (s0, d0 + 1024, d1 + 32)(%i1, %i4)[%[[N]]] {
// CHECK-NEXT: affine.for %i6 = max (d0, d1)[s0] -> (s0, d0, d1)(%i0, %i3)[%[[ZERO]]] to min (d0, d1)[s0] -> (s0, d0 + 512, d1 + 16)(%i0, %i3)[%[[M]]] {
// CHECK-NEXT: {{.*}} = load {{.*}}[%i6, %i5, %i2] : memref<?x?x?xf32>
// CHECK-NEXT: {{.*}} = load {{.*}}[%i6, %i5, %i2] : memref<?x?x?xf32>
// CHECK-NEXT: {{.*}} = addf {{.*}}, {{.*}} : f32
@ -421,9 +421,9 @@ TEST_FUNC(tile_2d) {
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: for %i7 = (d0) -> (d0)(%[[ZERO]]) to (d0) -> (d0)(%[[P]]) {
// CHECK-NEXT: for %i8 = max (d0)[s0] -> (s0, d0)(%i0)[%[[ZERO]]] to min (d0)[s0] -> (s0, d0 + 512)(%i0)[%[[M]]] {
// CHECK-NEXT: for %i9 = max (d0)[s0] -> (s0, d0)(%i1)[%[[ZERO]]] to min (d0)[s0] -> (s0, d0 + 1024)(%i1)[%[[N]]] {
// CHECK-NEXT: affine.for %i7 = (d0) -> (d0)(%[[ZERO]]) to (d0) -> (d0)(%[[P]]) {
// CHECK-NEXT: affine.for %i8 = max (d0)[s0] -> (s0, d0)(%i0)[%[[ZERO]]] to min (d0)[s0] -> (s0, d0 + 512)(%i0)[%[[M]]] {
// CHECK-NEXT: affine.for %i9 = max (d0)[s0] -> (s0, d0)(%i1)[%[[ZERO]]] to min (d0)[s0] -> (s0, d0 + 1024)(%i1)[%[[N]]] {
// CHECK-NEXT: {{.*}} = load {{.*}}[%i8, %i9, %i7] : memref<?x?x?xf32>
// CHECK-NEXT: {{.*}} = load {{.*}}[%i8, %i9, %i7] : memref<?x?x?xf32>
// CHECK-NEXT: {{.*}}= addf {{.*}}, {{.*}} : f32

View File

@ -80,11 +80,11 @@ TEST_FUNC(builder_dynamic_for_func_args) {
// clang-format off
// CHECK-LABEL: func @builder_dynamic_for_func_args(%arg0: index, %arg1: index) {
// CHECK: for %i0 = (d0) -> (d0)(%arg0) to (d0) -> (d0)(%arg1) step 3 {
// CHECK: affine.for %i0 = (d0) -> (d0)(%arg0) to (d0) -> (d0)(%arg1) step 3 {
// CHECK: {{.*}} = affine.apply ()[s0] -> (s0 * 3)()[%arg0]
// CHECK: {{.*}} = affine.apply ()[s0, s1] -> (s1 + s0 * 3)()[%arg0, %arg1]
// CHECK: {{.*}} = affine.apply ()[s0] -> (s0 + 3)()[%arg0]
// CHECK: for %i1 = (d0) -> (d0)(%arg0) to (d0) -> (d0)(%arg1) step 2 {
// CHECK: affine.for %i1 = (d0) -> (d0)(%arg0) to (d0) -> (d0)(%arg1) step 2 {
// CHECK: {{.*}} = affine.apply (d0, d1) -> ((d0 + d1 * 3) floordiv 32)(%i0, %i1)
// CHECK: {{.*}} = affine.apply (d0, d1) -> (((d0 + d1 * 3) floordiv 32) * 31)(%i0, %i1)
// CHECK: {{.*}} = affine.apply (d0, d1) -> ((((d0 + d1 * 3) floordiv 32) * 31) ceildiv 32)(%i0, %i1)
@ -119,7 +119,7 @@ TEST_FUNC(builder_dynamic_for) {
// CHECK-LABEL: func @builder_dynamic_for(%arg0: index, %arg1: index, %arg2: index, %arg3: index) {
// CHECK: %0 = affine.apply ()[s0, s1] -> (s0 - s1)()[%arg0, %arg1]
// CHECK-NEXT: %1 = affine.apply ()[s0, s1] -> (s0 + s1)()[%arg2, %arg3]
// CHECK-NEXT: for %i0 = (d0) -> (d0)(%0) to (d0) -> (d0)(%1) step 2 {
// CHECK-NEXT: affine.for %i0 = (d0) -> (d0)(%0) to (d0) -> (d0)(%1) step 2 {
// clang-format on
f->print(llvm::outs());
}
@ -140,7 +140,7 @@ TEST_FUNC(builder_max_min_for) {
// clang-format off
// CHECK-LABEL: func @builder_max_min_for(%arg0: index, %arg1: index, %arg2: index, %arg3: index) {
// CHECK: for %i0 = max (d0, d1) -> (d0, d1)(%arg0, %arg1) to min (d0, d1) -> (d0, d1)(%arg2, %arg3) {
// CHECK: affine.for %i0 = max (d0, d1) -> (d0, d1)(%arg0, %arg1) to min (d0, d1) -> (d0, d1)(%arg2, %arg3) {
// CHECK: return
// clang-format on
f->print(llvm::outs());
@ -344,16 +344,16 @@ TEST_FUNC(builder_helpers) {
});
// CHECK-LABEL: @builder_helpers
// CHECK: for %i0 = (d0) -> (d0)({{.*}}) to (d0) -> (d0)({{.*}}) {
// CHECK-NEXT: for %i1 = (d0) -> (d0)({{.*}}) to (d0) -> (d0)({{.*}}) {
// CHECK-NEXT: for %i2 = (d0) -> (d0)({{.*}}) to (d0) -> (d0)({{.*}}) {
// CHECK: affine.for %i0 = (d0) -> (d0)({{.*}}) to (d0) -> (d0)({{.*}}) {
// CHECK-NEXT: affine.for %i1 = (d0) -> (d0)({{.*}}) to (d0) -> (d0)({{.*}}) {
// CHECK-NEXT: affine.for %i2 = (d0) -> (d0)({{.*}}) to (d0) -> (d0)({{.*}}) {
// CHECK-NEXT: [[a:%.*]] = load %arg0[%i0, %i1, %i2] : memref<?x?x?xf32>
// CHECK-NEXT: [[b:%.*]] = addf {{.*}}, [[a]] : f32
// CHECK-NEXT: [[c:%.*]] = load %arg1[%i0, %i1, %i2] : memref<?x?x?xf32>
// CHECK-NEXT: [[d:%.*]] = addf [[b]], [[c]] : f32
// CHECK-NEXT: store [[d]], %arg2[%i0, %i1, %i2] : memref<?x?x?xf32>
// CHECK-NEXT: }
// CHECK-NEXT: for %i3 = (d0) -> (d0)(%c0_1) to (d0) -> (d0)(%2) {
// CHECK-NEXT: affine.for %i3 = (d0) -> (d0)(%c0_1) to (d0) -> (d0)(%2) {
// CHECK-NEXT: [[a:%.*]] = load %arg1[%i0, %i1, %i3] : memref<?x?x?xf32>
// CHECK-NEXT: [[b:%.*]] = load %arg0[%i0, %i1, %i3] : memref<?x?x?xf32>
// CHECK-NEXT: [[c:%.*]] = addf [[b]], [[a]] : f32
@ -392,8 +392,8 @@ TEST_FUNC(custom_ops) {
});
// CHECK-LABEL: @custom_ops
// CHECK: for %i0 {{.*}}
// CHECK: for %i1 {{.*}}
// CHECK: affine.for %i0 {{.*}}
// CHECK: affine.for %i1 {{.*}}
// CHECK: {{.*}} = "my_custom_op"{{.*}} : (index, index) -> index
// CHECK: "my_custom_inst_0"{{.*}} : (index, index) -> ()
// CHECK: [[TWO:%[a-z0-9]+]] = "my_custom_inst_2"{{.*}} : (index, index) -> (index, index)

View File

@ -204,24 +204,24 @@ func @illegaltype(i0) // expected-error {{invalid integer width}}
// -----
func @malformed_for_percent() {
for i = 1 to 10 { // expected-error {{expected SSA operand}}
affine.for i = 1 to 10 { // expected-error {{expected SSA operand}}
// -----
func @malformed_for_equal() {
for %i 1 to 10 { // expected-error {{expected '='}}
affine.for %i 1 to 10 { // expected-error {{expected '='}}
// -----
func @malformed_for_to() {
for %i = 1 too 10 { // expected-error {{expected 'to' between bounds}}
affine.for %i = 1 too 10 { // expected-error {{expected 'to' between bounds}}
}
}
// -----
func @incomplete_for() {
for %i = 1 to 10 step 2
affine.for %i = 1 to 10 step 2
} // expected-error {{expected '{' to begin a region}}
// -----
@ -230,19 +230,19 @@ func @incomplete_for() {
func @reference_to_iv_in_bound() {
// expected-error@+1 {{operand use before it's defined}}
for %i0 = #map0(%i0) to 10 {
affine.for %i0 = #map0(%i0) to 10 {
}
}
// -----
func @nonconstant_step(%1 : i32) {
for %2 = 1 to 5 step %1 { // expected-error {{expected non-function type}}
affine.for %2 = 1 to 5 step %1 { // expected-error {{expected non-function type}}
// -----
func @for_negative_stride() {
for %i = 1 to 10 step -1
affine.for %i = 1 to 10 step -1
} // expected-error@-1 {{expected step to be representable as a positive signed integer}}
// -----
@ -254,7 +254,7 @@ func @non_instruction() {
// -----
func @invalid_if_conditional2() {
for %i = 1 to 10 {
affine.for %i = 1 to 10 {
affine.if (i)[N] : (i >= ) // expected-error {{expected '== 0' or '>= 0' at end of affine constraint}}
}
}
@ -262,7 +262,7 @@ func @invalid_if_conditional2() {
// -----
func @invalid_if_conditional3() {
for %i = 1 to 10 {
affine.for %i = 1 to 10 {
affine.if (i)[N] : (i == 1) // expected-error {{expected '0' after '=='}}
}
}
@ -270,7 +270,7 @@ func @invalid_if_conditional3() {
// -----
func @invalid_if_conditional4() {
for %i = 1 to 10 {
affine.for %i = 1 to 10 {
affine.if (i)[N] : (i >= 2) // expected-error {{expected '0' after '>='}}
}
}
@ -278,7 +278,7 @@ func @invalid_if_conditional4() {
// -----
func @invalid_if_conditional5() {
for %i = 1 to 10 {
affine.for %i = 1 to 10 {
affine.if (i)[N] : (i <= 0 ) // expected-error {{expected '== 0' or '>= 0' at end of affine constraint}}
}
}
@ -286,7 +286,7 @@ func @invalid_if_conditional5() {
// -----
func @invalid_if_conditional6() {
for %i = 1 to 10 {
affine.for %i = 1 to 10 {
affine.if (i) : (i) // expected-error {{expected '== 0' or '>= 0' at end of affine constraint}}
}
}
@ -294,7 +294,7 @@ func @invalid_if_conditional6() {
// -----
// TODO (support affine.if (1)?
func @invalid_if_conditional7() {
for %i = 1 to 10 {
affine.for %i = 1 to 10 {
affine.if (i) : (1) // expected-error {{expected '== 0' or '>= 0' at end of affine constraint}}
}
}
@ -440,8 +440,8 @@ func @undef() {
// -----
func @duplicate_induction_var() {
for %i = 1 to 10 { // expected-error {{previously defined here}}
for %i = 1 to 10 { // expected-error {{redefinition of SSA value '%i'}}
affine.for %i = 1 to 10 { // expected-error {{previously defined here}}
affine.for %i = 1 to 10 { // expected-error {{redefinition of SSA value '%i'}}
}
}
return
@ -450,7 +450,7 @@ func @duplicate_induction_var() {
// -----
func @dominance_failure() {
for %i = 1 to 10 {
affine.for %i = 1 to 10 {
}
"xxx"(%i) : (index)->() // expected-error {{operand #0 does not dominate this use}}
return
@ -477,7 +477,7 @@ func @return_type_mismatch() -> i32 {
// -----
func @return_inside_loop() {
for %i = 1 to 100 {
affine.for %i = 1 to 100 {
// expected-error@-1 {{op expects body block to not have a terminator}}
return
}
@ -522,7 +522,7 @@ func @referer() {
#map1 = (i)[j] -> (i+j)
func @bound_symbol_mismatch(%N : index) {
for %i = #map1(%N) to 100 {
affine.for %i = #map1(%N) to 100 {
// expected-error@-1 {{symbol operand count and integer set symbol count must match}}
}
return
@ -533,7 +533,7 @@ func @bound_symbol_mismatch(%N : index) {
#map1 = (i)[j] -> (i+j)
func @bound_dim_mismatch(%N : index) {
for %i = #map1(%N, %N)[%N] to 100 {
affine.for %i = #map1(%N, %N)[%N] to 100 {
// expected-error@-1 {{dim operand count and integer set dim count must match}}
}
return
@ -542,7 +542,7 @@ func @bound_dim_mismatch(%N : index) {
// -----
func @large_bound() {
for %i = 1 to 9223372036854775810 {
affine.for %i = 1 to 9223372036854775810 {
// expected-error@-1 {{integer constant out of range for attribute}}
}
return
@ -551,7 +551,7 @@ func @large_bound() {
// -----
func @max_in_upper_bound(%N : index) {
for %i = 1 to max (i)->(N, 100) { //expected-error {{expected non-function type}}
affine.for %i = 1 to max (i)->(N, 100) { //expected-error {{expected non-function type}}
}
return
}
@ -559,7 +559,7 @@ func @max_in_upper_bound(%N : index) {
// -----
func @step_typo() {
for %i = 1 to 100 step -- 1 { //expected-error {{expected constant integer}}
affine.for %i = 1 to 100 step -- 1 { //expected-error {{expected constant integer}}
}
return
}
@ -567,7 +567,7 @@ func @step_typo() {
// -----
func @invalid_bound_map(%N : i32) {
for %i = 1 to (i)->(j)(%N) { //expected-error {{use of undeclared identifier}}
affine.for %i = 1 to (i)->(j)(%N) { //expected-error {{use of undeclared identifier}}
}
return
}
@ -580,7 +580,7 @@ func @invalid_bound_map(%N : i32) {
#set0 = (i)[N] : (i >= 0, N - i >= 0)
func @invalid_if_operands1(%N : index) {
for %i = 1 to 10 {
affine.for %i = 1 to 10 {
affine.if #set0(%i) {
// expected-error@-1 {{symbol operand count and integer set symbol count must match}}
@ -588,7 +588,7 @@ func @invalid_if_operands1(%N : index) {
#set0 = (i)[N] : (i >= 0, N - i >= 0)
func @invalid_if_operands2(%N : index) {
for %i = 1 to 10 {
affine.for %i = 1 to 10 {
affine.if #set0()[%N] {
// expected-error@-1 {{dim operand count and integer set dim count must match}}
@ -596,7 +596,7 @@ func @invalid_if_operands2(%N : index) {
#set0 = (i)[N] : (i >= 0, N - i >= 0)
func @invalid_if_operands3(%N : index) {
for %i = 1 to 10 {
affine.for %i = 1 to 10 {
affine.if #set0(%i)[%i] {
// expected-error@-1 {{operand cannot be used as a symbol}}
}
@ -751,11 +751,11 @@ func @f(f32) {
// -----
func @f(%m : memref<?x?xf32>) {
for %i0 = 0 to 42 {
affine.for %i0 = 0 to 42 {
// expected-error@+1 {{operand #2 does not dominate this use}}
%x = load %m[%i0, %i1] : memref<?x?xf32>
}
for %i1 = 0 to 42 {
affine.for %i1 = 0 to 42 {
}
return
}
@ -805,7 +805,7 @@ func @type_alias_unknown(!unknown_alias) -> () { // expected-error {{undefined t
// Check ill-formed opaque tensor.
func @complex_loops() {
for %i1 = 1 to 100 {
affine.for %i1 = 1 to 100 {
// expected-error @+1 {{expected '"' in string literal}}
"opaqueIntTensor"(){bar: opaque<"", tensor<2x1x4xi32>, "0x686]>} : () -> ()
@ -839,7 +839,7 @@ func @invalid_affine_structure() {
func @missing_for_max(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
// expected-error @+1 {{lower loop bound affine map with multiple results requires 'max' prefix}}
for %i0 = ()[s]->(0,s-1)()[%arg0] to %arg1 {
affine.for %i0 = ()[s]->(0,s-1)()[%arg0] to %arg1 {
}
return
}
@ -848,7 +848,7 @@ func @missing_for_max(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
func @missing_for_min(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
// expected-error @+1 {{upper loop bound affine map with multiple results requires 'min' prefix}}
for %i0 = %arg0 to ()[s]->(100,s+1)()[%arg1] {
affine.for %i0 = %arg0 to ()[s]->(100,s+1)()[%arg1] {
}
return
}

View File

@ -13,7 +13,7 @@ func @inline_notation() -> i32 loc("mysource.cc":10:8) {
%2 = constant 4 : index loc(callsite("foo" at "mysource.cc":10:8))
// CHECK: } loc(fused["foo", "mysource.cc":10:8])
for %i0 = 0 to 8 {
affine.for %i0 = 0 to 8 {
} loc(fused["foo", "mysource.cc":10:8])
// CHECK: } loc(fused<"myPass">["foo", "foo2"])

View File

@ -208,8 +208,8 @@ func @identity_functor(%a : () -> ()) -> (() -> ()) {
func @func_ops_in_loop() {
// CHECK: %0 = "foo"() : () -> i64
%a = "foo"() : ()->i64
// CHECK: for %i0 = 1 to 10 {
for %i = 1 to 10 {
// CHECK: affine.for %i0 = 1 to 10 {
affine.for %i = 1 to 10 {
// CHECK: %1 = "doo"() : () -> f32
%b = "doo"() : ()->f32
// CHECK: "bar"(%0, %1) : (i64, f32) -> ()
@ -224,10 +224,10 @@ func @func_ops_in_loop() {
// CHECK-LABEL: func @loops() {
func @loops() {
// CHECK: for %i0 = 1 to 100 step 2 {
for %i = 1 to 100 step 2 {
// CHECK: for %i1 = 1 to 200 {
for %j = 1 to 200 {
// CHECK: affine.for %i0 = 1 to 100 step 2 {
affine.for %i = 1 to 100 step 2 {
// CHECK: affine.for %i1 = 1 to 200 {
affine.for %j = 1 to 200 {
} // CHECK: }
} // CHECK: }
return // CHECK: return
@ -235,14 +235,14 @@ func @loops() {
// CHECK-LABEL: func @complex_loops() {
func @complex_loops() {
for %i1 = 1 to 100 { // CHECK: for %i0 = 1 to 100 {
for %j1 = 1 to 100 { // CHECK: for %i1 = 1 to 100 {
affine.for %i1 = 1 to 100 { // CHECK: affine.for %i0 = 1 to 100 {
affine.for %j1 = 1 to 100 { // CHECK: affine.for %i1 = 1 to 100 {
// CHECK: "foo"(%i0, %i1) : (index, index) -> ()
"foo"(%i1, %j1) : (index,index) -> ()
} // CHECK: }
"boo"() : () -> () // CHECK: "boo"() : () -> ()
for %j2 = 1 to 10 { // CHECK: for %i2 = 1 to 10 {
for %k2 = 1 to 10 { // CHECK: for %i3 = 1 to 10 {
affine.for %j2 = 1 to 10 { // CHECK: affine.for %i2 = 1 to 10 {
affine.for %k2 = 1 to 10 { // CHECK: affine.for %i3 = 1 to 10 {
"goo"() : () -> () // CHECK: "goo"() : () -> ()
} // CHECK: }
} // CHECK: }
@ -253,8 +253,8 @@ func @complex_loops() {
// CHECK: func @triang_loop(%arg0: index, %arg1: memref<?x?xi32>) {
func @triang_loop(%arg0: index, %arg1: memref<?x?xi32>) {
%c = constant 0 : i32 // CHECK: %c0_i32 = constant 0 : i32
for %i0 = 1 to %arg0 { // CHECK: for %i0 = 1 to %arg0 {
for %i1 = (d0)[]->(d0)(%i0)[] to %arg0 { // CHECK: for %i1 = #map{{[0-9]+}}(%i0) to %arg0 {
affine.for %i0 = 1 to %arg0 { // CHECK: affine.for %i0 = 1 to %arg0 {
affine.for %i1 = (d0)[]->(d0)(%i0)[] to %arg0 { // CHECK: affine.for %i1 = #map{{[0-9]+}}(%i0) to %arg0 {
store %c, %arg1[%i0, %i1] : memref<?x?xi32> // CHECK: store %c0_i32, %arg1[%i0, %i1]
} // CHECK: }
} // CHECK: }
@ -263,8 +263,8 @@ func @triang_loop(%arg0: index, %arg1: memref<?x?xi32>) {
// CHECK: func @minmax_loop(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
func @minmax_loop(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
// CHECK: for %i0 = max #map{{.*}}()[%arg0] to min #map{{.*}}()[%arg1] {
for %i0 = max()[s]->(0,s-1)()[%arg0] to min()[s]->(100,s+1)()[%arg1] {
// CHECK: affine.for %i0 = max #map{{.*}}()[%arg0] to min #map{{.*}}()[%arg1] {
affine.for %i0 = max()[s]->(0,s-1)()[%arg0] to min()[s]->(100,s+1)()[%arg1] {
// CHECK: "foo"(%arg2, %i0) : (memref<100xf32>, index) -> ()
"foo"(%arg2, %i0) : (memref<100xf32>, index) -> ()
} // CHECK: }
@ -275,24 +275,24 @@ func @minmax_loop(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
func @loop_bounds(%N : index) {
// CHECK: %0 = "foo"(%arg0) : (index) -> index
%s = "foo"(%N) : (index) -> index
// CHECK: for %i0 = %0 to %arg0
for %i = %s to %N {
// CHECK: for %i1 = #map{{[0-9]+}}(%i0) to 0
for %j = (d0)[]->(d0)(%i)[] to 0 step 1 {
// CHECK: affine.for %i0 = %0 to %arg0
affine.for %i = %s to %N {
// CHECK: affine.for %i1 = #map{{[0-9]+}}(%i0) to 0
affine.for %j = (d0)[]->(d0)(%i)[] to 0 step 1 {
// CHECK: %1 = affine.apply #map{{.*}}(%i0, %i1)[%0]
%w1 = affine.apply(d0, d1)[s0] -> (d0+d1) (%i, %j) [%s]
// CHECK: %2 = affine.apply #map{{.*}}(%i0, %i1)[%0]
%w2 = affine.apply(d0, d1)[s0] -> (s0+1) (%i, %j) [%s]
// CHECK: for %i2 = #map{{.*}}(%1, %i0)[%arg0] to #map{{.*}}(%2, %i1)[%0] {
for %k = #bound_map1 (%w1, %i)[%N] to (i, j)[s] -> (i + j + s) (%w2, %j)[%s] {
// CHECK: affine.for %i2 = #map{{.*}}(%1, %i0)[%arg0] to #map{{.*}}(%2, %i1)[%0] {
affine.for %k = #bound_map1 (%w1, %i)[%N] to (i, j)[s] -> (i + j + s) (%w2, %j)[%s] {
// CHECK: "foo"(%i0, %i1, %i2) : (index, index, index) -> ()
"foo"(%i, %j, %k) : (index, index, index)->()
// CHECK: %c30 = constant 30 : index
%c = constant 30 : index
// CHECK: %3 = affine.apply #map{{.*}}(%arg0, %c30)
%u = affine.apply (d0, d1)->(d0+d1) (%N, %c)
// CHECK: for %i3 = max #map{{.*}}(%i0)[%3] to min #map{{.*}}(%i2)[%c30] {
for %l = max #bound_map2(%i)[%u] to min #bound_map2(%k)[%c] {
// CHECK: affine.for %i3 = max #map{{.*}}(%i0)[%3] to min #map{{.*}}(%i2)[%c30] {
affine.for %l = max #bound_map2(%i)[%u] to min #bound_map2(%k)[%c] {
// CHECK: "bar"(%i3) : (index) -> ()
"bar"(%l) : (index) -> ()
} // CHECK: }
@ -305,7 +305,7 @@ func @loop_bounds(%N : index) {
// CHECK-LABEL: func @ifinst(%arg0: index) {
func @ifinst(%N: index) {
%c = constant 200 : index // CHECK %c200 = constant 200
for %i = 1 to 10 { // CHECK for %i0 = 1 to 10 {
affine.for %i = 1 to 10 { // CHECK affine.for %i0 = 1 to 10 {
affine.if #set0(%i)[%N, %c] { // CHECK affine.if #set0(%i0)[%arg0, %c200] {
%x = constant 1 : i32
// CHECK: %c1_i32 = constant 1 : i32
@ -328,7 +328,7 @@ func @ifinst(%N: index) {
// CHECK-LABEL: func @simple_ifinst(%arg0: index) {
func @simple_ifinst(%N: index) {
%c = constant 200 : index // CHECK %c200 = constant 200
for %i = 1 to 10 { // CHECK for %i0 = 1 to 10 {
affine.for %i = 1 to 10 { // CHECK affine.for %i0 = 1 to 10 {
affine.if #set0(%i)[%N, %c] { // CHECK affine.if #set0(%i0)[%arg0, %c200] {
%x = constant 1 : i32
// CHECK: %c1_i32 = constant 1 : i32
@ -549,18 +549,18 @@ func @funcattrwithblock() -> ()
#map_non_simple2 = ()[s0, s1] -> (s0 + s1)
#map_non_simple3 = ()[s0] -> (s0 + 3)
func @funcsimplemap(%arg0: index, %arg1: index) -> () {
for %i0 = 0 to #map_simple0()[] {
// CHECK: for %i0 = 0 to 10 {
for %i1 = 0 to #map_simple1()[%arg1] {
// CHECK: for %i1 = 0 to %arg1 {
for %i2 = 0 to #map_non_simple0(%i0)[] {
// CHECK: for %i2 = 0 to #map{{[a-z_0-9]*}}(%i0) {
for %i3 = 0 to #map_non_simple1(%i0)[%arg1] {
// CHECK: for %i3 = 0 to #map{{[a-z_0-9]*}}(%i0)[%arg1] {
for %i4 = 0 to #map_non_simple2()[%arg1, %arg0] {
// CHECK: for %i4 = 0 to #map{{[a-z_0-9]*}}()[%arg1, %arg0] {
for %i5 = 0 to #map_non_simple3()[%arg0] {
// CHECK: for %i5 = 0 to #map{{[a-z_0-9]*}}()[%arg0] {
affine.for %i0 = 0 to #map_simple0()[] {
// CHECK: affine.for %i0 = 0 to 10 {
affine.for %i1 = 0 to #map_simple1()[%arg1] {
// CHECK: affine.for %i1 = 0 to %arg1 {
affine.for %i2 = 0 to #map_non_simple0(%i0)[] {
// CHECK: affine.for %i2 = 0 to #map{{[a-z_0-9]*}}(%i0) {
affine.for %i3 = 0 to #map_non_simple1(%i0)[%arg1] {
// CHECK: affine.for %i3 = 0 to #map{{[a-z_0-9]*}}(%i0)[%arg1] {
affine.for %i4 = 0 to #map_non_simple2()[%arg1, %arg0] {
// CHECK: affine.for %i4 = 0 to #map{{[a-z_0-9]*}}()[%arg1, %arg0] {
affine.for %i5 = 0 to #map_non_simple3()[%arg0] {
// CHECK: affine.for %i5 = 0 to #map{{[a-z_0-9]*}}()[%arg0] {
%c42_i32 = constant 42 : i32
}
}
@ -745,9 +745,9 @@ func @sparsevectorattr() -> () {
// CHECK-LABEL: func @loops_with_blockids() {
func @loops_with_blockids() {
^block0:
for %i = 1 to 100 step 2 {
affine.for %i = 1 to 100 step 2 {
^block1:
for %j = 1 to 200 {
affine.for %j = 1 to 200 {
^block2:
}
}

View File

@ -18,7 +18,7 @@ func @inline_notation() -> i32 loc("mysource.cc":10:8) {
%3 = constant 4 : index loc(callsite("foo" at callsite("mysource1.cc":10:8 at callsite("mysource2.cc":13:8 at "mysource3.cc":100:10))))
// CHECK: } ["foo", mysource.cc:10:8]
for %i0 = 0 to 8 {
affine.for %i0 = 0 to 8 {
} loc(fused["foo", "mysource.cc":10:8])
// CHECK: } <"myPass">["foo", "foo2"]

View File

@ -6,8 +6,8 @@
// CHECK-LABEL: func @materialize_read_1d() {
func @materialize_read_1d() {
%A = alloc () : memref<7x42xf32>
for %i0 = 0 to 7 step 4 {
for %i1 = 0 to 42 step 4 {
affine.for %i0 = 0 to 7 step 4 {
affine.for %i1 = 0 to 42 step 4 {
%f1 = vector_transfer_read %A, %i0, %i1 {permutation_map: (d0, d1) -> (d0)} : (memref<7x42xf32>, index, index) -> vector<4xf32>
%ip1 = affine.apply (d0) -> (d0 + 1) (%i1)
%f2 = vector_transfer_read %A, %i0, %ip1 {permutation_map: (d0, d1) -> (d0)} : (memref<7x42xf32>, index, index) -> vector<4xf32>
@ -29,11 +29,11 @@ func @materialize_read_1d() {
// CHECK-LABEL: func @materialize_read_1d_partially_specialized
func @materialize_read_1d_partially_specialized(%dyn1 : index, %dyn2 : index, %dyn4 : index) {
%A = alloc (%dyn1, %dyn2, %dyn4) : memref<7x?x?x42x?xf32>
for %i0 = 0 to 7 {
for %i1 = 0 to %dyn1 {
for %i2 = 0 to %dyn2 {
for %i3 = 0 to 42 step 2 {
for %i4 = 0 to %dyn4 {
affine.for %i0 = 0 to 7 {
affine.for %i1 = 0 to %dyn1 {
affine.for %i2 = 0 to %dyn2 {
affine.for %i3 = 0 to 42 step 2 {
affine.for %i4 = 0 to %dyn4 {
%f1 = vector_transfer_read %A, %i0, %i1, %i2, %i3, %i4 {permutation_map: (d0, d1, d2, d3, d4) -> (d3)} : ( memref<7x?x?x42x?xf32>, index, index, index, index, index) -> vector<4xf32>
%i3p1 = affine.apply (d0) -> (d0 + 1) (%i3)
%f2 = vector_transfer_read %A, %i0, %i1, %i2, %i3p1, %i4 {permutation_map: (d0, d1, d2, d3, d4) -> (d3)} : ( memref<7x?x?x42x?xf32>, index, index, index, index, index) -> vector<4xf32>
@ -54,19 +54,19 @@ func @materialize_read_1d_partially_specialized(%dyn1 : index, %dyn2 : index, %d
// CHECK-LABEL: func @materialize_read(%arg0: index, %arg1: index, %arg2: index, %arg3: index) {
func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
// CHECK-NEXT: %0 = alloc(%arg0, %arg1, %arg2, %arg3) : memref<?x?x?x?xf32>
// CHECK-NEXT: for %[[I0:.*]] = 0 to %arg0 step 3 {
// CHECK-NEXT: for %[[I1:.*]] = 0 to %arg1 {
// CHECK-NEXT: for %[[I2:.*]] = 0 to %arg2 {
// CHECK-NEXT: for %[[I3:.*]] = 0 to %arg3 step 5 {
// CHECK-NEXT: affine.for %[[I0:.*]] = 0 to %arg0 step 3 {
// CHECK-NEXT: affine.for %[[I1:.*]] = 0 to %arg1 {
// CHECK-NEXT: affine.for %[[I2:.*]] = 0 to %arg2 {
// CHECK-NEXT: affine.for %[[I3:.*]] = 0 to %arg3 step 5 {
// CHECK: %[[D0:.*]] = dim %0, 0 : memref<?x?x?x?xf32>
// CHECK-NEXT: %[[D1:.*]] = dim %0, 1 : memref<?x?x?x?xf32>
// CHECK-NEXT: %[[D2:.*]] = dim %0, 2 : memref<?x?x?x?xf32>
// CHECK-NEXT: %[[D3:.*]] = dim %0, 3 : memref<?x?x?x?xf32>
// CHECK: %[[ALLOC:.*]] = alloc() : memref<5x4x3xf32>
// CHECK-NEXT: %[[VECTOR_VIEW:.*]] = vector_type_cast %[[ALLOC]] : memref<5x4x3xf32>, memref<1xvector<5x4x3xf32>>
// CHECK-NEXT: for %[[I4:.*]] = 0 to 3 {
// CHECK-NEXT: for %[[I5:.*]] = 0 to 4 {
// CHECK-NEXT: for %[[I6:.*]] = 0 to 5 {
// CHECK-NEXT: affine.for %[[I4:.*]] = 0 to 3 {
// CHECK-NEXT: affine.for %[[I5:.*]] = 0 to 4 {
// CHECK-NEXT: affine.for %[[I6:.*]] = 0 to 5 {
// CHECK-NEXT: %[[C0:.*]] = constant 0 : index
// CHECK-NEXT: %[[C1:.*]] = constant 1 : index
// CHECK-NEXT: {{.*}} = affine.apply #[[ADD]](%[[I0]], %[[I4]])
@ -117,10 +117,10 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
// Check that I3 + I6 (of size 5) read from last index load(..., L3) and write into first index store(I6, ...)
// Other dimensions are just accessed with I1, I2 resp.
%A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
for %i0 = 0 to %M step 3 {
for %i1 = 0 to %N {
for %i2 = 0 to %O {
for %i3 = 0 to %P step 5 {
affine.for %i0 = 0 to %M step 3 {
affine.for %i1 = 0 to %N {
affine.for %i2 = 0 to %O {
affine.for %i3 = 0 to %P step 5 {
%f = vector_transfer_read %A, %i0, %i1, %i2, %i3 {permutation_map: (d0, d1, d2, d3) -> (d3, 0, d0)} : (memref<?x?x?x?xf32, 0>, index, index, index, index) -> vector<5x4x3xf32>
}
}
@ -133,10 +133,10 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
// CHECK-NEXT: %0 = alloc(%arg0, %arg1, %arg2, %arg3) : memref<?x?x?x?xf32>
// CHECK-NEXT: %cst = constant splat<vector<5x4x3xf32>, 1.000000e+00> : vector<5x4x3xf32>
// CHECK-NEXT: for %[[I0:.*]] = 0 to %arg0 step 3 {
// CHECK-NEXT: for %[[I1:.*]] = 0 to %arg1 step 4 {
// CHECK-NEXT: for %[[I2:.*]] = 0 to %arg2 {
// CHECK-NEXT: for %[[I3:.*]] = 0 to %arg3 step 5 {
// CHECK-NEXT: affine.for %[[I0:.*]] = 0 to %arg0 step 3 {
// CHECK-NEXT: affine.for %[[I1:.*]] = 0 to %arg1 step 4 {
// CHECK-NEXT: affine.for %[[I2:.*]] = 0 to %arg2 {
// CHECK-NEXT: affine.for %[[I3:.*]] = 0 to %arg3 step 5 {
// CHECK: %[[D0:.*]] = dim %0, 0 : memref<?x?x?x?xf32>
// CHECK-NEXT: %[[D1:.*]] = dim %0, 1 : memref<?x?x?x?xf32>
// CHECK-NEXT: %[[D2:.*]] = dim %0, 2 : memref<?x?x?x?xf32>
@ -144,9 +144,9 @@ func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
// CHECK: %[[ALLOC:.*]] = alloc() : memref<5x4x3xf32>
// CHECK-NEXT: %[[VECTOR_VIEW:.*]] = vector_type_cast {{.*}} : memref<5x4x3xf32>, memref<1xvector<5x4x3xf32>>
// CHECK: store %cst, {{.*}} : memref<1xvector<5x4x3xf32>>
// CHECK-NEXT: for %[[I4:.*]] = 0 to 3 {
// CHECK-NEXT: for %[[I5:.*]] = 0 to 4 {
// CHECK-NEXT: for %[[I6:.*]] = 0 to 5 {
// CHECK-NEXT: affine.for %[[I4:.*]] = 0 to 3 {
// CHECK-NEXT: affine.for %[[I5:.*]] = 0 to 4 {
// CHECK-NEXT: affine.for %[[I6:.*]] = 0 to 5 {
// CHECK-NEXT: %[[C0:.*]] = constant 0 : index
// CHECK-NEXT: %[[C1:.*]] = constant 1 : index
// CHECK-NEXT: {{.*}} = affine.apply #[[ADD]](%[[I0]], %[[I4]])
@ -201,10 +201,10 @@ func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
// Other dimension is just accessed with I2.
%A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
%f1 = constant splat<vector<5x4x3xf32>, 1.000000e+00> : vector<5x4x3xf32>
for %i0 = 0 to %M step 3 {
for %i1 = 0 to %N step 4 {
for %i2 = 0 to %O {
for %i3 = 0 to %P step 5 {
affine.for %i0 = 0 to %M step 3 {
affine.for %i1 = 0 to %N step 4 {
affine.for %i2 = 0 to %O {
affine.for %i3 = 0 to %P step 5 {
vector_transfer_write %f1, %A, %i0, %i1, %i2, %i3 {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d0)} : vector<5x4x3xf32>, memref<?x?x?x?xf32, 0>, index, index, index, index
}
}

View File

@ -10,10 +10,10 @@
func @materialize(%M : index, %N : index, %O : index, %P : index) {
%A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
%f1 = constant splat<vector<4x4x4xf32>, 1.000000e+00> : vector<4x4x4xf32>
// CHECK: for %i0 = 0 to %arg0 step 4 {
// CHECK-NEXT: for %i1 = 0 to %arg1 step 4 {
// CHECK-NEXT: for %i2 = 0 to %arg2 {
// CHECK-NEXT: for %i3 = 0 to %arg3 step 4 {
// CHECK: affine.for %i0 = 0 to %arg0 step 4 {
// CHECK-NEXT: affine.for %i1 = 0 to %arg1 step 4 {
// CHECK-NEXT: affine.for %i2 = 0 to %arg2 {
// CHECK-NEXT: affine.for %i3 = 0 to %arg3 step 4 {
// CHECK-NEXT: %[[a:[0-9]+]] = {{.*}}[[ID1]](%i0)
// CHECK-NEXT: %[[b:[0-9]+]] = {{.*}}[[ID1]](%i1)
// CHECK-NEXT: %[[c:[0-9]+]] = {{.*}}[[ID1]](%i2)
@ -25,10 +25,10 @@ func @materialize(%M : index, %N : index, %O : index, %P : index) {
// CHECK: vector_transfer_write {{.*}}, %0, {{.*}}, %[[b2]], {{.*}} {permutation_map: #[[D0D1D2D3TOD1D0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index
// CHECK: %[[b3:[0-9]+]] = {{.*}}[[D0P3]](%i1)
// CHECK: vector_transfer_write {{.*}}, %0, {{.*}}, %[[b3]], {{.*}} {permutation_map: #[[D0D1D2D3TOD1D0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index
for %i0 = 0 to %M step 4 {
for %i1 = 0 to %N step 4 {
for %i2 = 0 to %O {
for %i3 = 0 to %P step 4 {
affine.for %i0 = 0 to %M step 4 {
affine.for %i1 = 0 to %N step 4 {
affine.for %i2 = 0 to %O {
affine.for %i3 = 0 to %P step 4 {
"vector_transfer_write"(%f1, %A, %i0, %i1, %i2, %i3) {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d0)} : (vector<4x4x4xf32>, memref<?x?x?x?xf32, 0>, index, index, index, index) -> ()
}
}

View File

@ -15,8 +15,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
%f1 = constant 1.0 : f32
%f2 = constant 2.0 : f32
// 4x unroll (jammed by construction).
// CHECK: for %i0 = 0 to %arg0 {
// CHECK-NEXT: for %i1 = 0 to %arg1 step 32 {
// CHECK: affine.for %i0 = 0 to %arg0 {
// CHECK-NEXT: affine.for %i1 = 0 to %arg1 step 32 {
// CHECK-NEXT: [[CST0:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
// CHECK-NEXT: [[CST1:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
// CHECK-NEXT: [[CST2:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
@ -34,15 +34,15 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
// CHECK-NEXT: [[VAL31:%.*]] = affine.apply [[D0P24]]{{.*}}
// CHECK-NEXT: vector_transfer_write [[CST3]], {{.*}}, [[VAL30]], [[VAL31]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
//
for %i0 = 0 to %M {
for %i1 = 0 to %N {
affine.for %i0 = 0 to %M {
affine.for %i1 = 0 to %N {
// non-scoped %f1
store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
}
}
// 4x unroll (jammed by construction).
// CHECK: for %i2 = 0 to %arg0 {
// CHECK-NEXT: for %i3 = 0 to %arg1 step 32 {
// CHECK: affine.for %i2 = 0 to %arg0 {
// CHECK-NEXT: affine.for %i3 = 0 to %arg1 step 32 {
// CHECK-NEXT: [[CST0:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
// CHECK-NEXT: [[CST1:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
// CHECK-NEXT: [[CST2:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
@ -60,15 +60,15 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
// CHECK-NEXT: [[VAL31:%.*]] = affine.apply [[D0P24]]{{.*}}
// CHECK-NEXT: vector_transfer_write [[CST3]], {{.*}}, [[VAL30]], [[VAL31]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
//
for %i2 = 0 to %M {
for %i3 = 0 to %N {
affine.for %i2 = 0 to %M {
affine.for %i3 = 0 to %N {
// non-scoped %f2
store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
}
}
// 4x unroll (jammed by construction).
// CHECK: for %i4 = 0 to %arg0 {
// CHECK-NEXT: for %i5 = 0 to %arg1 step 32 {
// CHECK: affine.for %i4 = 0 to %arg0 {
// CHECK-NEXT: affine.for %i5 = 0 to %arg1 step 32 {
// CHECK-NEXT: {{.*}} = affine.apply
// CHECK-NEXT: {{.*}} = affine.apply
// CHECK-NEXT: {{.*}} = vector_transfer_read
@ -110,8 +110,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
// CHECK-NEXT: {{.*}} = affine.apply
// CHECK-NEXT: vector_transfer_write
//
for %i4 = 0 to %M {
for %i5 = 0 to %N {
affine.for %i4 = 0 to %M {
affine.for %i5 = 0 to %N {
%a5 = load %A[%i4, %i5] : memref<?x?xf32, 0>
%b5 = load %B[%i4, %i5] : memref<?x?xf32, 0>
%s5 = addf %a5, %b5 : f32

View File

@ -15,8 +15,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
%f1 = constant 1.0 : f32
%f2 = constant 2.0 : f32
// (3x2)x unroll (jammed by construction).
// CHECK: for %i0 = 0 to %arg0 step 3 {
// CHECK-NEXT: for %i1 = 0 to %arg1 step 16 {
// CHECK: affine.for %i0 = 0 to %arg0 step 3 {
// CHECK-NEXT: affine.for %i1 = 0 to %arg1 step 16 {
// CHECK-NEXT: {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
// CHECK-NEXT: {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
// CHECK-NEXT: {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
@ -41,26 +41,26 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
// CHECK-NEXT: [[VAL50:%.*]] = affine.apply [[D0P2]](%i0)
// CHECK-NEXT: [[VAL51:%.*]] = affine.apply [[D0P8]](%i1)
// CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL50]], [[VAL51]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
for %i0 = 0 to %M {
for %i1 = 0 to %N {
affine.for %i0 = 0 to %M {
affine.for %i1 = 0 to %N {
// non-scoped %f1
store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
}
}
// (3x2)x unroll (jammed by construction).
// CHECK: for %i2 = 0 to %arg0 step 3 {
// CHECK-NEXT: for %i3 = 0 to %arg1 step 16 {
// CHECK: affine.for %i2 = 0 to %arg0 step 3 {
// CHECK-NEXT: affine.for %i3 = 0 to %arg1 step 16 {
// .....
for %i2 = 0 to %M {
for %i3 = 0 to %N {
affine.for %i2 = 0 to %M {
affine.for %i3 = 0 to %N {
// non-scoped %f2
// CHECK does (3x4)x unrolling.
store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
}
}
// (3x2)x unroll (jammed by construction).
// CHECK: for %i4 = 0 to %arg0 step 3 {
// CHECK-NEXT: for %i5 = 0 to %arg1 step 16 {
// CHECK: affine.for %i4 = 0 to %arg0 step 3 {
// CHECK-NEXT: affine.for %i5 = 0 to %arg1 step 16 {
// CHECK-NEXT: {{.*}} = affine.apply
// CHECK-NEXT: {{.*}} = affine.apply
// CHECK-NEXT: {{.*}} = vector_transfer_read
@ -122,8 +122,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
// CHECK-NEXT: {{.*}} = affine.apply
// CHECK-NEXT: vector_transfer_write
//
for %i4 = 0 to %M {
for %i5 = 0 to %N {
affine.for %i4 = 0 to %M {
affine.for %i5 = 0 to %N {
%a5 = load %A[%i4, %i5] : memref<?x?xf32, 0>
%b5 = load %B[%i4, %i5] : memref<?x?xf32, 0>
%s5 = addf %a5, %b5 : f32

View File

@ -13,8 +13,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
%f1 = constant 1.0 : f32
%f2 = constant 2.0 : f32
// 2x unroll (jammed by construction).
// CHECK: for %i0 = 0 to %arg0 step 3 {
// CHECK-NEXT: for %i1 = 0 to %arg1 step 32 {
// CHECK: affine.for %i0 = 0 to %arg0 step 3 {
// CHECK-NEXT: affine.for %i1 = 0 to %arg1 step 32 {
// CHECK-NEXT: {{.*}} = constant splat<vector<3x16xf32>, 1.000000e+00> : vector<3x16xf32>
// CHECK-NEXT: {{.*}} = constant splat<vector<3x16xf32>, 1.000000e+00> : vector<3x16xf32>
// CHECK-NEXT: [[VAL00:%.*]] = affine.apply [[ID1]](%i0)
@ -24,15 +24,15 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
// CHECK-NEXT: [[VAL11:%.*]] = affine.apply [[D0P16]](%i1)
// CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL10]], [[VAL11]] {permutation_map: [[ID2]]} : vector<3x16xf32>
//
for %i0 = 0 to %M {
for %i1 = 0 to %N {
affine.for %i0 = 0 to %M {
affine.for %i1 = 0 to %N {
// non-scoped %f1
store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
}
}
// 2x unroll (jammed by construction).
// CHECK: for %i2 = 0 to %arg0 step 3 {
// CHECK-NEXT: for %i3 = 0 to %arg1 step 32 {
// CHECK: affine.for %i2 = 0 to %arg0 step 3 {
// CHECK-NEXT: affine.for %i3 = 0 to %arg1 step 32 {
// CHECK-NEXT: {{.*}} = constant splat<vector<3x16xf32>, 2.000000e+00> : vector<3x16xf32>
// CHECK-NEXT: {{.*}} = constant splat<vector<3x16xf32>, 2.000000e+00> : vector<3x16xf32>
// CHECK-NEXT: [[VAL00:%.*]] = affine.apply [[ID1]](%i2)
@ -42,15 +42,15 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
// CHECK-NEXT: [[VAL11:%.*]] = affine.apply [[D0P16]](%i3)
// CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL10]], [[VAL11]] {permutation_map: [[ID2]]} : vector<3x16xf32>
//
for %i2 = 0 to %M {
for %i3 = 0 to %N {
affine.for %i2 = 0 to %M {
affine.for %i3 = 0 to %N {
// non-scoped %f2
store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
}
}
// 2x unroll (jammed by construction).
// CHECK: for %i4 = 0 to %arg0 step 3 {
// CHECK-NEXT: for %i5 = 0 to %arg1 step 32 {
// CHECK: affine.for %i4 = 0 to %arg0 step 3 {
// CHECK-NEXT: affine.for %i5 = 0 to %arg1 step 32 {
// CHECK-NEXT: {{.*}} = affine.apply
// CHECK-NEXT: {{.*}} = affine.apply
// CHECK-NEXT: {{.*}} = vector_transfer_read
@ -72,8 +72,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
// CHECK-NEXT: {{.*}} = affine.apply
// CHECK-NEXT: vector_transfer_write
//
for %i4 = 0 to %M {
for %i5 = 0 to %N {
affine.for %i4 = 0 to %M {
affine.for %i5 = 0 to %N {
%a5 = load %A[%i4, %i5] : memref<?x?xf32, 0>
%b5 = load %B[%i4, %i5] : memref<?x?xf32, 0>
%s5 = addf %a5, %b5 : f32

View File

@ -9,19 +9,19 @@
// CHECK-LABEL: func @simple()
func @simple() {
for %i0 = 0 to 7 {
affine.for %i0 = 0 to 7 {
%0 = affine.apply (d0) -> (d0) (%i0)
%1 = affine.apply (d0) -> (d0) (%0)
%2 = affine.apply (d0, d1) -> (d0 + d1) (%0, %0)
%3 = affine.apply (d0, d1) -> (d0 - d1) (%0, %0)
}
// CHECK-NEXT: for %i0 = 0 to 7
// CHECK-NEXT: affine.for %i0 = 0 to 7
// CHECK-NEXT: {{.*}} affine.apply #[[ID1]](%i0)
// CHECK-NEXT: {{.*}} affine.apply #[[D0TIMES2]](%i0)
// CHECK-NEXT: {{.*}} affine.apply #[[ZERO]]()
for %i1 = 0 to 7 {
for %i2 = 0 to 42 {
affine.for %i1 = 0 to 7 {
affine.for %i2 = 0 to 42 {
%20 = affine.apply (d0, d1) -> (d1) (%i1, %i2)
%21 = affine.apply (d0, d1) -> (d0) (%i1, %i2)
%22 = affine.apply (d0, d1) -> (d0 + d1) (%20, %21)
@ -29,15 +29,15 @@ func @simple() {
%24 = affine.apply (d0, d1) -> (-d0 + d1) (%20, %21)
}
}
// CHECK: for %i1 = 0 to 7
// CHECK-NEXT: for %i2 = 0 to 42
// CHECK: affine.for %i1 = 0 to 7
// CHECK-NEXT: affine.for %i2 = 0 to 42
// CHECK-NEXT: {{.*}} affine.apply #[[D0PLUSD1]](%i1, %i2)
// CHECK-NEXT: {{.*}} affine.apply #[[MINSD0PLUSD1]](%i1, %i2)
// CHECK-NEXT: {{.*}} affine.apply #[[D0MINUSD1]](%i1, %i2)
for %i3 = 0 to 16 {
for %i4 = 0 to 47 step 2 {
for %i5 = 0 to 78 step 16 {
affine.for %i3 = 0 to 16 {
affine.for %i4 = 0 to 47 step 2 {
affine.for %i5 = 0 to 78 step 16 {
%50 = affine.apply (d0) -> (d0) (%i3)
%51 = affine.apply (d0) -> (d0) (%i4)
%52 = affine.apply (d0) -> (d0) (%i5)
@ -47,9 +47,9 @@ func @simple() {
}
}
}
// CHECK: for %i3 = 0 to 16
// CHECK-NEXT: for %i4 = 0 to 47 step 2
// CHECK-NEXT: for %i5 = 0 to 78 step 16
// CHECK: affine.for %i3 = 0 to 16
// CHECK-NEXT: affine.for %i4 = 0 to 47 step 2
// CHECK-NEXT: affine.for %i5 = 0 to 78 step 16
// CHECK-NEXT: {{.*}} affine.apply #[[ID1]](%i3)
// CHECK-NEXT: {{.*}} affine.apply #[[ID1]](%i4)
// CHECK-NEXT: {{.*}} affine.apply #[[ID1]](%i5)

View File

@ -23,17 +23,17 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
//
// CHECK: for {{.*}} step 128
// CHECK-NEXT: {{.*}} = vector_transfer_read %arg0, [[C0]], [[C0]] {permutation_map: #[[map_proj_d0d1_0]]} : (memref<?x?xf32>, index, index) -> vector<128xf32>
for %i0 = 0 to %M { // vectorized due to scalar -> vector
affine.for %i0 = 0 to %M { // vectorized due to scalar -> vector
%a0 = load %A[%cst0, %cst0] : memref<?x?xf32>
}
//
// CHECK:for {{.*}} [[ARG_M]] {
for %i1 = 0 to %M { // not vectorized
affine.for %i1 = 0 to %M { // not vectorized
%a1 = load %A[%i1, %i1] : memref<?x?xf32>
}
//
// CHECK: for %i{{[0-9]*}} = 0 to [[ARG_M]] {
for %i2 = 0 to %M { // not vectorized, would vectorize with --test-fastest-varying=1
// CHECK: affine.for %i{{[0-9]*}} = 0 to [[ARG_M]] {
affine.for %i2 = 0 to %M { // not vectorized, would vectorize with --test-fastest-varying=1
%r2 = affine.apply (d0) -> (d0) (%i2)
%a2 = load %A[%r2#0, %cst0] : memref<?x?xf32>
}
@ -41,7 +41,7 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
// CHECK:for [[IV3:%[a-zA-Z0-9]+]] = 0 to [[ARG_M]] step 128
// CHECK-NEXT: [[APP3:%[a-zA-Z0-9]+]] = affine.apply {{.*}}[[IV3]]
// CHECK-NEXT: {{.*}} = vector_transfer_read %arg0, [[C0]], [[APP3]] {permutation_map: #[[map_proj_d0d1_d1]]} : {{.*}} -> vector<128xf32>
for %i3 = 0 to %M { // vectorized
affine.for %i3 = 0 to %M { // vectorized
%r3 = affine.apply (d0) -> (d0) (%i3)
%a3 = load %A[%cst0, %r3#0] : memref<?x?xf32>
}
@ -51,8 +51,8 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
// CHECK-NEXT: [[APP50:%[0-9]+]] = affine.apply {{.*}}([[IV4]], [[IV5]])
// CHECK-NEXT: [[APP51:%[0-9]+]] = affine.apply {{.*}}([[IV4]], [[IV5]])
// CHECK-NEXT: {{.*}} = vector_transfer_read %arg0, [[APP50]], [[APP51]] {permutation_map: #[[map_proj_d0d1_d1]]} : {{.*}} -> vector<128xf32>
for %i4 = 0 to %M { // vectorized
for %i5 = 0 to %N { // not vectorized, would vectorize with --test-fastest-varying=1
affine.for %i4 = 0 to %M { // vectorized
affine.for %i5 = 0 to %N { // not vectorized, would vectorize with --test-fastest-varying=1
%r50 = affine.apply (d0, d1) -> (d1) (%i4, %i5)
%r51 = affine.apply (d0, d1) -> (d0) (%i4, %i5)
%a5 = load %A[%r50, %r51] : memref<?x?xf32>
@ -61,8 +61,8 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
//
// CHECK: for [[IV6:%[i0-9]*]] = 0 to [[ARG_M]] {
// CHECK-NEXT: for [[IV7:%[i0-9]*]] = 0 to [[ARG_N]] {
for %i6 = 0 to %M { // not vectorized, would vectorize with --test-fastest-varying=1
for %i7 = 0 to %N { // not vectorized, can never vectorize
affine.for %i6 = 0 to %M { // not vectorized, would vectorize with --test-fastest-varying=1
affine.for %i7 = 0 to %N { // not vectorized, can never vectorize
%r70 = affine.apply (d0, d1) -> (d1 + d0) (%i6, %i7)
%r71 = affine.apply (d0, d1) -> (d0) (%i6, %i7)
%a7 = load %A[%r70, %r71] : memref<?x?xf32>
@ -74,8 +74,8 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
// CHECK-NEXT: [[APP9_0:%[0-9]+]] = affine.apply {{.*}}([[IV8]], [[IV9]])
// CHECK-NEXT: [[APP9_1:%[0-9]+]] = affine.apply {{.*}}([[IV8]], [[IV9]])
// CHECK-NEXT: {{.*}} = vector_transfer_read %arg0, [[APP9_0]], [[APP9_1]] {permutation_map: #[[map_proj_d0d1_d1]]} : {{.*}} -> vector<128xf32>
for %i8 = 0 to %M { // vectorized
for %i9 = 0 to %N {
affine.for %i8 = 0 to %M { // vectorized
affine.for %i9 = 0 to %N {
%r90 = affine.apply (d0, d1) -> (d1) (%i8, %i9)
%r91 = affine.apply (d0, d1) -> (d0 + d1) (%i8, %i9)
%a9 = load %A[%r90, %r91] : memref<?x?xf32>
@ -84,8 +84,8 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
//
// CHECK: for [[IV10:%[i0-9]*]] = 0 to %{{[0-9]*}} {
// CHECK: for [[IV11:%[i0-9]*]] = 0 to %{{[0-9]*}} {
for %i10 = 0 to %M { // not vectorized, need per load transposes
for %i11 = 0 to %N { // not vectorized, need per load transposes
affine.for %i10 = 0 to %M { // not vectorized, need per load transposes
affine.for %i11 = 0 to %N { // not vectorized, need per load transposes
%r11_0 = affine.apply (d0, d1) -> (d0) (%i10, %i11)
%r11_1 = affine.apply (d0, d1) -> (d1) (%i10, %i11)
%a11 = load %A[%r11_0, %r11_1] : memref<?x?xf32>
@ -98,9 +98,9 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
// CHECK: for [[IV12:%[i0-9]*]] = 0 to %{{[0-9]*}} {
// CHECK: for [[IV13:%[i0-9]*]] = 0 to %{{[0-9]*}} {
// CHECK: for [[IV14:%[i0-9]+]] = 0 to [[ARG_P]] step 128
for %i12 = 0 to %M { // not vectorized, can never vectorize
for %i13 = 0 to %N { // not vectorized, can never vectorize
for %i14 = 0 to %P { // vectorized
affine.for %i12 = 0 to %M { // not vectorized, can never vectorize
affine.for %i13 = 0 to %N { // not vectorized, can never vectorize
affine.for %i14 = 0 to %P { // vectorized
%r14_0 = affine.apply (d0, d1, d2) -> (d1) (%i12, %i13, %i14)
%r14_1 = affine.apply (d0, d1, d2) -> (d0 + d1) (%i12, %i13, %i14)
%r14_2 = affine.apply (d0, d1, d2) -> (d0 + d2) (%i12, %i13, %i14)
@ -109,24 +109,24 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
}
}
//
// CHECK: for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
for %i15 = 0 to %M { // not vectorized due to condition below
// CHECK: affine.for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
affine.for %i15 = 0 to %M { // not vectorized due to condition below
affine.if #set0(%i15) {
%a15 = load %A[%cst0, %cst0] : memref<?x?xf32>
}
}
//
// CHECK: for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
for %i16 = 0 to %M { // not vectorized, can't vectorize a vector load
// CHECK: affine.for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
affine.for %i16 = 0 to %M { // not vectorized, can't vectorize a vector load
%a16 = alloc(%M) : memref<?xvector<2xf32>>
%l16 = load %a16[%i16] : memref<?xvector<2xf32>>
}
//
// CHECK: for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
// CHECK: affine.for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
// CHECK: for [[IV18:%[a-zA-Z0-9]+]] = 0 to [[ARG_M]] step 128
// CHECK: {{.*}} = vector_transfer_read %arg0, [[C0]], [[C0]] {permutation_map: #[[map_proj_d0d1_0]]} : {{.*}} -> vector<128xf32>
for %i17 = 0 to %M { // not vectorized, the 1-D pattern that matched %i18 in DFS post-order prevents vectorizing %i17
for %i18 = 0 to %M { // vectorized due to scalar -> vector
affine.for %i17 = 0 to %M { // not vectorized, the 1-D pattern that matched %i18 in DFS post-order prevents vectorizing %i17
affine.for %i18 = 0 to %M { // vectorized due to scalar -> vector
%a18 = load %A[%cst0, %cst0] : memref<?x?xf32>
}
}
@ -139,24 +139,24 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
%C = alloc (%M, %N) : memref<?x?xf32, 0>
%f1 = constant 1.0 : f32
%f2 = constant 2.0 : f32
for %i0 = 0 to %M {
for %i1 = 0 to %N {
affine.for %i0 = 0 to %M {
affine.for %i1 = 0 to %N {
// CHECK: [[C1:%.*]] = constant splat<vector<128xf32>, 1.000000e+00> : vector<128xf32>
// CHECK: vector_transfer_write [[C1]], {{.*}} {permutation_map: #[[map_proj_d0d1_d1]]} : vector<128xf32>, memref<?x?xf32>, index, index
// non-scoped %f1
store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
}
}
for %i2 = 0 to %M {
for %i3 = 0 to %N {
affine.for %i2 = 0 to %M {
affine.for %i3 = 0 to %N {
// CHECK: [[C3:%.*]] = constant splat<vector<128xf32>, 2.000000e+00> : vector<128xf32>
// CHECK: vector_transfer_write [[C3]], {{.*}} {permutation_map: #[[map_proj_d0d1_d1]]} : vector<128xf32>, memref<?x?xf32>, index, index
// non-scoped %f2
store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
}
}
for %i4 = 0 to %M {
for %i5 = 0 to %N {
affine.for %i4 = 0 to %M {
affine.for %i5 = 0 to %N {
// CHECK: [[A5:%.*]] = vector_transfer_read %0, {{.*}} {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<128xf32>
// CHECK: [[B5:%.*]] = vector_transfer_read %1, {{.*}} {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<128xf32>
// CHECK: [[S5:%.*]] = addf [[A5]], [[B5]] : vector<128xf32>
@ -188,10 +188,10 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
// CHECK-LABEL: @vec_rejected
func @vec_rejected(%A : memref<?x?xf32>, %C : memref<?x?xf32>) {
%N = dim %A, 0 : memref<?x?xf32>
for %i = 0 to %N {
affine.for %i = 0 to %N {
// CHECK-NOT: vector
%a = load %A[%i, %i] : memref<?x?xf32> // not vectorized
for %j = 0 to %N {
affine.for %j = 0 to %N {
%b = load %A[%i, %j] : memref<?x?xf32> // may be vectorized
// CHECK-NOT: vector
%c = addf %a, %b : f32 // not vectorized because %a wasn't

View File

@ -11,13 +11,13 @@ func @vec2d(%A : memref<?x?x?xf32>) {
// CHECK: for {{.*}} = 0 to %1 step 32
// CHECK: for {{.*}} = 0 to %2 step 256
// Example:
// for %i0 = 0 to %0 {
// for %i1 = 0 to %1 step 32 {
// for %i2 = 0 to %2 step 256 {
// affine.for %i0 = 0 to %0 {
// affine.for %i1 = 0 to %1 step 32 {
// affine.for %i2 = 0 to %2 step 256 {
// %3 = "vector_transfer_read"(%arg0, %i0, %i1, %i2) : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
for %i0 = 0 to %M {
for %i1 = 0 to %N {
for %i2 = 0 to %P {
affine.for %i0 = 0 to %M {
affine.for %i1 = 0 to %N {
affine.for %i2 = 0 to %P {
%a2 = load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
}
}
@ -27,9 +27,9 @@ func @vec2d(%A : memref<?x?x?xf32>) {
// CHECK: for {{.*}} = 0 to %2 {
// For the case: --test-fastest-varying=1 --test-fastest-varying=0 no
// vectorization happens because of loop nesting order .
for %i3 = 0 to %M {
for %i4 = 0 to %N {
for %i5 = 0 to %P {
affine.for %i3 = 0 to %M {
affine.for %i4 = 0 to %N {
affine.for %i5 = 0 to %P {
%a5 = load %A[%i4, %i5, %i3] : memref<?x?x?xf32>
}
}
@ -43,24 +43,24 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
%C = alloc (%M, %N) : memref<?x?xf32, 0>
%f1 = constant 1.0 : f32
%f2 = constant 2.0 : f32
for %i0 = 0 to %M {
for %i1 = 0 to %N {
affine.for %i0 = 0 to %M {
affine.for %i1 = 0 to %N {
// CHECK: [[C1:%.*]] = constant splat<vector<32x256xf32>, 1.000000e+00> : vector<32x256xf32>
// CHECK: vector_transfer_write [[C1]], {{.*}} {permutation_map: #[[map_proj_d0d1_d0d1]]} : vector<32x256xf32>, memref<?x?xf32>, index, index
// non-scoped %f1
store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
}
}
for %i2 = 0 to %M {
for %i3 = 0 to %N {
affine.for %i2 = 0 to %M {
affine.for %i3 = 0 to %N {
// CHECK: [[C3:%.*]] = constant splat<vector<32x256xf32>, 2.000000e+00> : vector<32x256xf32>
// CHECK: vector_transfer_write [[C3]], {{.*}} {permutation_map: #[[map_proj_d0d1_d0d1]]} : vector<32x256xf32>, memref<?x?xf32>, index, index
// non-scoped %f2
store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
}
}
for %i4 = 0 to %M {
for %i5 = 0 to %N {
affine.for %i4 = 0 to %M {
affine.for %i5 = 0 to %N {
// CHECK: [[A5:%.*]] = vector_transfer_read %0, {{.*}} {permutation_map: #[[map_proj_d0d1_d0d1]]} : (memref<?x?xf32>, index, index) -> vector<32x256xf32>
// CHECK: [[B5:%.*]] = vector_transfer_read %1, {{.*}} {permutation_map: #[[map_proj_d0d1_d0d1]]} : (memref<?x?xf32>, index, index) -> vector<32x256xf32>
// CHECK: [[S5:%.*]] = addf [[A5]], [[B5]] : vector<32x256xf32>

View File

@ -7,17 +7,17 @@ func @vec3d(%A : memref<?x?x?xf32>) {
%0 = dim %A, 0 : memref<?x?x?xf32>
%1 = dim %A, 1 : memref<?x?x?xf32>
%2 = dim %A, 2 : memref<?x?x?xf32>
// CHECK: for %i0 = 0 to %0 {
// CHECK: for %i1 = 0 to %0 {
// CHECK: for %i2 = 0 to %0 step 32 {
// CHECK: for %i3 = 0 to %1 step 64 {
// CHECK: for %i4 = 0 to %2 step 256 {
// CHECK: affine.for %i0 = 0 to %0 {
// CHECK: affine.for %i1 = 0 to %0 {
// CHECK: affine.for %i2 = 0 to %0 step 32 {
// CHECK: affine.for %i3 = 0 to %1 step 64 {
// CHECK: affine.for %i4 = 0 to %2 step 256 {
// CHECK: %3 = vector_transfer_read %arg0, %i2, %i3, %i4 {permutation_map: #[[map_proj_d0d1d2_d0d1d2]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x64x256xf32>
for %t0 = 0 to %0 {
for %t1 = 0 to %0 {
for %i0 = 0 to %0 {
for %i1 = 0 to %1 {
for %i2 = 0 to %2 {
affine.for %t0 = 0 to %0 {
affine.for %t1 = 0 to %0 {
affine.for %i0 = 0 to %0 {
affine.for %i1 = 0 to %1 {
affine.for %i2 = 0 to %2 {
%a2 = load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
}
}

View File

@ -7,13 +7,13 @@ func @vec2d(%A : memref<?x?x?xf32>) {
%M = dim %A, 0 : memref<?x?x?xf32>
%N = dim %A, 1 : memref<?x?x?xf32>
%P = dim %A, 2 : memref<?x?x?xf32>
// CHECK: for %i0 = 0 to %0 step 32
// CHECK: for %i1 = 0 to %1 {
// CHECK: for %i2 = 0 to %2 step 256
// CHECK: affine.for %i0 = 0 to %0 step 32
// CHECK: affine.for %i1 = 0 to %1 {
// CHECK: affine.for %i2 = 0 to %2 step 256
// CHECK: {{.*}} = vector_transfer_read %arg0, %i0, %i1, %i2 {permutation_map: #[[map_proj_d0d1d2_d0d2]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
for %i0 = 0 to %M {
for %i1 = 0 to %N {
for %i2 = 0 to %P {
affine.for %i0 = 0 to %M {
affine.for %i1 = 0 to %N {
affine.for %i2 = 0 to %P {
%a2 = load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
}
}
@ -23,9 +23,9 @@ func @vec2d(%A : memref<?x?x?xf32>) {
// CHECK: for {{.*}} = 0 to %2 {
// For the case: --test-fastest-varying=2 --test-fastest-varying=0 no
// vectorization happens because of loop nesting order
for %i3 = 0 to %M {
for %i4 = 0 to %N {
for %i5 = 0 to %P {
affine.for %i3 = 0 to %M {
affine.for %i4 = 0 to %N {
affine.for %i5 = 0 to %P {
%a5 = load %A[%i4, %i5, %i3] : memref<?x?x?xf32>
}
}

View File

@ -12,20 +12,20 @@ func @vec2d(%A : memref<?x?x?xf32>) {
// CHECK: for {{.*}} = 0 to %2 {
// For the case: --test-fastest-varying=0 --test-fastest-varying=2 no
// vectorization happens because of loop nesting order.
for %i0 = 0 to %M {
for %i1 = 0 to %N {
for %i2 = 0 to %P {
affine.for %i0 = 0 to %M {
affine.for %i1 = 0 to %N {
affine.for %i2 = 0 to %P {
%a2 = load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
}
}
}
// CHECK: for %i3 = 0 to %0 step 32
// CHECK: for %i4 = 0 to %1 step 256
// CHECK: for %i5 = 0 to %2 {
// CHECK: affine.for %i3 = 0 to %0 step 32
// CHECK: affine.for %i4 = 0 to %1 step 256
// CHECK: affine.for %i5 = 0 to %2 {
// CHECK: {{.*}} = vector_transfer_read %arg0, %i4, %i5, %i3 {permutation_map: #[[map_proj_d0d1d2_d2d0]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
for %i3 = 0 to %M {
for %i4 = 0 to %N {
for %i5 = 0 to %P {
affine.for %i3 = 0 to %M {
affine.for %i4 = 0 to %N {
affine.for %i5 = 0 to %P {
%a5 = load %A[%i4, %i5, %i3] : memref<?x?x?xf32>
}
}
@ -37,26 +37,26 @@ func @vec2d_imperfectly_nested(%A : memref<?x?x?xf32>) {
%0 = dim %A, 0 : memref<?x?x?xf32>
%1 = dim %A, 1 : memref<?x?x?xf32>
%2 = dim %A, 2 : memref<?x?x?xf32>
// CHECK: for %i0 = 0 to %0 step 32 {
// CHECK: for %i1 = 0 to %1 {
// CHECK: for %i2 = 0 to %2 step 256 {
// CHECK: affine.for %i0 = 0 to %0 step 32 {
// CHECK: affine.for %i1 = 0 to %1 {
// CHECK: affine.for %i2 = 0 to %2 step 256 {
// CHECK: %3 = vector_transfer_read %arg0, %i2, %i1, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d0]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
// CHECK: for %i3 = 0 to %1 step 256 {
// CHECK: for %i4 = 0 to %2 {
// CHECK: affine.for %i3 = 0 to %1 step 256 {
// CHECK: affine.for %i4 = 0 to %2 {
// CHECK: %4 = vector_transfer_read %arg0, %i3, %i4, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d0]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
// CHECK: for %i5 = 0 to %2 {
// CHECK: affine.for %i5 = 0 to %2 {
// CHECK: %5 = vector_transfer_read %arg0, %i3, %i5, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d0]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
for %i0 = 0 to %0 {
for %i1 = 0 to %1 {
for %i2 = 0 to %2 {
affine.for %i0 = 0 to %0 {
affine.for %i1 = 0 to %1 {
affine.for %i2 = 0 to %2 {
%a2 = load %A[%i2, %i1, %i0] : memref<?x?x?xf32>
}
}
for %i3 = 0 to %1 {
for %i4 = 0 to %2 {
affine.for %i3 = 0 to %1 {
affine.for %i4 = 0 to %2 {
%a4 = load %A[%i3, %i4, %i0] : memref<?x?x?xf32>
}
for %i5 = 0 to %2 {
affine.for %i5 = 0 to %2 {
%a5 = load %A[%i3, %i5, %i0] : memref<?x?x?xf32>
}
}

View File

@ -12,20 +12,20 @@ func @vec2d(%A : memref<?x?x?xf32>) {
// CHECK: for {{.*}} = 0 to %2 {
// For the case: --test-fastest-varying=0 --test-fastest-varying=1 no
// vectorization happens because of loop nesting order.
for %i0 = 0 to %M {
for %i1 = 0 to %N {
for %i2 = 0 to %P {
affine.for %i0 = 0 to %M {
affine.for %i1 = 0 to %N {
affine.for %i2 = 0 to %P {
%a2 = load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
}
}
}
// CHECK: for %i3 = 0 to %0 step 32
// CHECK: for %i4 = 0 to %1 {
// CHECK: for %i5 = 0 to %2 step 256
// CHECK: affine.for %i3 = 0 to %0 step 32
// CHECK: affine.for %i4 = 0 to %1 {
// CHECK: affine.for %i5 = 0 to %2 step 256
// CHECK: {{.*}} = vector_transfer_read %arg0, %i4, %i5, %i3 {permutation_map: #[[map_proj_d0d1d2_d2d1]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
for %i3 = 0 to %M {
for %i4 = 0 to %N {
for %i5 = 0 to %P {
affine.for %i3 = 0 to %M {
affine.for %i4 = 0 to %N {
affine.for %i5 = 0 to %P {
%a5 = load %A[%i4, %i5, %i3] : memref<?x?x?xf32>
}
}
@ -37,26 +37,26 @@ func @vec2d_imperfectly_nested(%A : memref<?x?x?xf32>) {
%0 = dim %A, 0 : memref<?x?x?xf32>
%1 = dim %A, 1 : memref<?x?x?xf32>
%2 = dim %A, 2 : memref<?x?x?xf32>
// CHECK: for %i0 = 0 to %0 step 32 {
// CHECK: for %i1 = 0 to %1 step 256 {
// CHECK: for %i2 = 0 to %2 {
// CHECK: affine.for %i0 = 0 to %0 step 32 {
// CHECK: affine.for %i1 = 0 to %1 step 256 {
// CHECK: affine.for %i2 = 0 to %2 {
// CHECK: %3 = vector_transfer_read %arg0, %i2, %i1, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d1]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
// CHECK: for %i3 = 0 to %1 {
// CHECK: for %i4 = 0 to %2 step 256 {
// CHECK: affine.for %i3 = 0 to %1 {
// CHECK: affine.for %i4 = 0 to %2 step 256 {
// CHECK: %4 = vector_transfer_read %arg0, %i3, %i4, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d1]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
// CHECK: for %i5 = 0 to %2 step 256 {
// CHECK: affine.for %i5 = 0 to %2 step 256 {
// CHECK: %5 = vector_transfer_read %arg0, %i3, %i5, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d1]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
for %i0 = 0 to %0 {
for %i1 = 0 to %1 {
for %i2 = 0 to %2 {
affine.for %i0 = 0 to %0 {
affine.for %i1 = 0 to %1 {
affine.for %i2 = 0 to %2 {
%a2 = load %A[%i2, %i1, %i0] : memref<?x?x?xf32>
}
}
for %i3 = 0 to %1 {
for %i4 = 0 to %2 {
affine.for %i3 = 0 to %1 {
affine.for %i4 = 0 to %2 {
%a4 = load %A[%i3, %i4, %i0] : memref<?x?x?xf32>
}
for %i5 = 0 to %2 {
affine.for %i5 = 0 to %2 {
%a5 = load %A[%i3, %i5, %i0] : memref<?x?x?xf32>
}
}

View File

@ -205,10 +205,10 @@ func @dyn_shape_fold(%L : index, %M : index) -> (memref<? x ? x i32>, memref<? x
// CHECK-NEXT: %2 = alloc() : memref<512x1024xi32>
%c = alloc(%K, %N) : memref<? x ? x i32>
// CHECK: for %i0 =
for %i = 0 to %L {
// CHECK-NEXT: for %i1 =
for %j = 0 to 10 {
// CHECK: affine.for %i0 =
affine.for %i = 0 to %L {
// CHECK-NEXT: affine.for %i1 =
affine.for %j = 0 to 10 {
// CHECK-NEXT: %4 = load %0[%i0, %i1] : memref<?x1024xf32>
// CHECK-NEXT: store %4, %1[%c0, %c0, %i0, %i1, %c0] : memref<4x1024x8x512x?xf32>
%v = load %a[%i, %j] : memref<?x?xf32>
@ -234,8 +234,8 @@ func @merge_constants() -> (index, index) {
// CHECK-LABEL: func @hoist_constant
func @hoist_constant(%arg0: memref<8xi32>) {
// CHECK-NEXT: %c42_i32 = constant 42 : i32
// CHECK-NEXT: for %i0 = 0 to 8 {
for %i0 = 0 to 8 {
// CHECK-NEXT: affine.for %i0 = 0 to 8 {
affine.for %i0 = 0 to 8 {
// CHECK-NEXT: store %c42_i32, %arg0[%i0]
%c42_i32 = constant 42 : i32
store %c42_i32, %arg0[%i0] : memref<8xi32>

View File

@ -2,8 +2,8 @@
// CHECK-LABEL: @test(%arg0: memref<f32>) {
func @test(%p : memref<f32>) {
for %i0 = 0 to 128 {
for %i1 = 0 to 8 { // CHECK: for %i1 = 0 to 8 {
affine.for %i0 = 0 to 128 {
affine.for %i1 = 0 to 8 { // CHECK: affine.for %i1 = 0 to 8 {
%0 = constant 4.5 : f32
%1 = constant 1.5 : f32

View File

@ -113,8 +113,8 @@ func @down_propagate_for() {
// CHECK: %c1_i32 = constant 1 : i32
%0 = constant 1 : i32
// CHECK-NEXT: for %i0 = 0 to 4 {
for %i = 0 to 4 {
// CHECK-NEXT: affine.for %i0 = 0 to 4 {
affine.for %i = 0 to 4 {
// CHECK-NEXT: "foo"(%c1_i32, %c1_i32) : (i32, i32) -> ()
%1 = constant 1 : i32
"foo"(%0, %1) : (i32, i32) -> ()
@ -145,8 +145,8 @@ func @down_propagate() -> i32 {
/// Check that operation definitions are NOT propagated up the dominance tree.
// CHECK-LABEL: @up_propagate_for
func @up_propagate_for() -> i32 {
// CHECK: for %i0 = 0 to 4 {
for %i = 0 to 4 {
// CHECK: affine.for %i0 = 0 to 4 {
affine.for %i = 0 to 4 {
// CHECK-NEXT: %c1_i32 = constant 1 : i32
// CHECK-NEXT: "foo"(%c1_i32) : (i32) -> ()
%0 = constant 1 : i32

View File

@ -42,7 +42,7 @@ func @loop_nest_1d() {
// Second DMA transfer.
// CHECK: dma_start %1[%c256], %5[%c0], %c256_0, %6[%c0] : memref<512xf32>, memref<256xf32, 2>, memref<1xi32>
// CHECK-NEXT: dma_wait %6[%c0], %c256_0 : memref<1xi32>
// CHECK: for %i0 = 0 to 256 {
// CHECK: affine.for %i0 = 0 to 256 {
// CHECK-NEXT: %7 = load %3[%i0] : memref<256xf32, 2>
// CHECK: %8 = affine.apply [[MAP_PLUS_256]](%i0)
// CHECK: %9 = affine.apply [[MAP_MINUS_256]](%8)
@ -55,7 +55,7 @@ func @loop_nest_1d() {
// CHECK-NEXT: dealloc %4 : memref<1xi32>
// CHECK-NEXT: dealloc %3 : memref<256xf32, 2>
// CHECK-NEXT: return
for %i = 0 to 256 {
affine.for %i = 0 to 256 {
load %A[%i] : memref<256 x f32>
%idx = affine.apply (d0) -> (d0 + 256)(%i)
load %B[%idx] : memref<512 x f32>
@ -82,20 +82,20 @@ func @loop_nest_1d() {
// INCOMING DMA for C.
// CHECK-DAG: dma_start %arg2[%c0, %c0], [[BUFC]][%c0, %c0], %c16384_0, [[TAGC]][%c0] : memref<512x32xf32>, memref<512x32xf32, 2>, memref<1xi32>
// CHECK-DAG: dma_wait [[TAGC]][%c0], %c16384_0 : memref<1xi32>
// CHECK-NEXT: for %i0 = 0 to 32 {
// CHECK-NEXT: for %i1 = 0 to 32 {
// CHECK-NEXT: for %i2 = 0 to 32 {
// CHECK-NEXT: for %i3 = 0 to 16 {
// CHECK-NEXT: affine.for %i0 = 0 to 32 {
// CHECK-NEXT: affine.for %i1 = 0 to 32 {
// CHECK-NEXT: affine.for %i2 = 0 to 32 {
// CHECK-NEXT: affine.for %i3 = 0 to 16 {
// CHECK-NEXT: %7 = affine.apply #map{{[0-9]+}}(%i1, %i3)
// CHECK-NEXT: %8 = load [[BUFB]][%7, %i0] : memref<512x32xf32, 2>
// CHECK-NEXT: "foo"(%8) : (f32) -> ()
// CHECK-NEXT: }
// CHECK-NEXT: for %i4 = 0 to 16 {
// CHECK-NEXT: affine.for %i4 = 0 to 16 {
// CHECK-NEXT: %9 = affine.apply #map{{[0-9]+}}(%i2, %i4)
// CHECK-NEXT: %10 = load [[BUFA]][%9, %i1] : memref<512x32xf32, 2>
// CHECK-NEXT: "bar"(%10) : (f32) -> ()
// CHECK-NEXT: }
// CHECK-NEXT: for %i5 = 0 to 16 {
// CHECK-NEXT: affine.for %i5 = 0 to 16 {
// CHECK-NEXT: %11 = "abc_compute"() : () -> f32
// CHECK-NEXT: %12 = affine.apply #map{{[0-9]+}}(%i2, %i5)
// CHECK-NEXT: %13 = load [[BUFC]][%12, %i0] : memref<512x32xf32, 2>
@ -123,20 +123,20 @@ func @loop_nest_high_d(%A: memref<512 x 32 x f32>,
// DMAs will be performed at this level (jT is the first loop without a stride).
// A and B are read, while C is both read and written. A total of three new buffers
// are allocated and existing load's/store's are replaced by accesses to those buffers.
for %jT = 0 to 32 {
for %kT = 0 to 32 {
for %iT = 0 to 32 {
for %kk = 0 to 16 { // k intratile
affine.for %jT = 0 to 32 {
affine.for %kT = 0 to 32 {
affine.for %iT = 0 to 32 {
affine.for %kk = 0 to 16 { // k intratile
%k = affine.apply (d0, d1) -> (16*d0 + d1) (%kT, %kk)
%v0 = load %B[%k, %jT] : memref<512 x 32 x f32>
"foo"(%v0) : (f32) -> ()
}
for %ii = 0 to 16 { // i intratile.
affine.for %ii = 0 to 16 { // i intratile.
%i = affine.apply (d0, d1) -> (16*d0 + d1)(%iT, %ii)
%v1 = load %A[%i, %kT] : memref<512 x 32 x f32>
"bar"(%v1) : (f32) -> ()
}
for %ii_ = 0 to 16 { // i intratile.
affine.for %ii_ = 0 to 16 { // i intratile.
%v2 = "abc_compute"() : () -> f32
%i_ = affine.apply (d0, d1) -> (16*d0 + d1)(%iT, %ii_)
%v3 = load %C[%i_, %jT] : memref<512 x 32 x f32>
@ -155,13 +155,13 @@ func @loop_nest_high_d(%A: memref<512 x 32 x f32>,
//
// CHECK-LABEL: func @loop_nest_modulo() {
// CHECK: %0 = alloc() : memref<256x8xf32>
// CHECK-NEXT: for %i0 = 0 to 32 step 4 {
// CHECK-NEXT: affine.for %i0 = 0 to 32 step 4 {
// CHECK-NEXT: %1 = affine.apply #map{{[0-9]+}}(%i0)
// CHECK-NEXT: %2 = alloc() : memref<1x2xf32, 2>
// CHECK-NEXT: %3 = alloc() : memref<1xi32>
// CHECK-NEXT: dma_start %0[%1, %c0], %2[%c0, %c0], %c2, %3[%c0] : memref<256x8xf32>, memref<1x2xf32, 2>, memref<1xi32>
// CHECK-NEXT: dma_wait %3[%c0], %c2 : memref<1xi32>
// CHECK-NEXT: for %i1 = 0 to 8 {
// CHECK-NEXT: affine.for %i1 = 0 to 8 {
// ...
// ...
// CHECK: }
@ -171,9 +171,9 @@ func @loop_nest_high_d(%A: memref<512 x 32 x f32>,
// CHECK-NEXT: return
func @loop_nest_modulo() {
%A = alloc() : memref<256 x 8 x f32>
for %i = 0 to 32 step 4 {
affine.for %i = 0 to 32 step 4 {
// DMAs will be performed at this level (%j is the first unit stride loop)
for %j = 0 to 8 {
affine.for %j = 0 to 8 {
%idx = affine.apply (d0) -> (d0 mod 2) (%j)
// A buffer of size 32 x 2 will be allocated (original buffer was 256 x 8).
%v = load %A[%i, %idx] : memref<256 x 8 x f32>
@ -187,17 +187,17 @@ func @loop_nest_modulo() {
// CHECK-LABEL: func @loop_nest_tiled() -> memref<256x1024xf32> {
func @loop_nest_tiled() -> memref<256x1024xf32> {
%0 = alloc() : memref<256x1024xf32>
for %i0 = 0 to 256 step 32 {
for %i1 = 0 to 1024 step 32 {
affine.for %i0 = 0 to 256 step 32 {
affine.for %i1 = 0 to 1024 step 32 {
// CHECK: %3 = alloc() : memref<32x32xf32, 2>
// CHECK-NEXT: %4 = alloc() : memref<1xi32>
// Strided DMA here: 32 x 32 tile in a 256 x 1024 memref.
// CHECK-NEXT: dma_start %0[%1, %2], %3[%c0, %c0], %c1024, %4[%c0], %c1024_0, %c32 : memref<256x1024xf32>, memref<32x32xf32, 2>, memref<1xi32>
// CHECK-NEXT: dma_wait
// CHECK-NEXT: for %i2 = #map
// CHECK-NEXT: for %i3 = #map
for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 32)(%i0) {
for %i3 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 32)(%i1) {
// CHECK-NEXT: affine.for %i2 = #map
// CHECK-NEXT: affine.for %i3 = #map
affine.for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 32)(%i0) {
affine.for %i3 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 32)(%i1) {
// CHECK-NEXT: %5 = affine.apply [[MAP_INDEX_DIFF_EVEN]](%i0, %i1, %i2, %i3)
// CHECK-NEXT: %6 = affine.apply [[MAP_INDEX_DIFF_ODD]](%i0, %i1, %i2, %i3)
// CHECK-NEXT: %7 = load %3[%5, %6] : memref<32x32xf32, 2>
@ -218,8 +218,8 @@ func @dma_constant_dim_access(%A : memref<100x100xf32>) {
// No strided DMA needed here.
// CHECK: dma_start %arg0[%c1, %c0], %0[%c0, %c0], %c100, %1[%c0] : memref<100x100xf32>, memref<1x100xf32, 2>,
// CHECK-NEXT: dma_wait %1[%c0], %c100 : memref<1xi32>
for %i = 0 to 100 {
for %j = 0 to ()[s0] -> (s0) ()[%N] {
affine.for %i = 0 to 100 {
affine.for %j = 0 to ()[s0] -> (s0) ()[%N] {
// CHECK: %2 = affine.apply [[MAP_D0_MINUS_ONE]](%c1_0, %i1)
// CHECK: %3 = affine.apply [[MAP_D1]](%c1_0, %i1)
// CHECK-NEXT: %4 = load %0[%2, %3] : memref<1x100xf32, 2>
@ -232,8 +232,8 @@ func @dma_constant_dim_access(%A : memref<100x100xf32>) {
// CHECK-LABEL: func @dma_with_symbolic_accesses
func @dma_with_symbolic_accesses(%A : memref<100x100xf32>, %M : index) {
%N = constant 9 : index
for %i = 0 to 100 {
for %j = 0 to 100 {
affine.for %i = 0 to 100 {
affine.for %j = 0 to 100 {
%idy = affine.apply (d0, d1) [s0, s1] -> (d1 + s0 + s1)(%i, %j)[%M, %N]
load %A[%i, %idy] : memref<100 x 100 x f32>
}
@ -243,8 +243,8 @@ func @dma_with_symbolic_accesses(%A : memref<100x100xf32>, %M : index) {
// CHECK-NEXT: %2 = alloc() : memref<1xi32>
// CHECK-NEXT: dma_start %arg0[%c0, %0], %1[%c0, %c0], %c10000, %2[%c0]
// CHECK-NEXT: dma_wait %2[%c0], %c10000
// CHECK-NEXT: for %i0 = 0 to 100 {
// CHECK-NEXT: for %i1 = 0 to 100 {
// CHECK-NEXT: affine.for %i0 = 0 to 100 {
// CHECK-NEXT: affine.for %i1 = 0 to 100 {
// CHECK-NEXT: %3 = affine.apply [[MAP_SYM_SHIFT]](%i0, %i1)[%arg1, %c9]
// CHECK-NEXT: %4 = affine.apply [[MAP_3D_D1]](%arg1, %i0, %3)
// CHECK-NEXT: %5 = affine.apply [[MAP_SUB_OFFSET]](%arg1, %i0, %3)
@ -263,8 +263,8 @@ func @dma_with_symbolic_loop_bounds(%A : memref<100x100xf32>, %M : index, %N: in
// CHECK-NEXT: %1 = alloc() : memref<1xi32>
// CHECK-NEXT: dma_start %arg0[%c0, %c0], %0[%c0, %c0], %c10000, %1[%c0] : memref<100x100xf32>, memref<100x100xf32, 2>, memref<1xi32>
// CHECK-NEXT: dma_wait %1[%c0], %c10000 : memref<1xi32>
for %i = 0 to 100 {
for %j = %M to %N {
affine.for %i = 0 to 100 {
affine.for %j = %M to %N {
%idy = affine.apply (d1) [s0] -> (d1 + s0)(%j)[%K]
load %A[%i, %idy] : memref<100 x 100 x f32>
}
@ -278,8 +278,8 @@ func @dma_with_symbolic_loop_bounds(%A : memref<100x100xf32>, %M : index, %N: in
func @dma_unknown_size(%arg0: memref<?x?xf32>) {
%M = dim %arg0, 0 : memref<? x ? x f32>
%N = dim %arg0, 0 : memref<? x ? x f32>
for %i = 0 to %M {
for %j = 0 to %N {
affine.for %i = 0 to %M {
affine.for %j = 0 to %N {
// If this loop nest isn't tiled, the access requires a non-constant DMA
// size -- not yet implemented.
// CHECK: %2 = load %arg0[%i0, %i1] : memref<?x?xf32>
@ -294,9 +294,9 @@ func @dma_unknown_size(%arg0: memref<?x?xf32>) {
// CHECK-LABEL: func @dma_memref_3d
func @dma_memref_3d(%arg0: memref<1024x1024x1024xf32>) {
for %i = 0 to 1024 {
for %j = 0 to 1024 {
for %k = 0 to 1024 {
affine.for %i = 0 to 1024 {
affine.for %j = 0 to 1024 {
affine.for %k = 0 to 1024 {
%idx = affine.apply (d0) -> (d0 mod 128)(%i)
%idy = affine.apply (d0) -> (d0 mod 128)(%j)
%idz = affine.apply (d0) -> (d0 mod 128)(%k)
@ -330,8 +330,8 @@ func @dma_memref_3d(%arg0: memref<1024x1024x1024xf32>) {
// CHECK-LABEL: func @multi_load_store_union() {
func @multi_load_store_union() {
%A = alloc() : memref<512 x 512 x f32>
for %i = 0 to 256 {
for %j = 0 to 256 {
affine.for %i = 0 to 256 {
affine.for %j = 0 to 256 {
%idx = affine.apply (d0) -> (d0 + 64)(%i)
%idy = affine.apply (d0) -> (d0 + 128)(%j)
%ishift = affine.apply (d0) -> (d0 + 2)(%i)
@ -355,8 +355,8 @@ func @multi_load_store_union() {
// CHECK-NEXT: dma_start %0[%c2_1, %c2_2], %1[%c0, %c0], %c170372_3, %2[%c0], %c512_4, %c446_5 : memref<512x512xf32>, memref<382x446xf32, 2>, memref<1xi32>
// CHECK-NEXT: dma_wait %2[%c0], %c170372_3 : memref<1xi32>
// CHECK-NEXT: %3 = alloc() : memref<1xi32>
// CHECK-NEXT: for %i0 = 0 to 256 {
// CHECK-NEXT: for %i1 = 0 to 256 {
// CHECK-NEXT: affine.for %i0 = 0 to 256 {
// CHECK-NEXT: affine.for %i1 = 0 to 256 {
// CHECK-NEXT: %4 = affine.apply [[MAP_PLUS_64]](%i0)
// CHECK-NEXT: %5 = affine.apply [[MAP_PLUS_128]](%i1)
// CHECK-NEXT: %6 = affine.apply [[MAP_PLUS_2]](%i0)
@ -395,7 +395,7 @@ func @dma_loop_straightline_interspersed() {
%c255 = constant 255 : index
%A = alloc() : memref<256 x f32>
%v = load %A[%c0] : memref<256 x f32>
for %i = 1 to 255 {
affine.for %i = 1 to 255 {
load %A[%i] : memref<256 x f32>
}
%l = load %A[%c255] : memref<256 x f32>
@ -416,7 +416,7 @@ func @dma_loop_straightline_interspersed() {
// CHECK-NEXT: %5 = alloc() : memref<1xi32>
// CHECK-NEXT: dma_start %0[%c1_0], %4[%c0], %c254, %5[%c0] : memref<256xf32>, memref<254xf32, 2>, memref<1xi32>
// CHECK-NEXT: dma_wait %5[%c0], %c254 : memref<1xi32>
// CHECK-NEXT: for %i0 = 1 to 255 {
// CHECK-NEXT: affine.for %i0 = 1 to 255 {
// CHECK-NEXT: %6 = affine.apply [[MAP_MINUS_ONE]](%i0)
// CHECK-NEXT: %7 = load %4[%6] : memref<254xf32, 2>
// CHECK-NEXT: }
@ -442,10 +442,10 @@ func @dma_loop_straightline_interspersed() {
func @dma_mixed_loop_blocks() {
%c0 = constant 0 : index
%A = alloc() : memref<256 x 256 x vector<8 x f32>>
for %i = 0 to 256 {
affine.for %i = 0 to 256 {
%v = load %A[%c0, %c0] : memref<256 x 256 x vector<8 x f32>>
"foo"(%v) : (vector<8 x f32>) -> ()
for %j = 0 to 256 {
affine.for %j = 0 to 256 {
%w = load %A[%i, %j] : memref<256 x 256 x vector<8 x f32>>
"bar"(%w) : (vector<8 x f32>) -> ()
}
@ -457,17 +457,17 @@ func @dma_mixed_loop_blocks() {
// CHECK-DAG: [[TAG:%[0-9]+]] = alloc() : memref<1xi32>
// CHECK: dma_start [[MEM]][%c0, %c0], [[BUF]][%c0, %c0], %c65536, [[TAG]][%c0] : memref<256x256xvector<8xf32>>, memref<256x256xvector<8xf32>, 2>, memref<1xi32>
// CHECK-NEXT: dma_wait [[TAG]][%c0], %c65536 : memref<1xi32>
// CHECK-NEXT: for %i0 = 0 to 256 {
// CHECK-NEXT: affine.for %i0 = 0 to 256 {
// CHECK-NEXT: %3 = load [[BUF]][%c0_0, %c0_0] : memref<256x256xvector<8xf32>, 2>
// CHECK: for %i1 = 0 to 256 {
// CHECK: affine.for %i1 = 0 to 256 {
// CHECK-NEXT: %4 = load [[BUF]][%i0, %i1] : memref<256x256xvector<8xf32>, 2>
// -----
// CHECK-LABEL: func @relative_loop_bounds
func @relative_loop_bounds(%arg0: memref<1027xf32>) {
for %i0 = 0 to 1024 {
for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 4)(%i0) {
affine.for %i0 = 0 to 1024 {
affine.for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 4)(%i0) {
%0 = constant 0.0 : f32
store %0, %arg0[%i2] : memref<1027xf32>
}
@ -476,8 +476,8 @@ func @relative_loop_bounds(%arg0: memref<1027xf32>) {
}
// CHECK: [[BUF:%[0-9]+]] = alloc() : memref<1027xf32, 2>
// CHECK-NEXT: [[MEM:%[0-9]+]] = alloc() : memref<1xi32>
// CHECK-NEXT: for %i0 = 0 to 1024 {
// CHECK-NEXT: for %i1 = {{#map[0-9]+}}(%i0) to {{#map[0-9]+}}(%i0) {
// CHECK-NEXT: affine.for %i0 = 0 to 1024 {
// CHECK-NEXT: affine.for %i1 = {{#map[0-9]+}}(%i0) to {{#map[0-9]+}}(%i0) {
// CHECK-NEXT: %cst = constant 0.000000e+00 : f32
// CHECK-NEXT: store %cst, [[BUF]][%i1] : memref<1027xf32, 2>
// CHECK-NEXT: }
@ -487,7 +487,7 @@ func @relative_loop_bounds(%arg0: memref<1027xf32>) {
// ----
// This should create a buffer of size 2 for %arg2.
// This should create a buffer of size 2 affine.for %arg2.
#map_lb = (d0) -> (d0)
#map_ub = (d0) -> (d0 + 3)
@ -498,9 +498,9 @@ func @test_analysis_util(%arg0: memref<4x4x16x1xf32>, %arg1: memref<144x9xf32>,
%0 = alloc() : memref<64x1xf32>
%1 = alloc() : memref<144x4xf32>
%2 = constant 0.0 : f32
for %i8 = 0 to 9 step 3 {
for %i9 = #map_lb(%i8) to #map_ub(%i8) {
for %i17 = 0 to 64 {
affine.for %i8 = 0 to 9 step 3 {
affine.for %i9 = #map_lb(%i8) to #map_ub(%i8) {
affine.for %i17 = 0 to 64 {
%23 = affine.apply #map_acc(%i9)
%25 = load %arg2[%23] : memref<2xf32>
%26 = affine.apply #map_lb(%i17)
@ -511,11 +511,11 @@ func @test_analysis_util(%arg0: memref<4x4x16x1xf32>, %arg1: memref<144x9xf32>,
}
return %arg1, %arg2 : memref<144x9xf32>, memref<2xf32>
}
// CHECK: for %i0 = 0 to 9 step 3 {
// CHECK: affine.for %i0 = 0 to 9 step 3 {
// CHECK: [[BUF:%[0-9]+]] = alloc() : memref<2xf32, 2>
// CHECK: dma_start %arg2[%4], [[BUF]]
// CHECK: dma_wait %6[%c0], %c2_0 : memref<1xi32>
// CHECK: for %i1 =
// CHECK: affine.for %i1 =
// -----
@ -524,17 +524,17 @@ func @test_analysis_util(%arg0: memref<4x4x16x1xf32>, %arg1: memref<144x9xf32>,
// FAST-MEM-16KB-LABEL: func @load_store_same_memref
func @load_store_same_memref(%arg0: memref<256x1024xf32>) {
// FAST-MEM-16KB: for %i0 = 0 to 256 step 4
for %i0 = 0 to 256 step 4 {
// FAST-MEM-16KB: affine.for %i0 = 0 to 256 step 4
affine.for %i0 = 0 to 256 step 4 {
// FAST-MEM-16KB: [[BUF:%[0-9]+]] = alloc() : memref<4x1024xf32, 2>
// FAST-MEM-16KB: dma_start %arg0
// FAST-MEM-16KB-NEXT: dma_wait
// FAST-MEM-16KB: for %i1
for %i1 = 0 to 1024 step 4 {
// FAST-MEM-16KB: for %i2
for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 4)(%i0) {
// FAST-MEM-16KB: for %i3
for %i3 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 4)(%i1) {
// FAST-MEM-16KB: affine.for %i1
affine.for %i1 = 0 to 1024 step 4 {
// FAST-MEM-16KB: affine.for %i2
affine.for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 4)(%i0) {
// FAST-MEM-16KB: affine.for %i3
affine.for %i3 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 4)(%i1) {
%3 = load %arg0[%i2, %i3] : memref<256x1024xf32>
%4 = mulf %3, %3 : f32
store %4, %arg0[%i2, %i3] : memref<256x1024xf32>
@ -560,12 +560,12 @@ func @load_store_same_memref(%arg0: memref<256x1024xf32>) {
#map1 = (d0) -> (d0 + 4)
// FAST-MEM-16KB-LABEL: func @simple_matmul
func @simple_matmul(%arg0: memref<8x8xvector<64xf32>>, %arg1: memref<8x8xvector<64xf32>>, %arg2: memref<8x8xvector<64xf32>>) -> memref<8x8xvector<64xf32>> {
for %i = 0 to 8 step 4 {
for %j = 0 to 8 step 4 {
for %k = 0 to 8 step 4 {
for %ii = #map0(%i) to #map1(%i) {
for %jj = #map0(%j) to #map1(%j) {
for %kk = #map0(%k) to #map1(%k) {
affine.for %i = 0 to 8 step 4 {
affine.for %j = 0 to 8 step 4 {
affine.for %k = 0 to 8 step 4 {
affine.for %ii = #map0(%i) to #map1(%i) {
affine.for %jj = #map0(%j) to #map1(%j) {
affine.for %kk = #map0(%k) to #map1(%k) {
%5 = load %arg0[%ii, %kk] : memref<8x8xvector<64xf32>>
%6 = load %arg1[%kk, %jj] : memref<8x8xvector<64xf32>>
%7 = load %arg2[%ii, %jj] : memref<8x8xvector<64xf32>>
@ -580,18 +580,18 @@ func @simple_matmul(%arg0: memref<8x8xvector<64xf32>>, %arg1: memref<8x8xvector<
}
return %arg2 : memref<8x8xvector<64xf32>>
}
// FAST-MEM-16KB: for %i0 = 0 to 8 step 4 {
// FAST-MEM-16KB: for %i1 = 0 to 8 step 4 {
// FAST-MEM-16KB: affine.for %i0 = 0 to 8 step 4 {
// FAST-MEM-16KB: affine.for %i1 = 0 to 8 step 4 {
// FAST-MEM-16KB: dma_start %arg2
// FAST-MEM-16KB: dma_wait
// FAST-MEM-16KB: for %i2 = 0 to 8 step 4 {
// FAST-MEM-16KB: affine.for %i2 = 0 to 8 step 4 {
// FAST-MEM-16KB: dma_start %arg0
// FAST-MEM-16KB: dma_wait
// FAST-MEM-16KB: dma_start %arg1
// FAST-MEM-16KB: dma_wait
// FAST-MEM-16KB: for %i3 = #map{{[0-9]+}}(%i0) to #map{{[0-9]+}}(%i0) {
// FAST-MEM-16KB-NEXT: for %i4 = #map{{[0-9]+}}(%i1) to #map{{[0-9]+}}(%i1) {
// FAST-MEM-16KB-NEXT: for %i5 = #map{{[0-9]+}}(%i2) to #map{{[0-9]+}}(%i2) {
// FAST-MEM-16KB: affine.for %i3 = #map{{[0-9]+}}(%i0) to #map{{[0-9]+}}(%i0) {
// FAST-MEM-16KB-NEXT: affine.for %i4 = #map{{[0-9]+}}(%i1) to #map{{[0-9]+}}(%i1) {
// FAST-MEM-16KB-NEXT: affine.for %i5 = #map{{[0-9]+}}(%i2) to #map{{[0-9]+}}(%i2) {
// FAST-MEM-16KB: }
// FAST-MEM-16KB: }
// FAST-MEM-16KB: }

File diff suppressed because it is too large Load Diff

View File

@ -9,12 +9,12 @@
// CHECK-DAG: [[UB_INTRA_TILE:#map[0-9]+]] = (d0, d1, d2) -> (d2 + 32, s0, 4096 floordiv s1)
// CHECK-LABEL: func @loop_tiling()
// CHECK-NEXT: for %i0 = 0 to 256 step 32 {
// CHECK-NEXT: for %i1 = 0 to 512 step 32 {
// CHECK-NEXT: for %i2 = 0 to 1024 step 32 {
// CHECK-NEXT: for %i3 = [[IDENTITY]](%i0) to [[MAP0]](%i0) {
// CHECK-NEXT: for %i4 = [[IDENTITY]](%i1) to [[MAP0]](%i1) {
// CHECK-NEXT: for %i5 = [[IDENTITY]](%i2) to [[MAP0]](%i2) {
// CHECK-NEXT: affine.for %i0 = 0 to 256 step 32 {
// CHECK-NEXT: affine.for %i1 = 0 to 512 step 32 {
// CHECK-NEXT: affine.for %i2 = 0 to 1024 step 32 {
// CHECK-NEXT: affine.for %i3 = [[IDENTITY]](%i0) to [[MAP0]](%i0) {
// CHECK-NEXT: affine.for %i4 = [[IDENTITY]](%i1) to [[MAP0]](%i1) {
// CHECK-NEXT: affine.for %i5 = [[IDENTITY]](%i2) to [[MAP0]](%i2) {
// CHECK-NEXT: "foo"(%i3, %i4, %i5) : (index, index, index) -> ()
// CHECK-NEXT: }
// CHECK-NEXT: }
@ -22,32 +22,32 @@
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: for %i6 = 0 to 50 step 32 {
// CHECK-NEXT: for %i7 = [[IDENTITY]](%i6) to min [[MAP1]](%i6) {
// CHECK-NEXT: affine.for %i6 = 0 to 50 step 32 {
// CHECK-NEXT: affine.for %i7 = [[IDENTITY]](%i6) to min [[MAP1]](%i6) {
// CHECK-NEXT: "bar"(%i7, %i7) : (index, index) -> ()
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: for %i8 = 0 to 21 step 32 {
// CHECK-NEXT: for %i9 = [[IDENTITY]](%i8) to 21 {
// CHECK-NEXT: affine.for %i8 = 0 to 21 step 32 {
// CHECK-NEXT: affine.for %i9 = [[IDENTITY]](%i8) to 21 {
// CHECK-NEXT: "foobar"(%i9) : (index) -> ()
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: return
func @loop_tiling() {
for %i = 0 to 256 {
for %j = 0 to 512 {
for %k = 0 to 1024 {
affine.for %i = 0 to 256 {
affine.for %j = 0 to 512 {
affine.for %k = 0 to 1024 {
"foo"(%i, %j, %k) : (index, index, index) -> ()
}
}
}
for %x = 0 to 50 {
affine.for %x = 0 to 50 {
"bar"(%x, %x) : (index, index) -> ()
}
// Intra-tile loop won't need a min expression.
for %y = 0 to 21 {
affine.for %y = 0 to 21 {
"foobar"(%y) : (index) -> ()
}
@ -59,12 +59,12 @@ func @loop_tiling() {
// CHECK-LABEL: func @loop_max_min_bound(%arg0: memref<?xi32>, %arg1: index, %arg2: index) {
func @loop_max_min_bound(%A : memref<? x i32>, %L : index, %U : index) {
%M = dim %A, 0 : memref<? x i32>
for %iTT = max #lb()[%L] to min #ub()[%M, %U] {
affine.for %iTT = max #lb()[%L] to min #ub()[%M, %U] {
%out = affine.apply (d0) -> (d0) (%iTT)
}
return
// CHECK: for %i0 = max [[LB]]()[%arg1] to min [[UB]]()[%0, %arg2] step 32 {
// CHECK-NEXT: for %i1 = [[IDENTITY]](%i0) to min [[UB_INTRA_TILE]](%0, %arg2, %i0) {
// CHECK: affine.for %i0 = max [[LB]]()[%arg1] to min [[UB]]()[%0, %arg2] step 32 {
// CHECK-NEXT: affine.for %i1 = [[IDENTITY]](%i0) to min [[UB_INTRA_TILE]](%0, %arg2, %i0) {
// CHECK-NEXT: %1 = affine.apply [[IDENTITY]](%i1)
// CHECK-NEXT: }
// CHECK-NEXT: }
@ -78,9 +78,9 @@ func @loop_max_min_bound(%A : memref<? x i32>, %L : index, %U : index) {
// MODEL-LABEL: func @simple_matmul
func @simple_matmul(%arg0: memref<8x8xvector<64xf32>>, %arg1: memref<8x8xvector<64xf32>>, %arg2: memref<8x8xvector<64xf32>>) -> memref<8x8xvector<64xf32>> {
for %i = 0 to 256 {
for %j = 0 to 256 {
for %k = 0 to 250 {
affine.for %i = 0 to 256 {
affine.for %j = 0 to 256 {
affine.for %k = 0 to 250 {
%l = load %arg0[%i, %k] : memref<8x8xvector<64xf32>>
%r = load %arg1[%k, %j] : memref<8x8xvector<64xf32>>
%o = load %arg2[%i, %j] : memref<8x8xvector<64xf32>>
@ -92,6 +92,6 @@ func @simple_matmul(%arg0: memref<8x8xvector<64xf32>>, %arg1: memref<8x8xvector<
}
return %arg2 : memref<8x8xvector<64xf32>>
}
// MODEL: for %i0 = 0 to 256 step 4 {
// MODEL-NEXT: for %i1 = 0 to 256 step 4 {
// MODEL-NEXT: for %i2 = 0 to 250 step 5 {
// MODEL: affine.for %i0 = 0 to 256 step 4 {
// MODEL-NEXT: affine.for %i1 = 0 to 256 step 4 {
// MODEL-NEXT: affine.for %i2 = 0 to 250 step 5 {

View File

@ -24,7 +24,7 @@ func @body(index) -> ()
// CHECK-NEXT: return
// CHECK-NEXT: }
func @simple_loop() {
for %i = 1 to 42 {
affine.for %i = 1 to 42 {
call @body(%i) : (index) -> ()
}
return
@ -65,9 +65,9 @@ func @post(index) -> ()
// CHECK-NEXT: return
// CHECK-NEXT: }
func @imperfectly_nested_loops() {
for %i = 0 to 42 {
affine.for %i = 0 to 42 {
call @pre(%i) : (index) -> ()
for %j = 7 to 56 step 2 {
affine.for %j = 7 to 56 step 2 {
call @body2(%i, %j) : (index, index) -> ()
}
call @post(%i) : (index) -> ()
@ -122,13 +122,13 @@ func @body3(index, index) -> ()
// CHECK-NEXT: return
// CHECK-NEXT: }
func @more_imperfectly_nested_loops() {
for %i = 0 to 42 {
affine.for %i = 0 to 42 {
call @pre(%i) : (index) -> ()
for %j = 7 to 56 step 2 {
affine.for %j = 7 to 56 step 2 {
call @body2(%i, %j) : (index, index) -> ()
}
call @mid(%i) : (index) -> ()
for %k = 18 to 37 step 3 {
affine.for %k = 18 to 37 step 3 {
call @body3(%i, %k) : (index, index) -> ()
}
call @post(%i) : (index) -> ()
@ -161,8 +161,8 @@ func @more_imperfectly_nested_loops() {
// CHECK-NEXT: return
// CHECK-NEXT: }
func @affine_apply_loops_shorthand(%N : index) {
for %i = 0 to %N {
for %j = (d0)[]->(d0)(%i)[] to 42 {
affine.for %i = 0 to %N {
affine.for %j = (d0)[]->(d0)(%i)[] to 42 {
call @body2(%i, %j) : (index, index) -> ()
}
}
@ -360,7 +360,7 @@ func @if_for() {
// CHECK-NEXT: [[outerEndBB]]:
// CHECK-NEXT: br [[outerLoopInit:\^bb[0-9]+]]
affine.if #set1(%i) {
for %j = 0 to 42 {
affine.for %j = 0 to 42 {
affine.if #set2(%j) {
call @body2(%i, %j) : (index, index) -> ()
}
@ -397,9 +397,9 @@ func @if_for() {
// CHECK-NEXT: %c1_9 = constant 1 : index
// CHECK-NEXT: %16 = addi %9, %c1_9 : index
// CHECK-NEXT: br [[outerLoopCond]](%16 : index)
for %k = 0 to 42 {
affine.for %k = 0 to 42 {
affine.if #set2(%k) {
for %l = 0 to 42 {
affine.for %l = 0 to 42 {
call @body3(%k, %l) : (index, index) -> ()
}
}
@ -446,8 +446,8 @@ func @if_for() {
// CHECK-NEXT: return
// CHECK-NEXT: }
func @loop_min_max(%N : index) {
for %i = 0 to 42 {
for %j = max #lbMultiMap(%i)[%N] to min #ubMultiMap(%i)[%N] {
affine.for %i = 0 to 42 {
affine.for %j = max #lbMultiMap(%i)[%N] to min #ubMultiMap(%i)[%N] {
call @body2(%i, %j) : (index, index) -> ()
}
}
@ -486,7 +486,7 @@ func @loop_min_max(%N : index) {
// CHECK-NEXT: return
// CHECK-NEXT: }
func @min_reduction_tree(%v : index) {
for %i = 0 to min #map_7_values(%v)[] {
affine.for %i = 0 to min #map_7_values(%v)[] {
call @body(%i) : (index) -> ()
}
return

View File

@ -11,8 +11,8 @@ func @test() {
%A = alloc() : memref<9 x 9 x i32>
%B = alloc() : memref<111 x i32>
for %i = -1 to 10 {
for %j = -1 to 10 {
affine.for %i = -1 to 10 {
affine.for %j = -1 to 10 {
%idx0 = affine.apply (d0, d1) -> (d0)(%i, %j)
%idx1 = affine.apply (d0, d1) -> (d1)(%i, %j)
// Out of bound access.
@ -27,7 +27,7 @@ func @test() {
}
}
for %k = 0 to 10 {
affine.for %k = 0 to 10 {
// In bound.
%u = load %B[%zero] : memref<111 x i32>
// Out of bounds.
@ -43,8 +43,8 @@ func @test_mod_floordiv_ceildiv() {
%zero = constant 0 : index
%A = alloc() : memref<128 x 64 x 64 x i32>
for %i = 0 to 256 {
for %j = 0 to 256 {
affine.for %i = 0 to 256 {
affine.for %j = 0 to 256 {
%idx0 = affine.apply (d0, d1, d2) -> (d0 mod 128 + 1)(%i, %j, %j)
%idx1 = affine.apply (d0, d1, d2) -> (d1 floordiv 4 + 1)(%i, %j, %j)
%idx2 = affine.apply (d0, d1, d2) -> (d2 ceildiv 4)(%i, %j, %j)
@ -69,8 +69,8 @@ func @test_no_out_of_bounds() {
%C = alloc() : memref<257 x i32>
%B = alloc() : memref<1 x i32>
for %i = 0 to 256 {
for %j = 0 to 256 {
affine.for %i = 0 to 256 {
affine.for %j = 0 to 256 {
// All of these accesses are in bound; check that no errors are emitted.
// CHECK: %3 = affine.apply {{#map.*}}(%i0, %i1)
// CHECK-NEXT: %4 = load %0[%3, %c0] : memref<257x256xi32>
@ -93,8 +93,8 @@ func @mod_div() {
%zero = constant 0 : index
%A = alloc() : memref<128 x 64 x 64 x i32>
for %i = 0 to 256 {
for %j = 0 to 256 {
affine.for %i = 0 to 256 {
affine.for %j = 0 to 256 {
%idx0 = affine.apply (d0, d1, d2) -> (d0 mod 128 + 1)(%i, %j, %j)
%idx1 = affine.apply (d0, d1, d2) -> (d1 floordiv 4 + 1)(%i, %j, %j)
%idx2 = affine.apply (d0, d1, d2) -> (d2 ceildiv 4)(%i, %j, %j)
@ -115,8 +115,8 @@ func @mod_div() {
// CHECK-LABEL: func @mod_floordiv_nested() {
func @mod_floordiv_nested() {
%A = alloc() : memref<256 x 256 x i32>
for %i = 0 to 256 {
for %j = 0 to 256 {
affine.for %i = 0 to 256 {
affine.for %j = 0 to 256 {
%idx0 = affine.apply (d0, d1) -> ((d0 mod 1024) floordiv 4)(%i, %j)
%idx1 = affine.apply (d0, d1) -> ((((d1 mod 128) mod 32) ceildiv 4) * 32)(%i, %j)
load %A[%idx0, %idx1] : memref<256 x 256 x i32> // expected-error {{'std.load' op memref out of upper bound access along dimension #2}}
@ -128,7 +128,7 @@ func @mod_floordiv_nested() {
// CHECK-LABEL: func @test_semi_affine_bailout
func @test_semi_affine_bailout(%N : index) {
%B = alloc() : memref<10 x i32>
for %i = 0 to 10 {
affine.for %i = 0 to 10 {
%idx = affine.apply (d0)[s0] -> (d0 * s0)(%i)[%N]
%y = load %B[%idx] : memref<10 x i32>
// expected-error@-1 {{getMemRefRegion: compose affine map failed}}
@ -139,7 +139,7 @@ func @test_semi_affine_bailout(%N : index) {
// CHECK-LABEL: func @multi_mod_floordiv
func @multi_mod_floordiv() {
%A = alloc() : memref<2x2xi32>
for %ii = 0 to 64 {
affine.for %ii = 0 to 64 {
%idx0 = affine.apply (d0) -> ((d0 mod 147456) floordiv 1152) (%ii)
%idx1 = affine.apply (d0) -> (((d0 mod 147456) mod 1152) floordiv 384) (%ii)
%v = load %A[%idx0, %idx1] : memref<2x2xi32>
@ -154,8 +154,8 @@ func @delinearize_mod_floordiv() {
%out = alloc() : memref<64x9xi32>
// Reshape '%in' into '%out'.
for %ii = 0 to 64 {
for %jj = 0 to 9 {
affine.for %ii = 0 to 64 {
affine.for %jj = 0 to 9 {
%a0 = affine.apply (d0, d1) -> (d0 * (9 * 1024) + d1 * 128) (%ii, %jj)
%a10 = affine.apply (d0) ->
(d0 floordiv (2 * 3 * 3 * 128 * 128)) (%a0)
@ -190,7 +190,7 @@ func @out_of_bounds() {
%in = alloc() : memref<1xi32>
%c9 = constant 9 : i32
for %i0 = 10 to 11 {
affine.for %i0 = 10 to 11 {
%idy = affine.apply (d0) -> (100 * d0 floordiv 1000) (%i0)
store %c9, %in[%idy] : memref<1xi32> // expected-error {{'std.store' op memref out of upper bound access along dimension #1}}
}
@ -210,8 +210,8 @@ func @out_of_bounds() {
func @test_complex_mod_floordiv(%arg0: memref<4x4x16x1xf32>) {
%c0 = constant 0 : index
%0 = alloc() : memref<1x2x3x3x16x1xf32>
for %i0 = 0 to 64 {
for %i1 = 0 to 9 {
affine.for %i0 = 0 to 64 {
affine.for %i1 = 0 to 9 {
%2 = affine.apply #map3(%i0, %i1)
%3 = affine.apply #map4(%i0, %i1)
%4 = affine.apply #map5(%i0, %i1)
@ -231,8 +231,8 @@ func @test_complex_mod_floordiv(%arg0: memref<4x4x16x1xf32>) {
func @test_mod_bound() {
%0 = alloc() : memref<7 x f32>
%1 = alloc() : memref<6 x f32>
for %i0 = 0 to 4096 {
for %i1 = #map0(%i0) to #map1(%i0) {
affine.for %i0 = 0 to 4096 {
affine.for %i1 = #map0(%i0) to #map1(%i0) {
load %0[%i1] : memref<7 x f32>
load %1[%i1] : memref<6 x f32>
// expected-error@-1 {{'std.load' op memref out of upper bound access along dimension #1}}
@ -253,13 +253,13 @@ func @test_floordiv_bound() {
%1 = alloc() : memref<1026 x f32>
%2 = alloc() : memref<4096 x f32>
%N = constant 2048 : index
for %i0 = 0 to 4096 {
for %i1 = #map0(%i0) to #map1(%i0) {
affine.for %i0 = 0 to 4096 {
affine.for %i1 = #map0(%i0) to #map1(%i0) {
load %0[%i1] : memref<1027 x f32>
load %1[%i1] : memref<1026 x f32>
// expected-error@-1 {{'std.load' op memref out of upper bound access along dimension #1}}
}
for %i2 = 0 to #map2(%N) {
affine.for %i2 = 0 to #map2(%N) {
// Within bounds.
%v = load %2[%i2] : memref<4096 x f32>
}
@ -277,9 +277,9 @@ func @test_floordiv_bound() {
// CHECK-LABEL: func @non_composed_bound_operand
func @non_composed_bound_operand(%arg0: memref<1024xf32>) {
for %i0 = 4 to 1028 step 4 {
affine.for %i0 = 4 to 1028 step 4 {
%i1 = affine.apply (d0) -> (d0 - 4) (%i0)
for %i2 = #map_lb(%i1) to #map_ub(%i1) {
affine.for %i2 = #map_lb(%i1) to #map_ub(%i1) {
%0 = load %arg0[%i2] : memref<1024xf32>
}
}

View File

@ -10,14 +10,14 @@
func @simple_store_load() {
%cf7 = constant 7.0 : f32
%m = alloc() : memref<10xf32>
for %i0 = 0 to 10 {
affine.for %i0 = 0 to 10 {
store %cf7, %m[%i0] : memref<10xf32>
%v0 = load %m[%i0] : memref<10xf32>
%v1 = addf %v0, %v0 : f32
}
return
// CHECK: %cst = constant 7.000000e+00 : f32
// CHECK-NEXT: for %i0 = 0 to 10 {
// CHECK-NEXT: affine.for %i0 = 0 to 10 {
// CHECK-NEXT: %0 = addf %cst, %cst : f32
// CHECK-NEXT: }
// CHECK-NEXT: return
@ -30,7 +30,7 @@ func @multi_store_load() {
%cf8 = constant 8.0 : f32
%cf9 = constant 9.0 : f32
%m = alloc() : memref<10xf32>
for %i0 = 0 to 10 {
affine.for %i0 = 0 to 10 {
store %cf7, %m[%i0] : memref<10xf32>
%v0 = load %m[%i0] : memref<10xf32>
%v1 = addf %v0, %v0 : f32
@ -45,7 +45,7 @@ func @multi_store_load() {
// CHECK-NEXT: %cst = constant 7.000000e+00 : f32
// CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32
// CHECK-NEXT: %cst_1 = constant 9.000000e+00 : f32
// CHECK-NEXT: for %i0 = 0 to 10 {
// CHECK-NEXT: affine.for %i0 = 0 to 10 {
// CHECK-NEXT: %0 = addf %cst, %cst : f32
// CHECK-NEXT: %1 = mulf %cst_1, %cst_1 : f32
// CHECK-NEXT: }
@ -59,8 +59,8 @@ func @multi_store_load() {
func @store_load_affine_apply() -> memref<10x10xf32> {
%cf7 = constant 7.0 : f32
%m = alloc() : memref<10x10xf32>
for %i0 = 0 to 10 {
for %i1 = 0 to 10 {
affine.for %i0 = 0 to 10 {
affine.for %i1 = 0 to 10 {
%t0 = affine.apply (d0, d1) -> (d1 + 1)(%i0, %i1)
%t1 = affine.apply (d0, d1) -> (d0)(%i0, %i1)
%idx0 = affine.apply (d0, d1) -> (d1) (%t0, %t1)
@ -75,8 +75,8 @@ func @store_load_affine_apply() -> memref<10x10xf32> {
return %m : memref<10x10xf32>
// CHECK: %cst = constant 7.000000e+00 : f32
// CHECK-NEXT: %0 = alloc() : memref<10x10xf32>
// CHECK-NEXT: for %i0 = 0 to 10 {
// CHECK-NEXT: for %i1 = 0 to 10 {
// CHECK-NEXT: affine.for %i0 = 0 to 10 {
// CHECK-NEXT: affine.for %i1 = 0 to 10 {
// CHECK-NEXT: %1 = affine.apply [[MAP0]](%i0, %i1)
// CHECK-NEXT: %2 = affine.apply [[MAP1]](%i0, %i1)
// CHECK-NEXT: %3 = affine.apply [[MAP2]](%1, %2)
@ -92,17 +92,17 @@ func @store_load_affine_apply() -> memref<10x10xf32> {
func @store_load_nested(%N : index) {
%cf7 = constant 7.0 : f32
%m = alloc() : memref<10xf32>
for %i0 = 0 to 10 {
affine.for %i0 = 0 to 10 {
store %cf7, %m[%i0] : memref<10xf32>
for %i1 = 0 to %N {
affine.for %i1 = 0 to %N {
%v0 = load %m[%i0] : memref<10xf32>
%v1 = addf %v0, %v0 : f32
}
}
return
// CHECK: %cst = constant 7.000000e+00 : f32
// CHECK-NEXT: for %i0 = 0 to 10 {
// CHECK-NEXT: for %i1 = 0 to %arg0 {
// CHECK-NEXT: affine.for %i0 = 0 to 10 {
// CHECK-NEXT: affine.for %i1 = 0 to %arg0 {
// CHECK-NEXT: %0 = addf %cst, %cst : f32
// CHECK-NEXT: }
// CHECK-NEXT: }
@ -117,12 +117,12 @@ func @multi_store_load_nested_no_fwd(%N : index) {
%cf7 = constant 7.0 : f32
%cf8 = constant 8.0 : f32
%m = alloc() : memref<10xf32>
for %i0 = 0 to 10 {
affine.for %i0 = 0 to 10 {
store %cf7, %m[%i0] : memref<10xf32>
for %i1 = 0 to %N {
affine.for %i1 = 0 to %N {
store %cf8, %m[%i1] : memref<10xf32>
}
for %i2 = 0 to %N {
affine.for %i2 = 0 to %N {
// CHECK: %{{[0-9]+}} = load %0[%i0] : memref<10xf32>
%v0 = load %m[%i0] : memref<10xf32>
%v1 = addf %v0, %v0 : f32
@ -138,9 +138,9 @@ func @store_load_store_nested_no_fwd(%N : index) {
%cf7 = constant 7.0 : f32
%cf9 = constant 9.0 : f32
%m = alloc() : memref<10xf32>
for %i0 = 0 to 10 {
affine.for %i0 = 0 to 10 {
store %cf7, %m[%i0] : memref<10xf32>
for %i1 = 0 to %N {
affine.for %i1 = 0 to %N {
// CHECK: %{{[0-9]+}} = load %0[%i0] : memref<10xf32>
%v0 = load %m[%i0] : memref<10xf32>
%v1 = addf %v0, %v0 : f32
@ -159,16 +159,16 @@ func @multi_store_load_nested_fwd(%N : index) {
%cf9 = constant 9.0 : f32
%cf10 = constant 10.0 : f32
%m = alloc() : memref<10xf32>
for %i0 = 0 to 10 {
affine.for %i0 = 0 to 10 {
store %cf7, %m[%i0] : memref<10xf32>
for %i1 = 0 to %N {
affine.for %i1 = 0 to %N {
store %cf8, %m[%i1] : memref<10xf32>
}
for %i2 = 0 to %N {
affine.for %i2 = 0 to %N {
store %cf9, %m[%i2] : memref<10xf32>
}
store %cf10, %m[%i0] : memref<10xf32>
for %i3 = 0 to %N {
affine.for %i3 = 0 to %N {
// CHECK-NOT: %{{[0-9]+}} = load
%v0 = load %m[%i0] : memref<10xf32>
%v1 = addf %v0, %v0 : f32
@ -182,10 +182,10 @@ func @multi_store_load_nested_fwd(%N : index) {
func @store_load_no_fwd() {
%cf7 = constant 7.0 : f32
%m = alloc() : memref<10xf32>
for %i0 = 0 to 10 {
affine.for %i0 = 0 to 10 {
store %cf7, %m[%i0] : memref<10xf32>
for %i1 = 0 to 10 {
for %i2 = 0 to 10 {
affine.for %i1 = 0 to 10 {
affine.for %i2 = 0 to 10 {
// CHECK: load %{{[0-9]+}}
%v0 = load %m[%i2] : memref<10xf32>
%v1 = addf %v0, %v0 : f32
@ -202,9 +202,9 @@ func @store_load_fwd() {
%c0 = constant 0 : index
%m = alloc() : memref<10xf32>
store %cf7, %m[%c0] : memref<10xf32>
for %i0 = 0 to 10 {
for %i1 = 0 to 10 {
for %i2 = 0 to 10 {
affine.for %i0 = 0 to 10 {
affine.for %i1 = 0 to 10 {
affine.for %i2 = 0 to 10 {
// CHECK-NOT: load %{{[0-9]}}+
%v0 = load %m[%c0] : memref<10xf32>
%v1 = addf %v0, %v0 : f32
@ -223,9 +223,9 @@ func @store_load_store_nested_fwd(%N : index) -> f32 {
%c0 = constant 0 : index
%c1 = constant 1 : index
%m = alloc() : memref<10xf32>
for %i0 = 0 to 10 {
affine.for %i0 = 0 to 10 {
store %cf7, %m[%i0] : memref<10xf32>
for %i1 = 0 to %N {
affine.for %i1 = 0 to %N {
%v0 = load %m[%i0] : memref<10xf32>
%v1 = addf %v0, %v0 : f32
%idx = affine.apply (d0) -> (d0 + 1) (%i0)
@ -236,9 +236,9 @@ func @store_load_store_nested_fwd(%N : index) -> f32 {
%v3 = load %m[%c1] : memref<10xf32>
return %v3 : f32
// CHECK: %0 = alloc() : memref<10xf32>
// CHECK-NEXT: for %i0 = 0 to 10 {
// CHECK-NEXT: affine.for %i0 = 0 to 10 {
// CHECK-NEXT: store %cst, %0[%i0] : memref<10xf32>
// CHECK-NEXT: for %i1 = 0 to %arg0 {
// CHECK-NEXT: affine.for %i1 = 0 to %arg0 {
// CHECK-NEXT: %1 = addf %cst, %cst : f32
// CHECK-NEXT: %2 = affine.apply [[MAP4]](%i0)
// CHECK-NEXT: store %cst_0, %0[%2] : memref<10xf32>

View File

@ -13,14 +13,14 @@ func @store_may_execute_before_load() {
// ancestor IfOp of the store, dominates the ancestor ForSmt of the load,
// and thus the store "may" conditionally execute before the load.
affine.if #set0(%c0) {
for %i0 = 0 to 10 {
affine.for %i0 = 0 to 10 {
store %cf7, %m[%i0] : memref<10xf32>
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
// expected-note@-2 {{dependence from 0 to 0 at depth 2 = false}}
// expected-note@-3 {{dependence from 0 to 1 at depth 1 = true}}
}
}
for %i1 = 0 to 10 {
affine.for %i1 = 0 to 10 {
%v0 = load %m[%i1] : memref<10xf32>
// expected-note@-1 {{dependence from 1 to 1 at depth 1 = false}}
// expected-note@-2 {{dependence from 1 to 1 at depth 2 = false}}
@ -37,13 +37,13 @@ func @dependent_loops() {
%cst = constant 7.000000e+00 : f32
// There is a dependence from 0 to 1 at depth 1 (common surrounding loops 0)
// because the first loop with the store dominates the second loop.
for %i0 = 0 to 10 {
affine.for %i0 = 0 to 10 {
store %cst, %0[%i0] : memref<10xf32>
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
// expected-note@-2 {{dependence from 0 to 0 at depth 2 = false}}
// expected-note@-3 {{dependence from 0 to 1 at depth 1 = true}}
}
for %i1 = 0 to 10 {
affine.for %i1 = 0 to 10 {
%1 = load %0[%i1] : memref<10xf32>
// expected-note@-1 {{dependence from 1 to 1 at depth 1 = false}}
// expected-note@-2 {{dependence from 1 to 1 at depth 2 = false}}
@ -231,7 +231,7 @@ func @store_range_load_after_range() {
%m = alloc() : memref<100xf32>
%c7 = constant 7.0 : f32
%c10 = constant 10 : index
for %i0 = 0 to 10 {
affine.for %i0 = 0 to 10 {
%a0 = affine.apply (d0) -> (d0) (%i0)
store %c7, %m[%a0] : memref<100xf32>
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
@ -254,7 +254,7 @@ func @store_load_func_symbol(%arg0: index, %arg1: index) {
%m = alloc() : memref<100xf32>
%c7 = constant 7.0 : f32
%c10 = constant 10 : index
for %i0 = 0 to %arg1 {
affine.for %i0 = 0 to %arg1 {
%a0 = affine.apply (d0) -> (d0) (%arg0)
store %c7, %m[%a0] : memref<100xf32>
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = [1, +inf]}}
@ -277,7 +277,7 @@ func @store_range_load_last_in_range() {
%m = alloc() : memref<100xf32>
%c7 = constant 7.0 : f32
%c10 = constant 10 : index
for %i0 = 0 to 10 {
affine.for %i0 = 0 to 10 {
%a0 = affine.apply (d0) -> (d0) (%i0)
// For dependence from 0 to 1, we do not have a loop carried dependence
// because only the final write in the loop accesses the same element as the
@ -305,7 +305,7 @@ func @store_range_load_before_range() {
%m = alloc() : memref<100xf32>
%c7 = constant 7.0 : f32
%c0 = constant 0 : index
for %i0 = 1 to 11 {
affine.for %i0 = 1 to 11 {
%a0 = affine.apply (d0) -> (d0) (%i0)
store %c7, %m[%a0] : memref<100xf32>
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
@ -328,7 +328,7 @@ func @store_range_load_first_in_range() {
%m = alloc() : memref<100xf32>
%c7 = constant 7.0 : f32
%c0 = constant 0 : index
for %i0 = 1 to 11 {
affine.for %i0 = 1 to 11 {
%a0 = affine.apply (d0) -> (d0) (%i0)
// Dependence from 0 to 1 at depth 1 is a range because all loads at
// constant index zero are reads after first store at index zero during
@ -353,7 +353,7 @@ func @store_range_load_first_in_range() {
func @store_plus_3() {
%m = alloc() : memref<100xf32>
%c7 = constant 7.0 : f32
for %i0 = 1 to 11 {
affine.for %i0 = 1 to 11 {
%a0 = affine.apply (d0) -> (d0 + 3) (%i0)
store %c7, %m[%a0] : memref<100xf32>
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
@ -375,7 +375,7 @@ func @store_plus_3() {
func @load_minus_2() {
%m = alloc() : memref<100xf32>
%c7 = constant 7.0 : f32
for %i0 = 2 to 11 {
affine.for %i0 = 2 to 11 {
%a0 = affine.apply (d0) -> (d0) (%i0)
store %c7, %m[%a0] : memref<100xf32>
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
@ -397,8 +397,8 @@ func @load_minus_2() {
func @perfectly_nested_loops_loop_independent() {
%m = alloc() : memref<10x10xf32>
%c7 = constant 7.0 : f32
for %i0 = 0 to 11 {
for %i1 = 0 to 11 {
affine.for %i0 = 0 to 11 {
affine.for %i1 = 0 to 11 {
// Dependence from access 0 to 1 is loop independent at depth = 3.
%a00 = affine.apply (d0, d1) -> (d0) (%i0, %i1)
%a01 = affine.apply (d0, d1) -> (d1) (%i0, %i1)
@ -428,8 +428,8 @@ func @perfectly_nested_loops_loop_independent() {
func @perfectly_nested_loops_loop_carried_at_depth1() {
%m = alloc() : memref<10x10xf32>
%c7 = constant 7.0 : f32
for %i0 = 0 to 9 {
for %i1 = 0 to 9 {
affine.for %i0 = 0 to 9 {
affine.for %i1 = 0 to 9 {
// Dependence from access 0 to 1 is loop carried at depth 1.
%a00 = affine.apply (d0, d1) -> (d0) (%i0, %i1)
%a01 = affine.apply (d0, d1) -> (d1) (%i0, %i1)
@ -459,8 +459,8 @@ func @perfectly_nested_loops_loop_carried_at_depth1() {
func @perfectly_nested_loops_loop_carried_at_depth2() {
%m = alloc() : memref<10x10xf32>
%c7 = constant 7.0 : f32
for %i0 = 0 to 10 {
for %i1 = 0 to 10 {
affine.for %i0 = 0 to 10 {
affine.for %i1 = 0 to 10 {
// Dependence from access 0 to 1 is loop carried at depth 2.
%a00 = affine.apply (d0, d1) -> (d0) (%i0, %i1)
%a01 = affine.apply (d0, d1) -> (d1) (%i0, %i1)
@ -491,8 +491,8 @@ func @one_common_loop() {
%m = alloc() : memref<10x10xf32>
%c7 = constant 7.0 : f32
// There is a loop-independent dependence from access 0 to 1 at depth 2.
for %i0 = 0 to 10 {
for %i1 = 0 to 10 {
affine.for %i0 = 0 to 10 {
affine.for %i1 = 0 to 10 {
%a00 = affine.apply (d0, d1) -> (d0) (%i0, %i1)
%a01 = affine.apply (d0, d1) -> (d1) (%i0, %i1)
store %c7, %m[%a00, %a01] : memref<10x10xf32>
@ -502,7 +502,7 @@ func @one_common_loop() {
// expected-note@-4 {{dependence from 0 to 1 at depth 1 = false}}
// expected-note@-5 {{dependence from 0 to 1 at depth 2 = true}}
}
for %i2 = 0 to 9 {
affine.for %i2 = 0 to 9 {
%a10 = affine.apply (d0, d1) -> (d0) (%i0, %i2)
%a11 = affine.apply (d0, d1) -> (d1) (%i0, %i2)
%v0 = load %m[%a10, %a11] : memref<10x10xf32>
@ -525,7 +525,7 @@ func @dependence_cycle() {
// Dependences:
// *) loop-independent dependence from access 1 to 2 at depth 2.
// *) loop-carried dependence from access 3 to 0 at depth 1.
for %i0 = 0 to 9 {
affine.for %i0 = 0 to 9 {
%a0 = affine.apply (d0) -> (d0) (%i0)
%v0 = load %m.a[%a0] : memref<100xf32>
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
@ -575,8 +575,8 @@ func @dependence_cycle() {
func @negative_and_positive_direction_vectors(%arg0: index, %arg1: index) {
%m = alloc() : memref<10x10xf32>
%c7 = constant 7.0 : f32
for %i0 = 0 to %arg0 {
for %i1 = 0 to %arg1 {
affine.for %i0 = 0 to %arg0 {
affine.for %i1 = 0 to %arg1 {
%a00 = affine.apply (d0, d1) -> (d0 - 1) (%i0, %i1)
%a01 = affine.apply (d0, d1) -> (d1 + 1) (%i0, %i1)
%v0 = load %m[%a00, %a01] : memref<10x10xf32>
@ -605,8 +605,8 @@ func @negative_and_positive_direction_vectors(%arg0: index, %arg1: index) {
func @war_raw_waw_deps() {
%m = alloc() : memref<100xf32>
%c7 = constant 7.0 : f32
for %i0 = 0 to 10 {
for %i1 = 0 to 10 {
affine.for %i0 = 0 to 10 {
affine.for %i1 = 0 to 10 {
%a0 = affine.apply (d0) -> (d0 + 1) (%i1)
%v0 = load %m[%a0] : memref<100xf32>
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
@ -633,7 +633,7 @@ func @war_raw_waw_deps() {
func @mod_deps() {
%m = alloc() : memref<100xf32>
%c7 = constant 7.0 : f32
for %i0 = 0 to 10 {
affine.for %i0 = 0 to 10 {
%a0 = affine.apply (d0) -> (d0 mod 2) (%i0)
// Results are conservative here since we currently don't have a way to
// represent strided sets in FlatAffineConstraints.
@ -658,8 +658,8 @@ func @loop_nest_depth() {
%0 = alloc() : memref<100x100xf32>
%c7 = constant 7.0 : f32
for %i0 = 0 to 128 {
for %i1 = 0 to 8 {
affine.for %i0 = 0 to 128 {
affine.for %i1 = 0 to 8 {
store %c7, %0[%i0, %i1] : memref<100x100xf32>
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
// expected-note@-2 {{dependence from 0 to 0 at depth 2 = false}}
@ -667,10 +667,10 @@ func @loop_nest_depth() {
// expected-note@-4 {{dependence from 0 to 1 at depth 1 = true}}
}
}
for %i2 = 0 to 8 {
for %i3 = 0 to 8 {
for %i4 = 0 to 8 {
for %i5 = 0 to 16 {
affine.for %i2 = 0 to 8 {
affine.for %i3 = 0 to 8 {
affine.for %i4 = 0 to 8 {
affine.for %i5 = 0 to 16 {
%8 = affine.apply (d0, d1) -> (d0 * 16 + d1)(%i4, %i5)
%9 = load %0[%8, %i3] : memref<100x100xf32>
// expected-note@-1 {{dependence from 1 to 0 at depth 1 = false}}
@ -693,9 +693,9 @@ func @loop_nest_depth() {
func @mod_div_3d() {
%M = alloc() : memref<2x2x2xi32>
%c0 = constant 0 : i32
for %i0 = 0 to 8 {
for %i1 = 0 to 8 {
for %i2 = 0 to 8 {
affine.for %i0 = 0 to 8 {
affine.for %i1 = 0 to 8 {
affine.for %i2 = 0 to 8 {
%idx0 = affine.apply (d0, d1, d2) -> (d0 floordiv 4) (%i0, %i1, %i2)
%idx1 = affine.apply (d0, d1, d2) -> (d1 mod 2) (%i0, %i1, %i2)
%idx2 = affine.apply (d0, d1, d2) -> (d2 floordiv 4) (%i0, %i1, %i2)
@ -719,12 +719,12 @@ func @delinearize_mod_floordiv() {
%in = alloc() : memref<2x2x3x3x16x1xi32>
%out = alloc() : memref<64x9xi32>
for %i0 = 0 to 2 {
for %i1 = 0 to 2 {
for %i2 = 0 to 3 {
for %i3 = 0 to 3 {
for %i4 = 0 to 16 {
for %i5 = 0 to 1 {
affine.for %i0 = 0 to 2 {
affine.for %i1 = 0 to 2 {
affine.for %i2 = 0 to 3 {
affine.for %i3 = 0 to 3 {
affine.for %i4 = 0 to 16 {
affine.for %i5 = 0 to 1 {
store %val, %in[%i0, %i1, %i2, %i3, %i4, %i5] : memref<2x2x3x3x16x1xi32>
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
// expected-note@-2 {{dependence from 0 to 0 at depth 2 = false}}
@ -742,8 +742,8 @@ func @delinearize_mod_floordiv() {
}
}
for %ii = 0 to 64 {
for %jj = 0 to 9 {
affine.for %ii = 0 to 64 {
affine.for %jj = 0 to 9 {
%a0 = affine.apply (d0, d1) -> (d0 * (9 * 1024) + d1 * 128) (%ii, %jj)
%a10 = affine.apply (d0) ->
(d0 floordiv (2 * 3 * 3 * 128 * 128)) (%a0)

View File

@ -5,11 +5,11 @@ func @loop_nest_3d_outer_two_parallel(%N : index) {
%0 = alloc() : memref<1024 x 1024 x vector<64xf32>>
%1 = alloc() : memref<1024 x 1024 x vector<64xf32>>
%2 = alloc() : memref<1024 x 1024 x vector<64xf32>>
for %i = 0 to %N {
affine.for %i = 0 to %N {
// expected-note@-1 {{parallel loop}}
for %j = 0 to %N {
affine.for %j = 0 to %N {
// expected-note@-1 {{parallel loop}}
for %k = 0 to %N {
affine.for %k = 0 to %N {
%5 = load %0[%i, %k] : memref<1024x1024xvector<64xf32>>
%6 = load %1[%k, %j] : memref<1024x1024xvector<64xf32>>
%7 = load %2[%i, %j] : memref<1024x1024xvector<64xf32>>

View File

@ -16,13 +16,13 @@ func @loop_nest_dma() {
%zero = constant 0 : index
%num_elts = constant 128 : index
for %i = 0 to 8 {
affine.for %i = 0 to 8 {
dma_start %A[%i], %Ah[%i], %num_elts, %tag[%zero] : memref<256 x f32>, memref<32 x f32, 1>, memref<1 x f32>
dma_wait %tag[%zero], %num_elts : memref<1 x f32>
%v = load %Ah[%i] : memref<32 x f32, (d0) -> (d0), 1>
%r = "compute"(%v) : (f32) -> (f32)
store %r, %Ah[%i] : memref<32 x f32, (d0) -> (d0), 1>
for %j = 0 to 128 {
affine.for %j = 0 to 128 {
"do_more_compute"(%i, %j) : (index, index) -> ()
}
}
@ -34,7 +34,7 @@ func @loop_nest_dma() {
// CHECK-NEXT: %3 = affine.apply [[MOD_2]](%c0)
// CHECK-NEXT: %4 = affine.apply [[MOD_2]](%c0)
// CHECK-NEXT: dma_start %0[%c0], %1[%3, %c0], %c128, %2[%4, %c0_0] : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
// CHECK-NEXT: for %i0 = 1 to 8 {
// CHECK-NEXT: affine.for %i0 = 1 to 8 {
// CHECK-NEXT: %5 = affine.apply [[MOD_2]](%i0)
// CHECK-NEXT: %6 = affine.apply [[MOD_2]](%i0)
// CHECK-NEXT: dma_start %0[%i0], %1[%5, %i0], %c128, %2[%6, %c0_0] : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
@ -45,7 +45,7 @@ func @loop_nest_dma() {
// CHECK-NEXT: %10 = load %1[%9, %7] : memref<2x32xf32, 1>
// CHECK-NEXT: %11 = "compute"(%10) : (f32) -> f32
// CHECK-NEXT: store %11, %1[%9, %7] : memref<2x32xf32, 1>
// CHECK-NEXT: for %i1 = 0 to 128 {
// CHECK-NEXT: affine.for %i1 = 0 to 128 {
// CHECK-NEXT: "do_more_compute"(%7, %i1) : (index, index) -> ()
// CHECK-NEXT: }
// CHECK-NEXT: }
@ -56,7 +56,7 @@ func @loop_nest_dma() {
// CHECK-NEXT: %15 = load %1[%14, %12] : memref<2x32xf32, 1>
// CHECK-NEXT: %16 = "compute"(%15) : (f32) -> f32
// CHECK-NEXT: store %16, %1[%14, %12] : memref<2x32xf32, 1>
// CHECK-NEXT: for %i2 = 0 to 128 {
// CHECK-NEXT: affine.for %i2 = 0 to 128 {
// CHECK-NEXT: "do_more_compute"(%12, %i2) : (index, index) -> ()
// CHECK-NEXT: }
// CHECK-NEXT: dealloc %2 : memref<2x1xf32>
@ -70,7 +70,7 @@ func @loop_step(%arg0: memref<512xf32>,
%arg1: memref<512xf32>) {
%c0 = constant 0 : index
%c4 = constant 4 : index
for %i0 = 0 to 512 step 4 {
affine.for %i0 = 0 to 512 step 4 {
%1 = alloc() : memref<4xf32, 1>
%2 = alloc() : memref<1xi32>
dma_start %arg0[%i0], %1[%c0], %c4, %2[%c0]
@ -84,7 +84,7 @@ func @loop_step(%arg0: memref<512xf32>,
// CHECK: %2 = affine.apply [[FLOOR_MOD_2]](%c0)
// CHECK: %3 = affine.apply [[FLOOR_MOD_2]](%c0)
// CHECK-NEXT: dma_start %arg0[%c0], %0[%2, %c0_0], %c4, [[TAG]][%3, %c0_0] : memref<512xf32>, memref<2x4xf32, 1>, memref<2x1xi32>
// CHECK-NEXT: for %i0 = 4 to 512 step 4 {
// CHECK-NEXT: affine.for %i0 = 4 to 512 step 4 {
// CHECK-NEXT: %4 = affine.apply [[FLOOR_MOD_2]](%i0)
// CHECK-NEXT: %5 = affine.apply [[FLOOR_MOD_2]](%i0)
// CHECK-NEXT: dma_start %arg0[%i0], %0[%4, %c0_0], %c4, [[TAG]][%5, %c0_0] : memref<512xf32>, memref<2x4xf32, 1>, memref<2x1xi32>
@ -117,8 +117,8 @@ func @loop_dma_nested(%arg0: memref<512x32xvector<8xf32>, #map0>, %arg1: memref<
// CHECK-DAG: [[BUF_ARG2:%[0-9]+]] = alloc() : memref<2x64x4xvector<8xf32>, 2>
// CHECK-DAG: [[TAG_ARG2:%[0-9]+]] = alloc() : memref<2x2xi32>
// CHECK: dma_start %arg2[
// CHECK: for %i0 = 1 to 8 {
for %i0 = 0 to 8 {
// CHECK: affine.for %i0 = 1 to 8 {
affine.for %i0 = 0 to 8 {
%6 = affine.apply #map2(%i0)
dma_start %arg2[%6, %c0], %2[%c0, %c0], %num_elts, %5[%c0] : memref<512x32xvector<8xf32>, #map0>, memref<64x4xvector<8xf32>, #map0, 2>, memref<2xi32>
dma_wait %5[%c0], %num_elts : memref<2xi32>
@ -132,8 +132,8 @@ func @loop_dma_nested(%arg0: memref<512x32xvector<8xf32>, #map0>, %arg1: memref<
// CHECK: [[TAG_ARG1:%[0-9]+]] = alloc() : memref<2x2xi32>
// CHECK: dma_start %arg0[
// CHECK: dma_start %arg1[
// CHECK-NEXT for %i1 = 1 to 8 {
for %i1 = 0 to 8 {
// CHECK-NEXT affine.for %i1 = 1 to 8 {
affine.for %i1 = 0 to 8 {
%7 = affine.apply #map1(%i0, %i1)
%8 = affine.apply #map2(%i1)
dma_start %arg0[%7, %c0], %0[%c0, %c0], %num_elts, %3[%c0] : memref<512x32xvector<8xf32>, #map0>, memref<64x4xvector<8xf32>, #map0, 2>, memref<2xi32>
@ -145,8 +145,8 @@ func @loop_dma_nested(%arg0: memref<512x32xvector<8xf32>, #map0>, %arg1: memref<
// CHECK: dma_start %arg1[
// CHECK: dma_wait [[TAG_ARG0]]
// CHECK: dma_wait [[TAG_ARG1]]
// CHECK-NEXT: for %i2 = 0 to 4 {
for %i2 = 0 to 4 {
// CHECK-NEXT: affine.for %i2 = 0 to 4 {
affine.for %i2 = 0 to 4 {
"foo"() : () -> ()
}
}
@ -166,16 +166,16 @@ func @loop_dma_nested(%arg0: memref<512x32xvector<8xf32>, #map0>, %arg1: memref<
// CHECK: [[TAG_ARG1_NESTED:%[0-9]+]] = alloc() : memref<2x2xi32>
// CHECK: dma_start %arg0[
// CHECK: dma_start %arg1[
// CHECK: for %i4 = 1 to 8 {
// CHECK: affine.for %i4 = 1 to 8 {
// CHECK: dma_start %arg0[
// CHECK: dma_start %arg1[
// CHECK: dma_wait [[TAG_ARG0_NESTED]]
// CHECK: dma_wait [[TAG_ARG1_NESTED]]
// CHECK: for %i5 = 0 to 4 {
// CHECK: affine.for %i5 = 0 to 4 {
// CHECK: "foo"() : () -> ()
// CHECK: dma_wait [[TAG_ARG0_NESTED]]
// CHECK: dma_wait [[TAG_ARG1_NESTED]]
// CHECK: for %i6 = 0 to 4 {
// CHECK: affine.for %i6 = 0 to 4 {
}
return
// CHECK: }
@ -202,8 +202,8 @@ func @loop_dma_dependent(%arg2: memref<512x32xvector<8xf32>>) {
// The two DMAs below are dependent (incoming and outgoing on the same
// memref) in the same iteration; so no pipelining here.
// CHECK-NOT: dma_start
// CHECK: for %i0 = 0 to 8 {
for %i0 = 0 to 8 {
// CHECK: affine.for %i0 = 0 to 8 {
affine.for %i0 = 0 to 8 {
%6 = affine.apply #map2(%i0)
dma_start %arg2[%6, %c0], %2[%c0, %c0], %num_elts, %5[%c0] : memref<512x32xvector<8xf32>>, memref<64x4xvector<8xf32>, 2>, memref<2xi32>
dma_wait %5[%c0], %num_elts : memref<2xi32>
@ -223,8 +223,8 @@ func @escaping_use(%arg0: memref<512 x 32 x f32>) {
%tag = alloc() : memref<1 x i32>
// CHECK-NOT: dma_start
// CHECK: for %i0 = 0 to 16 {
for %kTT = 0 to 16 {
// CHECK: affine.for %i0 = 0 to 16 {
affine.for %kTT = 0 to 16 {
dma_start %arg0[%zero, %zero], %Av[%zero, %zero], %num_elt, %tag[%zero] :
memref<512 x 32 x f32>,
memref<32 x 32 x f32, 2>, memref<1 x i32>
@ -247,14 +247,14 @@ func @live_out_use(%arg0: memref<512 x 32 x f32>) -> f32 {
%tag = alloc() : memref<1 x i32>
// CHECK-NOT: dma_start
// CHECK: for %i0 = 0 to 16 {
for %kTT = 0 to 16 {
// CHECK: affine.for %i0 = 0 to 16 {
affine.for %kTT = 0 to 16 {
dma_start %arg0[%zero, %zero], %Av[%zero, %zero], %num_elt, %tag[%zero] :
memref<512 x 32 x f32>,
memref<32 x 32 x f32, 2>, memref<1 x i32>
dma_wait %tag[%zero], %num_elt : memref<1 x i32>
}
// Use live out of 'for' inst; no DMA pipelining will be done.
// Use live out of 'affine.for' inst; no DMA pipelining will be done.
%v = load %Av[%zero, %zero] : memref<32 x 32 x f32, 2>
return %v : f32
// CHECK: %{{[0-9]+}} = load %{{[0-9]+}}[%c0, %c0] : memref<32x32xf32, 2>
@ -278,14 +278,14 @@ func @dynamic_shape_dma_buffer(%arg0: memref<512 x 32 x f32>) {
// CHECK: %5 = affine.apply [[MOD_2]](%c0)
// CHECK: %6 = affine.apply [[MOD_2]](%c0)
// CHECK: dma_start %arg0[%c0_0, %c0_0], %3[%5, %c0_0, %c0_0], %c512, %4[%6, %c0_0]
for %kTT = 0 to 16 {
affine.for %kTT = 0 to 16 {
dma_start %arg0[%zero, %zero], %Av[%zero, %zero], %num_elt, %tag[%zero] :
memref<512 x 32 x f32>,
memref<? x ? x f32, 2>, memref<1 x i32>
dma_wait %tag[%zero], %num_elt : memref<1 x i32>
}
return
// CHECK-NEXT: for %i0 = 1 to 16 {
// CHECK-NEXT: affine.for %i0 = 1 to 16 {
// CHECK: %7 = affine.apply [[MOD_2]](%i0)
// CHECK: %8 = affine.apply [[MOD_2]](%i0)
// CHECK: dma_start %arg0[%c0_0, %c0_0], %3[%7, %c0_0, %c0_0], %c512, %4[%8, %c0_0]

View File

@ -73,8 +73,8 @@
// CHECK-LABEL: func @test_gaussian_elimination_empty_set0() {
func @test_gaussian_elimination_empty_set0() {
for %i0 = 1 to 10 {
for %i1 = 1 to 100 {
affine.for %i0 = 1 to 10 {
affine.for %i1 = 1 to 100 {
// CHECK: [[SET_EMPTY_2D]](%i0, %i1)
affine.if (d0, d1) : (2 == 0)(%i0, %i1) {
}
@ -85,8 +85,8 @@ func @test_gaussian_elimination_empty_set0() {
// CHECK-LABEL: func @test_gaussian_elimination_empty_set1() {
func @test_gaussian_elimination_empty_set1() {
for %i0 = 1 to 10 {
for %i1 = 1 to 100 {
affine.for %i0 = 1 to 10 {
affine.for %i1 = 1 to 100 {
// CHECK: [[SET_EMPTY_2D]](%i0, %i1)
affine.if (d0, d1) : (1 >= 0, -1 >= 0) (%i0, %i1) {
}
@ -97,8 +97,8 @@ func @test_gaussian_elimination_empty_set1() {
// CHECK-LABEL: func @test_gaussian_elimination_non_empty_set2() {
func @test_gaussian_elimination_non_empty_set2() {
for %i0 = 1 to 10 {
for %i1 = 1 to 100 {
affine.for %i0 = 1 to 10 {
affine.for %i1 = 1 to 100 {
// CHECK: #set1(%i0, %i1)
affine.if #set2(%i0, %i1) {
}
@ -111,8 +111,8 @@ func @test_gaussian_elimination_non_empty_set2() {
func @test_gaussian_elimination_empty_set3() {
%c7 = constant 7 : index
%c11 = constant 11 : index
for %i0 = 1 to 10 {
for %i1 = 1 to 100 {
affine.for %i0 = 1 to 10 {
affine.for %i1 = 1 to 100 {
// CHECK: #set2(%i0, %i1)[%c7, %c11]
affine.if #set3(%i0, %i1)[%c7, %c11] {
}
@ -125,8 +125,8 @@ func @test_gaussian_elimination_empty_set3() {
func @test_gaussian_elimination_non_empty_set4() {
%c7 = constant 7 : index
%c11 = constant 11 : index
for %i0 = 1 to 10 {
for %i1 = 1 to 100 {
affine.for %i0 = 1 to 10 {
affine.for %i1 = 1 to 100 {
// CHECK: #set3(%i0, %i1)[%c7, %c11]
affine.if #set4(%i0, %i1)[%c7, %c11] {
}
@ -139,8 +139,8 @@ func @test_gaussian_elimination_non_empty_set4() {
func @test_gaussian_elimination_empty_set5() {
%c7 = constant 7 : index
%c11 = constant 11 : index
for %i0 = 1 to 10 {
for %i1 = 1 to 100 {
affine.for %i0 = 1 to 10 {
affine.for %i1 = 1 to 100 {
// CHECK: #set2(%i0, %i1)[%c7, %c11]
affine.if #set5(%i0, %i1)[%c7, %c11] {
}
@ -151,8 +151,8 @@ func @test_gaussian_elimination_empty_set5() {
// CHECK-LABEL: func @test_fuzz_explosion
func @test_fuzz_explosion(%arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index) {
for %i0 = 1 to 10 {
for %i1 = 1 to 100 {
affine.for %i0 = 1 to 10 {
affine.for %i1 = 1 to 100 {
affine.if #set_fuzz_virus(%i0, %i1, %arg0, %arg1, %arg2, %arg3) {
}
}
@ -163,8 +163,8 @@ func @test_fuzz_explosion(%arg0 : index, %arg1 : index, %arg2 : index, %arg3 : i
// CHECK-LABEL: func @test_empty_set(%arg0: index) {
func @test_empty_set(%N : index) {
for %i = 0 to 10 {
for %j = 0 to 10 {
affine.for %i = 0 to 10 {
affine.for %j = 0 to 10 {
// CHECK: affine.if [[SET_EMPTY_2D]](%i0, %i1)
affine.if (d0, d1) : (d0 - d1 >= 0, d1 - d0 - 1 >= 0)(%i, %j) {
"foo"() : () -> ()
@ -198,8 +198,8 @@ func @test_empty_set(%N : index) {
}
}
// The tests below test GCDTightenInequalities().
for %k = 0 to 10 {
for %l = 0 to 10 {
affine.for %k = 0 to 10 {
affine.for %l = 0 to 10 {
// Empty because no multiple of 8 lies between 4 and 7.
// CHECK: affine.if [[SET_EMPTY_1D]](%i2)
affine.if (d0) : (8*d0 - 4 >= 0, -8*d0 + 7 >= 0)(%k) {
@ -226,7 +226,7 @@ func @test_empty_set(%N : index) {
}
}
for %m = 0 to 10 {
affine.for %m = 0 to 10 {
// CHECK: affine.if [[SET_EMPTY_1D]](%i{{[0-9]+}})
affine.if (d0) : (d0 mod 2 - 3 == 0) (%m) {
"foo"() : () -> ()

View File

@ -10,7 +10,7 @@ func @inline_notation() -> i32 loc("mysource.cc":10:8) {
%1 = "foo"() : () -> i32 loc("foo")
// CHECK: } loc(unknown)
for %i0 = 0 to 8 {
affine.for %i0 = 0 to 8 {
} loc(fused["foo", "mysource.cc":10:8])
// CHECK: } loc(unknown)

View File

@ -8,13 +8,13 @@
// CHECK-LABEL: func @unroll_jam_imperfect_nest() {
func @unroll_jam_imperfect_nest() {
// CHECK: %c100 = constant 100 : index
// CHECK-NEXT: for %i0 = 0 to 100 step 2 {
for %i = 0 to 101 {
// CHECK-NEXT: affine.for %i0 = 0 to 100 step 2 {
affine.for %i = 0 to 101 {
// CHECK: %0 = "addi32"(%i0, %i0) : (index, index) -> i32
// CHECK-NEXT: %1 = affine.apply [[MAP_PLUS_1]](%i0)
// CHECK-NEXT: %2 = "addi32"(%1, %1) : (index, index) -> i32
%x = "addi32"(%i, %i) : (index, index) -> i32
for %j = 0 to 17 {
affine.for %j = 0 to 17 {
// CHECK: %3 = "addi32"(%i0, %i0) : (index, index) -> i32
// CHECK-NEXT: %4 = "addi32"(%3, %3) : (i32, i32) -> i32
// CHECK-NEXT: %5 = affine.apply [[MAP_PLUS_1]](%i0)
@ -30,7 +30,7 @@ func @unroll_jam_imperfect_nest() {
} // CHECK }
// cleanup loop (single iteration)
// CHECK: %11 = "addi32"(%c100, %c100) : (index, index) -> i32
// CHECK-NEXT: for %i2 = 0 to 17 {
// CHECK-NEXT: affine.for %i2 = 0 to 17 {
// CHECK-NEXT: %12 = "addi32"(%c100, %c100) : (index, index) -> i32
// CHECK-NEXT: %13 = "addi32"(%12, %12) : (i32, i32) -> i32
// CHECK-NEXT: }
@ -40,20 +40,20 @@ func @unroll_jam_imperfect_nest() {
// CHECK-LABEL: func @loop_nest_unknown_count_1(%arg0: index) {
func @loop_nest_unknown_count_1(%N : index) {
// CHECK-NEXT: for %i0 = 1 to [[MAP_DIV_OFFSET]]()[%arg0] step 2 {
// CHECK-NEXT: for %i1 = 1 to 100 {
// CHECK-NEXT: affine.for %i0 = 1 to [[MAP_DIV_OFFSET]]()[%arg0] step 2 {
// CHECK-NEXT: affine.for %i1 = 1 to 100 {
// CHECK-NEXT: %0 = "foo"() : () -> i32
// CHECK-NEXT: %1 = "foo"() : () -> i32
// CHECK-NEXT: }
// CHECK-NEXT: }
// A cleanup loop should be generated here.
// CHECK-NEXT: for %i2 = [[MAP_DIV_OFFSET]]()[%arg0] to %arg0 {
// CHECK-NEXT: for %i3 = 1 to 100 {
// CHECK-NEXT: affine.for %i2 = [[MAP_DIV_OFFSET]]()[%arg0] to %arg0 {
// CHECK-NEXT: affine.for %i3 = 1 to 100 {
// CHECK-NEXT: %2 = "foo"() : () -> i32
// CHECK_NEXT: }
// CHECK_NEXT: }
for %i = 1 to %N {
for %j = 1 to 100 {
affine.for %i = 1 to %N {
affine.for %j = 1 to 100 {
%x = "foo"() : () -> i32
}
}
@ -62,8 +62,8 @@ func @loop_nest_unknown_count_1(%N : index) {
// CHECK-LABEL: func @loop_nest_unknown_count_2(%arg0: index) {
func @loop_nest_unknown_count_2(%arg : index) {
// CHECK-NEXT: for %i0 = %arg0 to [[M1]]()[%arg0] step 2 {
// CHECK-NEXT: for %i1 = 1 to 100 {
// CHECK-NEXT: affine.for %i0 = %arg0 to [[M1]]()[%arg0] step 2 {
// CHECK-NEXT: affine.for %i1 = 1 to 100 {
// CHECK-NEXT: %0 = "foo"(%i0) : (index) -> i32
// CHECK-NEXT: %1 = affine.apply #map{{[0-9]+}}(%i0)
// CHECK-NEXT: %2 = "foo"(%1) : (index) -> i32
@ -71,11 +71,11 @@ func @loop_nest_unknown_count_2(%arg : index) {
// CHECK-NEXT: }
// The cleanup loop is a single iteration one and is promoted.
// CHECK-NEXT: %3 = affine.apply [[M1]]()[%arg0]
// CHECK-NEXT: for %i2 = 1 to 100 {
// CHECK-NEXT: affine.for %i2 = 1 to 100 {
// CHECK-NEXT: %4 = "foo"(%3) : (index) -> i32
// CHECK_NEXT: }
for %i = %arg to ()[s0] -> (s0+9) ()[%arg] {
for %j = 1 to 100 {
affine.for %i = %arg to ()[s0] -> (s0+9) ()[%arg] {
affine.for %j = 1 to 100 {
%x = "foo"(%i) : (index) -> i32
}
}
@ -84,22 +84,22 @@ func @loop_nest_unknown_count_2(%arg : index) {
// CHECK-LABEL: func @loop_nest_symbolic_and_min_upper_bound
func @loop_nest_symbolic_and_min_upper_bound(%M : index, %N : index, %K : index) {
for %i = 0 to min ()[s0, s1] -> (s0, s1, 1024)()[%M, %N] {
for %j = 0 to %K {
affine.for %i = 0 to min ()[s0, s1] -> (s0, s1, 1024)()[%M, %N] {
affine.for %j = 0 to %K {
"foo"(%i, %j) : (index, index) -> ()
}
}
return
}
// CHECK-NEXT: for %i0 = 0 to min [[MAP_MULTI_RES]]()[%arg0, %arg1] step 2 {
// CHECK-NEXT: for %i1 = 0 to %arg2 {
// CHECK-NEXT: affine.for %i0 = 0 to min [[MAP_MULTI_RES]]()[%arg0, %arg1] step 2 {
// CHECK-NEXT: affine.for %i1 = 0 to %arg2 {
// CHECK-NEXT: "foo"(%i0, %i1) : (index, index) -> ()
// CHECK-NEXT: %0 = affine.apply #map2(%i0)
// CHECK-NEXT: "foo"(%0, %i1) : (index, index) -> ()
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: for %i2 = max [[MAP_MULTI_RES]]()[%arg0, %arg1] to min #map9()[%arg0, %arg1] {
// CHECK-NEXT: for %i3 = 0 to %arg2 {
// CHECK-NEXT: affine.for %i2 = max [[MAP_MULTI_RES]]()[%arg0, %arg1] to min #map9()[%arg0, %arg1] {
// CHECK-NEXT: affine.for %i3 = 0 to %arg2 {
// CHECK-NEXT: "foo"(%i2, %i3) : (index, index) -> ()
// CHECK-NEXT: }
// CHECK-NEXT: }

View File

@ -25,13 +25,13 @@
// UNROLL-FULL-LABEL: func @loop_nest_simplest() {
func @loop_nest_simplest() {
// UNROLL-FULL: for %i0 = 0 to 100 step 2 {
for %i = 0 to 100 step 2 {
// UNROLL-FULL: affine.for %i0 = 0 to 100 step 2 {
affine.for %i = 0 to 100 step 2 {
// UNROLL-FULL: %c1_i32 = constant 1 : i32
// UNROLL-FULL-NEXT: %c1_i32_0 = constant 1 : i32
// UNROLL-FULL-NEXT: %c1_i32_1 = constant 1 : i32
// UNROLL-FULL-NEXT: %c1_i32_2 = constant 1 : i32
for %j = 0 to 4 {
affine.for %j = 0 to 4 {
%x = constant 1 : i32
}
} // UNROLL-FULL: }
@ -41,8 +41,8 @@ func @loop_nest_simplest() {
// UNROLL-FULL-LABEL: func @loop_nest_simple_iv_use() {
func @loop_nest_simple_iv_use() {
// UNROLL-FULL: %c0 = constant 0 : index
// UNROLL-FULL-NEXT: for %i0 = 0 to 100 step 2 {
for %i = 0 to 100 step 2 {
// UNROLL-FULL-NEXT: affine.for %i0 = 0 to 100 step 2 {
affine.for %i = 0 to 100 step 2 {
// UNROLL-FULL: %0 = "addi32"(%c0, %c0) : (index, index) -> i32
// UNROLL-FULL: %1 = affine.apply [[MAP0]](%c0)
// UNROLL-FULL-NEXT: %2 = "addi32"(%1, %1) : (index, index) -> i32
@ -50,7 +50,7 @@ func @loop_nest_simple_iv_use() {
// UNROLL-FULL-NEXT: %4 = "addi32"(%3, %3) : (index, index) -> i32
// UNROLL-FULL: %5 = affine.apply [[MAP2]](%c0)
// UNROLL-FULL-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> i32
for %j = 0 to 4 {
affine.for %j = 0 to 4 {
%x = "addi32"(%j, %j) : (index, index) -> i32
}
} // UNROLL-FULL: }
@ -61,8 +61,8 @@ func @loop_nest_simple_iv_use() {
// UNROLL-FULL-LABEL: func @loop_nest_body_def_use() {
func @loop_nest_body_def_use() {
// UNROLL-FULL: %c0 = constant 0 : index
// UNROLL-FULL-NEXT: for %i0 = 0 to 100 step 2 {
for %i = 0 to 100 step 2 {
// UNROLL-FULL-NEXT: affine.for %i0 = 0 to 100 step 2 {
affine.for %i = 0 to 100 step 2 {
// UNROLL-FULL: %c0_0 = constant 0 : index
%c0 = constant 0 : index
// UNROLL-FULL: %0 = affine.apply [[MAP0]](%c0)
@ -76,7 +76,7 @@ func @loop_nest_body_def_use() {
// UNROLL-FULL-NEXT: %8 = affine.apply [[MAP2]](%c0)
// UNROLL-FULL-NEXT: %9 = affine.apply [[MAP0]](%8)
// UNROLL-FULL-NEXT: %10 = "addi32"(%9, %c0_0) : (index, index) -> index
for %j = 0 to 4 {
affine.for %j = 0 to 4 {
%x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
(index) -> (index)
%y = "addi32"(%x, %c0) : (index, index) -> index
@ -89,14 +89,14 @@ func @loop_nest_body_def_use() {
func @loop_nest_strided() {
// UNROLL-FULL: %c2 = constant 2 : index
// UNROLL-FULL-NEXT: %c2_0 = constant 2 : index
// UNROLL-FULL-NEXT: for %i0 = 0 to 100 {
for %i = 0 to 100 {
// UNROLL-FULL-NEXT: affine.for %i0 = 0 to 100 {
affine.for %i = 0 to 100 {
// UNROLL-FULL: %0 = affine.apply [[MAP0]](%c2_0)
// UNROLL-FULL-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
// UNROLL-FULL-NEXT: %2 = affine.apply [[MAP1]](%c2_0)
// UNROLL-FULL-NEXT: %3 = affine.apply [[MAP0]](%2)
// UNROLL-FULL-NEXT: %4 = "addi32"(%3, %3) : (index, index) -> index
for %j = 2 to 6 step 2 {
affine.for %j = 2 to 6 step 2 {
%x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
(index) -> (index)
%y = "addi32"(%x, %x) : (index, index) -> index
@ -109,7 +109,7 @@ func @loop_nest_strided() {
// UNROLL-FULL-NEXT: %10 = affine.apply [[MAP3]](%c2)
// UNROLL-FULL-NEXT: %11 = affine.apply [[MAP0]](%10)
// UNROLL-FULL-NEXT: %12 = "addi32"(%11, %11) : (index, index) -> index
for %k = 2 to 7 step 2 {
affine.for %k = 2 to 7 step 2 {
%z = "affine.apply" (%k) { map: (d0) -> (d0 + 1) } :
(index) -> (index)
%w = "addi32"(%z, %z) : (index, index) -> index
@ -121,8 +121,8 @@ func @loop_nest_strided() {
// UNROLL-FULL-LABEL: func @loop_nest_multiple_results() {
func @loop_nest_multiple_results() {
// UNROLL-FULL: %c0 = constant 0 : index
// UNROLL-FULL-NEXT: for %i0 = 0 to 100 {
for %i = 0 to 100 {
// UNROLL-FULL-NEXT: affine.for %i0 = 0 to 100 {
affine.for %i = 0 to 100 {
// UNROLL-FULL: %0 = affine.apply [[MAP4]](%i0, %c0)
// UNROLL-FULL-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
// UNROLL-FULL-NEXT: %2 = affine.apply #map{{.*}}(%i0, %c0)
@ -132,7 +132,7 @@ func @loop_nest_multiple_results() {
// UNROLL-FULL-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> index
// UNROLL-FULL-NEXT: %7 = affine.apply #map{{.*}}(%i0, %4)
// UNROLL-FULL-NEXT: %8 = "fma"(%7, %5, %5) : (index, index, index) -> (index, index)
for %j = 0 to 2 step 1 {
affine.for %j = 0 to 2 step 1 {
%x = affine.apply (d0, d1) -> (d0 + 1) (%i, %j)
%y = "addi32"(%x, %x) : (index, index) -> index
%z = affine.apply (d0, d1) -> (d0 + 3) (%i, %j)
@ -149,8 +149,8 @@ func @loop_nest_seq_imperfect(%a : memref<128x128xf32>) {
// UNROLL-FULL: %c0 = constant 0 : index
// UNROLL-FULL-NEXT: %c128 = constant 128 : index
%c128 = constant 128 : index
// UNROLL-FULL: for %i0 = 0 to 100 {
for %i = 0 to 100 {
// UNROLL-FULL: affine.for %i0 = 0 to 100 {
affine.for %i = 0 to 100 {
// UNROLL-FULL: %0 = "vld"(%i0) : (index) -> i32
%ld = "vld"(%i) : (index) -> i32
// UNROLL-FULL: %1 = affine.apply [[MAP0]](%c0)
@ -168,7 +168,7 @@ func @loop_nest_seq_imperfect(%a : memref<128x128xf32>) {
// UNROLL-FULL-NEXT: %13 = affine.apply [[MAP0]](%12)
// UNROLL-FULL-NEXT: %14 = "vmulf"(%12, %13) : (index, index) -> index
// UNROLL-FULL-NEXT: %15 = "vaddf"(%14, %14) : (index, index) -> index
for %j = 0 to 4 {
affine.for %j = 0 to 4 {
%x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
(index) -> (index)
%y = "vmulf"(%j, %x) : (index, index) -> index
@ -197,7 +197,7 @@ func @loop_nest_seq_multiple() {
// UNROLL-FULL-NEXT: %5 = affine.apply [[MAP2]](%c0_0)
// UNROLL-FULL-NEXT: %6 = affine.apply [[MAP0]](%5)
// UNROLL-FULL-NEXT: "mul"(%6, %6) : (index, index) -> ()
for %j = 0 to 4 {
affine.for %j = 0 to 4 {
%x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
(index) -> (index)
"mul"(%x, %x) : (index, index) -> ()
@ -205,8 +205,8 @@ func @loop_nest_seq_multiple() {
// UNROLL-FULL: %c99 = constant 99 : index
%k = constant 99 : index
// UNROLL-FULL: for %i0 = 0 to 100 step 2 {
for %m = 0 to 100 step 2 {
// UNROLL-FULL: affine.for %i0 = 0 to 100 step 2 {
affine.for %m = 0 to 100 step 2 {
// UNROLL-FULL: %7 = affine.apply [[MAP0]](%c0)
// UNROLL-FULL-NEXT: %8 = affine.apply [[MAP6]](%c0)[%c99]
// UNROLL-FULL-NEXT: %9 = affine.apply [[MAP0]](%c0)
@ -218,7 +218,7 @@ func @loop_nest_seq_multiple() {
// UNROLL-FULL-NEXT: %15 = affine.apply [[MAP2]](%c0)
// UNROLL-FULL-NEXT: %16 = affine.apply [[MAP0]](%15)
// UNROLL-FULL-NEXT: %17 = affine.apply [[MAP6]](%15)[%c99]
for %n = 0 to 4 {
affine.for %n = 0 to 4 {
%y = "affine.apply" (%n) { map: (d0) -> (d0 + 1) } :
(index) -> (index)
%z = "affine.apply" (%n, %k) { map: (d0) [s0] -> (d0 + s0 + 1) } :
@ -233,7 +233,7 @@ func @loop_nest_unroll_full() {
// UNROLL-FULL-NEXT: %0 = "foo"() : () -> i32
// UNROLL-FULL-NEXT: %1 = "bar"() : () -> i32
// UNROLL-FULL-NEXT: return
for %i = 0 to 1 {
affine.for %i = 0 to 1 {
%x = "foo"() : () -> i32
%y = "bar"() : () -> i32
}
@ -242,16 +242,16 @@ func @loop_nest_unroll_full() {
// SHORT-LABEL: func @loop_nest_outer_unroll() {
func @loop_nest_outer_unroll() {
// SHORT: for %i0 = 0 to 4 {
// SHORT: affine.for %i0 = 0 to 4 {
// SHORT-NEXT: %0 = affine.apply [[MAP0]](%i0)
// SHORT-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
// SHORT-NEXT: }
// SHORT-NEXT: for %i1 = 0 to 4 {
// SHORT-NEXT: affine.for %i1 = 0 to 4 {
// SHORT-NEXT: %2 = affine.apply [[MAP0]](%i1)
// SHORT-NEXT: %3 = "addi32"(%2, %2) : (index, index) -> index
// SHORT-NEXT: }
for %i = 0 to 2 {
for %j = 0 to 4 {
affine.for %i = 0 to 2 {
affine.for %j = 0 to 4 {
%x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
(index) -> (index)
%y = "addi32"(%x, %x) : (index, index) -> index
@ -275,33 +275,33 @@ func @loop_nest_seq_long() -> i32 {
%zero_idx = constant 0 : index
// CHECK: for %i0 = 0 to 512
for %n0 = 0 to 512 {
// CHECK: for %i1 = 0 to 8
for %n1 = 0 to 8 {
// CHECK: affine.for %i0 = 0 to 512
affine.for %n0 = 0 to 512 {
// CHECK: affine.for %i1 = 0 to 8
affine.for %n1 = 0 to 8 {
store %one, %A[%n0, %n1] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
store %two, %B[%n0, %n1] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
store %zero, %C[%n0, %n1] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
}
}
for %x = 0 to 2 {
for %y = 0 to 2 {
// CHECK: for %i2
for %i2 = 0 to 8 {
// CHECK-NOT: for %i3
affine.for %x = 0 to 2 {
affine.for %y = 0 to 2 {
// CHECK: affine.for %i2
affine.for %i2 = 0 to 8 {
// CHECK-NOT: affine.for %i3
// CHECK: %{{[0-9]+}} = affine.apply
%b2 = "affine.apply" (%y, %i2) {map: (d0, d1) -> (16*d0 + d1)} : (index, index) -> index
%z = load %B[%x, %b2] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
"op1"(%z) : (i32) -> ()
}
for %j1 = 0 to 8 {
for %j2 = 0 to 8 {
affine.for %j1 = 0 to 8 {
affine.for %j2 = 0 to 8 {
%a2 = "affine.apply" (%y, %j2) {map: (d0, d1) -> (16*d0 + d1)} : (index, index) -> index
%v203 = load %A[%j1, %a2] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
"op2"(%v203) : (i32) -> ()
}
for %k2 = 0 to 8 {
affine.for %k2 = 0 to 8 {
%s0 = "op3"() : () -> i32
%c2 = "affine.apply" (%x, %k2) {map: (d0, d1) -> (16*d0 + d1)} : (index, index) -> index
%s1 = load %C[%j1, %c2] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
@ -318,8 +318,8 @@ func @loop_nest_seq_long() -> i32 {
// UNROLL-BY-4-LABEL: func @unroll_unit_stride_no_cleanup() {
func @unroll_unit_stride_no_cleanup() {
// UNROLL-BY-4: for %i0 = 0 to 100 {
for %i = 0 to 100 {
// UNROLL-BY-4: affine.for %i0 = 0 to 100 {
affine.for %i = 0 to 100 {
// UNROLL-BY-4: for [[L1:%i[0-9]+]] = 0 to 8 step 4 {
// UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
// UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32
@ -333,13 +333,13 @@ func @unroll_unit_stride_no_cleanup() {
// UNROLL-BY-4-NEXT: %9 = "addi32"(%8, %8) : (index, index) -> i32
// UNROLL-BY-4-NEXT: %10 = "addi32"(%9, %9) : (i32, i32) -> i32
// UNROLL-BY-4-NEXT: }
for %j = 0 to 8 {
affine.for %j = 0 to 8 {
%x = "addi32"(%j, %j) : (index, index) -> i32
%y = "addi32"(%x, %x) : (i32, i32) -> i32
}
// empty loop
// UNROLL-BY-4: for %i2 = 0 to 8 {
for %k = 0 to 8 {
// UNROLL-BY-4: affine.for %i2 = 0 to 8 {
affine.for %k = 0 to 8 {
}
}
return
@ -347,8 +347,8 @@ func @unroll_unit_stride_no_cleanup() {
// UNROLL-BY-4-LABEL: func @unroll_unit_stride_cleanup() {
func @unroll_unit_stride_cleanup() {
// UNROLL-BY-4: for %i0 = 0 to 100 {
for %i = 0 to 100 {
// UNROLL-BY-4: affine.for %i0 = 0 to 100 {
affine.for %i = 0 to 100 {
// UNROLL-BY-4: for [[L1:%i[0-9]+]] = 0 to 8 step 4 {
// UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
// UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32
@ -366,7 +366,7 @@ func @unroll_unit_stride_cleanup() {
// UNROLL-BY-4-NEXT: %11 = "addi32"([[L2]], [[L2]]) : (index, index) -> i32
// UNROLL-BY-4-NEXT: %12 = "addi32"(%11, %11) : (i32, i32) -> i32
// UNROLL-BY-4-NEXT: }
for %j = 0 to 10 {
affine.for %j = 0 to 10 {
%x = "addi32"(%j, %j) : (index, index) -> i32
%y = "addi32"(%x, %x) : (i32, i32) -> i32
}
@ -376,8 +376,8 @@ func @unroll_unit_stride_cleanup() {
// UNROLL-BY-4-LABEL: func @unroll_non_unit_stride_cleanup() {
func @unroll_non_unit_stride_cleanup() {
// UNROLL-BY-4: for %i0 = 0 to 100 {
for %i = 0 to 100 {
// UNROLL-BY-4: affine.for %i0 = 0 to 100 {
affine.for %i = 0 to 100 {
// UNROLL-BY-4: for [[L1:%i[0-9]+]] = 2 to 42 step 20 {
// UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
// UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32
@ -395,7 +395,7 @@ func @unroll_non_unit_stride_cleanup() {
// UNROLL-BY-4-NEXT: %11 = "addi32"([[L2]], [[L2]]) : (index, index) -> i32
// UNROLL-BY-4-NEXT: %12 = "addi32"(%11, %11) : (i32, i32) -> i32
// UNROLL-BY-4-NEXT: }
for %j = 2 to 48 step 5 {
affine.for %j = 2 to 48 step 5 {
%x = "addi32"(%j, %j) : (index, index) -> i32
%y = "addi32"(%x, %x) : (i32, i32) -> i32
}
@ -408,8 +408,8 @@ func @unroll_non_unit_stride_cleanup() {
func @loop_nest_single_iteration_after_unroll(%N: index) {
// UNROLL-BY-4: %c0 = constant 0 : index
// UNROLL-BY-4: %c4 = constant 4 : index
// UNROLL-BY-4: for %i0 = 0 to %arg0 {
for %i = 0 to %N {
// UNROLL-BY-4: affine.for %i0 = 0 to %arg0 {
affine.for %i = 0 to %N {
// UNROLL-BY-4: %0 = "addi32"(%c0, %c0) : (index, index) -> i32
// UNROLL-BY-4-NEXT: %1 = affine.apply [[MAP0]](%c0)
// UNROLL-BY-4-NEXT: %2 = "addi32"(%1, %1) : (index, index) -> i32
@ -419,7 +419,7 @@ func @loop_nest_single_iteration_after_unroll(%N: index) {
// UNROLL-BY-4-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> i32
// UNROLL-BY-4-NEXT: %7 = "addi32"(%c4, %c4) : (index, index) -> i32
// UNROLL-BY-4-NOT: for
for %j = 0 to 5 {
affine.for %j = 0 to 5 {
%x = "addi32"(%j, %j) : (index, index) -> i32
} // UNROLL-BY-4-NOT: }
} // UNROLL-BY-4: }
@ -431,8 +431,8 @@ func @loop_nest_single_iteration_after_unroll(%N: index) {
// No cleanup will be generated here.
// UNROLL-BY-4-LABEL: func @loop_nest_operand1() {
func @loop_nest_operand1() {
// UNROLL-BY-4: for %i0 = 0 to 100 step 2 {
// UNROLL-BY-4-NEXT: for %i1 = 0 to #map{{[0-9]+}}(%i0) step 4
// UNROLL-BY-4: affine.for %i0 = 0 to 100 step 2 {
// UNROLL-BY-4-NEXT: affine.for %i1 = 0 to #map{{[0-9]+}}(%i0) step 4
// UNROLL-BY-4-NEXT: %0 = "foo"() : () -> i32
// UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32
// UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32
@ -440,8 +440,8 @@ func @loop_nest_operand1() {
// UNROLL-BY-4-NEXT: }
// UNROLL-BY-4-NEXT: }
// UNROLL-BY-4-NEXT: return
for %i = 0 to 100 step 2 {
for %j = 0 to (d0) -> (d0 - d0 mod 4) (%i) {
affine.for %i = 0 to 100 step 2 {
affine.for %j = 0 to (d0) -> (d0 - d0 mod 4) (%i) {
%x = "foo"() : () -> i32
}
}
@ -451,8 +451,8 @@ func @loop_nest_operand1() {
// No cleanup will be generated here.
// UNROLL-BY-4-LABEL: func @loop_nest_operand2() {
func @loop_nest_operand2() {
// UNROLL-BY-4: for %i0 = 0 to 100 step 2 {
// UNROLL-BY-4-NEXT: for %i1 = [[MAP11]](%i0) to #map{{[0-9]+}}(%i0) step 4 {
// UNROLL-BY-4: affine.for %i0 = 0 to 100 step 2 {
// UNROLL-BY-4-NEXT: affine.for %i1 = [[MAP11]](%i0) to #map{{[0-9]+}}(%i0) step 4 {
// UNROLL-BY-4-NEXT: %0 = "foo"() : () -> i32
// UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32
// UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32
@ -460,8 +460,8 @@ func @loop_nest_operand2() {
// UNROLL-BY-4-NEXT: }
// UNROLL-BY-4-NEXT: }
// UNROLL-BY-4-NEXT: return
for %i = 0 to 100 step 2 {
for %j = (d0) -> (d0) (%i) to (d0) -> (5*d0 + 4) (%i) {
affine.for %i = 0 to 100 step 2 {
affine.for %j = (d0) -> (d0) (%i) to (d0) -> (5*d0 + 4) (%i) {
%x = "foo"() : () -> i32
}
}
@ -472,16 +472,16 @@ func @loop_nest_operand2() {
// factor. The cleanup loop happens to be a single iteration one and is promoted.
// UNROLL-BY-4-LABEL: func @loop_nest_operand3() {
func @loop_nest_operand3() {
// UNROLL-BY-4: for %i0 = 0 to 100 step 2 {
for %i = 0 to 100 step 2 {
// UNROLL-BY-4: for %i1 = [[MAP11]](%i0) to #map{{[0-9]+}}(%i0) step 4 {
// UNROLL-BY-4: affine.for %i0 = 0 to 100 step 2 {
affine.for %i = 0 to 100 step 2 {
// UNROLL-BY-4: affine.for %i1 = [[MAP11]](%i0) to #map{{[0-9]+}}(%i0) step 4 {
// UNROLL-BY-4-NEXT: %0 = "foo"() : () -> i32
// UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32
// UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32
// UNROLL-BY-4-NEXT: %3 = "foo"() : () -> i32
// UNROLL-BY-4-NEXT: }
// UNROLL-BY-4-NEXT: %4 = "foo"() : () -> i32
for %j = (d0) -> (d0) (%i) to (d0) -> (d0 + 9) (%i) {
affine.for %j = (d0) -> (d0) (%i) to (d0) -> (d0 + 9) (%i) {
%x = "foo"() : () -> i32
}
} // UNROLL-BY-4: }
@ -490,19 +490,19 @@ func @loop_nest_operand3() {
// UNROLL-BY-4-LABEL: func @loop_nest_symbolic_bound(%arg0: index) {
func @loop_nest_symbolic_bound(%N : index) {
// UNROLL-BY-4: for %i0 = 0 to 100 {
for %i = 0 to 100 {
// UNROLL-BY-4: for %i1 = 0 to #map{{[0-9]+}}()[%arg0] step 4 {
// UNROLL-BY-4: affine.for %i0 = 0 to 100 {
affine.for %i = 0 to 100 {
// UNROLL-BY-4: affine.for %i1 = 0 to #map{{[0-9]+}}()[%arg0] step 4 {
// UNROLL-BY-4: %0 = "foo"() : () -> i32
// UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32
// UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32
// UNROLL-BY-4-NEXT: %3 = "foo"() : () -> i32
// UNROLL-BY-4-NEXT: }
// A cleanup loop will be be generated here.
// UNROLL-BY-4-NEXT: for %i2 = #map{{[0-9]+}}()[%arg0] to %arg0 {
// UNROLL-BY-4-NEXT: affine.for %i2 = #map{{[0-9]+}}()[%arg0] to %arg0 {
// UNROLL-BY-4-NEXT: %4 = "foo"() : () -> i32
// UNROLL-BY-4_NEXT: }
for %j = 0 to %N {
affine.for %j = 0 to %N {
%x = "foo"() : () -> i32
}
}
@ -511,18 +511,18 @@ func @loop_nest_symbolic_bound(%N : index) {
// UNROLL-BY-4-LABEL: func @loop_nest_symbolic_and_min_upper_bound
func @loop_nest_symbolic_and_min_upper_bound(%M : index, %N : index, %K : index) {
for %i = %M to min ()[s0, s1] -> (s0, s1, 1024)()[%N, %K] {
affine.for %i = %M to min ()[s0, s1] -> (s0, s1, 1024)()[%N, %K] {
"foo"() : () -> ()
}
return
}
// CHECK-NEXT: for %i0 = %arg0 to min [[MAP_TRIP_COUNT_MULTIPLE_FOUR]]()[%arg0, %arg1, %arg2] step 4 {
// CHECK-NEXT: affine.for %i0 = %arg0 to min [[MAP_TRIP_COUNT_MULTIPLE_FOUR]]()[%arg0, %arg1, %arg2] step 4 {
// CHECK-NEXT: "foo"() : () -> ()
// CHECK-NEXT: "foo"() : () -> ()
// CHECK-NEXT: "foo"() : () -> ()
// CHECK-NEXT: "foo"() : () -> ()
// CHECK-NEXT: }
// CHECK-NEXT: for %i1 = max [[MAP_TRIP_COUNT_MULTIPLE_FOUR]]()[%arg0, %arg1, %arg2] to min #map28()[%arg1, %arg2] {
// CHECK-NEXT: affine.for %i1 = max [[MAP_TRIP_COUNT_MULTIPLE_FOUR]]()[%arg0, %arg1, %arg2] to min #map28()[%arg1, %arg2] {
// CHECK-NEXT: "foo"() : () -> ()
// CHECK-NEXT: }
// CHECK-NEXT: return
@ -533,22 +533,22 @@ func @loop_nest_symbolic_and_min_upper_bound(%M : index, %N : index, %K : index)
func @loop_nest_non_trivial_multiple_unroll_factor(%M : index, %N : index) {
%T = affine.apply (d0) -> (4*d0 + 1)(%M)
%K = affine.apply (d0) -> (d0 - 1) (%T)
for %i = 0 to min (d0, d1) -> (4 * d0, d1, 1024)(%N, %K) {
affine.for %i = 0 to min (d0, d1) -> (4 * d0, d1, 1024)(%N, %K) {
"foo"() : () -> ()
}
return
}
// UNROLL-BY-4: for %i0 = 0 to min
// UNROLL-BY-4: affine.for %i0 = 0 to min
// UNROLL-BY-4-NOT: for
// UNROLL-BY-4: return
// UNROLL-BY-4-LABEL: func @loop_nest_non_trivial_multiple_unroll_factor_2
func @loop_nest_non_trivial_multiple_unroll_factor_2(%M : index, %N : index) {
%K = affine.apply (d0) -> (4*d0) (%M)
for %i = 0 to min ()[s0, s1] -> (4 * s0, s1, 1024)()[%N, %K] {
affine.for %i = 0 to min ()[s0, s1] -> (4 * s0, s1, 1024)()[%N, %K] {
"foo"() : () -> ()
}
// UNROLL-BY-4: for %i0 = 0 to min
// UNROLL-BY-4: affine.for %i0 = 0 to min
// UNROLL-BY-4-NEXT: "foo"
// UNROLL-BY-4-NEXT: "foo"
// UNROLL-BY-4-NEXT: "foo"
@ -560,7 +560,7 @@ func @loop_nest_non_trivial_multiple_unroll_factor_2(%M : index, %N : index) {
// UNROLL-BY-1-LABEL: func @unroll_by_one_should_promote_single_iteration_loop()
func @unroll_by_one_should_promote_single_iteration_loop() {
for %i = 0 to 1 {
affine.for %i = 0 to 1 {
%x = "foo"(%i) : (index) -> i32
}
return