mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-01-05 15:42:24 +00:00
NFC: Rename the 'for' operation in the AffineOps dialect to 'affine.for' and set the namespace of the AffineOps dialect to 'affine'.
PiperOrigin-RevId: 240165792
This commit is contained in:
parent
8f5fa56623
commit
832567b379
@ -56,11 +56,11 @@ class EdscTest(unittest.TestCase):
|
||||
code = str(fun)
|
||||
# TODO(zinenko,ntv): use FileCheck for these tests
|
||||
self.assertIn(
|
||||
' "for"() {lower_bound: () -> (0), step: 1 : index, upper_bound: () -> (42)} : () -> () {\n',
|
||||
' "affine.for"() {lower_bound: () -> (0), step: 1 : index, upper_bound: () -> (42)} : () -> () {\n',
|
||||
code)
|
||||
self.assertIn(" ^bb1(%i0: index):", code)
|
||||
self.assertIn(
|
||||
' "for"(%c42, %2) {lower_bound: (d0) -> (d0), step: 2 : index, upper_bound: (d0) -> (d0)} : (index, index) -> () {\n',
|
||||
' "affine.for"(%c42, %2) {lower_bound: (d0) -> (d0), step: 2 : index, upper_bound: (d0) -> (d0)} : (index, index) -> () {\n',
|
||||
code)
|
||||
self.assertIn(" ^bb2(%i1: index):", code)
|
||||
self.assertIn(
|
||||
@ -76,19 +76,19 @@ class EdscTest(unittest.TestCase):
|
||||
|
||||
code = str(fun)
|
||||
self.assertIn(
|
||||
' "for"() {lower_bound: () -> (0), step: 1 : index, upper_bound: () -> (5)} : () -> () {\n',
|
||||
' "affine.for"() {lower_bound: () -> (0), step: 1 : index, upper_bound: () -> (5)} : () -> () {\n',
|
||||
code)
|
||||
self.assertIn(" ^bb1(%i0: index):", code)
|
||||
self.assertIn(
|
||||
' "for"() {lower_bound: () -> (1), step: 3 : index, upper_bound: () -> (15)} : () -> () {\n',
|
||||
' "affine.for"() {lower_bound: () -> (1), step: 3 : index, upper_bound: () -> (15)} : () -> () {\n',
|
||||
code)
|
||||
self.assertIn(" ^bb2(%i1: index):", code)
|
||||
self.assertIn(
|
||||
' "for"() {lower_bound: () -> (2), step: 5 : index, upper_bound: () -> (25)} : () -> () {\n',
|
||||
' "affine.for"() {lower_bound: () -> (2), step: 5 : index, upper_bound: () -> (25)} : () -> () {\n',
|
||||
code)
|
||||
self.assertIn(" ^bb3(%i2: index):", code)
|
||||
self.assertIn(
|
||||
' "for"() {lower_bound: () -> (3), step: 7 : index, upper_bound: () -> (35)} : () -> () {\n',
|
||||
' "affine.for"() {lower_bound: () -> (3), step: 7 : index, upper_bound: () -> (35)} : () -> () {\n',
|
||||
code)
|
||||
self.assertIn(" ^bb4(%i3: index):", code)
|
||||
self.assertIn(
|
||||
@ -342,10 +342,10 @@ class EdscTest(unittest.TestCase):
|
||||
|
||||
code = str(fun)
|
||||
self.assertIn(
|
||||
'"for"() {lower_bound: () -> (0), step: 1 : index, upper_bound: () -> (10)}',
|
||||
'"affine.for"() {lower_bound: () -> (0), step: 1 : index, upper_bound: () -> (10)}',
|
||||
code)
|
||||
self.assertIn(
|
||||
'"for"() {lower_bound: () -> (0), step: 1 : index, upper_bound: () -> (42)}',
|
||||
'"affine.for"() {lower_bound: () -> (0), step: 1 : index, upper_bound: () -> (42)}',
|
||||
code)
|
||||
self.assertIn("%0 = load %arg0[%i0, %i1] : memref<10x42xf32>", code)
|
||||
self.assertIn("%1 = addf %0, %cst : f32", code)
|
||||
@ -367,7 +367,7 @@ class EdscTest(unittest.TestCase):
|
||||
|
||||
code = str(fun)
|
||||
self.assertIn(
|
||||
'"for"() {lower_bound: () -> (0), step: 1 : index, upper_bound: () -> (32)} : () -> ()',
|
||||
'"affine.for"() {lower_bound: () -> (0), step: 1 : index, upper_bound: () -> (32)} : () -> ()',
|
||||
code)
|
||||
self.assertIn("%0 = load %arg0[%i0, %i2] : memref<32x32xf32>", code)
|
||||
self.assertIn("%1 = load %arg1[%i2, %i1] : memref<32x32xf32>", code)
|
||||
|
@ -15,7 +15,7 @@ loops and if instructions), the result of a
|
||||
[`affine.apply` operation](#'affine.apply'-operation) that recursively takes as
|
||||
arguments any symbolic identifiers. Dimensions may be bound not only to anything
|
||||
that a symbol is bound to, but also to induction variables of enclosing
|
||||
[`for` operations](#'for'-operation), and the result of an
|
||||
[`affine.for` operations](#'affine.for'-operation), and the result of an
|
||||
[`affine.apply` operation](#'affine.apply'-operation) (which recursively may use
|
||||
other dimensions and symbols).
|
||||
|
||||
@ -47,12 +47,12 @@ Example:
|
||||
%2 = affine.apply (i)[s0] -> (i+s0) (%42)[%n]
|
||||
```
|
||||
|
||||
#### 'for' operation {#'for'-operation}
|
||||
#### 'affine.for' operation {#'affine.for'-operation}
|
||||
|
||||
Syntax:
|
||||
|
||||
``` {.ebnf}
|
||||
operation ::= `for` ssa-id `=` lower-bound `to` upper-bound
|
||||
operation ::= `affine.for` ssa-id `=` lower-bound `to` upper-bound
|
||||
(`step` integer-literal)? `{` inst* `}`
|
||||
|
||||
lower-bound ::= `max`? affine-map dim-and-symbol-use-list | shorthand-bound
|
||||
@ -60,17 +60,17 @@ upper-bound ::= `min`? affine-map dim-and-symbol-use-list | shorthand-bound
|
||||
shorthand-bound ::= ssa-id | `-`? integer-literal
|
||||
```
|
||||
|
||||
The `for` operation represents an affine loop nest, defining an SSA value for
|
||||
its induction variable. This SSA value always has type
|
||||
The `affine.for` operation represents an affine loop nest, defining an SSA value
|
||||
for its induction variable. This SSA value always has type
|
||||
[`index`](LangRef.md#index-type), which is the size of the machine word.
|
||||
|
||||
The `for` operation executes its body a number of times iterating from a lower
|
||||
bound to an upper bound by a stride. The stride, represented by `step`, is a
|
||||
positive constant integer which defaults to "1" if not present. The lower and
|
||||
The `affine.for` operation executes its body a number of times iterating from a
|
||||
lower bound to an upper bound by a stride. The stride, represented by `step`, is
|
||||
a positive constant integer which defaults to "1" if not present. The lower and
|
||||
upper bounds specify a half-open range: the range includes the lower bound but
|
||||
does not include the upper bound.
|
||||
|
||||
The lower and upper bounds of a `for` operation are represented as an
|
||||
The lower and upper bounds of a `affine.for` operation are represented as an
|
||||
application of an affine mapping to a list of SSA values passed to the map. The
|
||||
[same restrictions](#restrictions-on-dimensions-and-symbols) hold for these SSA
|
||||
values as for all bindings of SSA values to dimensions and symbols.
|
||||
@ -94,8 +94,8 @@ Example showing reverse iteration of the inner loop:
|
||||
|
||||
func @simple_example(%A: memref<?x?xf32>, %B: memref<?x?xf32>) {
|
||||
%N = dim %A, 0 : memref<?x?xf32>
|
||||
for %i = 0 to %N step 1 {
|
||||
for %j = 0 to %N { // implicitly steps by 1
|
||||
affine.for %i = 0 to %N step 1 {
|
||||
affine.for %j = 0 to %N { // implicitly steps by 1
|
||||
%0 = affine.apply #map57(%j)[%N]
|
||||
%tmp = call @F1(%A, %i, %0) : (memref<?x?xf32>, index, index)->(f32)
|
||||
call @F2(%tmp, %B, %i, %0) : (f32, memref<?x?xf32>, index, index)->()
|
||||
@ -130,8 +130,8 @@ Example:
|
||||
#set = (d0, d1)[s0]: (d0 - 10 >= 0, s0 - d0 - 9 >= 0,
|
||||
d1 - 10 >= 0, s0 - d1 - 9 >= 0)
|
||||
func @reduced_domain_example(%A, %X, %N) : (memref<10xi32>, i32, i32) {
|
||||
for %i = 0 to %N {
|
||||
for %j = 0 to %N {
|
||||
affine.for %i = 0 to %N {
|
||||
affine.for %j = 0 to %N {
|
||||
%0 = affine.apply #map42(%j)
|
||||
%tmp = call @S1(%X, %i, %0)
|
||||
affine.if #set(%i, %j)[%N] {
|
||||
|
@ -23,8 +23,8 @@ Examples:
|
||||
// pad with %f0 to handle the boundary case:
|
||||
%f0 = constant 0.0f : f32
|
||||
for %i0 = 0 to %0 {
|
||||
for %i1 = 0 to %1 step 256 {
|
||||
for %i2 = 0 to %2 step 32 {
|
||||
affine.for %i1 = 0 to %1 step 256 {
|
||||
affine.for %i2 = 0 to %2 step 32 {
|
||||
%v = vector_transfer_read %A, %i0, %i1, %i2, %f0
|
||||
{permutation_map: (d0, d1, d2) -> (d2, d1)} :
|
||||
(memref<?x?x?xf32>, index, index, f32) -> vector<32x256xf32>
|
||||
@ -34,7 +34,7 @@ for %i0 = 0 to %0 {
|
||||
// vector<128xf32>. The underlying implementation will require a 1-D vector
|
||||
// broadcast:
|
||||
for %i0 = 0 to %0 {
|
||||
for %i1 = 0 to %1 {
|
||||
affine.for %i1 = 0 to %1 {
|
||||
%3 = vector_transfer_read %A, %i0, %i1
|
||||
{permutation_map: (d0, d1) -> (0)} :
|
||||
(memref<?x?xf32>, index, index) -> vector<128xf32>
|
||||
@ -81,8 +81,8 @@ A notional lowering of vector_transfer_read could generate code resembling:
|
||||
%tmp = alloc() : vector<3x4x5xf32>
|
||||
%view_in_tmp = "element_type_cast"(%tmp) : memref<1xvector<3x4x5xf32>>
|
||||
for %i = 0 to 3 {
|
||||
for %j = 0 to 4 {
|
||||
for %k = 0 to 5 {
|
||||
affine.for %j = 0 to 4 {
|
||||
affine.for %k = 0 to 5 {
|
||||
%a = load %A[%expr1 + %k, %expr2, %expr3 + %i, %expr4] : memref<?x?x?x?xf32>
|
||||
store %tmp[%i, %j, %k] : vector<3x4x5xf32>
|
||||
}}}
|
||||
@ -102,7 +102,7 @@ lowered code would resemble:
|
||||
%tmp = alloc() : vector<3x4x5xf32>
|
||||
%view_in_tmp = "element_type_cast"(%tmp) : memref<1xvector<3x4x5xf32>>
|
||||
for %i = 0 to 3 {
|
||||
for %k = 0 to 5 {
|
||||
affine.for %k = 0 to 5 {
|
||||
%a = load %A[%expr1 + %k, %expr2, %expr3 + %i, %expr4] : memref<?x?x?x?xf32>
|
||||
store %tmp[%i, 0, %k] : vector<3x4x5xf32>
|
||||
}}
|
||||
@ -130,9 +130,9 @@ Examples:
|
||||
```mlir {.mlir}
|
||||
// write vector<16x32x64xf32> into the slice `%A[%i0, %i1:%i1+32, %i2:%i2+64, %i3:%i3+16]`:
|
||||
for %i0 = 0 to %0 {
|
||||
for %i1 = 0 to %1 step 32 {
|
||||
for %i2 = 0 to %2 step 64 {
|
||||
for %i3 = 0 to %3 step 16 {
|
||||
affine.for %i1 = 0 to %1 step 32 {
|
||||
affine.for %i2 = 0 to %2 step 64 {
|
||||
affine.for %i3 = 0 to %3 step 16 {
|
||||
%val = `ssa-value` : vector<16x32x64xf32>
|
||||
vector_transfer_write %val, %A, %i0, %i1, %i2, %i3
|
||||
{permutation_map: (d0, d1, d2, d3) -> (d3, d1, d2)} :
|
||||
|
@ -40,10 +40,10 @@ which means that values are defined before use and have scope defined by their
|
||||
dominance relations. Operations may produce zero or more results, and each is a
|
||||
distinct SSA value with its own type defined by the [type system](#type-system).
|
||||
|
||||
MLIR incorporates polyhedral compiler concepts, including `for` and `affine.if`
|
||||
operations defined by the [affine dialect](Dialects/Affine.md), which model
|
||||
affine loops and affine conditionals. It also includes affine maps integrated
|
||||
into the type system - they are key to the representation of data and
|
||||
MLIR incorporates polyhedral compiler concepts, including `affine.for` and
|
||||
`affine.if` operations defined by the [affine dialect](Dialects/Affine.md),
|
||||
which model affine loops and affine conditionals. It also includes affine maps
|
||||
integrated into the type system - they are key to the representation of data and
|
||||
[MemRefs](#memref-type), which are the representation for tensors in addressable
|
||||
memory. MLIR also supports a first-class Tensor type allowing it to concisely
|
||||
represent operations on N-dimensional arrays.
|
||||
@ -99,10 +99,10 @@ func @multiply(%A: memref<100x?xf32>, %B: memref<?x50xf32>)
|
||||
%C = alloc() : memref<100x50xf32>
|
||||
|
||||
// Multiplication loop nest.
|
||||
for %i = 0 to 100 {
|
||||
for %j = 0 to 50 {
|
||||
affine.for %i = 0 to 100 {
|
||||
affine.for %j = 0 to 50 {
|
||||
store 0 to %C[%i, %j] : memref<100x50xf32>
|
||||
for %k = 0 to %n {
|
||||
affine.for %k = 0 to %n {
|
||||
%a_v = load %A[%i, %k] : memref<100x?xf32>
|
||||
%b_v = load %B[%k, %j] : memref<?x50xf32>
|
||||
%prod = mulf %a_v, %b_v : f32
|
||||
@ -1697,8 +1697,8 @@ The arity of indices is the rank of the memref (i.e., if the memref loaded from
|
||||
is of rank 3, then 3 indices are required for the load following the memref
|
||||
identifier).
|
||||
|
||||
In an `affine.if` or `for` body, the indices of a load are restricted to SSA
|
||||
values bound to surrounding loop induction variables,
|
||||
In an `affine.if` or `affine.for` body, the indices of a load are restricted to
|
||||
SSA values bound to surrounding loop induction variables,
|
||||
[symbols](#dimensions-and-symbols), results of a
|
||||
[`constant` operation](#'constant'-operation), or the result of an
|
||||
`affine.apply` operation that can in turn take as arguments all of the
|
||||
@ -1719,10 +1719,10 @@ Example:
|
||||
|
||||
**Context:** The `load` and `store` instructions are specifically crafted to
|
||||
fully resolve a reference to an element of a memref, and (in affine `affine.if`
|
||||
and `for` instructions) the compiler can follow use-def chains (e.g. through
|
||||
[`affine.apply`](Dialects/Affine.md#'affine.apply'-operation) operations) to
|
||||
precisely analyze references at compile-time using polyhedral techniques. This
|
||||
is possible because of the
|
||||
and `affine.for` instructions) the compiler can follow use-def chains (e.g.
|
||||
through [`affine.apply`](Dialects/Affine.md#'affine.apply'-operation)
|
||||
operations) to precisely analyze references at compile-time using polyhedral
|
||||
techniques. This is possible because of the
|
||||
[restrictions on dimensions and symbols](Dialects/Affine.md#restrictions-on-dimensions-and-symbols)
|
||||
in these contexts.
|
||||
|
||||
@ -1755,10 +1755,11 @@ store %100, %A[%1, 1023] : memref<4x?xf32, #layout, hbm>
|
||||
|
||||
**Context:** The `load` and `store` instructions are specifically crafted to
|
||||
fully resolve a reference to an element of a memref, and (in polyhedral
|
||||
`affine.if` and `for` instructions) the compiler can follow use-def chains (e.g.
|
||||
through [`affine.apply`](Dialects/Affine.md#'affine.apply'-operation)
|
||||
operations) to precisely analyze references at compile-time using polyhedral
|
||||
techniques. This is possible because of the
|
||||
`affine.if` and `affine.for` instructions) the compiler can follow use-def
|
||||
chains (e.g. through
|
||||
[`affine.apply`](Dialects/Affine.md#'affine.apply'-operation) operations) to
|
||||
precisely analyze references at compile-time using polyhedral techniques. This
|
||||
is possible because of the
|
||||
[restrictions on dimensions and symbols](Dialect/Affine.md#restrictions-on-dimensions-and-symbols)
|
||||
in these contexts.
|
||||
|
||||
|
@ -39,9 +39,9 @@ These restrictions may be lifted in the future.
|
||||
|
||||
### Output IR
|
||||
|
||||
Functions with `for` and `affine.if` instructions eliminated. These functions
|
||||
may contain operations from the Standard dialect in addition to those already
|
||||
present before the pass.
|
||||
Functions with `affine.for` and `affine.if` instructions eliminated. These
|
||||
functions may contain operations from the Standard dialect in addition to those
|
||||
already present before the pass.
|
||||
|
||||
### Invariants
|
||||
|
||||
@ -95,10 +95,10 @@ Input
|
||||
```mlir
|
||||
func @loop_nest_tiled() -> memref<256x1024xf32> {
|
||||
%0 = alloc() : memref<256x1024xf32>
|
||||
for %i0 = 0 to 256 step 32 {
|
||||
for %i1 = 0 to 1024 step 32 {
|
||||
for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 32)(%i0) {
|
||||
for %i3 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 32)(%i1) {
|
||||
affine.for %i0 = 0 to 256 step 32 {
|
||||
affine.for %i1 = 0 to 1024 step 32 {
|
||||
affine.for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 32)(%i0) {
|
||||
affine.for %i3 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 32)(%i1) {
|
||||
%1 = load %0[%i2, %i3] : memref<256x1024xf32>
|
||||
}
|
||||
}
|
||||
@ -119,16 +119,16 @@ func @loop_nest_tiled() -> memref<256x1024xf32> {
|
||||
%c32 = constant 32 : index
|
||||
%c0 = constant 0 : index
|
||||
%0 = alloc() : memref<256x1024xf32>
|
||||
for %i0 = 0 to 256 step 32 {
|
||||
for %i1 = 0 to 1024 step 32 {
|
||||
affine.for %i0 = 0 to 256 step 32 {
|
||||
affine.for %i1 = 0 to 1024 step 32 {
|
||||
%1 = affine.apply #map1(%i0)
|
||||
%2 = affine.apply #map1(%i1)
|
||||
%3 = alloc() : memref<32x32xf32, 1>
|
||||
%4 = alloc() : memref<1xi32>
|
||||
dma_start %0[%1, %2], %3[%c0, %c0], %c1024, %4[%c0], %c1024, %c32 : memref<256x1024xf32>, memref<32x32xf32, 1>, memref<1xi32>
|
||||
dma_wait %4[%c0], %c1024 : memref<1xi32>
|
||||
for %i2 = #map1(%i0) to #map2(%i0) {
|
||||
for %i3 = #map1(%i1) to #map2(%i1) {
|
||||
affine.for %i2 = #map1(%i0) to #map2(%i0) {
|
||||
affine.for %i3 = #map1(%i1) to #map2(%i1) {
|
||||
%5 = affine.apply #map3(%i0, %i2)
|
||||
%6 = affine.apply #map3(%i1, %i3)
|
||||
%7 = load %3[%5, %6] : memref<32x32xf32, 1>
|
||||
@ -194,8 +194,8 @@ Input
|
||||
func @store_load_affine_apply() -> memref<10x10xf32> {
|
||||
%cf7 = constant 7.0 : f32
|
||||
%m = alloc() : memref<10x10xf32>
|
||||
for %i0 = 0 to 10 {
|
||||
for %i1 = 0 to 10 {
|
||||
affine.for %i0 = 0 to 10 {
|
||||
affine.for %i1 = 0 to 10 {
|
||||
%t0 = affine.apply (d0, d1) -> (d1 + 1)(%i0, %i1)
|
||||
%t1 = affine.apply (d0, d1) -> (d0)(%i0, %i1)
|
||||
%idx0 = affine.apply (d0, d1) -> (d1) (%t0, %t1)
|
||||
@ -217,8 +217,8 @@ Output
|
||||
func @store_load_affine_apply() -> memref<10x10xf32> {
|
||||
%cst = constant 7.000000e+00 : f32
|
||||
%0 = alloc() : memref<10x10xf32>
|
||||
for %i0 = 0 to 10 {
|
||||
for %i1 = 0 to 10 {
|
||||
affine.for %i0 = 0 to 10 {
|
||||
affine.for %i1 = 0 to 10 {
|
||||
%3 = affine.apply #map1(%1, %2)
|
||||
%4 = affine.apply #map2(%1, %2)
|
||||
store %cst, %0[%3, %4] : memref<10x10xf32>
|
||||
@ -258,7 +258,7 @@ Input
|
||||
%2 = alloc() : memref<1xf32>
|
||||
%c0 = constant 0 : index
|
||||
%c128 = constant 128 : index
|
||||
for %i0 = 0 to 8 {
|
||||
affine.for %i0 = 0 to 8 {
|
||||
dma_start %0[%i0], %1[%i0], %c128, %2[%c0] : memref<256xf32>, memref<32xf32, 1>, memref<1xf32>
|
||||
dma_wait %2[%c0], %c128 : memref<1xf32>
|
||||
%3 = load %1[%i0] : memref<32xf32, 1>
|
||||
@ -282,7 +282,7 @@ Output
|
||||
%1 = alloc() : memref<2x32xf32, 1>
|
||||
%2 = alloc() : memref<2x1xf32>
|
||||
dma_start %0[%c0], %1[%c0, %c0], %c128, %2[%c0, %c0] : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
|
||||
for %i0 = 1 to 8 {
|
||||
affine.for %i0 = 1 to 8 {
|
||||
%3 = affine.apply #map2(%i0)
|
||||
%4 = affine.apply #map2(%i0)
|
||||
dma_start %0[%i0], %1[%3, %i0], %c128, %2[%4, %c0] : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
|
||||
|
@ -150,8 +150,8 @@ func bar(%A : memref<8x?xf32, #lmap>) {
|
||||
// dynamically using dim instruction.
|
||||
%N = dim %A, 1 : memref<8x?xf32, #lmap>
|
||||
|
||||
for %i = 0 to 8 {
|
||||
for %j = 0 to %N {
|
||||
affine.for %i = 0 to 8 {
|
||||
affine.for %j = 0 to %N {
|
||||
// A[i,j] += 1
|
||||
%s1 = load %A [%i, %j] : memref<8x?xf32, #lmap>
|
||||
%s2 = add %s1, 1
|
||||
@ -548,7 +548,7 @@ nested in an outer function that using affine loops.
|
||||
func @search(memref<?x?xi32 %A, <?xi32> %S, i32 %key) {
|
||||
%ni = dim %A, 0 : memref<?x?xi32>
|
||||
// This loop can be parallelized
|
||||
for %i = 0 to %ni {
|
||||
affine.for %i = 0 to %ni {
|
||||
call @search_body (%A, %S, %i) : (memref<?x?xi32>, memref<?xi32>, i32)
|
||||
}
|
||||
return
|
||||
@ -582,9 +582,9 @@ func @search_body(%A: memref<?x?xi32>, %S: memref<?xi32>, %key: i32) {
|
||||
|
||||
As per the [MLIR spec](LangRef.md), the restrictions on dimensions and symbol
|
||||
identifiers to be used with the affine.apply instruction only apply to accesses
|
||||
inside `for` and `affine.if` instructions. However, an analysis of accesses
|
||||
inside the called function (`@search_body`) is necessary to determine if the
|
||||
`%i` loop could be parallelized: such function access analysis is calling
|
||||
inside `affine.for` and `affine.if` instructions. However, an analysis of
|
||||
accesses inside the called function (`@search_body`) is necessary to determine
|
||||
if the `%i` loop could be parallelized: such function access analysis is calling
|
||||
context sensitive.
|
||||
|
||||
### Non-affine loop bounds {#non-affine-loop-bounds}
|
||||
@ -604,8 +604,8 @@ for (i=0; i <N; i++)
|
||||
|
||||
```mlir {.mlir}
|
||||
func @outer_nest(%n) : (i32) {
|
||||
for %i = 0 to %n {
|
||||
for %j = 0 to %n {
|
||||
affine.for %i = 0 to %n {
|
||||
affine.for %j = 0 to %n {
|
||||
call @inner_nest(%i, %j, %n)
|
||||
}
|
||||
}
|
||||
@ -620,8 +620,8 @@ func @inner_nest(%i: i32, %j: i32, %n: i32) {
|
||||
}
|
||||
|
||||
func @inner_nest2(%m, %n) -> i32 {
|
||||
for %k = 0 to %m {
|
||||
for %l = 0 to %n {
|
||||
affine.for %k = 0 to %m {
|
||||
affine.for %l = 0 to %n {
|
||||
...
|
||||
}
|
||||
}
|
||||
@ -663,13 +663,13 @@ in a dilated convolution.
|
||||
func @conv2d(memref<16x1024x1024x3xf32, #lm0, vmem> %input,
|
||||
memref<5x5x3x32xf32, #lm0, vmem> %kernel,
|
||||
memref<16x512x512x32xf32, #lm0, vmem> %output) {
|
||||
for %b = 0 to %batch {
|
||||
for %oh = 0 to %output_height {
|
||||
for %ow = 0 to %output_width {
|
||||
for %of = 0 to %output_feature {
|
||||
for %kh = 0 to %kernel_height {
|
||||
for %kw = 0 to %kernel_width {
|
||||
for %if = 0 to %input_feature {
|
||||
affine.for %b = 0 to %batch {
|
||||
affine.for %oh = 0 to %output_height {
|
||||
affine.for %ow = 0 to %output_width {
|
||||
affine.for %of = 0 to %output_feature {
|
||||
affine.for %kh = 0 to %kernel_height {
|
||||
affine.for %kw = 0 to %kernel_width {
|
||||
affine.for %if = 0 to %input_feature {
|
||||
// Calculate input indices.
|
||||
%1_0 = affine.apply #map1_0 (%0#1, %0#2, %0#4, %0#5)
|
||||
[%h_stride, %w_stride, %h_kernel_dilation, %w_kernel_dilation,
|
||||
@ -913,10 +913,10 @@ func @dma_hbm_to_vmem(memref<1024 x f32, #layout_map0, hbm> %a,
|
||||
representation. 2(b) requires no change, but impacts how cost models look at
|
||||
index and layout maps.
|
||||
|
||||
### `affine.if` and `for` Extensions for "Escaping Scalars" {#extensions-for-"escaping-scalars"}
|
||||
### `affine.if` and `affine.for` Extensions for "Escaping Scalars" {#extensions-for-"escaping-scalars"}
|
||||
|
||||
We considered providing a representation for SSA values that are live out of
|
||||
`if/else` conditional bodies and loop carried in `for` loops. We
|
||||
`if/else` conditional bodies and loop carried in `affine.for` loops. We
|
||||
ultimately abandoned this approach due to its complexity. In the current design
|
||||
of MLIR, scalar variables cannot escape for loops or if instructions. In
|
||||
situations, where escaping is necessary, we use zero-dimensional tensors and
|
||||
@ -948,7 +948,7 @@ Example:
|
||||
// Return sum of elements in 1-dimensional mref A
|
||||
func int32 @sum(%A : memref<?xi32>, %N : i32) -> (i32) {
|
||||
%init = 0
|
||||
%result = for %i = 0 to N with %tmp(%init) {
|
||||
%result = affine.for %i = 0 to N with %tmp(%init) {
|
||||
%value = load %A[%i]
|
||||
%sum = %value + %tmp
|
||||
yield %sum
|
||||
@ -978,7 +978,7 @@ Example:
|
||||
// Compute sum of half of the array
|
||||
func int32 @sum_half(%A, %N) {
|
||||
%s0 = 0
|
||||
%s1 = for %i = 1 ... N step 1 with %s2 (%s0) {
|
||||
%s1 = affine.for %i = 1 ... N step 1 with %s2 (%s0) {
|
||||
%s3 = if (%i >= %N / 2) {
|
||||
%v0 = load %A[%i]
|
||||
%s4 = %s2 + %v0
|
||||
|
@ -184,8 +184,8 @@ Our simple example above would be represented as:
|
||||
|
||||
```mlir
|
||||
mlfunc @simple_example(... %N) {
|
||||
for %i = 0 ... %N step 1 {
|
||||
for %j = 0 ... %N step 1 {
|
||||
affine.for %i = 0 ... %N step 1 {
|
||||
affine.for %j = 0 ... %N step 1 {
|
||||
// identity noop in this case, but can exist in general.
|
||||
%0,%1 = affine.apply #57(%i, %j)
|
||||
|
||||
@ -203,8 +203,8 @@ The example with the reduced domain would be represented with an if instruction:
|
||||
|
||||
```mlir
|
||||
mlfunc @reduced_domain_example(... %N) {
|
||||
for %i = 0 ... %N step 1 {
|
||||
for %j = 0 ... %N step 1 {
|
||||
affine.for %i = 0 ... %N step 1 {
|
||||
affine.for %j = 0 ... %N step 1 {
|
||||
// identity noop in this case, but can exist in general.
|
||||
%0,%1 = affinecall #57(%i, %j)
|
||||
|
||||
@ -233,8 +233,8 @@ that transformations call into):
|
||||
|
||||
```mlir
|
||||
mlfunc @skewed_domain_example(... %N) {
|
||||
for %t1 = 0 ... 2*N-2 step 1 {
|
||||
for %t2 = max(0, t1-N+1) ... min(N, t1) step 1 {
|
||||
affine.for %t1 = 0 ... 2*N-2 step 1 {
|
||||
affine.for %t2 = max(0, t1-N+1) ... min(N, t1) step 1 {
|
||||
(%i, %j) = (%t1-%t2, %t2)
|
||||
...
|
||||
}
|
||||
@ -373,7 +373,7 @@ mlfunc's (if we support them) will also have to have domains.
|
||||
### Lack of redundancy in IR
|
||||
|
||||
The traditional form has multiple encodings for the same sorts of behavior: you
|
||||
end up having bits on `for` loops to specify whether codegen should use
|
||||
end up having bits on `affine.for` loops to specify whether codegen should use
|
||||
"atomic/separate" policies, unroll loops, etc. Instructions can be split or can
|
||||
generate multiple copies of their instruction because of overlapping domains,
|
||||
etc.
|
||||
|
@ -88,15 +88,15 @@ public:
|
||||
MLIRContext *context);
|
||||
};
|
||||
|
||||
/// The "for" instruction represents an affine loop nest, defining an SSA value
|
||||
/// for its induction variable. The induction variable is represented as a
|
||||
/// The "affine.for" instruction represents an affine loop nest, defining an SSA
|
||||
/// value for its induction variable. The induction variable is represented as a
|
||||
/// BlockArgument to the entry block of the body. The body and induction
|
||||
/// variable can be created automatically for new "for" ops with 'createBody'.
|
||||
/// This SSA value always has type index, which is the size of the machine word.
|
||||
/// The stride, represented by step, is a positive constant integer which
|
||||
/// defaults to "1" if not present. The lower and upper bounds specify a
|
||||
/// half-open range: the range includes the lower bound but does not include the
|
||||
/// upper bound.
|
||||
/// variable can be created automatically for new "affine.for" ops with
|
||||
/// 'createBody'. This SSA value always has type index, which is the size of the
|
||||
/// machine word. The stride, represented by step, is a positive constant
|
||||
/// integer which defaults to "1" if not present. The lower and upper bounds
|
||||
/// specify a half-open range: the range includes the lower bound but does not
|
||||
/// include the upper bound.
|
||||
///
|
||||
/// The lower and upper bounds of a for operation are represented as an
|
||||
/// application of an affine mapping to a list of SSA values passed to the map.
|
||||
@ -108,7 +108,7 @@ public:
|
||||
///
|
||||
/// Example:
|
||||
///
|
||||
/// for %i = 1 to 10 {
|
||||
/// affine.for %i = 1 to 10 {
|
||||
/// ...
|
||||
/// }
|
||||
///
|
||||
@ -131,7 +131,7 @@ public:
|
||||
static void getCanonicalizationPatterns(OwningRewritePatternList &results,
|
||||
MLIRContext *context);
|
||||
|
||||
static StringRef getOperationName() { return "for"; }
|
||||
static StringRef getOperationName() { return "affine.for"; }
|
||||
static StringRef getStepAttrName() { return "step"; }
|
||||
static StringRef getLowerBoundAttrName() { return "lower_bound"; }
|
||||
static StringRef getUpperBoundAttrName() { return "upper_bound"; }
|
||||
@ -268,10 +268,10 @@ public:
|
||||
operand_range getOperands() { return {operand_begin(), operand_end()}; }
|
||||
|
||||
private:
|
||||
// 'for' instruction that contains this bound.
|
||||
// 'affine.for' instruction that contains this bound.
|
||||
AffineForOp inst;
|
||||
// Start and end positions of this affine bound operands in the list of
|
||||
// the containing 'for' instruction operands.
|
||||
// the containing 'affine.for' instruction operands.
|
||||
unsigned opStart, opEnd;
|
||||
// Affine map for this bound.
|
||||
AffineMap map;
|
||||
|
@ -376,14 +376,15 @@ public:
|
||||
|
||||
AffineExpr toAffineExpr(unsigned idx, MLIRContext *context);
|
||||
|
||||
/// Adds constraints (lower and upper bounds) for the specified 'for'
|
||||
/// Adds constraints (lower and upper bounds) for the specified 'affine.for'
|
||||
/// instruction's Value using IR information stored in its bound maps. The
|
||||
/// right identifier is first looked up using forOp's Value. Asserts if the
|
||||
/// Value corresponding to the 'for' instruction isn't found in the constraint
|
||||
/// system. Returns failure for the yet unimplemented/unsupported cases. Any
|
||||
/// new identifiers that are found in the bound operands of the 'for'
|
||||
/// instruction are added as trailing identifiers (either dimensional or
|
||||
/// symbolic depending on whether the operand is a valid ML Function symbol).
|
||||
/// Value corresponding to the 'affine.for' instruction isn't found in the
|
||||
/// constraint system. Returns failure for the yet unimplemented/unsupported
|
||||
/// cases. Any new identifiers that are found in the bound operands of the
|
||||
/// 'affine.for' instruction are added as trailing identifiers (either
|
||||
/// dimensional or symbolic depending on whether the operand is a valid ML
|
||||
/// Function symbol).
|
||||
// TODO(bondhugula): add support for non-unit strides.
|
||||
LogicalResult addAffineForOpDomain(AffineForOp forOp);
|
||||
|
||||
|
@ -45,7 +45,7 @@ class Instruction;
|
||||
class Value;
|
||||
|
||||
/// Populates 'loops' with IVs of the loops surrounding 'inst' ordered from
|
||||
/// the outermost 'for' instruction to the innermost one.
|
||||
/// the outermost 'affine.for' instruction to the innermost one.
|
||||
// TODO(bondhugula): handle 'affine.if' inst's.
|
||||
void getLoopIVs(Instruction &inst, SmallVectorImpl<AffineForOp> *loops);
|
||||
|
||||
@ -113,8 +113,8 @@ AffineForOp insertBackwardComputationSlice(Instruction *srcOpInst,
|
||||
/// surrounding such op's.
|
||||
// For example, the memref region for a load operation at loop depth = 1:
|
||||
//
|
||||
// for %i = 0 to 32 {
|
||||
// for %ii = %i to (d0) -> (d0 + 8) (%i) {
|
||||
// affine.for %i = 0 to 32 {
|
||||
// affine.for %ii = %i to (d0) -> (d0 + 8) (%i) {
|
||||
// load %A[%ii]
|
||||
// }
|
||||
// }
|
||||
@ -146,8 +146,8 @@ struct MemRefRegion {
|
||||
/// For example, the memref region for this operation at loopDepth = 1 will
|
||||
/// be:
|
||||
///
|
||||
/// for %i = 0 to 32 {
|
||||
/// for %ii = %i to (d0) -> (d0 + 8) (%i) {
|
||||
/// affine.for %i = 0 to 32 {
|
||||
/// affine.for %ii = %i to (d0) -> (d0 + 8) (%i) {
|
||||
/// load %A[%ii]
|
||||
/// }
|
||||
/// }
|
||||
|
@ -76,9 +76,9 @@ shapeRatio(VectorType superVectorType, VectorType subVectorType);
|
||||
/// The following MLIR snippet:
|
||||
///
|
||||
/// ```mlir
|
||||
/// for %i3 = 0 to %0 {
|
||||
/// for %i4 = 0 to %1 {
|
||||
/// for %i5 = 0 to %2 {
|
||||
/// affine.for %i3 = 0 to %0 {
|
||||
/// affine.for %i4 = 0 to %1 {
|
||||
/// affine.for %i5 = 0 to %2 {
|
||||
/// %a5 = load %arg0[%i4, %i5, %i3] : memref<?x?x?xf32>
|
||||
/// }}}
|
||||
/// ```
|
||||
@ -86,9 +86,9 @@ shapeRatio(VectorType superVectorType, VectorType subVectorType);
|
||||
/// may vectorize with {permutation_map: (d0, d1, d2) -> (d2, d1)} into:
|
||||
///
|
||||
/// ```mlir
|
||||
/// for %i3 = 0 to %0 step 32 {
|
||||
/// for %i4 = 0 to %1 {
|
||||
/// for %i5 = 0 to %2 step 256 {
|
||||
/// affine.for %i3 = 0 to %0 step 32 {
|
||||
/// affine.for %i4 = 0 to %1 {
|
||||
/// affine.for %i5 = 0 to %2 step 256 {
|
||||
/// %4 = vector_transfer_read %arg0, %i4, %i5, %i3
|
||||
/// {permutation_map: (d0, d1, d2) -> (d2, d1)} :
|
||||
/// (memref<?x?x?xf32>, index, index) -> vector<32x256xf32>
|
||||
@ -103,7 +103,7 @@ shapeRatio(VectorType superVectorType, VectorType subVectorType);
|
||||
///
|
||||
/// ```mlir
|
||||
/// %cst0 = constant 0 : index
|
||||
/// for %i0 = 0 to %0 {
|
||||
/// affine.for %i0 = 0 to %0 {
|
||||
/// %a0 = load %arg0[%cst0, %cst0] : memref<?x?xf32>
|
||||
/// }
|
||||
/// ```
|
||||
@ -111,7 +111,7 @@ shapeRatio(VectorType superVectorType, VectorType subVectorType);
|
||||
/// may vectorize with {permutation_map: (d0) -> (0)} into:
|
||||
///
|
||||
/// ```mlir
|
||||
/// for %i0 = 0 to %0 step 128 {
|
||||
/// affine.for %i0 = 0 to %0 step 128 {
|
||||
/// %3 = vector_transfer_read %arg0, %c0_0, %c0_0
|
||||
/// {permutation_map: (d0, d1) -> (0)} :
|
||||
/// (memref<?x?xf32>, index, index) -> vector<128xf32>
|
||||
|
@ -341,7 +341,8 @@ protected:
|
||||
/// (e.g. vectorValue = load(vectorView, zero)).
|
||||
///
|
||||
/// Only ExprKind::StmtBlockLikeExpr have `enclosedStmts`, these comprise:
|
||||
/// 1. `For`-loops for which the `lhs` binds to the induction variable, `rhs`
|
||||
/// 1. `affine.for`-loops for which the `lhs` binds to the induction variable,
|
||||
/// `rhs`
|
||||
/// binds to an Expr of kind `ExprKind::For` with lower-bound, upper-bound and
|
||||
/// step respectively.
|
||||
// TODO(zinenko): this StmtBlockLikeExpr should be retired in favor of Expr
|
||||
@ -647,7 +648,7 @@ Stmt For(llvm::ArrayRef<Expr> indices, llvm::ArrayRef<Expr> lbs,
|
||||
llvm::ArrayRef<Expr> ubs, llvm::ArrayRef<Expr> steps,
|
||||
llvm::ArrayRef<Stmt> enclosedStmts);
|
||||
|
||||
/// Define a 'for' loop from with multi-valued bounds.
|
||||
/// Define a 'affine.for' loop from with multi-valued bounds.
|
||||
///
|
||||
/// for max(lbs...) to min(ubs...) {}
|
||||
///
|
||||
|
@ -73,9 +73,10 @@ void getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
|
||||
SmallVectorImpl<Value *> *operands,
|
||||
FuncBuilder *builder);
|
||||
|
||||
/// Skew the instructions in the body of a 'for' instruction with the specified
|
||||
/// instruction-wise shifts. The shifts are with respect to the original
|
||||
/// execution order, and are multiplied by the loop 'step' before being applied.
|
||||
/// Skew the instructions in the body of a 'affine.for' instruction with the
|
||||
/// specified instruction-wise shifts. The shifts are with respect to the
|
||||
/// original execution order, and are multiplied by the loop 'step' before being
|
||||
/// applied.
|
||||
LLVM_NODISCARD
|
||||
LogicalResult instBodySkew(AffineForOp forOp, ArrayRef<uint64_t> shifts,
|
||||
bool unrollPrologueEpilogue = false);
|
||||
|
@ -95,14 +95,14 @@ Instruction *createComposedAffineApplyOp(FuncBuilder *builder, Location loc,
|
||||
///
|
||||
/// Before
|
||||
///
|
||||
/// for %i = 0 to #map(%N)
|
||||
/// affine.for %i = 0 to #map(%N)
|
||||
/// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
|
||||
/// send %A[%idx], ...
|
||||
/// %v = "compute"(%idx, ...)
|
||||
///
|
||||
/// After
|
||||
///
|
||||
/// for %i = 0 to #map(%N)
|
||||
/// affine.for %i = 0 to #map(%N)
|
||||
/// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
|
||||
/// send %A[%idx], ...
|
||||
/// %idx_ = affine.apply (d0) -> (d0 mod 2) (%i)
|
||||
|
@ -36,7 +36,7 @@ using llvm::dbgs;
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
AffineOpsDialect::AffineOpsDialect(MLIRContext *context)
|
||||
: Dialect(/*namePrefix=*/"", context) {
|
||||
: Dialect(/*namePrefix=*/"affine", context) {
|
||||
addOperations<AffineApplyOp, AffineForOp, AffineIfOp>();
|
||||
}
|
||||
|
||||
@ -69,7 +69,7 @@ bool mlir::isValidDim(Value *value) {
|
||||
return isTopLevelSymbol(dimOp->getOperand());
|
||||
return false;
|
||||
}
|
||||
// This value is a block argument (which also includes 'for' loop IVs).
|
||||
// This value is a block argument (which also includes 'affine.for' loop IVs).
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -969,7 +969,7 @@ static void printBound(AffineBound bound, const char *prefix, OpAsmPrinter *p) {
|
||||
}
|
||||
|
||||
void AffineForOp::print(OpAsmPrinter *p) {
|
||||
*p << "for ";
|
||||
*p << "affine.for ";
|
||||
p->printOperand(getBody()->getArgument(0));
|
||||
*p << " = ";
|
||||
printBound(getLowerBound(), "max", p);
|
||||
|
@ -708,8 +708,8 @@ void MemRefAccess::getAccessMap(AffineValueMap *accessMap) const {
|
||||
// For example, given the following MLIR code with with "source" and
|
||||
// "destination" accesses to the same memref labled, and symbols %M, %N, %K:
|
||||
//
|
||||
// for %i0 = 0 to 100 {
|
||||
// for %i1 = 0 to 50 {
|
||||
// affine.for %i0 = 0 to 100 {
|
||||
// affine.for %i1 = 0 to 50 {
|
||||
// %a0 = affine.apply
|
||||
// (d0, d1) -> (d0 * 2 - d1 * 4 + s1, d1 * 3 - s0) (%i0, %i1)[%M, %N]
|
||||
// // Source memref access.
|
||||
@ -717,8 +717,8 @@ void MemRefAccess::getAccessMap(AffineValueMap *accessMap) const {
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// for %i2 = 0 to 100 {
|
||||
// for %i3 = 0 to 50 {
|
||||
// affine.for %i2 = 0 to 100 {
|
||||
// affine.for %i3 = 0 to 50 {
|
||||
// %a1 = affine.apply
|
||||
// (d0, d1) -> (d0 * 7 + d1 * 9 - s1, d1 * 11 + s0) (%i2, %i3)[%K, %M]
|
||||
// // Destination memref access.
|
||||
|
@ -15,7 +15,7 @@
|
||||
// limitations under the License.
|
||||
// =============================================================================
|
||||
//
|
||||
// This file implements a pass to detect parallel affine 'for' ops.
|
||||
// This file implements a pass to detect parallel affine 'affine.for' ops.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
@ -40,7 +40,8 @@ FunctionPassBase *mlir::createParallelismDetectionTestPass() {
|
||||
return new TestParallelismDetection();
|
||||
}
|
||||
|
||||
// Walks the function and emits a note for all 'for' ops detected as parallel.
|
||||
// Walks the function and emits a note for all 'affine.for' ops detected as
|
||||
// parallel.
|
||||
void TestParallelismDetection::runOnFunction() {
|
||||
Function *f = getFunction();
|
||||
FuncBuilder b(f);
|
||||
|
@ -38,11 +38,11 @@ using namespace mlir;
|
||||
using llvm::SmallDenseMap;
|
||||
|
||||
/// Populates 'loops' with IVs of the loops surrounding 'inst' ordered from
|
||||
/// the outermost 'for' instruction to the innermost one.
|
||||
/// the outermost 'affine.for' instruction to the innermost one.
|
||||
void mlir::getLoopIVs(Instruction &inst, SmallVectorImpl<AffineForOp> *loops) {
|
||||
auto *currInst = inst.getParentInst();
|
||||
AffineForOp currAffineForOp;
|
||||
// Traverse up the hierarchy collecing all 'for' instruction while
|
||||
// Traverse up the hierarchy collecing all 'affine.for' instruction while
|
||||
// skipping over 'affine.if' instructions.
|
||||
while (currInst && ((currAffineForOp = currInst->dyn_cast<AffineForOp>()) ||
|
||||
currInst->isa<AffineIfOp>())) {
|
||||
@ -162,8 +162,8 @@ LogicalResult MemRefRegion::unionBoundingBox(const MemRefRegion &other) {
|
||||
// For example, the memref region for this load operation at loopDepth = 1 will
|
||||
// be as below:
|
||||
//
|
||||
// for %i = 0 to 32 {
|
||||
// for %ii = %i to (d0) -> (d0 + 8) (%i) {
|
||||
// affine.for %i = 0 to 32 {
|
||||
// affine.for %ii = %i to (d0) -> (d0 + 8) (%i) {
|
||||
// load %A[%ii]
|
||||
// }
|
||||
// }
|
||||
@ -683,7 +683,7 @@ static Optional<int64_t> getMemoryFootprintBytes(Block &block,
|
||||
int memorySpace) {
|
||||
SmallDenseMap<Value *, std::unique_ptr<MemRefRegion>, 4> regions;
|
||||
|
||||
// Walk this 'for' instruction to gather all memory regions.
|
||||
// Walk this 'affine.for' instruction to gather all memory regions.
|
||||
bool error = false;
|
||||
block.walk(start, end, [&](Instruction *opInst) {
|
||||
if (!opInst->isa<LoadOp>() && !opInst->isa<StoreOp>()) {
|
||||
|
@ -146,7 +146,8 @@ Value *mlir::edsc::MLIREmitter::emitExpr(Expr e) {
|
||||
if (auto expr = e.dyn_cast<StmtBlockLikeExpr>()) {
|
||||
if (expr.getKind() == ExprKind::For) {
|
||||
auto exprGroups = expr.getAllArgumentGroups();
|
||||
assert(exprGroups.size() == 3 && "expected 3 expr groups in `for`");
|
||||
assert(exprGroups.size() == 3 &&
|
||||
"expected 3 expr groups in `affine.for`");
|
||||
assert(!exprGroups[0].empty() && "expected at least one lower bound");
|
||||
assert(!exprGroups[1].empty() && "expected at least one upper bound");
|
||||
assert(exprGroups[2].size() == 1 &&
|
||||
|
@ -526,8 +526,8 @@ Stmt mlir::edsc::For(llvm::ArrayRef<Expr> idxs, llvm::ArrayRef<MaxExpr> lbs,
|
||||
Stmt mlir::edsc::MaxMinFor(const Bindable &idx, ArrayRef<Expr> lbs,
|
||||
ArrayRef<Expr> ubs, Expr step,
|
||||
ArrayRef<Stmt> enclosedStmts) {
|
||||
assert(!lbs.empty() && "'for' loop must have lower bounds");
|
||||
assert(!ubs.empty() && "'for' loop must have upper bounds");
|
||||
assert(!lbs.empty() && "'affine.for' loop must have lower bounds");
|
||||
assert(!ubs.empty() && "'affine.for' loop must have upper bounds");
|
||||
|
||||
// Use a null expression as a sentinel between lower and upper bound
|
||||
// expressions in the list of children.
|
||||
@ -964,7 +964,7 @@ void mlir::edsc::Expr::print(raw_ostream &os) const {
|
||||
} else if (auto stmtLikeExpr = this->dyn_cast<StmtBlockLikeExpr>()) {
|
||||
switch (stmtLikeExpr.getKind()) {
|
||||
// We only print the lb, ub and step here, which are the StmtBlockLike
|
||||
// part of the `for` StmtBlockLikeExpr.
|
||||
// part of the `affine.for` StmtBlockLikeExpr.
|
||||
case ExprKind::For: {
|
||||
auto exprGroups = stmtLikeExpr.getAllArgumentGroups();
|
||||
assert(exprGroups.size() == 3 &&
|
||||
|
@ -343,7 +343,7 @@ bool DmaGeneration::generateDma(const MemRefRegion ®ion, Block *block,
|
||||
auto fastMemRefType = top.getMemRefType(
|
||||
fastBufferShape, memRefType.getElementType(), {}, fastMemorySpace);
|
||||
|
||||
// Create the fast memory space buffer just before the 'for'
|
||||
// Create the fast memory space buffer just before the 'affine.for'
|
||||
// instruction.
|
||||
fastMemRef = prologue.create<AllocOp>(loc, fastMemRefType)->getResult();
|
||||
// Record it.
|
||||
@ -472,7 +472,7 @@ bool DmaGeneration::runOnBlock(Block *block) {
|
||||
// approach is conservative in some cases at the moment, we do a check later
|
||||
// and report an error with location info.
|
||||
// TODO(bondhugula): An 'affine.if' instruction is being treated similar to an
|
||||
// operation instruction. 'affine.if''s could have 'for's in them;
|
||||
// operation instruction. 'affine.if''s could have 'affine.for's in them;
|
||||
// treat them separately.
|
||||
|
||||
// Get to the first load, store, or for op.
|
||||
@ -494,7 +494,7 @@ bool DmaGeneration::runOnBlock(Block *block) {
|
||||
fastMemCapacityBytes);
|
||||
};
|
||||
|
||||
// If the memory footprint of the 'for' loop is higher than fast
|
||||
// If the memory footprint of the 'affine.for' loop is higher than fast
|
||||
// memory capacity (when provided), we recurse to DMA at an inner level
|
||||
// until we find a depth at which footprint fits in fast mem capacity. If
|
||||
// the footprint can't be calculated, we assume for now it fits. Recurse
|
||||
@ -507,7 +507,7 @@ bool DmaGeneration::runOnBlock(Block *block) {
|
||||
runOnBlock(/*begin=*/curBegin, /*end=*/it);
|
||||
// Recurse onto the body of this loop.
|
||||
runOnBlock(forOp->getBody());
|
||||
// The next region starts right after the 'for' instruction.
|
||||
// The next region starts right after the 'affine.for' instruction.
|
||||
curBegin = std::next(it);
|
||||
} else {
|
||||
// We have enough capacity, i.e., DMAs will be computed for the portion
|
||||
@ -698,7 +698,8 @@ uint64_t DmaGeneration::runOnBlock(Block::iterator begin, Block::iterator end) {
|
||||
[&](const SmallMapVector<Value *, std::unique_ptr<MemRefRegion>, 4>
|
||||
®ions) {
|
||||
for (const auto ®ionEntry : regions) {
|
||||
// For each region, hoist DMA transfer past all invariant 'for's.
|
||||
// For each region, hoist DMA transfer past all invariant
|
||||
// 'affine.for's.
|
||||
Block::iterator dmaPlacementReadStart, dmaPlacementWriteStart;
|
||||
Block *dmaPlacementBlock;
|
||||
findHighestBlockForPlacement(
|
||||
|
@ -641,7 +641,8 @@ bool MemRefDependenceGraph::init(Function *f) {
|
||||
// all loads and store accesses it contains.
|
||||
LoopNestStateCollector collector;
|
||||
collector.collect(&inst);
|
||||
// Return false if a non 'for' region was found (not currently supported).
|
||||
// Return false if a non 'affine.for' region was found (not currently
|
||||
// supported).
|
||||
if (collector.hasNonForRegion)
|
||||
return false;
|
||||
Node node(nextNodeId++, &inst);
|
||||
|
@ -258,7 +258,8 @@ LogicalResult mlir::tileCodeGen(MutableArrayRef<AffineForOp> band,
|
||||
// Returns all maximal outermost perfect loop nests to tile.
|
||||
static void getTileableBands(Function *f,
|
||||
std::vector<SmallVector<AffineForOp, 6>> *bands) {
|
||||
// Get maximal perfect nest of 'for' insts starting from root (inclusive).
|
||||
// Get maximal perfect nest of 'affine.for' insts starting from root
|
||||
// (inclusive).
|
||||
auto getMaximalPerfectLoopNest = [&](AffineForOp root) {
|
||||
SmallVector<AffineForOp, 6> band;
|
||||
AffineForOp currInst = root;
|
||||
|
@ -158,8 +158,8 @@ void LoopUnroll::runOnFunction() {
|
||||
}
|
||||
}
|
||||
|
||||
/// Unrolls a 'for' inst. Returns success if the loop was unrolled, failure
|
||||
/// otherwise. The default unroll factor is 4.
|
||||
/// Unrolls a 'affine.for' inst. Returns success if the loop was unrolled,
|
||||
/// failure otherwise. The default unroll factor is 4.
|
||||
LogicalResult LoopUnroll::runOnAffineForOp(AffineForOp forOp) {
|
||||
// Use the function callback if one was provided.
|
||||
if (getUnrollFactor) {
|
||||
|
@ -96,7 +96,7 @@ void LoopUnrollAndJam::runOnFunction() {
|
||||
runOnAffineForOp(forOp);
|
||||
}
|
||||
|
||||
/// Unroll and jam a 'for' inst. Default unroll jam factor is
|
||||
/// Unroll and jam a 'affine.for' inst. Default unroll jam factor is
|
||||
/// kDefaultUnrollJamFactor. Return failure if nothing was done.
|
||||
LogicalResult LoopUnrollAndJam::runOnAffineForOp(AffineForOp forOp) {
|
||||
// Unroll and jam by the factor that was passed if any.
|
||||
|
@ -276,7 +276,7 @@ static Value *buildMinMaxReductionSeq(Location loc, CmpIPredicate predicate,
|
||||
return value;
|
||||
}
|
||||
|
||||
// Convert a "for" loop to a flow of blocks. Return `false` on success.
|
||||
// Convert a "affine.for" loop to a flow of blocks. Return `false` on success.
|
||||
//
|
||||
// Create an SESE region for the loop (including its body) and append it to the
|
||||
// end of the current region. The loop region consists of the initialization
|
||||
@ -323,8 +323,9 @@ bool LowerAffinePass::lowerAffineFor(AffineForOp forOp) {
|
||||
auto loc = forOp->getLoc();
|
||||
auto *forInst = forOp->getInstruction();
|
||||
|
||||
// Start by splitting the block containing the 'for' into two parts. The part
|
||||
// before will get the init code, the part after will be the end point.
|
||||
// Start by splitting the block containing the 'affine.for' into two parts.
|
||||
// The part before will get the init code, the part after will be the end
|
||||
// point.
|
||||
auto *initBlock = forInst->getBlock();
|
||||
auto *endBlock = initBlock->splitBlock(forInst);
|
||||
|
||||
|
@ -56,9 +56,9 @@
|
||||
/// // Read the slice `%A[%i0, %i1:%i1+256, %i2:%i2+32]` into
|
||||
/// // vector<32x256xf32> and pad with %f0 to handle the boundary case:
|
||||
/// %f0 = constant 0.0f : f32
|
||||
/// for %i0 = 0 to %0 {
|
||||
/// for %i1 = 0 to %1 step 256 {
|
||||
/// for %i2 = 0 to %2 step 32 {
|
||||
/// affine.for %i0 = 0 to %0 {
|
||||
/// affine.for %i1 = 0 to %1 step 256 {
|
||||
/// affine.for %i2 = 0 to %2 step 32 {
|
||||
/// %v = vector_transfer_read %A, %i0, %i1, %i2, %f0
|
||||
/// {permutation_map: (d0, d1, d2) -> (d2, d1)} :
|
||||
/// (memref<?x?x?xf32>, index, index, f32) -> vector<32x256xf32>
|
||||
@ -70,8 +70,8 @@
|
||||
/// abstraction):
|
||||
///
|
||||
/// ```mlir {.mlir}
|
||||
/// for %d2 = 0 to 256 {
|
||||
/// for %d1 = 0 to 32 {
|
||||
/// affine.for %d2 = 0 to 256 {
|
||||
/// affine.for %d1 = 0 to 32 {
|
||||
/// %s = %A[%i0, %i1 + %d1, %i2 + %d2] : f32
|
||||
/// %tmp[%d2, %d1] = %s
|
||||
/// }
|
||||
|
@ -100,10 +100,10 @@
|
||||
/// mlfunc @materialize(%M : index, %N : index, %O : index, %P : index) {
|
||||
/// %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
|
||||
/// %f1 = constant splat<vector<4x4x4xf32>, 1.000000e+00> :
|
||||
/// vector<4x4x4xf32> for %i0 = 0 to %M step 4 {
|
||||
/// for %i1 = 0 to %N step 4 {
|
||||
/// for %i2 = 0 to %O {
|
||||
/// for %i3 = 0 to %P step 4 {
|
||||
/// vector<4x4x4xf32> affine.for %i0 = 0 to %M step 4 {
|
||||
/// affine.for %i1 = 0 to %N step 4 {
|
||||
/// affine.for %i2 = 0 to %O {
|
||||
/// affine.for %i3 = 0 to %P step 4 {
|
||||
/// vector_transfer_write %f1, %A, %i0, %i1, %i2, %i3
|
||||
/// {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d0)} :
|
||||
/// vector<4x4x4xf32>, memref<?x?x?x?xf32, 0>,
|
||||
@ -119,10 +119,10 @@
|
||||
/// mlfunc @materialize(%M : index, %N : index, %O : index, %P : index) {
|
||||
/// %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
|
||||
/// %f1 = constant splat<vector<4x4xf32>, 1.000000e+00> : vector<4x4x4xf32>
|
||||
/// for %i0 = 0 to %arg0 step 4 {
|
||||
/// for %i1 = 0 to %arg1 step 4 {
|
||||
/// for %i2 = 0 to %arg2 {
|
||||
/// for %i3 = 0 to %arg3 step 4 {
|
||||
/// affine.for %i0 = 0 to %arg0 step 4 {
|
||||
/// affine.for %i1 = 0 to %arg1 step 4 {
|
||||
/// affine.for %i2 = 0 to %arg2 {
|
||||
/// affine.for %i3 = 0 to %arg3 step 4 {
|
||||
/// %1 = affine.apply (d0, d1, d2, d3) -> (d0, d1, d2, d3)
|
||||
/// (%i0, %i1, %i2, %i3)
|
||||
/// vector_transfer_write f1, %0, %1#0, %1#1, %1#2, %1#3
|
||||
@ -286,10 +286,10 @@ static Value *substitute(Value *v, VectorType hwVectorType,
|
||||
/// super-vectorization has been applied:
|
||||
///
|
||||
/// ```mlir
|
||||
/// for %i0 = 0 to %M {
|
||||
/// for %i1 = 0 to %N step 3 {
|
||||
/// for %i2 = 0 to %O {
|
||||
/// for %i3 = 0 to %P step 32 {
|
||||
/// affine.for %i0 = 0 to %M {
|
||||
/// affine.for %i1 = 0 to %N step 3 {
|
||||
/// affine.for %i2 = 0 to %O {
|
||||
/// affine.for %i3 = 0 to %P step 32 {
|
||||
/// %r = vector_transfer_read(%A, map(%i..)#0, map(%i..)#1, map(%i..)#2)
|
||||
/// -> vector<3x32xf32>
|
||||
/// ...
|
||||
|
@ -19,7 +19,7 @@
|
||||
// potentially getting rid of intermediate memref's entirely.
|
||||
// TODO(mlir-team): In the future, similar techniques could be used to eliminate
|
||||
// dead memref store's and perform more complex forwarding when support for
|
||||
// SSA scalars live out of 'for'/'affine.if' statements is available.
|
||||
// SSA scalars live out of 'affine.for'/'affine.if' statements is available.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "mlir/Analysis/AffineAnalysis.h"
|
||||
@ -55,7 +55,7 @@ namespace {
|
||||
//
|
||||
// (* A dependence being satisfied at a block: a dependence that is satisfied by
|
||||
// virtue of the destination instruction appearing textually / lexically after
|
||||
// the source instruction within the body of a 'for' instruction; thus, a
|
||||
// the source instruction within the body of a 'affine.for' instruction; thus, a
|
||||
// dependence is always either satisfied by a loop or by a block).
|
||||
//
|
||||
// The above conditions are simple to check, sufficient, and powerful for most
|
||||
@ -139,8 +139,8 @@ void MemRefDataFlowOpt::forwardStoreToLoad(LoadOp loadOp) {
|
||||
// Check if this store is a candidate for forwarding; we only forward if
|
||||
// the dependence from the store is carried by the *body* of innermost
|
||||
// common surrounding loop. As an example this filters out cases like:
|
||||
// for %i0
|
||||
// for %i1
|
||||
// affine.for %i0
|
||||
// affine.for %i1
|
||||
// %idx = affine.apply (d0) -> (d0 + 1) (%i0)
|
||||
// store %A[%idx]
|
||||
// load %A[%i0]
|
||||
|
@ -66,11 +66,11 @@ static unsigned getTagMemRefPos(Instruction &dmaInst) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// Doubles the buffer of the supplied memref on the specified 'for' instruction
|
||||
/// by adding a leading dimension of size two to the memref. Replaces all uses
|
||||
/// of the old memref by the new one while indexing the newly added dimension by
|
||||
/// the loop IV of the specified 'for' instruction modulo 2. Returns false if
|
||||
/// such a replacement cannot be performed.
|
||||
/// Doubles the buffer of the supplied memref on the specified 'affine.for'
|
||||
/// instruction by adding a leading dimension of size two to the memref.
|
||||
/// Replaces all uses of the old memref by the new one while indexing the newly
|
||||
/// added dimension by the loop IV of the specified 'affine.for' instruction
|
||||
/// modulo 2. Returns false if such a replacement cannot be performed.
|
||||
static bool doubleBuffer(Value *oldMemRef, AffineForOp forOp) {
|
||||
auto *forBody = forOp->getBody();
|
||||
FuncBuilder bInner(forBody, forBody->begin());
|
||||
@ -104,7 +104,7 @@ static bool doubleBuffer(Value *oldMemRef, AffineForOp forOp) {
|
||||
dynamicDimCount++));
|
||||
}
|
||||
|
||||
// Create and place the alloc right before the 'for' instruction.
|
||||
// Create and place the alloc right before the 'affine.for' instruction.
|
||||
Value *newMemRef =
|
||||
bOuter.create<AllocOp>(forInst->getLoc(), newMemRefType, allocOperands);
|
||||
|
||||
@ -139,9 +139,9 @@ static bool doubleBuffer(Value *oldMemRef, AffineForOp forOp) {
|
||||
/// Returns success if the IR is in a valid state.
|
||||
void PipelineDataTransfer::runOnFunction() {
|
||||
// Do a post order walk so that inner loop DMAs are processed first. This is
|
||||
// necessary since 'for' instructions nested within would otherwise become
|
||||
// invalid (erased) when the outer loop is pipelined (the pipelined one gets
|
||||
// deleted and replaced by a prologue, a new steady-state loop and an
|
||||
// necessary since 'affine.for' instructions nested within would otherwise
|
||||
// become invalid (erased) when the outer loop is pipelined (the pipelined one
|
||||
// gets deleted and replaced by a prologue, a new steady-state loop and an
|
||||
// epilogue).
|
||||
forOps.clear();
|
||||
getFunction()->walkPostOrder<AffineForOp>(
|
||||
|
@ -71,7 +71,7 @@ void mlir::getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
|
||||
auto lb = b->create<AffineApplyOp>(forOp->getLoc(), lbMap, lbOperands);
|
||||
|
||||
// For each upper bound expr, get the range.
|
||||
// Eg: for %i = lb to min (ub1, ub2),
|
||||
// Eg: affine.for %i = lb to min (ub1, ub2),
|
||||
// where tripCountExprs yield (tr1, tr2), we create affine.apply's:
|
||||
// lb + tr1 - tr1 % ufactor, lb + tr2 - tr2 % ufactor; the results of all
|
||||
// these affine.apply's make up the cleanup loop lower bound.
|
||||
@ -161,8 +161,8 @@ void mlir::promoteSingleIterationLoops(Function *f) {
|
||||
[](AffineForOp forOp) { promoteIfSingleIteration(forOp); });
|
||||
}
|
||||
|
||||
/// Generates a 'for' inst with the specified lower and upper bounds while
|
||||
/// generating the right IV remappings for the shifted instructions. The
|
||||
/// Generates a 'affine.for' inst with the specified lower and upper bounds
|
||||
/// while generating the right IV remappings for the shifted instructions. The
|
||||
/// instruction blocks that go into the loop are specified in instGroupQueue
|
||||
/// starting from the specified offset, and in that order; the first element of
|
||||
/// the pair specifies the shift applied to that group of instructions; note
|
||||
@ -216,10 +216,10 @@ generateLoop(AffineMap lbMap, AffineMap ubMap,
|
||||
return loopChunk;
|
||||
}
|
||||
|
||||
/// Skew the instructions in the body of a 'for' instruction with the specified
|
||||
/// instruction-wise shifts. The shifts are with respect to the original
|
||||
/// execution order, and are multiplied by the loop 'step' before being applied.
|
||||
/// A shift of zero for each instruction will lead to no change.
|
||||
/// Skew the instructions in the body of a 'affine.for' instruction with the
|
||||
/// specified instruction-wise shifts. The shifts are with respect to the
|
||||
/// original execution order, and are multiplied by the loop 'step' before being
|
||||
/// applied. A shift of zero for each instruction will lead to no change.
|
||||
// The skewing of instructions with respect to one another can be used for
|
||||
// example to allow overlap of asynchronous operations (such as DMA
|
||||
// communication) with computation, or just relative shifting of instructions
|
||||
@ -267,7 +267,7 @@ LogicalResult mlir::instBodySkew(AffineForOp forOp, ArrayRef<uint64_t> shifts,
|
||||
|
||||
// An array of instruction groups sorted by shift amount; each group has all
|
||||
// instructions with the same shift in the order in which they appear in the
|
||||
// body of the 'for' inst.
|
||||
// body of the 'affine.for' inst.
|
||||
std::vector<std::vector<Instruction *>> sortedInstGroups(maxShift + 1);
|
||||
unsigned pos = 0;
|
||||
for (auto &inst : *forOp->getBody()) {
|
||||
@ -499,7 +499,7 @@ void mlir::sinkLoop(AffineForOp forOp, unsigned loopDepth) {
|
||||
// bounds, the resulting IR resembles:
|
||||
//
|
||||
// ```mlir
|
||||
// for %i = max (`iv, ...) to min (`iv` + `offset`) {
|
||||
// affine.for %i = max (`iv, ...) to min (`iv` + `offset`) {
|
||||
// ...
|
||||
// }
|
||||
// ```
|
||||
|
@ -199,14 +199,14 @@ bool mlir::replaceAllMemRefUsesWith(Value *oldMemRef, Value *newMemRef,
|
||||
///
|
||||
/// Before
|
||||
///
|
||||
/// for %i = 0 to #map(%N)
|
||||
/// affine.for %i = 0 to #map(%N)
|
||||
/// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
|
||||
/// "send"(%idx, %A, ...)
|
||||
/// "compute"(%idx)
|
||||
///
|
||||
/// After
|
||||
///
|
||||
/// for %i = 0 to #map(%N)
|
||||
/// affine.for %i = 0 to #map(%N)
|
||||
/// %idx = affine.apply (d0) -> (d0 mod 2) (%i)
|
||||
/// "send"(%idx, %A, ...)
|
||||
/// %idx_ = affine.apply (d0) -> (d0 mod 2) (%i)
|
||||
|
@ -113,7 +113,7 @@ using namespace mlir;
|
||||
///
|
||||
/// At a high level, a vectorized load in a loop will resemble:
|
||||
/// ```mlir
|
||||
/// for %i = ? to ? step ? {
|
||||
/// affine.for %i = ? to ? step ? {
|
||||
/// %v_a = "vector_transfer_read" (A, %i) : (memref<?xf32>, index) ->
|
||||
/// vector<128xf32>
|
||||
/// }
|
||||
@ -309,7 +309,7 @@ using namespace mlir;
|
||||
/// ```mlir
|
||||
/// mlfunc @fill(%A : memref<128xf32>) -> () {
|
||||
/// %f1 = constant 1.0 : f32
|
||||
/// for %i0 = 0 to 32 {
|
||||
/// affine.for %i0 = 0 to 32 {
|
||||
/// store %f1, %A[%i0] : memref<128xf32, 0>
|
||||
/// }
|
||||
/// return
|
||||
@ -322,7 +322,7 @@ using namespace mlir;
|
||||
/// is still subject to exploratory tradeoffs. In particular, say we want to
|
||||
/// vectorize by a factor 128, we want to transform the following input:
|
||||
/// ```mlir
|
||||
/// for %i = %M to %N {
|
||||
/// affine.for %i = %M to %N {
|
||||
/// %a = load A[%i] : memref<?xf32>
|
||||
/// }
|
||||
/// ```
|
||||
@ -331,8 +331,8 @@ using namespace mlir;
|
||||
/// memory promotion etc) say after stripmining (and potentially unrolling in
|
||||
/// the case of LLVM's SLP vectorizer):
|
||||
/// ```mlir
|
||||
/// for %i = floor(%M, 128) to ceil(%N, 128) {
|
||||
/// for %ii = max(%M, 128 * %i) to min(%N, 128*%i + 127) {
|
||||
/// affine.for %i = floor(%M, 128) to ceil(%N, 128) {
|
||||
/// affine.for %ii = max(%M, 128 * %i) to min(%N, 128*%i + 127) {
|
||||
/// %a = load A[%ii] : memref<?xf32>
|
||||
/// }
|
||||
/// }
|
||||
@ -341,7 +341,7 @@ using namespace mlir;
|
||||
/// Instead, we seek to vectorize early and freeze vector types before
|
||||
/// scheduling, so we want to generate a pattern that resembles:
|
||||
/// ```mlir
|
||||
/// for %i = ? to ? step ? {
|
||||
/// affine.for %i = ? to ? step ? {
|
||||
/// %v_a = "vector_transfer_read" (A, %i) : (memref<?xf32>, index) ->
|
||||
/// vector<128xf32>
|
||||
/// }
|
||||
@ -362,7 +362,7 @@ using namespace mlir;
|
||||
/// For the simple strawman example above, vectorizing for a 1-D vector
|
||||
/// abstraction of size 128 returns code similar to:
|
||||
/// ```mlir
|
||||
/// for %i = %M to %N step 128 {
|
||||
/// affine.for %i = %M to %N step 128 {
|
||||
/// %v_a = "vector_transfer_read" (A, %i) : (memref<?xf32>, index) ->
|
||||
/// vector<128xf32>
|
||||
/// }
|
||||
@ -391,20 +391,20 @@ using namespace mlir;
|
||||
/// %C = alloc (%M, %N) : memref<?x?xf32, 0>
|
||||
/// %f1 = constant 1.0 : f32
|
||||
/// %f2 = constant 2.0 : f32
|
||||
/// for %i0 = 0 to %M {
|
||||
/// for %i1 = 0 to %N {
|
||||
/// affine.for %i0 = 0 to %M {
|
||||
/// affine.for %i1 = 0 to %N {
|
||||
/// // non-scoped %f1
|
||||
/// store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
|
||||
/// }
|
||||
/// }
|
||||
/// for %i2 = 0 to %M {
|
||||
/// for %i3 = 0 to %N {
|
||||
/// affine.for %i2 = 0 to %M {
|
||||
/// affine.for %i3 = 0 to %N {
|
||||
/// // non-scoped %f2
|
||||
/// store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
|
||||
/// }
|
||||
/// }
|
||||
/// for %i4 = 0 to %M {
|
||||
/// for %i5 = 0 to %N {
|
||||
/// affine.for %i4 = 0 to %M {
|
||||
/// affine.for %i5 = 0 to %N {
|
||||
/// %a5 = load %A[%i4, %i5] : memref<?x?xf32, 0>
|
||||
/// %b5 = load %B[%i4, %i5] : memref<?x?xf32, 0>
|
||||
/// %s5 = addf %a5, %b5 : f32
|
||||
@ -438,24 +438,24 @@ using namespace mlir;
|
||||
/// %2 = alloc(%arg0, %arg1) : memref<?x?xf32>
|
||||
/// %cst = constant 1.0 : f32
|
||||
/// %cst_0 = constant 2.0 : f32
|
||||
/// for %i0 = 0 to %arg0 {
|
||||
/// for %i1 = 0 to %arg1 step 256 {
|
||||
/// affine.for %i0 = 0 to %arg0 {
|
||||
/// affine.for %i1 = 0 to %arg1 step 256 {
|
||||
/// %cst_1 = constant splat<vector<256xf32>, 1.0> :
|
||||
/// vector<256xf32>
|
||||
/// "vector_transfer_write"(%cst_1, %0, %i0, %i1) :
|
||||
/// (vector<256xf32>, memref<?x?xf32>, index, index) -> ()
|
||||
/// }
|
||||
/// }
|
||||
/// for %i2 = 0 to %arg0 {
|
||||
/// for %i3 = 0 to %arg1 step 256 {
|
||||
/// affine.for %i2 = 0 to %arg0 {
|
||||
/// affine.for %i3 = 0 to %arg1 step 256 {
|
||||
/// %cst_2 = constant splat<vector<256xf32>, 2.0> :
|
||||
/// vector<256xf32>
|
||||
/// "vector_transfer_write"(%cst_2, %1, %i2, %i3) :
|
||||
/// (vector<256xf32>, memref<?x?xf32>, index, index) -> ()
|
||||
/// }
|
||||
/// }
|
||||
/// for %i4 = 0 to %arg0 {
|
||||
/// for %i5 = 0 to %arg1 step 256 {
|
||||
/// affine.for %i4 = 0 to %arg0 {
|
||||
/// affine.for %i5 = 0 to %arg1 step 256 {
|
||||
/// %3 = "vector_transfer_read"(%0, %i4, %i5) :
|
||||
/// (memref<?x?xf32>, index, index) -> vector<256xf32>
|
||||
/// %4 = "vector_transfer_read"(%1, %i4, %i5) :
|
||||
@ -494,24 +494,24 @@ using namespace mlir;
|
||||
/// %2 = alloc(%arg0, %arg1) : memref<?x?xf32>
|
||||
/// %cst = constant 1.0 : f32
|
||||
/// %cst_0 = constant 2.0 : f32
|
||||
/// for %i0 = 0 to %arg0 step 32 {
|
||||
/// for %i1 = 0 to %arg1 step 256 {
|
||||
/// affine.for %i0 = 0 to %arg0 step 32 {
|
||||
/// affine.for %i1 = 0 to %arg1 step 256 {
|
||||
/// %cst_1 = constant splat<vector<32x256xf32>, 1.0> :
|
||||
/// vector<32x256xf32>
|
||||
/// "vector_transfer_write"(%cst_1, %0, %i0, %i1) :
|
||||
/// (vector<32x256xf32>, memref<?x?xf32>, index, index) -> ()
|
||||
/// }
|
||||
/// }
|
||||
/// for %i2 = 0 to %arg0 step 32 {
|
||||
/// for %i3 = 0 to %arg1 step 256 {
|
||||
/// affine.for %i2 = 0 to %arg0 step 32 {
|
||||
/// affine.for %i3 = 0 to %arg1 step 256 {
|
||||
/// %cst_2 = constant splat<vector<32x256xf32>, 2.0> :
|
||||
/// vector<32x256xf32>
|
||||
/// "vector_transfer_write"(%cst_2, %1, %i2, %i3) :
|
||||
/// (vector<32x256xf32>, memref<?x?xf32>, index, index) -> ()
|
||||
/// }
|
||||
/// }
|
||||
/// for %i4 = 0 to %arg0 step 32 {
|
||||
/// for %i5 = 0 to %arg1 step 256 {
|
||||
/// affine.for %i4 = 0 to %arg0 step 32 {
|
||||
/// affine.for %i5 = 0 to %arg1 step 256 {
|
||||
/// %3 = "vector_transfer_read"(%0, %i4, %i5) :
|
||||
/// (memref<?x?xf32>, index, index) -> vector<32x256xf32>
|
||||
/// %4 = "vector_transfer_read"(%1, %i4, %i5) :
|
||||
|
@ -47,7 +47,7 @@
|
||||
func @compose_affine_maps_1dto2d_no_symbols() {
|
||||
%0 = alloc() : memref<4x4xf32>
|
||||
|
||||
for %i0 = 0 to 15 {
|
||||
affine.for %i0 = 0 to 15 {
|
||||
// Test load[%x, %x]
|
||||
|
||||
%x0 = affine.apply (d0) -> (d0 - 1) (%i0)
|
||||
@ -93,7 +93,7 @@ func @compose_affine_maps_1dto2d_no_symbols() {
|
||||
func @compose_affine_maps_1dto2d_with_symbols() {
|
||||
%0 = alloc() : memref<4x4xf32>
|
||||
|
||||
for %i0 = 0 to 15 {
|
||||
affine.for %i0 = 0 to 15 {
|
||||
// Test load[%x0, %x0] with symbol %c4
|
||||
%c4 = constant 4 : index
|
||||
%x0 = affine.apply (d0)[s0] -> (d0 - s0) (%i0)[%c4]
|
||||
@ -134,13 +134,13 @@ func @compose_affine_maps_2d_tile() {
|
||||
%c4 = constant 4 : index
|
||||
%c8 = constant 8 : index
|
||||
|
||||
for %i0 = 0 to 3 {
|
||||
affine.for %i0 = 0 to 3 {
|
||||
%x0 = affine.apply (d0)[s0] -> (d0 ceildiv s0) (%i0)[%c4]
|
||||
for %i1 = 0 to 3 {
|
||||
affine.for %i1 = 0 to 3 {
|
||||
%x1 = affine.apply (d0)[s0] -> (d0 ceildiv s0) (%i1)[%c8]
|
||||
for %i2 = 0 to 3 {
|
||||
affine.for %i2 = 0 to 3 {
|
||||
%x2 = affine.apply (d0)[s0] -> (d0 mod s0) (%i2)[%c4]
|
||||
for %i3 = 0 to 3 {
|
||||
affine.for %i3 = 0 to 3 {
|
||||
%x3 = affine.apply (d0)[s0] -> (d0 mod s0) (%i3)[%c8]
|
||||
|
||||
%x40 = affine.apply (d0, d1, d2, d3)[s0, s1] ->
|
||||
@ -166,9 +166,9 @@ func @compose_affine_maps_dependent_loads() {
|
||||
%0 = alloc() : memref<16x32xf32>
|
||||
%1 = alloc() : memref<16x32xf32>
|
||||
|
||||
for %i0 = 0 to 3 {
|
||||
for %i1 = 0 to 3 {
|
||||
for %i2 = 0 to 3 {
|
||||
affine.for %i0 = 0 to 3 {
|
||||
affine.for %i1 = 0 to 3 {
|
||||
affine.for %i2 = 0 to 3 {
|
||||
%c3 = constant 3 : index
|
||||
%c7 = constant 7 : index
|
||||
|
||||
@ -212,7 +212,7 @@ func @compose_affine_maps_dependent_loads() {
|
||||
func @compose_affine_maps_diamond_dependency() {
|
||||
%0 = alloc() : memref<4x4xf32>
|
||||
|
||||
for %i0 = 0 to 15 {
|
||||
affine.for %i0 = 0 to 15 {
|
||||
%a = affine.apply (d0) -> (d0 - 1) (%i0)
|
||||
%b = affine.apply (d0) -> (d0 + 7) (%a)
|
||||
%c = affine.apply (d0) -> (d0 * 4) (%a)
|
||||
@ -232,8 +232,8 @@ func @arg_used_as_dim_and_symbol(%arg0: memref<100x100xf32>, %arg1: index) {
|
||||
%c9 = constant 9 : index
|
||||
%1 = alloc() : memref<100x100xf32, 1>
|
||||
%2 = alloc() : memref<1xi32>
|
||||
for %i0 = 0 to 100 {
|
||||
for %i1 = 0 to 100 {
|
||||
affine.for %i0 = 0 to 100 {
|
||||
affine.for %i1 = 0 to 100 {
|
||||
%3 = affine.apply (d0, d1)[s0, s1] -> (d1 + s0 + s1)
|
||||
(%i0, %i1)[%arg1, %c9]
|
||||
%4 = affine.apply (d0, d1, d3) -> (d3 - (d0 + d1))
|
||||
@ -253,7 +253,7 @@ func @trivial_maps() {
|
||||
%0 = alloc() : memref<10xf32>
|
||||
%c0 = constant 0 : index
|
||||
%cst = constant 0.000000e+00 : f32
|
||||
for %i1 = 0 to 10 {
|
||||
affine.for %i1 = 0 to 10 {
|
||||
%1 = affine.apply ()[s0] -> (s0)()[%c0]
|
||||
store %cst, %0[%1] : memref<10xf32>
|
||||
%2 = load %0[%c0] : memref<10xf32>
|
||||
@ -380,7 +380,7 @@ func @mix_dims_and_symbols_g(%M: index, %N: index) -> (index, index, index) {
|
||||
// CHECK-LABEL: func @symbolic_semi_affine(%arg0: index, %arg1: index, %arg2: memref<?xf32>) {
|
||||
func @symbolic_semi_affine(%M: index, %N: index, %A: memref<?xf32>) {
|
||||
%f1 = constant 1.0 : f32
|
||||
for %i0 = 1 to 100 {
|
||||
affine.for %i0 = 1 to 100 {
|
||||
%1 = affine.apply ()[s0] -> (s0 + 1) ()[%M]
|
||||
%2 = affine.apply (d0)[s0] -> (d0 floordiv s0) (%i0)[%1]
|
||||
// CHECK-DAG: {{.*}} = affine.apply [[symbolic_semi_affine]](%i0)[%arg0]
|
||||
@ -404,20 +404,20 @@ func @constant_fold_bounds(%N : index) {
|
||||
%c3 = affine.apply (d0, d1) -> (d0 + d1) (%c1, %c2)
|
||||
%l = "foo"() : () -> index
|
||||
|
||||
// CHECK: for %i0 = 5 to 7 {
|
||||
for %i = max (d0, d1) -> (0, d0 + d1)(%c2, %c3) to min (d0, d1) -> (d0 - 2, 32*d1) (%c9, %c1) {
|
||||
// CHECK: affine.for %i0 = 5 to 7 {
|
||||
affine.for %i = max (d0, d1) -> (0, d0 + d1)(%c2, %c3) to min (d0, d1) -> (d0 - 2, 32*d1) (%c9, %c1) {
|
||||
"foo"(%i, %c3) : (index, index) -> ()
|
||||
}
|
||||
|
||||
// Bound takes a non-constant argument but can still be folded.
|
||||
// CHECK: for %i1 = 1 to 7 {
|
||||
for %j = max (d0) -> (0, 1)(%N) to min (d0, d1) -> (7, 9)(%N, %l) {
|
||||
// CHECK: affine.for %i1 = 1 to 7 {
|
||||
affine.for %j = max (d0) -> (0, 1)(%N) to min (d0, d1) -> (7, 9)(%N, %l) {
|
||||
"foo"(%j, %c3) : (index, index) -> ()
|
||||
}
|
||||
|
||||
// None of the bounds can be folded.
|
||||
// CHECK: for %i2 = max [[MAP0]]()[%0] to min [[MAP1]]()[%arg0] {
|
||||
for %k = max ()[s0] -> (0, s0) ()[%l] to min ()[s0] -> (100, s0)()[%N] {
|
||||
// CHECK: affine.for %i2 = max [[MAP0]]()[%0] to min [[MAP1]]()[%arg0] {
|
||||
affine.for %k = max ()[s0] -> (0, s0) ()[%l] to min ()[s0] -> (100, s0)()[%N] {
|
||||
"foo"(%k, %c3) : (index, index) -> ()
|
||||
}
|
||||
return
|
||||
|
@ -5,7 +5,7 @@
|
||||
#map = (d0)[s0] -> (d0 + s0)
|
||||
|
||||
func @affine_apply_invalid_dim(%arg : index) {
|
||||
for %n0 = 0 to 7 {
|
||||
affine.for %n0 = 0 to 7 {
|
||||
%dim = addi %arg, %arg : index
|
||||
|
||||
// expected-error@+1 {{operand cannot be used as a dimension id}}
|
||||
@ -19,7 +19,7 @@ func @affine_apply_invalid_dim(%arg : index) {
|
||||
#map0 = (d0)[s0] -> (d0 + s0)
|
||||
|
||||
func @affine_apply_invalid_sym() {
|
||||
for %i0 = 0 to 7 {
|
||||
affine.for %i0 = 0 to 7 {
|
||||
// expected-error@+1 {{operand cannot be used as a symbol}}
|
||||
%0 = affine.apply #map0(%i0)[%i0]
|
||||
}
|
||||
@ -31,11 +31,11 @@ func @affine_apply_invalid_sym() {
|
||||
#map = (d0)[s0] -> (d0 + s0)
|
||||
|
||||
func @affine_for_lower_bound_invalid_dim(%arg : index) {
|
||||
for %n0 = 0 to 7 {
|
||||
affine.for %n0 = 0 to 7 {
|
||||
%dim = addi %arg, %arg : index
|
||||
|
||||
// expected-error@+1 {{operand cannot be used as a dimension id}}
|
||||
for %n1 = 0 to #map(%dim)[%arg] {
|
||||
affine.for %n1 = 0 to #map(%dim)[%arg] {
|
||||
}
|
||||
}
|
||||
return
|
||||
@ -46,11 +46,11 @@ func @affine_for_lower_bound_invalid_dim(%arg : index) {
|
||||
#map = (d0)[s0] -> (d0 + s0)
|
||||
|
||||
func @affine_for_upper_bound_invalid_dim(%arg : index) {
|
||||
for %n0 = 0 to 7 {
|
||||
affine.for %n0 = 0 to 7 {
|
||||
%dim = addi %arg, %arg : index
|
||||
|
||||
// expected-error@+1 {{operand cannot be used as a dimension id}}
|
||||
for %n1 = #map(%dim)[%arg] to 7 {
|
||||
affine.for %n1 = #map(%dim)[%arg] to 7 {
|
||||
}
|
||||
}
|
||||
return
|
||||
@ -61,9 +61,9 @@ func @affine_for_upper_bound_invalid_dim(%arg : index) {
|
||||
#map0 = (d0)[s0] -> (d0 + s0)
|
||||
|
||||
func @affine_for_lower_bound_invalid_sym() {
|
||||
for %i0 = 0 to 7 {
|
||||
affine.for %i0 = 0 to 7 {
|
||||
// expected-error@+1 {{operand cannot be used as a symbol}}
|
||||
for %n0 = #map0(%i0)[%i0] to 7 {
|
||||
affine.for %n0 = #map0(%i0)[%i0] to 7 {
|
||||
}
|
||||
}
|
||||
return
|
||||
@ -74,9 +74,9 @@ func @affine_for_lower_bound_invalid_sym() {
|
||||
#map0 = (d0)[s0] -> (d0 + s0)
|
||||
|
||||
func @affine_for_upper_bound_invalid_sym() {
|
||||
for %i0 = 0 to 7 {
|
||||
affine.for %i0 = 0 to 7 {
|
||||
// expected-error@+1 {{operand cannot be used as a symbol}}
|
||||
for %n0 = 0 to #map0(%i0)[%i0] {
|
||||
affine.for %n0 = 0 to #map0(%i0)[%i0] {
|
||||
}
|
||||
}
|
||||
return
|
||||
@ -87,7 +87,7 @@ func @affine_for_upper_bound_invalid_sym() {
|
||||
#set0 = (i)[N] : (i >= 0, N - i >= 0)
|
||||
|
||||
func @affine_if_invalid_dim(%arg : index) {
|
||||
for %n0 = 0 to 7 {
|
||||
affine.for %n0 = 0 to 7 {
|
||||
%dim = addi %arg, %arg : index
|
||||
|
||||
// expected-error@+1 {{operand cannot be used as a dimension id}}
|
||||
@ -101,7 +101,7 @@ func @affine_if_invalid_dim(%arg : index) {
|
||||
#set0 = (i)[N] : (i >= 0, N - i >= 0)
|
||||
|
||||
func @affine_if_invalid_sym() {
|
||||
for %i0 = 0 to 7 {
|
||||
affine.for %i0 = 0 to 7 {
|
||||
// expected-error@+1 {{operand cannot be used as a symbol}}
|
||||
affine.if #set0(%i0)[%i0] {}
|
||||
}
|
||||
@ -113,7 +113,7 @@ func @affine_if_invalid_sym() {
|
||||
#set0 = (i)[N] : (i >= 0, N - i >= 0)
|
||||
|
||||
func @affine_if_invalid_dimop_dim(%arg0: index, %arg1: index, %arg2: index, %arg3: index) {
|
||||
for %n0 = 0 to 7 {
|
||||
affine.for %n0 = 0 to 7 {
|
||||
%0 = alloc(%arg0, %arg1, %arg2, %arg3) : memref<?x?x?x?xf32>
|
||||
%dim = dim %0, 0 : memref<?x?x?x?xf32>
|
||||
|
||||
|
@ -2,9 +2,9 @@
|
||||
|
||||
// Check that the attributes for the affine operations are round-tripped.
|
||||
func @attributes() {
|
||||
// CHECK: for %i
|
||||
// CHECK: affine.for %i
|
||||
// CHECK-NEXT: } {some_attr: true}
|
||||
for %i = 0 to 10 {
|
||||
affine.for %i = 0 to 10 {
|
||||
} {some_attr: true}
|
||||
|
||||
// CHECK: if
|
||||
|
@ -143,7 +143,7 @@ TEST_FUNC(cond_branch) {
|
||||
f->print(llvm::outs());
|
||||
}
|
||||
|
||||
// Inject a EDSC-constructed `for` loop with bounds coming from function
|
||||
// Inject a EDSC-constructed `affine.for` loop with bounds coming from function
|
||||
// arguments.
|
||||
TEST_FUNC(dynamic_for_func_args) {
|
||||
auto indexType = IndexType::get(&globalContext());
|
||||
@ -164,7 +164,7 @@ TEST_FUNC(dynamic_for_func_args) {
|
||||
|
||||
// clang-format off
|
||||
// CHECK-LABEL: func @dynamic_for_func_args(%arg0: index, %arg1: index) {
|
||||
// CHECK: for %i0 = (d0) -> (d0)(%arg0) to (d0) -> (d0)(%arg1) step 3 {
|
||||
// CHECK: affine.for %i0 = (d0) -> (d0)(%arg0) to (d0) -> (d0)(%arg1) step 3 {
|
||||
// CHECK: {{.*}} = affine.apply ()[s0] -> (s0 * 3)()[%arg0]
|
||||
// CHECK: {{.*}} = affine.apply ()[s0, s1] -> (s1 + s0 * 3)()[%arg0, %arg1]
|
||||
// CHECK: {{.*}} = affine.apply ()[s0] -> (s0 + 3)()[%arg0]
|
||||
@ -172,7 +172,7 @@ TEST_FUNC(dynamic_for_func_args) {
|
||||
f->print(llvm::outs());
|
||||
}
|
||||
|
||||
// Inject a EDSC-constructed `for` loop with non-constant bounds that are
|
||||
// Inject a EDSC-constructed `affine.for` loop with non-constant bounds that are
|
||||
// obtained from AffineApplyOp (also constructed using EDSC operator
|
||||
// overloads).
|
||||
TEST_FUNC(dynamic_for) {
|
||||
@ -200,12 +200,12 @@ TEST_FUNC(dynamic_for) {
|
||||
// CHECK-LABEL: func @dynamic_for(%arg0: index, %arg1: index, %arg2: index, %arg3: index) {
|
||||
// CHECK: %0 = affine.apply ()[s0, s1] -> (s0 - s1)()[%arg0, %arg1]
|
||||
// CHECK-NEXT: %1 = affine.apply ()[s0, s1] -> (s0 + s1)()[%arg2, %arg3]
|
||||
// CHECK-NEXT: for %i0 = (d0) -> (d0)(%0) to (d0) -> (d0)(%1) step 2 {
|
||||
// CHECK-NEXT: affine.for %i0 = (d0) -> (d0)(%0) to (d0) -> (d0)(%1) step 2 {
|
||||
// clang-format on
|
||||
f->print(llvm::outs());
|
||||
}
|
||||
|
||||
// Inject a EDSC-constructed empty `for` loop with max/min bounds that
|
||||
// Inject a EDSC-constructed empty `affine.for` loop with max/min bounds that
|
||||
// corresponds to
|
||||
//
|
||||
// for max(%arg0, %arg1) to (%arg2, %arg3) step 1
|
||||
@ -234,7 +234,7 @@ TEST_FUNC(max_min_for) {
|
||||
|
||||
// clang-format off
|
||||
// CHECK-LABEL: func @max_min_for(%arg0: index, %arg1: index, %arg2: index, %arg3: index) {
|
||||
// CHECK: for %i0 = max (d0, d1) -> (d0, d1)(%arg0, %arg1) to min (d0, d1) -> (d0, d1)(%arg2, %arg3) {
|
||||
// CHECK: affine.for %i0 = max (d0, d1) -> (d0, d1)(%arg0, %arg1) to min (d0, d1) -> (d0, d1)(%arg2, %arg3) {
|
||||
// clang-format on
|
||||
f->print(llvm::outs());
|
||||
}
|
||||
@ -334,7 +334,7 @@ TEST_FUNC(assignments_1) {
|
||||
|
||||
// clang-format off
|
||||
// CHECK-LABEL: func @assignments(%arg0: memref<4xf32>, %arg1: memref<4xf32>, %arg2: memref<4xf32>) {
|
||||
// CHECK: for %[[iv:.*]] = 0 to 4 {
|
||||
// CHECK: affine.for %[[iv:.*]] = 0 to 4 {
|
||||
// CHECK: %[[a:.*]] = load %arg0[%[[iv]]] : memref<4xf32>
|
||||
// CHECK: %[[b:.*]] = load %arg1[%[[iv]]] : memref<4xf32>
|
||||
// CHECK: %[[tmp:.*]] = mulf %[[a]], %[[b]] : f32
|
||||
@ -348,7 +348,7 @@ TEST_FUNC(assignments_2) {
|
||||
|
||||
// clang-format off
|
||||
// CHECK-LABEL: func @assignments(%arg0: memref<?xf32>, %arg1: memref<?xf32>, %arg2: memref<?xf32>) {
|
||||
// CHECK: for %[[iv:.*]] = {{.*}} to {{.*}} {
|
||||
// CHECK: affine.for %[[iv:.*]] = {{.*}} to {{.*}} {
|
||||
// CHECK: %[[a:.*]] = load %arg0[%[[iv]]] : memref<?xf32>
|
||||
// CHECK: %[[b:.*]] = load %arg1[%[[iv]]] : memref<?xf32>
|
||||
// CHECK: %[[tmp:.*]] = mulf %[[a]], %[[b]] : f32
|
||||
@ -405,13 +405,13 @@ TEST_FUNC(tile_2d) {
|
||||
// CHECK: %[[M:[0-9]+]] = dim %arg0, 0 : memref<?x?x?xf32>
|
||||
// CHECK-NEXT: %[[N:[0-9]+]] = dim %arg0, 1 : memref<?x?x?xf32>
|
||||
// CHECK-NEXT: %[[P:[0-9]+]] = dim %arg0, 2 : memref<?x?x?xf32>
|
||||
// CHECK: for %i0 = (d0) -> (d0)(%[[ZERO]]) to (d0) -> (d0)(%[[M]]) step 512 {
|
||||
// CHECK-NEXT: for %i1 = (d0) -> (d0)(%[[ZERO]]) to (d0) -> (d0)(%[[N]]) step 1024 {
|
||||
// CHECK-NEXT: for %i2 = (d0) -> (d0)(%[[ZERO]]) to (d0) -> (d0)(%[[P]]) {
|
||||
// CHECK-NEXT: for %i3 = max (d0)[s0] -> (s0, d0)(%i0)[%[[ZERO]]] to min (d0)[s0] -> (s0, d0 + 512)(%i0)[%[[M]]] step 16 {
|
||||
// CHECK-NEXT: for %i4 = max (d0)[s0] -> (s0, d0)(%i1)[%[[ZERO]]] to min (d0)[s0] -> (s0, d0 + 1024)(%i1)[%[[N]]] step 32 {
|
||||
// CHECK-NEXT: for %i5 = max (d0, d1)[s0] -> (s0, d0, d1)(%i1, %i4)[%[[ZERO]]] to min (d0, d1)[s0] -> (s0, d0 + 1024, d1 + 32)(%i1, %i4)[%[[N]]] {
|
||||
// CHECK-NEXT: for %i6 = max (d0, d1)[s0] -> (s0, d0, d1)(%i0, %i3)[%[[ZERO]]] to min (d0, d1)[s0] -> (s0, d0 + 512, d1 + 16)(%i0, %i3)[%[[M]]] {
|
||||
// CHECK: affine.for %i0 = (d0) -> (d0)(%[[ZERO]]) to (d0) -> (d0)(%[[M]]) step 512 {
|
||||
// CHECK-NEXT: affine.for %i1 = (d0) -> (d0)(%[[ZERO]]) to (d0) -> (d0)(%[[N]]) step 1024 {
|
||||
// CHECK-NEXT: affine.for %i2 = (d0) -> (d0)(%[[ZERO]]) to (d0) -> (d0)(%[[P]]) {
|
||||
// CHECK-NEXT: affine.for %i3 = max (d0)[s0] -> (s0, d0)(%i0)[%[[ZERO]]] to min (d0)[s0] -> (s0, d0 + 512)(%i0)[%[[M]]] step 16 {
|
||||
// CHECK-NEXT: affine.for %i4 = max (d0)[s0] -> (s0, d0)(%i1)[%[[ZERO]]] to min (d0)[s0] -> (s0, d0 + 1024)(%i1)[%[[N]]] step 32 {
|
||||
// CHECK-NEXT: affine.for %i5 = max (d0, d1)[s0] -> (s0, d0, d1)(%i1, %i4)[%[[ZERO]]] to min (d0, d1)[s0] -> (s0, d0 + 1024, d1 + 32)(%i1, %i4)[%[[N]]] {
|
||||
// CHECK-NEXT: affine.for %i6 = max (d0, d1)[s0] -> (s0, d0, d1)(%i0, %i3)[%[[ZERO]]] to min (d0, d1)[s0] -> (s0, d0 + 512, d1 + 16)(%i0, %i3)[%[[M]]] {
|
||||
// CHECK-NEXT: {{.*}} = load {{.*}}[%i6, %i5, %i2] : memref<?x?x?xf32>
|
||||
// CHECK-NEXT: {{.*}} = load {{.*}}[%i6, %i5, %i2] : memref<?x?x?xf32>
|
||||
// CHECK-NEXT: {{.*}} = addf {{.*}}, {{.*}} : f32
|
||||
@ -421,9 +421,9 @@ TEST_FUNC(tile_2d) {
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: for %i7 = (d0) -> (d0)(%[[ZERO]]) to (d0) -> (d0)(%[[P]]) {
|
||||
// CHECK-NEXT: for %i8 = max (d0)[s0] -> (s0, d0)(%i0)[%[[ZERO]]] to min (d0)[s0] -> (s0, d0 + 512)(%i0)[%[[M]]] {
|
||||
// CHECK-NEXT: for %i9 = max (d0)[s0] -> (s0, d0)(%i1)[%[[ZERO]]] to min (d0)[s0] -> (s0, d0 + 1024)(%i1)[%[[N]]] {
|
||||
// CHECK-NEXT: affine.for %i7 = (d0) -> (d0)(%[[ZERO]]) to (d0) -> (d0)(%[[P]]) {
|
||||
// CHECK-NEXT: affine.for %i8 = max (d0)[s0] -> (s0, d0)(%i0)[%[[ZERO]]] to min (d0)[s0] -> (s0, d0 + 512)(%i0)[%[[M]]] {
|
||||
// CHECK-NEXT: affine.for %i9 = max (d0)[s0] -> (s0, d0)(%i1)[%[[ZERO]]] to min (d0)[s0] -> (s0, d0 + 1024)(%i1)[%[[N]]] {
|
||||
// CHECK-NEXT: {{.*}} = load {{.*}}[%i8, %i9, %i7] : memref<?x?x?xf32>
|
||||
// CHECK-NEXT: {{.*}} = load {{.*}}[%i8, %i9, %i7] : memref<?x?x?xf32>
|
||||
// CHECK-NEXT: {{.*}}= addf {{.*}}, {{.*}} : f32
|
||||
|
@ -80,11 +80,11 @@ TEST_FUNC(builder_dynamic_for_func_args) {
|
||||
|
||||
// clang-format off
|
||||
// CHECK-LABEL: func @builder_dynamic_for_func_args(%arg0: index, %arg1: index) {
|
||||
// CHECK: for %i0 = (d0) -> (d0)(%arg0) to (d0) -> (d0)(%arg1) step 3 {
|
||||
// CHECK: affine.for %i0 = (d0) -> (d0)(%arg0) to (d0) -> (d0)(%arg1) step 3 {
|
||||
// CHECK: {{.*}} = affine.apply ()[s0] -> (s0 * 3)()[%arg0]
|
||||
// CHECK: {{.*}} = affine.apply ()[s0, s1] -> (s1 + s0 * 3)()[%arg0, %arg1]
|
||||
// CHECK: {{.*}} = affine.apply ()[s0] -> (s0 + 3)()[%arg0]
|
||||
// CHECK: for %i1 = (d0) -> (d0)(%arg0) to (d0) -> (d0)(%arg1) step 2 {
|
||||
// CHECK: affine.for %i1 = (d0) -> (d0)(%arg0) to (d0) -> (d0)(%arg1) step 2 {
|
||||
// CHECK: {{.*}} = affine.apply (d0, d1) -> ((d0 + d1 * 3) floordiv 32)(%i0, %i1)
|
||||
// CHECK: {{.*}} = affine.apply (d0, d1) -> (((d0 + d1 * 3) floordiv 32) * 31)(%i0, %i1)
|
||||
// CHECK: {{.*}} = affine.apply (d0, d1) -> ((((d0 + d1 * 3) floordiv 32) * 31) ceildiv 32)(%i0, %i1)
|
||||
@ -119,7 +119,7 @@ TEST_FUNC(builder_dynamic_for) {
|
||||
// CHECK-LABEL: func @builder_dynamic_for(%arg0: index, %arg1: index, %arg2: index, %arg3: index) {
|
||||
// CHECK: %0 = affine.apply ()[s0, s1] -> (s0 - s1)()[%arg0, %arg1]
|
||||
// CHECK-NEXT: %1 = affine.apply ()[s0, s1] -> (s0 + s1)()[%arg2, %arg3]
|
||||
// CHECK-NEXT: for %i0 = (d0) -> (d0)(%0) to (d0) -> (d0)(%1) step 2 {
|
||||
// CHECK-NEXT: affine.for %i0 = (d0) -> (d0)(%0) to (d0) -> (d0)(%1) step 2 {
|
||||
// clang-format on
|
||||
f->print(llvm::outs());
|
||||
}
|
||||
@ -140,7 +140,7 @@ TEST_FUNC(builder_max_min_for) {
|
||||
|
||||
// clang-format off
|
||||
// CHECK-LABEL: func @builder_max_min_for(%arg0: index, %arg1: index, %arg2: index, %arg3: index) {
|
||||
// CHECK: for %i0 = max (d0, d1) -> (d0, d1)(%arg0, %arg1) to min (d0, d1) -> (d0, d1)(%arg2, %arg3) {
|
||||
// CHECK: affine.for %i0 = max (d0, d1) -> (d0, d1)(%arg0, %arg1) to min (d0, d1) -> (d0, d1)(%arg2, %arg3) {
|
||||
// CHECK: return
|
||||
// clang-format on
|
||||
f->print(llvm::outs());
|
||||
@ -344,16 +344,16 @@ TEST_FUNC(builder_helpers) {
|
||||
});
|
||||
|
||||
// CHECK-LABEL: @builder_helpers
|
||||
// CHECK: for %i0 = (d0) -> (d0)({{.*}}) to (d0) -> (d0)({{.*}}) {
|
||||
// CHECK-NEXT: for %i1 = (d0) -> (d0)({{.*}}) to (d0) -> (d0)({{.*}}) {
|
||||
// CHECK-NEXT: for %i2 = (d0) -> (d0)({{.*}}) to (d0) -> (d0)({{.*}}) {
|
||||
// CHECK: affine.for %i0 = (d0) -> (d0)({{.*}}) to (d0) -> (d0)({{.*}}) {
|
||||
// CHECK-NEXT: affine.for %i1 = (d0) -> (d0)({{.*}}) to (d0) -> (d0)({{.*}}) {
|
||||
// CHECK-NEXT: affine.for %i2 = (d0) -> (d0)({{.*}}) to (d0) -> (d0)({{.*}}) {
|
||||
// CHECK-NEXT: [[a:%.*]] = load %arg0[%i0, %i1, %i2] : memref<?x?x?xf32>
|
||||
// CHECK-NEXT: [[b:%.*]] = addf {{.*}}, [[a]] : f32
|
||||
// CHECK-NEXT: [[c:%.*]] = load %arg1[%i0, %i1, %i2] : memref<?x?x?xf32>
|
||||
// CHECK-NEXT: [[d:%.*]] = addf [[b]], [[c]] : f32
|
||||
// CHECK-NEXT: store [[d]], %arg2[%i0, %i1, %i2] : memref<?x?x?xf32>
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: for %i3 = (d0) -> (d0)(%c0_1) to (d0) -> (d0)(%2) {
|
||||
// CHECK-NEXT: affine.for %i3 = (d0) -> (d0)(%c0_1) to (d0) -> (d0)(%2) {
|
||||
// CHECK-NEXT: [[a:%.*]] = load %arg1[%i0, %i1, %i3] : memref<?x?x?xf32>
|
||||
// CHECK-NEXT: [[b:%.*]] = load %arg0[%i0, %i1, %i3] : memref<?x?x?xf32>
|
||||
// CHECK-NEXT: [[c:%.*]] = addf [[b]], [[a]] : f32
|
||||
@ -392,8 +392,8 @@ TEST_FUNC(custom_ops) {
|
||||
});
|
||||
|
||||
// CHECK-LABEL: @custom_ops
|
||||
// CHECK: for %i0 {{.*}}
|
||||
// CHECK: for %i1 {{.*}}
|
||||
// CHECK: affine.for %i0 {{.*}}
|
||||
// CHECK: affine.for %i1 {{.*}}
|
||||
// CHECK: {{.*}} = "my_custom_op"{{.*}} : (index, index) -> index
|
||||
// CHECK: "my_custom_inst_0"{{.*}} : (index, index) -> ()
|
||||
// CHECK: [[TWO:%[a-z0-9]+]] = "my_custom_inst_2"{{.*}} : (index, index) -> (index, index)
|
||||
|
@ -204,24 +204,24 @@ func @illegaltype(i0) // expected-error {{invalid integer width}}
|
||||
// -----
|
||||
|
||||
func @malformed_for_percent() {
|
||||
for i = 1 to 10 { // expected-error {{expected SSA operand}}
|
||||
affine.for i = 1 to 10 { // expected-error {{expected SSA operand}}
|
||||
|
||||
// -----
|
||||
|
||||
func @malformed_for_equal() {
|
||||
for %i 1 to 10 { // expected-error {{expected '='}}
|
||||
affine.for %i 1 to 10 { // expected-error {{expected '='}}
|
||||
|
||||
// -----
|
||||
|
||||
func @malformed_for_to() {
|
||||
for %i = 1 too 10 { // expected-error {{expected 'to' between bounds}}
|
||||
affine.for %i = 1 too 10 { // expected-error {{expected 'to' between bounds}}
|
||||
}
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
func @incomplete_for() {
|
||||
for %i = 1 to 10 step 2
|
||||
affine.for %i = 1 to 10 step 2
|
||||
} // expected-error {{expected '{' to begin a region}}
|
||||
|
||||
// -----
|
||||
@ -230,19 +230,19 @@ func @incomplete_for() {
|
||||
|
||||
func @reference_to_iv_in_bound() {
|
||||
// expected-error@+1 {{operand use before it's defined}}
|
||||
for %i0 = #map0(%i0) to 10 {
|
||||
affine.for %i0 = #map0(%i0) to 10 {
|
||||
}
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
func @nonconstant_step(%1 : i32) {
|
||||
for %2 = 1 to 5 step %1 { // expected-error {{expected non-function type}}
|
||||
affine.for %2 = 1 to 5 step %1 { // expected-error {{expected non-function type}}
|
||||
|
||||
// -----
|
||||
|
||||
func @for_negative_stride() {
|
||||
for %i = 1 to 10 step -1
|
||||
affine.for %i = 1 to 10 step -1
|
||||
} // expected-error@-1 {{expected step to be representable as a positive signed integer}}
|
||||
|
||||
// -----
|
||||
@ -254,7 +254,7 @@ func @non_instruction() {
|
||||
// -----
|
||||
|
||||
func @invalid_if_conditional2() {
|
||||
for %i = 1 to 10 {
|
||||
affine.for %i = 1 to 10 {
|
||||
affine.if (i)[N] : (i >= ) // expected-error {{expected '== 0' or '>= 0' at end of affine constraint}}
|
||||
}
|
||||
}
|
||||
@ -262,7 +262,7 @@ func @invalid_if_conditional2() {
|
||||
// -----
|
||||
|
||||
func @invalid_if_conditional3() {
|
||||
for %i = 1 to 10 {
|
||||
affine.for %i = 1 to 10 {
|
||||
affine.if (i)[N] : (i == 1) // expected-error {{expected '0' after '=='}}
|
||||
}
|
||||
}
|
||||
@ -270,7 +270,7 @@ func @invalid_if_conditional3() {
|
||||
// -----
|
||||
|
||||
func @invalid_if_conditional4() {
|
||||
for %i = 1 to 10 {
|
||||
affine.for %i = 1 to 10 {
|
||||
affine.if (i)[N] : (i >= 2) // expected-error {{expected '0' after '>='}}
|
||||
}
|
||||
}
|
||||
@ -278,7 +278,7 @@ func @invalid_if_conditional4() {
|
||||
// -----
|
||||
|
||||
func @invalid_if_conditional5() {
|
||||
for %i = 1 to 10 {
|
||||
affine.for %i = 1 to 10 {
|
||||
affine.if (i)[N] : (i <= 0 ) // expected-error {{expected '== 0' or '>= 0' at end of affine constraint}}
|
||||
}
|
||||
}
|
||||
@ -286,7 +286,7 @@ func @invalid_if_conditional5() {
|
||||
// -----
|
||||
|
||||
func @invalid_if_conditional6() {
|
||||
for %i = 1 to 10 {
|
||||
affine.for %i = 1 to 10 {
|
||||
affine.if (i) : (i) // expected-error {{expected '== 0' or '>= 0' at end of affine constraint}}
|
||||
}
|
||||
}
|
||||
@ -294,7 +294,7 @@ func @invalid_if_conditional6() {
|
||||
// -----
|
||||
// TODO (support affine.if (1)?
|
||||
func @invalid_if_conditional7() {
|
||||
for %i = 1 to 10 {
|
||||
affine.for %i = 1 to 10 {
|
||||
affine.if (i) : (1) // expected-error {{expected '== 0' or '>= 0' at end of affine constraint}}
|
||||
}
|
||||
}
|
||||
@ -440,8 +440,8 @@ func @undef() {
|
||||
// -----
|
||||
|
||||
func @duplicate_induction_var() {
|
||||
for %i = 1 to 10 { // expected-error {{previously defined here}}
|
||||
for %i = 1 to 10 { // expected-error {{redefinition of SSA value '%i'}}
|
||||
affine.for %i = 1 to 10 { // expected-error {{previously defined here}}
|
||||
affine.for %i = 1 to 10 { // expected-error {{redefinition of SSA value '%i'}}
|
||||
}
|
||||
}
|
||||
return
|
||||
@ -450,7 +450,7 @@ func @duplicate_induction_var() {
|
||||
// -----
|
||||
|
||||
func @dominance_failure() {
|
||||
for %i = 1 to 10 {
|
||||
affine.for %i = 1 to 10 {
|
||||
}
|
||||
"xxx"(%i) : (index)->() // expected-error {{operand #0 does not dominate this use}}
|
||||
return
|
||||
@ -477,7 +477,7 @@ func @return_type_mismatch() -> i32 {
|
||||
// -----
|
||||
|
||||
func @return_inside_loop() {
|
||||
for %i = 1 to 100 {
|
||||
affine.for %i = 1 to 100 {
|
||||
// expected-error@-1 {{op expects body block to not have a terminator}}
|
||||
return
|
||||
}
|
||||
@ -522,7 +522,7 @@ func @referer() {
|
||||
#map1 = (i)[j] -> (i+j)
|
||||
|
||||
func @bound_symbol_mismatch(%N : index) {
|
||||
for %i = #map1(%N) to 100 {
|
||||
affine.for %i = #map1(%N) to 100 {
|
||||
// expected-error@-1 {{symbol operand count and integer set symbol count must match}}
|
||||
}
|
||||
return
|
||||
@ -533,7 +533,7 @@ func @bound_symbol_mismatch(%N : index) {
|
||||
#map1 = (i)[j] -> (i+j)
|
||||
|
||||
func @bound_dim_mismatch(%N : index) {
|
||||
for %i = #map1(%N, %N)[%N] to 100 {
|
||||
affine.for %i = #map1(%N, %N)[%N] to 100 {
|
||||
// expected-error@-1 {{dim operand count and integer set dim count must match}}
|
||||
}
|
||||
return
|
||||
@ -542,7 +542,7 @@ func @bound_dim_mismatch(%N : index) {
|
||||
// -----
|
||||
|
||||
func @large_bound() {
|
||||
for %i = 1 to 9223372036854775810 {
|
||||
affine.for %i = 1 to 9223372036854775810 {
|
||||
// expected-error@-1 {{integer constant out of range for attribute}}
|
||||
}
|
||||
return
|
||||
@ -551,7 +551,7 @@ func @large_bound() {
|
||||
// -----
|
||||
|
||||
func @max_in_upper_bound(%N : index) {
|
||||
for %i = 1 to max (i)->(N, 100) { //expected-error {{expected non-function type}}
|
||||
affine.for %i = 1 to max (i)->(N, 100) { //expected-error {{expected non-function type}}
|
||||
}
|
||||
return
|
||||
}
|
||||
@ -559,7 +559,7 @@ func @max_in_upper_bound(%N : index) {
|
||||
// -----
|
||||
|
||||
func @step_typo() {
|
||||
for %i = 1 to 100 step -- 1 { //expected-error {{expected constant integer}}
|
||||
affine.for %i = 1 to 100 step -- 1 { //expected-error {{expected constant integer}}
|
||||
}
|
||||
return
|
||||
}
|
||||
@ -567,7 +567,7 @@ func @step_typo() {
|
||||
// -----
|
||||
|
||||
func @invalid_bound_map(%N : i32) {
|
||||
for %i = 1 to (i)->(j)(%N) { //expected-error {{use of undeclared identifier}}
|
||||
affine.for %i = 1 to (i)->(j)(%N) { //expected-error {{use of undeclared identifier}}
|
||||
}
|
||||
return
|
||||
}
|
||||
@ -580,7 +580,7 @@ func @invalid_bound_map(%N : i32) {
|
||||
#set0 = (i)[N] : (i >= 0, N - i >= 0)
|
||||
|
||||
func @invalid_if_operands1(%N : index) {
|
||||
for %i = 1 to 10 {
|
||||
affine.for %i = 1 to 10 {
|
||||
affine.if #set0(%i) {
|
||||
// expected-error@-1 {{symbol operand count and integer set symbol count must match}}
|
||||
|
||||
@ -588,7 +588,7 @@ func @invalid_if_operands1(%N : index) {
|
||||
#set0 = (i)[N] : (i >= 0, N - i >= 0)
|
||||
|
||||
func @invalid_if_operands2(%N : index) {
|
||||
for %i = 1 to 10 {
|
||||
affine.for %i = 1 to 10 {
|
||||
affine.if #set0()[%N] {
|
||||
// expected-error@-1 {{dim operand count and integer set dim count must match}}
|
||||
|
||||
@ -596,7 +596,7 @@ func @invalid_if_operands2(%N : index) {
|
||||
#set0 = (i)[N] : (i >= 0, N - i >= 0)
|
||||
|
||||
func @invalid_if_operands3(%N : index) {
|
||||
for %i = 1 to 10 {
|
||||
affine.for %i = 1 to 10 {
|
||||
affine.if #set0(%i)[%i] {
|
||||
// expected-error@-1 {{operand cannot be used as a symbol}}
|
||||
}
|
||||
@ -751,11 +751,11 @@ func @f(f32) {
|
||||
// -----
|
||||
|
||||
func @f(%m : memref<?x?xf32>) {
|
||||
for %i0 = 0 to 42 {
|
||||
affine.for %i0 = 0 to 42 {
|
||||
// expected-error@+1 {{operand #2 does not dominate this use}}
|
||||
%x = load %m[%i0, %i1] : memref<?x?xf32>
|
||||
}
|
||||
for %i1 = 0 to 42 {
|
||||
affine.for %i1 = 0 to 42 {
|
||||
}
|
||||
return
|
||||
}
|
||||
@ -805,7 +805,7 @@ func @type_alias_unknown(!unknown_alias) -> () { // expected-error {{undefined t
|
||||
|
||||
// Check ill-formed opaque tensor.
|
||||
func @complex_loops() {
|
||||
for %i1 = 1 to 100 {
|
||||
affine.for %i1 = 1 to 100 {
|
||||
// expected-error @+1 {{expected '"' in string literal}}
|
||||
"opaqueIntTensor"(){bar: opaque<"", tensor<2x1x4xi32>, "0x686]>} : () -> ()
|
||||
|
||||
@ -839,7 +839,7 @@ func @invalid_affine_structure() {
|
||||
|
||||
func @missing_for_max(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
|
||||
// expected-error @+1 {{lower loop bound affine map with multiple results requires 'max' prefix}}
|
||||
for %i0 = ()[s]->(0,s-1)()[%arg0] to %arg1 {
|
||||
affine.for %i0 = ()[s]->(0,s-1)()[%arg0] to %arg1 {
|
||||
}
|
||||
return
|
||||
}
|
||||
@ -848,7 +848,7 @@ func @missing_for_max(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
|
||||
|
||||
func @missing_for_min(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
|
||||
// expected-error @+1 {{upper loop bound affine map with multiple results requires 'min' prefix}}
|
||||
for %i0 = %arg0 to ()[s]->(100,s+1)()[%arg1] {
|
||||
affine.for %i0 = %arg0 to ()[s]->(100,s+1)()[%arg1] {
|
||||
}
|
||||
return
|
||||
}
|
||||
|
@ -13,7 +13,7 @@ func @inline_notation() -> i32 loc("mysource.cc":10:8) {
|
||||
%2 = constant 4 : index loc(callsite("foo" at "mysource.cc":10:8))
|
||||
|
||||
// CHECK: } loc(fused["foo", "mysource.cc":10:8])
|
||||
for %i0 = 0 to 8 {
|
||||
affine.for %i0 = 0 to 8 {
|
||||
} loc(fused["foo", "mysource.cc":10:8])
|
||||
|
||||
// CHECK: } loc(fused<"myPass">["foo", "foo2"])
|
||||
|
@ -208,8 +208,8 @@ func @identity_functor(%a : () -> ()) -> (() -> ()) {
|
||||
func @func_ops_in_loop() {
|
||||
// CHECK: %0 = "foo"() : () -> i64
|
||||
%a = "foo"() : ()->i64
|
||||
// CHECK: for %i0 = 1 to 10 {
|
||||
for %i = 1 to 10 {
|
||||
// CHECK: affine.for %i0 = 1 to 10 {
|
||||
affine.for %i = 1 to 10 {
|
||||
// CHECK: %1 = "doo"() : () -> f32
|
||||
%b = "doo"() : ()->f32
|
||||
// CHECK: "bar"(%0, %1) : (i64, f32) -> ()
|
||||
@ -224,10 +224,10 @@ func @func_ops_in_loop() {
|
||||
|
||||
// CHECK-LABEL: func @loops() {
|
||||
func @loops() {
|
||||
// CHECK: for %i0 = 1 to 100 step 2 {
|
||||
for %i = 1 to 100 step 2 {
|
||||
// CHECK: for %i1 = 1 to 200 {
|
||||
for %j = 1 to 200 {
|
||||
// CHECK: affine.for %i0 = 1 to 100 step 2 {
|
||||
affine.for %i = 1 to 100 step 2 {
|
||||
// CHECK: affine.for %i1 = 1 to 200 {
|
||||
affine.for %j = 1 to 200 {
|
||||
} // CHECK: }
|
||||
} // CHECK: }
|
||||
return // CHECK: return
|
||||
@ -235,14 +235,14 @@ func @loops() {
|
||||
|
||||
// CHECK-LABEL: func @complex_loops() {
|
||||
func @complex_loops() {
|
||||
for %i1 = 1 to 100 { // CHECK: for %i0 = 1 to 100 {
|
||||
for %j1 = 1 to 100 { // CHECK: for %i1 = 1 to 100 {
|
||||
affine.for %i1 = 1 to 100 { // CHECK: affine.for %i0 = 1 to 100 {
|
||||
affine.for %j1 = 1 to 100 { // CHECK: affine.for %i1 = 1 to 100 {
|
||||
// CHECK: "foo"(%i0, %i1) : (index, index) -> ()
|
||||
"foo"(%i1, %j1) : (index,index) -> ()
|
||||
} // CHECK: }
|
||||
"boo"() : () -> () // CHECK: "boo"() : () -> ()
|
||||
for %j2 = 1 to 10 { // CHECK: for %i2 = 1 to 10 {
|
||||
for %k2 = 1 to 10 { // CHECK: for %i3 = 1 to 10 {
|
||||
affine.for %j2 = 1 to 10 { // CHECK: affine.for %i2 = 1 to 10 {
|
||||
affine.for %k2 = 1 to 10 { // CHECK: affine.for %i3 = 1 to 10 {
|
||||
"goo"() : () -> () // CHECK: "goo"() : () -> ()
|
||||
} // CHECK: }
|
||||
} // CHECK: }
|
||||
@ -253,8 +253,8 @@ func @complex_loops() {
|
||||
// CHECK: func @triang_loop(%arg0: index, %arg1: memref<?x?xi32>) {
|
||||
func @triang_loop(%arg0: index, %arg1: memref<?x?xi32>) {
|
||||
%c = constant 0 : i32 // CHECK: %c0_i32 = constant 0 : i32
|
||||
for %i0 = 1 to %arg0 { // CHECK: for %i0 = 1 to %arg0 {
|
||||
for %i1 = (d0)[]->(d0)(%i0)[] to %arg0 { // CHECK: for %i1 = #map{{[0-9]+}}(%i0) to %arg0 {
|
||||
affine.for %i0 = 1 to %arg0 { // CHECK: affine.for %i0 = 1 to %arg0 {
|
||||
affine.for %i1 = (d0)[]->(d0)(%i0)[] to %arg0 { // CHECK: affine.for %i1 = #map{{[0-9]+}}(%i0) to %arg0 {
|
||||
store %c, %arg1[%i0, %i1] : memref<?x?xi32> // CHECK: store %c0_i32, %arg1[%i0, %i1]
|
||||
} // CHECK: }
|
||||
} // CHECK: }
|
||||
@ -263,8 +263,8 @@ func @triang_loop(%arg0: index, %arg1: memref<?x?xi32>) {
|
||||
|
||||
// CHECK: func @minmax_loop(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
|
||||
func @minmax_loop(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
|
||||
// CHECK: for %i0 = max #map{{.*}}()[%arg0] to min #map{{.*}}()[%arg1] {
|
||||
for %i0 = max()[s]->(0,s-1)()[%arg0] to min()[s]->(100,s+1)()[%arg1] {
|
||||
// CHECK: affine.for %i0 = max #map{{.*}}()[%arg0] to min #map{{.*}}()[%arg1] {
|
||||
affine.for %i0 = max()[s]->(0,s-1)()[%arg0] to min()[s]->(100,s+1)()[%arg1] {
|
||||
// CHECK: "foo"(%arg2, %i0) : (memref<100xf32>, index) -> ()
|
||||
"foo"(%arg2, %i0) : (memref<100xf32>, index) -> ()
|
||||
} // CHECK: }
|
||||
@ -275,24 +275,24 @@ func @minmax_loop(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
|
||||
func @loop_bounds(%N : index) {
|
||||
// CHECK: %0 = "foo"(%arg0) : (index) -> index
|
||||
%s = "foo"(%N) : (index) -> index
|
||||
// CHECK: for %i0 = %0 to %arg0
|
||||
for %i = %s to %N {
|
||||
// CHECK: for %i1 = #map{{[0-9]+}}(%i0) to 0
|
||||
for %j = (d0)[]->(d0)(%i)[] to 0 step 1 {
|
||||
// CHECK: affine.for %i0 = %0 to %arg0
|
||||
affine.for %i = %s to %N {
|
||||
// CHECK: affine.for %i1 = #map{{[0-9]+}}(%i0) to 0
|
||||
affine.for %j = (d0)[]->(d0)(%i)[] to 0 step 1 {
|
||||
// CHECK: %1 = affine.apply #map{{.*}}(%i0, %i1)[%0]
|
||||
%w1 = affine.apply(d0, d1)[s0] -> (d0+d1) (%i, %j) [%s]
|
||||
// CHECK: %2 = affine.apply #map{{.*}}(%i0, %i1)[%0]
|
||||
%w2 = affine.apply(d0, d1)[s0] -> (s0+1) (%i, %j) [%s]
|
||||
// CHECK: for %i2 = #map{{.*}}(%1, %i0)[%arg0] to #map{{.*}}(%2, %i1)[%0] {
|
||||
for %k = #bound_map1 (%w1, %i)[%N] to (i, j)[s] -> (i + j + s) (%w2, %j)[%s] {
|
||||
// CHECK: affine.for %i2 = #map{{.*}}(%1, %i0)[%arg0] to #map{{.*}}(%2, %i1)[%0] {
|
||||
affine.for %k = #bound_map1 (%w1, %i)[%N] to (i, j)[s] -> (i + j + s) (%w2, %j)[%s] {
|
||||
// CHECK: "foo"(%i0, %i1, %i2) : (index, index, index) -> ()
|
||||
"foo"(%i, %j, %k) : (index, index, index)->()
|
||||
// CHECK: %c30 = constant 30 : index
|
||||
%c = constant 30 : index
|
||||
// CHECK: %3 = affine.apply #map{{.*}}(%arg0, %c30)
|
||||
%u = affine.apply (d0, d1)->(d0+d1) (%N, %c)
|
||||
// CHECK: for %i3 = max #map{{.*}}(%i0)[%3] to min #map{{.*}}(%i2)[%c30] {
|
||||
for %l = max #bound_map2(%i)[%u] to min #bound_map2(%k)[%c] {
|
||||
// CHECK: affine.for %i3 = max #map{{.*}}(%i0)[%3] to min #map{{.*}}(%i2)[%c30] {
|
||||
affine.for %l = max #bound_map2(%i)[%u] to min #bound_map2(%k)[%c] {
|
||||
// CHECK: "bar"(%i3) : (index) -> ()
|
||||
"bar"(%l) : (index) -> ()
|
||||
} // CHECK: }
|
||||
@ -305,7 +305,7 @@ func @loop_bounds(%N : index) {
|
||||
// CHECK-LABEL: func @ifinst(%arg0: index) {
|
||||
func @ifinst(%N: index) {
|
||||
%c = constant 200 : index // CHECK %c200 = constant 200
|
||||
for %i = 1 to 10 { // CHECK for %i0 = 1 to 10 {
|
||||
affine.for %i = 1 to 10 { // CHECK affine.for %i0 = 1 to 10 {
|
||||
affine.if #set0(%i)[%N, %c] { // CHECK affine.if #set0(%i0)[%arg0, %c200] {
|
||||
%x = constant 1 : i32
|
||||
// CHECK: %c1_i32 = constant 1 : i32
|
||||
@ -328,7 +328,7 @@ func @ifinst(%N: index) {
|
||||
// CHECK-LABEL: func @simple_ifinst(%arg0: index) {
|
||||
func @simple_ifinst(%N: index) {
|
||||
%c = constant 200 : index // CHECK %c200 = constant 200
|
||||
for %i = 1 to 10 { // CHECK for %i0 = 1 to 10 {
|
||||
affine.for %i = 1 to 10 { // CHECK affine.for %i0 = 1 to 10 {
|
||||
affine.if #set0(%i)[%N, %c] { // CHECK affine.if #set0(%i0)[%arg0, %c200] {
|
||||
%x = constant 1 : i32
|
||||
// CHECK: %c1_i32 = constant 1 : i32
|
||||
@ -549,18 +549,18 @@ func @funcattrwithblock() -> ()
|
||||
#map_non_simple2 = ()[s0, s1] -> (s0 + s1)
|
||||
#map_non_simple3 = ()[s0] -> (s0 + 3)
|
||||
func @funcsimplemap(%arg0: index, %arg1: index) -> () {
|
||||
for %i0 = 0 to #map_simple0()[] {
|
||||
// CHECK: for %i0 = 0 to 10 {
|
||||
for %i1 = 0 to #map_simple1()[%arg1] {
|
||||
// CHECK: for %i1 = 0 to %arg1 {
|
||||
for %i2 = 0 to #map_non_simple0(%i0)[] {
|
||||
// CHECK: for %i2 = 0 to #map{{[a-z_0-9]*}}(%i0) {
|
||||
for %i3 = 0 to #map_non_simple1(%i0)[%arg1] {
|
||||
// CHECK: for %i3 = 0 to #map{{[a-z_0-9]*}}(%i0)[%arg1] {
|
||||
for %i4 = 0 to #map_non_simple2()[%arg1, %arg0] {
|
||||
// CHECK: for %i4 = 0 to #map{{[a-z_0-9]*}}()[%arg1, %arg0] {
|
||||
for %i5 = 0 to #map_non_simple3()[%arg0] {
|
||||
// CHECK: for %i5 = 0 to #map{{[a-z_0-9]*}}()[%arg0] {
|
||||
affine.for %i0 = 0 to #map_simple0()[] {
|
||||
// CHECK: affine.for %i0 = 0 to 10 {
|
||||
affine.for %i1 = 0 to #map_simple1()[%arg1] {
|
||||
// CHECK: affine.for %i1 = 0 to %arg1 {
|
||||
affine.for %i2 = 0 to #map_non_simple0(%i0)[] {
|
||||
// CHECK: affine.for %i2 = 0 to #map{{[a-z_0-9]*}}(%i0) {
|
||||
affine.for %i3 = 0 to #map_non_simple1(%i0)[%arg1] {
|
||||
// CHECK: affine.for %i3 = 0 to #map{{[a-z_0-9]*}}(%i0)[%arg1] {
|
||||
affine.for %i4 = 0 to #map_non_simple2()[%arg1, %arg0] {
|
||||
// CHECK: affine.for %i4 = 0 to #map{{[a-z_0-9]*}}()[%arg1, %arg0] {
|
||||
affine.for %i5 = 0 to #map_non_simple3()[%arg0] {
|
||||
// CHECK: affine.for %i5 = 0 to #map{{[a-z_0-9]*}}()[%arg0] {
|
||||
%c42_i32 = constant 42 : i32
|
||||
}
|
||||
}
|
||||
@ -745,9 +745,9 @@ func @sparsevectorattr() -> () {
|
||||
// CHECK-LABEL: func @loops_with_blockids() {
|
||||
func @loops_with_blockids() {
|
||||
^block0:
|
||||
for %i = 1 to 100 step 2 {
|
||||
affine.for %i = 1 to 100 step 2 {
|
||||
^block1:
|
||||
for %j = 1 to 200 {
|
||||
affine.for %j = 1 to 200 {
|
||||
^block2:
|
||||
}
|
||||
}
|
||||
|
@ -18,7 +18,7 @@ func @inline_notation() -> i32 loc("mysource.cc":10:8) {
|
||||
%3 = constant 4 : index loc(callsite("foo" at callsite("mysource1.cc":10:8 at callsite("mysource2.cc":13:8 at "mysource3.cc":100:10))))
|
||||
|
||||
// CHECK: } ["foo", mysource.cc:10:8]
|
||||
for %i0 = 0 to 8 {
|
||||
affine.for %i0 = 0 to 8 {
|
||||
} loc(fused["foo", "mysource.cc":10:8])
|
||||
|
||||
// CHECK: } <"myPass">["foo", "foo2"]
|
||||
|
@ -6,8 +6,8 @@
|
||||
// CHECK-LABEL: func @materialize_read_1d() {
|
||||
func @materialize_read_1d() {
|
||||
%A = alloc () : memref<7x42xf32>
|
||||
for %i0 = 0 to 7 step 4 {
|
||||
for %i1 = 0 to 42 step 4 {
|
||||
affine.for %i0 = 0 to 7 step 4 {
|
||||
affine.for %i1 = 0 to 42 step 4 {
|
||||
%f1 = vector_transfer_read %A, %i0, %i1 {permutation_map: (d0, d1) -> (d0)} : (memref<7x42xf32>, index, index) -> vector<4xf32>
|
||||
%ip1 = affine.apply (d0) -> (d0 + 1) (%i1)
|
||||
%f2 = vector_transfer_read %A, %i0, %ip1 {permutation_map: (d0, d1) -> (d0)} : (memref<7x42xf32>, index, index) -> vector<4xf32>
|
||||
@ -29,11 +29,11 @@ func @materialize_read_1d() {
|
||||
// CHECK-LABEL: func @materialize_read_1d_partially_specialized
|
||||
func @materialize_read_1d_partially_specialized(%dyn1 : index, %dyn2 : index, %dyn4 : index) {
|
||||
%A = alloc (%dyn1, %dyn2, %dyn4) : memref<7x?x?x42x?xf32>
|
||||
for %i0 = 0 to 7 {
|
||||
for %i1 = 0 to %dyn1 {
|
||||
for %i2 = 0 to %dyn2 {
|
||||
for %i3 = 0 to 42 step 2 {
|
||||
for %i4 = 0 to %dyn4 {
|
||||
affine.for %i0 = 0 to 7 {
|
||||
affine.for %i1 = 0 to %dyn1 {
|
||||
affine.for %i2 = 0 to %dyn2 {
|
||||
affine.for %i3 = 0 to 42 step 2 {
|
||||
affine.for %i4 = 0 to %dyn4 {
|
||||
%f1 = vector_transfer_read %A, %i0, %i1, %i2, %i3, %i4 {permutation_map: (d0, d1, d2, d3, d4) -> (d3)} : ( memref<7x?x?x42x?xf32>, index, index, index, index, index) -> vector<4xf32>
|
||||
%i3p1 = affine.apply (d0) -> (d0 + 1) (%i3)
|
||||
%f2 = vector_transfer_read %A, %i0, %i1, %i2, %i3p1, %i4 {permutation_map: (d0, d1, d2, d3, d4) -> (d3)} : ( memref<7x?x?x42x?xf32>, index, index, index, index, index) -> vector<4xf32>
|
||||
@ -54,19 +54,19 @@ func @materialize_read_1d_partially_specialized(%dyn1 : index, %dyn2 : index, %d
|
||||
// CHECK-LABEL: func @materialize_read(%arg0: index, %arg1: index, %arg2: index, %arg3: index) {
|
||||
func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
|
||||
// CHECK-NEXT: %0 = alloc(%arg0, %arg1, %arg2, %arg3) : memref<?x?x?x?xf32>
|
||||
// CHECK-NEXT: for %[[I0:.*]] = 0 to %arg0 step 3 {
|
||||
// CHECK-NEXT: for %[[I1:.*]] = 0 to %arg1 {
|
||||
// CHECK-NEXT: for %[[I2:.*]] = 0 to %arg2 {
|
||||
// CHECK-NEXT: for %[[I3:.*]] = 0 to %arg3 step 5 {
|
||||
// CHECK-NEXT: affine.for %[[I0:.*]] = 0 to %arg0 step 3 {
|
||||
// CHECK-NEXT: affine.for %[[I1:.*]] = 0 to %arg1 {
|
||||
// CHECK-NEXT: affine.for %[[I2:.*]] = 0 to %arg2 {
|
||||
// CHECK-NEXT: affine.for %[[I3:.*]] = 0 to %arg3 step 5 {
|
||||
// CHECK: %[[D0:.*]] = dim %0, 0 : memref<?x?x?x?xf32>
|
||||
// CHECK-NEXT: %[[D1:.*]] = dim %0, 1 : memref<?x?x?x?xf32>
|
||||
// CHECK-NEXT: %[[D2:.*]] = dim %0, 2 : memref<?x?x?x?xf32>
|
||||
// CHECK-NEXT: %[[D3:.*]] = dim %0, 3 : memref<?x?x?x?xf32>
|
||||
// CHECK: %[[ALLOC:.*]] = alloc() : memref<5x4x3xf32>
|
||||
// CHECK-NEXT: %[[VECTOR_VIEW:.*]] = vector_type_cast %[[ALLOC]] : memref<5x4x3xf32>, memref<1xvector<5x4x3xf32>>
|
||||
// CHECK-NEXT: for %[[I4:.*]] = 0 to 3 {
|
||||
// CHECK-NEXT: for %[[I5:.*]] = 0 to 4 {
|
||||
// CHECK-NEXT: for %[[I6:.*]] = 0 to 5 {
|
||||
// CHECK-NEXT: affine.for %[[I4:.*]] = 0 to 3 {
|
||||
// CHECK-NEXT: affine.for %[[I5:.*]] = 0 to 4 {
|
||||
// CHECK-NEXT: affine.for %[[I6:.*]] = 0 to 5 {
|
||||
// CHECK-NEXT: %[[C0:.*]] = constant 0 : index
|
||||
// CHECK-NEXT: %[[C1:.*]] = constant 1 : index
|
||||
// CHECK-NEXT: {{.*}} = affine.apply #[[ADD]](%[[I0]], %[[I4]])
|
||||
@ -117,10 +117,10 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
|
||||
// Check that I3 + I6 (of size 5) read from last index load(..., L3) and write into first index store(I6, ...)
|
||||
// Other dimensions are just accessed with I1, I2 resp.
|
||||
%A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
|
||||
for %i0 = 0 to %M step 3 {
|
||||
for %i1 = 0 to %N {
|
||||
for %i2 = 0 to %O {
|
||||
for %i3 = 0 to %P step 5 {
|
||||
affine.for %i0 = 0 to %M step 3 {
|
||||
affine.for %i1 = 0 to %N {
|
||||
affine.for %i2 = 0 to %O {
|
||||
affine.for %i3 = 0 to %P step 5 {
|
||||
%f = vector_transfer_read %A, %i0, %i1, %i2, %i3 {permutation_map: (d0, d1, d2, d3) -> (d3, 0, d0)} : (memref<?x?x?x?xf32, 0>, index, index, index, index) -> vector<5x4x3xf32>
|
||||
}
|
||||
}
|
||||
@ -133,10 +133,10 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
|
||||
func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
|
||||
// CHECK-NEXT: %0 = alloc(%arg0, %arg1, %arg2, %arg3) : memref<?x?x?x?xf32>
|
||||
// CHECK-NEXT: %cst = constant splat<vector<5x4x3xf32>, 1.000000e+00> : vector<5x4x3xf32>
|
||||
// CHECK-NEXT: for %[[I0:.*]] = 0 to %arg0 step 3 {
|
||||
// CHECK-NEXT: for %[[I1:.*]] = 0 to %arg1 step 4 {
|
||||
// CHECK-NEXT: for %[[I2:.*]] = 0 to %arg2 {
|
||||
// CHECK-NEXT: for %[[I3:.*]] = 0 to %arg3 step 5 {
|
||||
// CHECK-NEXT: affine.for %[[I0:.*]] = 0 to %arg0 step 3 {
|
||||
// CHECK-NEXT: affine.for %[[I1:.*]] = 0 to %arg1 step 4 {
|
||||
// CHECK-NEXT: affine.for %[[I2:.*]] = 0 to %arg2 {
|
||||
// CHECK-NEXT: affine.for %[[I3:.*]] = 0 to %arg3 step 5 {
|
||||
// CHECK: %[[D0:.*]] = dim %0, 0 : memref<?x?x?x?xf32>
|
||||
// CHECK-NEXT: %[[D1:.*]] = dim %0, 1 : memref<?x?x?x?xf32>
|
||||
// CHECK-NEXT: %[[D2:.*]] = dim %0, 2 : memref<?x?x?x?xf32>
|
||||
@ -144,9 +144,9 @@ func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
|
||||
// CHECK: %[[ALLOC:.*]] = alloc() : memref<5x4x3xf32>
|
||||
// CHECK-NEXT: %[[VECTOR_VIEW:.*]] = vector_type_cast {{.*}} : memref<5x4x3xf32>, memref<1xvector<5x4x3xf32>>
|
||||
// CHECK: store %cst, {{.*}} : memref<1xvector<5x4x3xf32>>
|
||||
// CHECK-NEXT: for %[[I4:.*]] = 0 to 3 {
|
||||
// CHECK-NEXT: for %[[I5:.*]] = 0 to 4 {
|
||||
// CHECK-NEXT: for %[[I6:.*]] = 0 to 5 {
|
||||
// CHECK-NEXT: affine.for %[[I4:.*]] = 0 to 3 {
|
||||
// CHECK-NEXT: affine.for %[[I5:.*]] = 0 to 4 {
|
||||
// CHECK-NEXT: affine.for %[[I6:.*]] = 0 to 5 {
|
||||
// CHECK-NEXT: %[[C0:.*]] = constant 0 : index
|
||||
// CHECK-NEXT: %[[C1:.*]] = constant 1 : index
|
||||
// CHECK-NEXT: {{.*}} = affine.apply #[[ADD]](%[[I0]], %[[I4]])
|
||||
@ -201,10 +201,10 @@ func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
|
||||
// Other dimension is just accessed with I2.
|
||||
%A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
|
||||
%f1 = constant splat<vector<5x4x3xf32>, 1.000000e+00> : vector<5x4x3xf32>
|
||||
for %i0 = 0 to %M step 3 {
|
||||
for %i1 = 0 to %N step 4 {
|
||||
for %i2 = 0 to %O {
|
||||
for %i3 = 0 to %P step 5 {
|
||||
affine.for %i0 = 0 to %M step 3 {
|
||||
affine.for %i1 = 0 to %N step 4 {
|
||||
affine.for %i2 = 0 to %O {
|
||||
affine.for %i3 = 0 to %P step 5 {
|
||||
vector_transfer_write %f1, %A, %i0, %i1, %i2, %i3 {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d0)} : vector<5x4x3xf32>, memref<?x?x?x?xf32, 0>, index, index, index, index
|
||||
}
|
||||
}
|
||||
|
@ -10,10 +10,10 @@
|
||||
func @materialize(%M : index, %N : index, %O : index, %P : index) {
|
||||
%A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
|
||||
%f1 = constant splat<vector<4x4x4xf32>, 1.000000e+00> : vector<4x4x4xf32>
|
||||
// CHECK: for %i0 = 0 to %arg0 step 4 {
|
||||
// CHECK-NEXT: for %i1 = 0 to %arg1 step 4 {
|
||||
// CHECK-NEXT: for %i2 = 0 to %arg2 {
|
||||
// CHECK-NEXT: for %i3 = 0 to %arg3 step 4 {
|
||||
// CHECK: affine.for %i0 = 0 to %arg0 step 4 {
|
||||
// CHECK-NEXT: affine.for %i1 = 0 to %arg1 step 4 {
|
||||
// CHECK-NEXT: affine.for %i2 = 0 to %arg2 {
|
||||
// CHECK-NEXT: affine.for %i3 = 0 to %arg3 step 4 {
|
||||
// CHECK-NEXT: %[[a:[0-9]+]] = {{.*}}[[ID1]](%i0)
|
||||
// CHECK-NEXT: %[[b:[0-9]+]] = {{.*}}[[ID1]](%i1)
|
||||
// CHECK-NEXT: %[[c:[0-9]+]] = {{.*}}[[ID1]](%i2)
|
||||
@ -25,10 +25,10 @@ func @materialize(%M : index, %N : index, %O : index, %P : index) {
|
||||
// CHECK: vector_transfer_write {{.*}}, %0, {{.*}}, %[[b2]], {{.*}} {permutation_map: #[[D0D1D2D3TOD1D0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index
|
||||
// CHECK: %[[b3:[0-9]+]] = {{.*}}[[D0P3]](%i1)
|
||||
// CHECK: vector_transfer_write {{.*}}, %0, {{.*}}, %[[b3]], {{.*}} {permutation_map: #[[D0D1D2D3TOD1D0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index
|
||||
for %i0 = 0 to %M step 4 {
|
||||
for %i1 = 0 to %N step 4 {
|
||||
for %i2 = 0 to %O {
|
||||
for %i3 = 0 to %P step 4 {
|
||||
affine.for %i0 = 0 to %M step 4 {
|
||||
affine.for %i1 = 0 to %N step 4 {
|
||||
affine.for %i2 = 0 to %O {
|
||||
affine.for %i3 = 0 to %P step 4 {
|
||||
"vector_transfer_write"(%f1, %A, %i0, %i1, %i2, %i3) {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d0)} : (vector<4x4x4xf32>, memref<?x?x?x?xf32, 0>, index, index, index, index) -> ()
|
||||
}
|
||||
}
|
||||
|
@ -15,8 +15,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
|
||||
%f1 = constant 1.0 : f32
|
||||
%f2 = constant 2.0 : f32
|
||||
// 4x unroll (jammed by construction).
|
||||
// CHECK: for %i0 = 0 to %arg0 {
|
||||
// CHECK-NEXT: for %i1 = 0 to %arg1 step 32 {
|
||||
// CHECK: affine.for %i0 = 0 to %arg0 {
|
||||
// CHECK-NEXT: affine.for %i1 = 0 to %arg1 step 32 {
|
||||
// CHECK-NEXT: [[CST0:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
|
||||
// CHECK-NEXT: [[CST1:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
|
||||
// CHECK-NEXT: [[CST2:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
|
||||
@ -34,15 +34,15 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
|
||||
// CHECK-NEXT: [[VAL31:%.*]] = affine.apply [[D0P24]]{{.*}}
|
||||
// CHECK-NEXT: vector_transfer_write [[CST3]], {{.*}}, [[VAL30]], [[VAL31]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
|
||||
//
|
||||
for %i0 = 0 to %M {
|
||||
for %i1 = 0 to %N {
|
||||
affine.for %i0 = 0 to %M {
|
||||
affine.for %i1 = 0 to %N {
|
||||
// non-scoped %f1
|
||||
store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
|
||||
}
|
||||
}
|
||||
// 4x unroll (jammed by construction).
|
||||
// CHECK: for %i2 = 0 to %arg0 {
|
||||
// CHECK-NEXT: for %i3 = 0 to %arg1 step 32 {
|
||||
// CHECK: affine.for %i2 = 0 to %arg0 {
|
||||
// CHECK-NEXT: affine.for %i3 = 0 to %arg1 step 32 {
|
||||
// CHECK-NEXT: [[CST0:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
|
||||
// CHECK-NEXT: [[CST1:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
|
||||
// CHECK-NEXT: [[CST2:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
|
||||
@ -60,15 +60,15 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
|
||||
// CHECK-NEXT: [[VAL31:%.*]] = affine.apply [[D0P24]]{{.*}}
|
||||
// CHECK-NEXT: vector_transfer_write [[CST3]], {{.*}}, [[VAL30]], [[VAL31]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
|
||||
//
|
||||
for %i2 = 0 to %M {
|
||||
for %i3 = 0 to %N {
|
||||
affine.for %i2 = 0 to %M {
|
||||
affine.for %i3 = 0 to %N {
|
||||
// non-scoped %f2
|
||||
store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
|
||||
}
|
||||
}
|
||||
// 4x unroll (jammed by construction).
|
||||
// CHECK: for %i4 = 0 to %arg0 {
|
||||
// CHECK-NEXT: for %i5 = 0 to %arg1 step 32 {
|
||||
// CHECK: affine.for %i4 = 0 to %arg0 {
|
||||
// CHECK-NEXT: affine.for %i5 = 0 to %arg1 step 32 {
|
||||
// CHECK-NEXT: {{.*}} = affine.apply
|
||||
// CHECK-NEXT: {{.*}} = affine.apply
|
||||
// CHECK-NEXT: {{.*}} = vector_transfer_read
|
||||
@ -110,8 +110,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
|
||||
// CHECK-NEXT: {{.*}} = affine.apply
|
||||
// CHECK-NEXT: vector_transfer_write
|
||||
//
|
||||
for %i4 = 0 to %M {
|
||||
for %i5 = 0 to %N {
|
||||
affine.for %i4 = 0 to %M {
|
||||
affine.for %i5 = 0 to %N {
|
||||
%a5 = load %A[%i4, %i5] : memref<?x?xf32, 0>
|
||||
%b5 = load %B[%i4, %i5] : memref<?x?xf32, 0>
|
||||
%s5 = addf %a5, %b5 : f32
|
||||
|
@ -15,8 +15,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
|
||||
%f1 = constant 1.0 : f32
|
||||
%f2 = constant 2.0 : f32
|
||||
// (3x2)x unroll (jammed by construction).
|
||||
// CHECK: for %i0 = 0 to %arg0 step 3 {
|
||||
// CHECK-NEXT: for %i1 = 0 to %arg1 step 16 {
|
||||
// CHECK: affine.for %i0 = 0 to %arg0 step 3 {
|
||||
// CHECK-NEXT: affine.for %i1 = 0 to %arg1 step 16 {
|
||||
// CHECK-NEXT: {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
|
||||
// CHECK-NEXT: {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
|
||||
// CHECK-NEXT: {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
|
||||
@ -41,26 +41,26 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
|
||||
// CHECK-NEXT: [[VAL50:%.*]] = affine.apply [[D0P2]](%i0)
|
||||
// CHECK-NEXT: [[VAL51:%.*]] = affine.apply [[D0P8]](%i1)
|
||||
// CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL50]], [[VAL51]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
|
||||
for %i0 = 0 to %M {
|
||||
for %i1 = 0 to %N {
|
||||
affine.for %i0 = 0 to %M {
|
||||
affine.for %i1 = 0 to %N {
|
||||
// non-scoped %f1
|
||||
store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
|
||||
}
|
||||
}
|
||||
// (3x2)x unroll (jammed by construction).
|
||||
// CHECK: for %i2 = 0 to %arg0 step 3 {
|
||||
// CHECK-NEXT: for %i3 = 0 to %arg1 step 16 {
|
||||
// CHECK: affine.for %i2 = 0 to %arg0 step 3 {
|
||||
// CHECK-NEXT: affine.for %i3 = 0 to %arg1 step 16 {
|
||||
// .....
|
||||
for %i2 = 0 to %M {
|
||||
for %i3 = 0 to %N {
|
||||
affine.for %i2 = 0 to %M {
|
||||
affine.for %i3 = 0 to %N {
|
||||
// non-scoped %f2
|
||||
// CHECK does (3x4)x unrolling.
|
||||
store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
|
||||
}
|
||||
}
|
||||
// (3x2)x unroll (jammed by construction).
|
||||
// CHECK: for %i4 = 0 to %arg0 step 3 {
|
||||
// CHECK-NEXT: for %i5 = 0 to %arg1 step 16 {
|
||||
// CHECK: affine.for %i4 = 0 to %arg0 step 3 {
|
||||
// CHECK-NEXT: affine.for %i5 = 0 to %arg1 step 16 {
|
||||
// CHECK-NEXT: {{.*}} = affine.apply
|
||||
// CHECK-NEXT: {{.*}} = affine.apply
|
||||
// CHECK-NEXT: {{.*}} = vector_transfer_read
|
||||
@ -122,8 +122,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
|
||||
// CHECK-NEXT: {{.*}} = affine.apply
|
||||
// CHECK-NEXT: vector_transfer_write
|
||||
//
|
||||
for %i4 = 0 to %M {
|
||||
for %i5 = 0 to %N {
|
||||
affine.for %i4 = 0 to %M {
|
||||
affine.for %i5 = 0 to %N {
|
||||
%a5 = load %A[%i4, %i5] : memref<?x?xf32, 0>
|
||||
%b5 = load %B[%i4, %i5] : memref<?x?xf32, 0>
|
||||
%s5 = addf %a5, %b5 : f32
|
||||
|
@ -13,8 +13,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
|
||||
%f1 = constant 1.0 : f32
|
||||
%f2 = constant 2.0 : f32
|
||||
// 2x unroll (jammed by construction).
|
||||
// CHECK: for %i0 = 0 to %arg0 step 3 {
|
||||
// CHECK-NEXT: for %i1 = 0 to %arg1 step 32 {
|
||||
// CHECK: affine.for %i0 = 0 to %arg0 step 3 {
|
||||
// CHECK-NEXT: affine.for %i1 = 0 to %arg1 step 32 {
|
||||
// CHECK-NEXT: {{.*}} = constant splat<vector<3x16xf32>, 1.000000e+00> : vector<3x16xf32>
|
||||
// CHECK-NEXT: {{.*}} = constant splat<vector<3x16xf32>, 1.000000e+00> : vector<3x16xf32>
|
||||
// CHECK-NEXT: [[VAL00:%.*]] = affine.apply [[ID1]](%i0)
|
||||
@ -24,15 +24,15 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
|
||||
// CHECK-NEXT: [[VAL11:%.*]] = affine.apply [[D0P16]](%i1)
|
||||
// CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL10]], [[VAL11]] {permutation_map: [[ID2]]} : vector<3x16xf32>
|
||||
//
|
||||
for %i0 = 0 to %M {
|
||||
for %i1 = 0 to %N {
|
||||
affine.for %i0 = 0 to %M {
|
||||
affine.for %i1 = 0 to %N {
|
||||
// non-scoped %f1
|
||||
store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
|
||||
}
|
||||
}
|
||||
// 2x unroll (jammed by construction).
|
||||
// CHECK: for %i2 = 0 to %arg0 step 3 {
|
||||
// CHECK-NEXT: for %i3 = 0 to %arg1 step 32 {
|
||||
// CHECK: affine.for %i2 = 0 to %arg0 step 3 {
|
||||
// CHECK-NEXT: affine.for %i3 = 0 to %arg1 step 32 {
|
||||
// CHECK-NEXT: {{.*}} = constant splat<vector<3x16xf32>, 2.000000e+00> : vector<3x16xf32>
|
||||
// CHECK-NEXT: {{.*}} = constant splat<vector<3x16xf32>, 2.000000e+00> : vector<3x16xf32>
|
||||
// CHECK-NEXT: [[VAL00:%.*]] = affine.apply [[ID1]](%i2)
|
||||
@ -42,15 +42,15 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
|
||||
// CHECK-NEXT: [[VAL11:%.*]] = affine.apply [[D0P16]](%i3)
|
||||
// CHECK-NEXT: vector_transfer_write {{.*}}, {{.*}}, [[VAL10]], [[VAL11]] {permutation_map: [[ID2]]} : vector<3x16xf32>
|
||||
//
|
||||
for %i2 = 0 to %M {
|
||||
for %i3 = 0 to %N {
|
||||
affine.for %i2 = 0 to %M {
|
||||
affine.for %i3 = 0 to %N {
|
||||
// non-scoped %f2
|
||||
store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
|
||||
}
|
||||
}
|
||||
// 2x unroll (jammed by construction).
|
||||
// CHECK: for %i4 = 0 to %arg0 step 3 {
|
||||
// CHECK-NEXT: for %i5 = 0 to %arg1 step 32 {
|
||||
// CHECK: affine.for %i4 = 0 to %arg0 step 3 {
|
||||
// CHECK-NEXT: affine.for %i5 = 0 to %arg1 step 32 {
|
||||
// CHECK-NEXT: {{.*}} = affine.apply
|
||||
// CHECK-NEXT: {{.*}} = affine.apply
|
||||
// CHECK-NEXT: {{.*}} = vector_transfer_read
|
||||
@ -72,8 +72,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
|
||||
// CHECK-NEXT: {{.*}} = affine.apply
|
||||
// CHECK-NEXT: vector_transfer_write
|
||||
//
|
||||
for %i4 = 0 to %M {
|
||||
for %i5 = 0 to %N {
|
||||
affine.for %i4 = 0 to %M {
|
||||
affine.for %i5 = 0 to %N {
|
||||
%a5 = load %A[%i4, %i5] : memref<?x?xf32, 0>
|
||||
%b5 = load %B[%i4, %i5] : memref<?x?xf32, 0>
|
||||
%s5 = addf %a5, %b5 : f32
|
||||
|
@ -9,19 +9,19 @@
|
||||
|
||||
// CHECK-LABEL: func @simple()
|
||||
func @simple() {
|
||||
for %i0 = 0 to 7 {
|
||||
affine.for %i0 = 0 to 7 {
|
||||
%0 = affine.apply (d0) -> (d0) (%i0)
|
||||
%1 = affine.apply (d0) -> (d0) (%0)
|
||||
%2 = affine.apply (d0, d1) -> (d0 + d1) (%0, %0)
|
||||
%3 = affine.apply (d0, d1) -> (d0 - d1) (%0, %0)
|
||||
}
|
||||
// CHECK-NEXT: for %i0 = 0 to 7
|
||||
// CHECK-NEXT: affine.for %i0 = 0 to 7
|
||||
// CHECK-NEXT: {{.*}} affine.apply #[[ID1]](%i0)
|
||||
// CHECK-NEXT: {{.*}} affine.apply #[[D0TIMES2]](%i0)
|
||||
// CHECK-NEXT: {{.*}} affine.apply #[[ZERO]]()
|
||||
|
||||
for %i1 = 0 to 7 {
|
||||
for %i2 = 0 to 42 {
|
||||
affine.for %i1 = 0 to 7 {
|
||||
affine.for %i2 = 0 to 42 {
|
||||
%20 = affine.apply (d0, d1) -> (d1) (%i1, %i2)
|
||||
%21 = affine.apply (d0, d1) -> (d0) (%i1, %i2)
|
||||
%22 = affine.apply (d0, d1) -> (d0 + d1) (%20, %21)
|
||||
@ -29,15 +29,15 @@ func @simple() {
|
||||
%24 = affine.apply (d0, d1) -> (-d0 + d1) (%20, %21)
|
||||
}
|
||||
}
|
||||
// CHECK: for %i1 = 0 to 7
|
||||
// CHECK-NEXT: for %i2 = 0 to 42
|
||||
// CHECK: affine.for %i1 = 0 to 7
|
||||
// CHECK-NEXT: affine.for %i2 = 0 to 42
|
||||
// CHECK-NEXT: {{.*}} affine.apply #[[D0PLUSD1]](%i1, %i2)
|
||||
// CHECK-NEXT: {{.*}} affine.apply #[[MINSD0PLUSD1]](%i1, %i2)
|
||||
// CHECK-NEXT: {{.*}} affine.apply #[[D0MINUSD1]](%i1, %i2)
|
||||
|
||||
for %i3 = 0 to 16 {
|
||||
for %i4 = 0 to 47 step 2 {
|
||||
for %i5 = 0 to 78 step 16 {
|
||||
affine.for %i3 = 0 to 16 {
|
||||
affine.for %i4 = 0 to 47 step 2 {
|
||||
affine.for %i5 = 0 to 78 step 16 {
|
||||
%50 = affine.apply (d0) -> (d0) (%i3)
|
||||
%51 = affine.apply (d0) -> (d0) (%i4)
|
||||
%52 = affine.apply (d0) -> (d0) (%i5)
|
||||
@ -47,9 +47,9 @@ func @simple() {
|
||||
}
|
||||
}
|
||||
}
|
||||
// CHECK: for %i3 = 0 to 16
|
||||
// CHECK-NEXT: for %i4 = 0 to 47 step 2
|
||||
// CHECK-NEXT: for %i5 = 0 to 78 step 16
|
||||
// CHECK: affine.for %i3 = 0 to 16
|
||||
// CHECK-NEXT: affine.for %i4 = 0 to 47 step 2
|
||||
// CHECK-NEXT: affine.for %i5 = 0 to 78 step 16
|
||||
// CHECK-NEXT: {{.*}} affine.apply #[[ID1]](%i3)
|
||||
// CHECK-NEXT: {{.*}} affine.apply #[[ID1]](%i4)
|
||||
// CHECK-NEXT: {{.*}} affine.apply #[[ID1]](%i5)
|
||||
|
@ -23,17 +23,17 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
|
||||
//
|
||||
// CHECK: for {{.*}} step 128
|
||||
// CHECK-NEXT: {{.*}} = vector_transfer_read %arg0, [[C0]], [[C0]] {permutation_map: #[[map_proj_d0d1_0]]} : (memref<?x?xf32>, index, index) -> vector<128xf32>
|
||||
for %i0 = 0 to %M { // vectorized due to scalar -> vector
|
||||
affine.for %i0 = 0 to %M { // vectorized due to scalar -> vector
|
||||
%a0 = load %A[%cst0, %cst0] : memref<?x?xf32>
|
||||
}
|
||||
//
|
||||
// CHECK:for {{.*}} [[ARG_M]] {
|
||||
for %i1 = 0 to %M { // not vectorized
|
||||
affine.for %i1 = 0 to %M { // not vectorized
|
||||
%a1 = load %A[%i1, %i1] : memref<?x?xf32>
|
||||
}
|
||||
//
|
||||
// CHECK: for %i{{[0-9]*}} = 0 to [[ARG_M]] {
|
||||
for %i2 = 0 to %M { // not vectorized, would vectorize with --test-fastest-varying=1
|
||||
// CHECK: affine.for %i{{[0-9]*}} = 0 to [[ARG_M]] {
|
||||
affine.for %i2 = 0 to %M { // not vectorized, would vectorize with --test-fastest-varying=1
|
||||
%r2 = affine.apply (d0) -> (d0) (%i2)
|
||||
%a2 = load %A[%r2#0, %cst0] : memref<?x?xf32>
|
||||
}
|
||||
@ -41,7 +41,7 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
|
||||
// CHECK:for [[IV3:%[a-zA-Z0-9]+]] = 0 to [[ARG_M]] step 128
|
||||
// CHECK-NEXT: [[APP3:%[a-zA-Z0-9]+]] = affine.apply {{.*}}[[IV3]]
|
||||
// CHECK-NEXT: {{.*}} = vector_transfer_read %arg0, [[C0]], [[APP3]] {permutation_map: #[[map_proj_d0d1_d1]]} : {{.*}} -> vector<128xf32>
|
||||
for %i3 = 0 to %M { // vectorized
|
||||
affine.for %i3 = 0 to %M { // vectorized
|
||||
%r3 = affine.apply (d0) -> (d0) (%i3)
|
||||
%a3 = load %A[%cst0, %r3#0] : memref<?x?xf32>
|
||||
}
|
||||
@ -51,8 +51,8 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
|
||||
// CHECK-NEXT: [[APP50:%[0-9]+]] = affine.apply {{.*}}([[IV4]], [[IV5]])
|
||||
// CHECK-NEXT: [[APP51:%[0-9]+]] = affine.apply {{.*}}([[IV4]], [[IV5]])
|
||||
// CHECK-NEXT: {{.*}} = vector_transfer_read %arg0, [[APP50]], [[APP51]] {permutation_map: #[[map_proj_d0d1_d1]]} : {{.*}} -> vector<128xf32>
|
||||
for %i4 = 0 to %M { // vectorized
|
||||
for %i5 = 0 to %N { // not vectorized, would vectorize with --test-fastest-varying=1
|
||||
affine.for %i4 = 0 to %M { // vectorized
|
||||
affine.for %i5 = 0 to %N { // not vectorized, would vectorize with --test-fastest-varying=1
|
||||
%r50 = affine.apply (d0, d1) -> (d1) (%i4, %i5)
|
||||
%r51 = affine.apply (d0, d1) -> (d0) (%i4, %i5)
|
||||
%a5 = load %A[%r50, %r51] : memref<?x?xf32>
|
||||
@ -61,8 +61,8 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
|
||||
//
|
||||
// CHECK: for [[IV6:%[i0-9]*]] = 0 to [[ARG_M]] {
|
||||
// CHECK-NEXT: for [[IV7:%[i0-9]*]] = 0 to [[ARG_N]] {
|
||||
for %i6 = 0 to %M { // not vectorized, would vectorize with --test-fastest-varying=1
|
||||
for %i7 = 0 to %N { // not vectorized, can never vectorize
|
||||
affine.for %i6 = 0 to %M { // not vectorized, would vectorize with --test-fastest-varying=1
|
||||
affine.for %i7 = 0 to %N { // not vectorized, can never vectorize
|
||||
%r70 = affine.apply (d0, d1) -> (d1 + d0) (%i6, %i7)
|
||||
%r71 = affine.apply (d0, d1) -> (d0) (%i6, %i7)
|
||||
%a7 = load %A[%r70, %r71] : memref<?x?xf32>
|
||||
@ -74,8 +74,8 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
|
||||
// CHECK-NEXT: [[APP9_0:%[0-9]+]] = affine.apply {{.*}}([[IV8]], [[IV9]])
|
||||
// CHECK-NEXT: [[APP9_1:%[0-9]+]] = affine.apply {{.*}}([[IV8]], [[IV9]])
|
||||
// CHECK-NEXT: {{.*}} = vector_transfer_read %arg0, [[APP9_0]], [[APP9_1]] {permutation_map: #[[map_proj_d0d1_d1]]} : {{.*}} -> vector<128xf32>
|
||||
for %i8 = 0 to %M { // vectorized
|
||||
for %i9 = 0 to %N {
|
||||
affine.for %i8 = 0 to %M { // vectorized
|
||||
affine.for %i9 = 0 to %N {
|
||||
%r90 = affine.apply (d0, d1) -> (d1) (%i8, %i9)
|
||||
%r91 = affine.apply (d0, d1) -> (d0 + d1) (%i8, %i9)
|
||||
%a9 = load %A[%r90, %r91] : memref<?x?xf32>
|
||||
@ -84,8 +84,8 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
|
||||
//
|
||||
// CHECK: for [[IV10:%[i0-9]*]] = 0 to %{{[0-9]*}} {
|
||||
// CHECK: for [[IV11:%[i0-9]*]] = 0 to %{{[0-9]*}} {
|
||||
for %i10 = 0 to %M { // not vectorized, need per load transposes
|
||||
for %i11 = 0 to %N { // not vectorized, need per load transposes
|
||||
affine.for %i10 = 0 to %M { // not vectorized, need per load transposes
|
||||
affine.for %i11 = 0 to %N { // not vectorized, need per load transposes
|
||||
%r11_0 = affine.apply (d0, d1) -> (d0) (%i10, %i11)
|
||||
%r11_1 = affine.apply (d0, d1) -> (d1) (%i10, %i11)
|
||||
%a11 = load %A[%r11_0, %r11_1] : memref<?x?xf32>
|
||||
@ -98,9 +98,9 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
|
||||
// CHECK: for [[IV12:%[i0-9]*]] = 0 to %{{[0-9]*}} {
|
||||
// CHECK: for [[IV13:%[i0-9]*]] = 0 to %{{[0-9]*}} {
|
||||
// CHECK: for [[IV14:%[i0-9]+]] = 0 to [[ARG_P]] step 128
|
||||
for %i12 = 0 to %M { // not vectorized, can never vectorize
|
||||
for %i13 = 0 to %N { // not vectorized, can never vectorize
|
||||
for %i14 = 0 to %P { // vectorized
|
||||
affine.for %i12 = 0 to %M { // not vectorized, can never vectorize
|
||||
affine.for %i13 = 0 to %N { // not vectorized, can never vectorize
|
||||
affine.for %i14 = 0 to %P { // vectorized
|
||||
%r14_0 = affine.apply (d0, d1, d2) -> (d1) (%i12, %i13, %i14)
|
||||
%r14_1 = affine.apply (d0, d1, d2) -> (d0 + d1) (%i12, %i13, %i14)
|
||||
%r14_2 = affine.apply (d0, d1, d2) -> (d0 + d2) (%i12, %i13, %i14)
|
||||
@ -109,24 +109,24 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
|
||||
}
|
||||
}
|
||||
//
|
||||
// CHECK: for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
|
||||
for %i15 = 0 to %M { // not vectorized due to condition below
|
||||
// CHECK: affine.for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
|
||||
affine.for %i15 = 0 to %M { // not vectorized due to condition below
|
||||
affine.if #set0(%i15) {
|
||||
%a15 = load %A[%cst0, %cst0] : memref<?x?xf32>
|
||||
}
|
||||
}
|
||||
//
|
||||
// CHECK: for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
|
||||
for %i16 = 0 to %M { // not vectorized, can't vectorize a vector load
|
||||
// CHECK: affine.for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
|
||||
affine.for %i16 = 0 to %M { // not vectorized, can't vectorize a vector load
|
||||
%a16 = alloc(%M) : memref<?xvector<2xf32>>
|
||||
%l16 = load %a16[%i16] : memref<?xvector<2xf32>>
|
||||
}
|
||||
//
|
||||
// CHECK: for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
|
||||
// CHECK: affine.for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
|
||||
// CHECK: for [[IV18:%[a-zA-Z0-9]+]] = 0 to [[ARG_M]] step 128
|
||||
// CHECK: {{.*}} = vector_transfer_read %arg0, [[C0]], [[C0]] {permutation_map: #[[map_proj_d0d1_0]]} : {{.*}} -> vector<128xf32>
|
||||
for %i17 = 0 to %M { // not vectorized, the 1-D pattern that matched %i18 in DFS post-order prevents vectorizing %i17
|
||||
for %i18 = 0 to %M { // vectorized due to scalar -> vector
|
||||
affine.for %i17 = 0 to %M { // not vectorized, the 1-D pattern that matched %i18 in DFS post-order prevents vectorizing %i17
|
||||
affine.for %i18 = 0 to %M { // vectorized due to scalar -> vector
|
||||
%a18 = load %A[%cst0, %cst0] : memref<?x?xf32>
|
||||
}
|
||||
}
|
||||
@ -139,24 +139,24 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
|
||||
%C = alloc (%M, %N) : memref<?x?xf32, 0>
|
||||
%f1 = constant 1.0 : f32
|
||||
%f2 = constant 2.0 : f32
|
||||
for %i0 = 0 to %M {
|
||||
for %i1 = 0 to %N {
|
||||
affine.for %i0 = 0 to %M {
|
||||
affine.for %i1 = 0 to %N {
|
||||
// CHECK: [[C1:%.*]] = constant splat<vector<128xf32>, 1.000000e+00> : vector<128xf32>
|
||||
// CHECK: vector_transfer_write [[C1]], {{.*}} {permutation_map: #[[map_proj_d0d1_d1]]} : vector<128xf32>, memref<?x?xf32>, index, index
|
||||
// non-scoped %f1
|
||||
store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
|
||||
}
|
||||
}
|
||||
for %i2 = 0 to %M {
|
||||
for %i3 = 0 to %N {
|
||||
affine.for %i2 = 0 to %M {
|
||||
affine.for %i3 = 0 to %N {
|
||||
// CHECK: [[C3:%.*]] = constant splat<vector<128xf32>, 2.000000e+00> : vector<128xf32>
|
||||
// CHECK: vector_transfer_write [[C3]], {{.*}} {permutation_map: #[[map_proj_d0d1_d1]]} : vector<128xf32>, memref<?x?xf32>, index, index
|
||||
// non-scoped %f2
|
||||
store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
|
||||
}
|
||||
}
|
||||
for %i4 = 0 to %M {
|
||||
for %i5 = 0 to %N {
|
||||
affine.for %i4 = 0 to %M {
|
||||
affine.for %i5 = 0 to %N {
|
||||
// CHECK: [[A5:%.*]] = vector_transfer_read %0, {{.*}} {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<128xf32>
|
||||
// CHECK: [[B5:%.*]] = vector_transfer_read %1, {{.*}} {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<128xf32>
|
||||
// CHECK: [[S5:%.*]] = addf [[A5]], [[B5]] : vector<128xf32>
|
||||
@ -188,10 +188,10 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
|
||||
// CHECK-LABEL: @vec_rejected
|
||||
func @vec_rejected(%A : memref<?x?xf32>, %C : memref<?x?xf32>) {
|
||||
%N = dim %A, 0 : memref<?x?xf32>
|
||||
for %i = 0 to %N {
|
||||
affine.for %i = 0 to %N {
|
||||
// CHECK-NOT: vector
|
||||
%a = load %A[%i, %i] : memref<?x?xf32> // not vectorized
|
||||
for %j = 0 to %N {
|
||||
affine.for %j = 0 to %N {
|
||||
%b = load %A[%i, %j] : memref<?x?xf32> // may be vectorized
|
||||
// CHECK-NOT: vector
|
||||
%c = addf %a, %b : f32 // not vectorized because %a wasn't
|
||||
|
@ -11,13 +11,13 @@ func @vec2d(%A : memref<?x?x?xf32>) {
|
||||
// CHECK: for {{.*}} = 0 to %1 step 32
|
||||
// CHECK: for {{.*}} = 0 to %2 step 256
|
||||
// Example:
|
||||
// for %i0 = 0 to %0 {
|
||||
// for %i1 = 0 to %1 step 32 {
|
||||
// for %i2 = 0 to %2 step 256 {
|
||||
// affine.for %i0 = 0 to %0 {
|
||||
// affine.for %i1 = 0 to %1 step 32 {
|
||||
// affine.for %i2 = 0 to %2 step 256 {
|
||||
// %3 = "vector_transfer_read"(%arg0, %i0, %i1, %i2) : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
|
||||
for %i0 = 0 to %M {
|
||||
for %i1 = 0 to %N {
|
||||
for %i2 = 0 to %P {
|
||||
affine.for %i0 = 0 to %M {
|
||||
affine.for %i1 = 0 to %N {
|
||||
affine.for %i2 = 0 to %P {
|
||||
%a2 = load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
|
||||
}
|
||||
}
|
||||
@ -27,9 +27,9 @@ func @vec2d(%A : memref<?x?x?xf32>) {
|
||||
// CHECK: for {{.*}} = 0 to %2 {
|
||||
// For the case: --test-fastest-varying=1 --test-fastest-varying=0 no
|
||||
// vectorization happens because of loop nesting order .
|
||||
for %i3 = 0 to %M {
|
||||
for %i4 = 0 to %N {
|
||||
for %i5 = 0 to %P {
|
||||
affine.for %i3 = 0 to %M {
|
||||
affine.for %i4 = 0 to %N {
|
||||
affine.for %i5 = 0 to %P {
|
||||
%a5 = load %A[%i4, %i5, %i3] : memref<?x?x?xf32>
|
||||
}
|
||||
}
|
||||
@ -43,24 +43,24 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
|
||||
%C = alloc (%M, %N) : memref<?x?xf32, 0>
|
||||
%f1 = constant 1.0 : f32
|
||||
%f2 = constant 2.0 : f32
|
||||
for %i0 = 0 to %M {
|
||||
for %i1 = 0 to %N {
|
||||
affine.for %i0 = 0 to %M {
|
||||
affine.for %i1 = 0 to %N {
|
||||
// CHECK: [[C1:%.*]] = constant splat<vector<32x256xf32>, 1.000000e+00> : vector<32x256xf32>
|
||||
// CHECK: vector_transfer_write [[C1]], {{.*}} {permutation_map: #[[map_proj_d0d1_d0d1]]} : vector<32x256xf32>, memref<?x?xf32>, index, index
|
||||
// non-scoped %f1
|
||||
store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
|
||||
}
|
||||
}
|
||||
for %i2 = 0 to %M {
|
||||
for %i3 = 0 to %N {
|
||||
affine.for %i2 = 0 to %M {
|
||||
affine.for %i3 = 0 to %N {
|
||||
// CHECK: [[C3:%.*]] = constant splat<vector<32x256xf32>, 2.000000e+00> : vector<32x256xf32>
|
||||
// CHECK: vector_transfer_write [[C3]], {{.*}} {permutation_map: #[[map_proj_d0d1_d0d1]]} : vector<32x256xf32>, memref<?x?xf32>, index, index
|
||||
// non-scoped %f2
|
||||
store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
|
||||
}
|
||||
}
|
||||
for %i4 = 0 to %M {
|
||||
for %i5 = 0 to %N {
|
||||
affine.for %i4 = 0 to %M {
|
||||
affine.for %i5 = 0 to %N {
|
||||
// CHECK: [[A5:%.*]] = vector_transfer_read %0, {{.*}} {permutation_map: #[[map_proj_d0d1_d0d1]]} : (memref<?x?xf32>, index, index) -> vector<32x256xf32>
|
||||
// CHECK: [[B5:%.*]] = vector_transfer_read %1, {{.*}} {permutation_map: #[[map_proj_d0d1_d0d1]]} : (memref<?x?xf32>, index, index) -> vector<32x256xf32>
|
||||
// CHECK: [[S5:%.*]] = addf [[A5]], [[B5]] : vector<32x256xf32>
|
||||
|
@ -7,17 +7,17 @@ func @vec3d(%A : memref<?x?x?xf32>) {
|
||||
%0 = dim %A, 0 : memref<?x?x?xf32>
|
||||
%1 = dim %A, 1 : memref<?x?x?xf32>
|
||||
%2 = dim %A, 2 : memref<?x?x?xf32>
|
||||
// CHECK: for %i0 = 0 to %0 {
|
||||
// CHECK: for %i1 = 0 to %0 {
|
||||
// CHECK: for %i2 = 0 to %0 step 32 {
|
||||
// CHECK: for %i3 = 0 to %1 step 64 {
|
||||
// CHECK: for %i4 = 0 to %2 step 256 {
|
||||
// CHECK: affine.for %i0 = 0 to %0 {
|
||||
// CHECK: affine.for %i1 = 0 to %0 {
|
||||
// CHECK: affine.for %i2 = 0 to %0 step 32 {
|
||||
// CHECK: affine.for %i3 = 0 to %1 step 64 {
|
||||
// CHECK: affine.for %i4 = 0 to %2 step 256 {
|
||||
// CHECK: %3 = vector_transfer_read %arg0, %i2, %i3, %i4 {permutation_map: #[[map_proj_d0d1d2_d0d1d2]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x64x256xf32>
|
||||
for %t0 = 0 to %0 {
|
||||
for %t1 = 0 to %0 {
|
||||
for %i0 = 0 to %0 {
|
||||
for %i1 = 0 to %1 {
|
||||
for %i2 = 0 to %2 {
|
||||
affine.for %t0 = 0 to %0 {
|
||||
affine.for %t1 = 0 to %0 {
|
||||
affine.for %i0 = 0 to %0 {
|
||||
affine.for %i1 = 0 to %1 {
|
||||
affine.for %i2 = 0 to %2 {
|
||||
%a2 = load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
|
||||
}
|
||||
}
|
||||
|
@ -7,13 +7,13 @@ func @vec2d(%A : memref<?x?x?xf32>) {
|
||||
%M = dim %A, 0 : memref<?x?x?xf32>
|
||||
%N = dim %A, 1 : memref<?x?x?xf32>
|
||||
%P = dim %A, 2 : memref<?x?x?xf32>
|
||||
// CHECK: for %i0 = 0 to %0 step 32
|
||||
// CHECK: for %i1 = 0 to %1 {
|
||||
// CHECK: for %i2 = 0 to %2 step 256
|
||||
// CHECK: affine.for %i0 = 0 to %0 step 32
|
||||
// CHECK: affine.for %i1 = 0 to %1 {
|
||||
// CHECK: affine.for %i2 = 0 to %2 step 256
|
||||
// CHECK: {{.*}} = vector_transfer_read %arg0, %i0, %i1, %i2 {permutation_map: #[[map_proj_d0d1d2_d0d2]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
|
||||
for %i0 = 0 to %M {
|
||||
for %i1 = 0 to %N {
|
||||
for %i2 = 0 to %P {
|
||||
affine.for %i0 = 0 to %M {
|
||||
affine.for %i1 = 0 to %N {
|
||||
affine.for %i2 = 0 to %P {
|
||||
%a2 = load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
|
||||
}
|
||||
}
|
||||
@ -23,9 +23,9 @@ func @vec2d(%A : memref<?x?x?xf32>) {
|
||||
// CHECK: for {{.*}} = 0 to %2 {
|
||||
// For the case: --test-fastest-varying=2 --test-fastest-varying=0 no
|
||||
// vectorization happens because of loop nesting order
|
||||
for %i3 = 0 to %M {
|
||||
for %i4 = 0 to %N {
|
||||
for %i5 = 0 to %P {
|
||||
affine.for %i3 = 0 to %M {
|
||||
affine.for %i4 = 0 to %N {
|
||||
affine.for %i5 = 0 to %P {
|
||||
%a5 = load %A[%i4, %i5, %i3] : memref<?x?x?xf32>
|
||||
}
|
||||
}
|
||||
|
@ -12,20 +12,20 @@ func @vec2d(%A : memref<?x?x?xf32>) {
|
||||
// CHECK: for {{.*}} = 0 to %2 {
|
||||
// For the case: --test-fastest-varying=0 --test-fastest-varying=2 no
|
||||
// vectorization happens because of loop nesting order.
|
||||
for %i0 = 0 to %M {
|
||||
for %i1 = 0 to %N {
|
||||
for %i2 = 0 to %P {
|
||||
affine.for %i0 = 0 to %M {
|
||||
affine.for %i1 = 0 to %N {
|
||||
affine.for %i2 = 0 to %P {
|
||||
%a2 = load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
|
||||
}
|
||||
}
|
||||
}
|
||||
// CHECK: for %i3 = 0 to %0 step 32
|
||||
// CHECK: for %i4 = 0 to %1 step 256
|
||||
// CHECK: for %i5 = 0 to %2 {
|
||||
// CHECK: affine.for %i3 = 0 to %0 step 32
|
||||
// CHECK: affine.for %i4 = 0 to %1 step 256
|
||||
// CHECK: affine.for %i5 = 0 to %2 {
|
||||
// CHECK: {{.*}} = vector_transfer_read %arg0, %i4, %i5, %i3 {permutation_map: #[[map_proj_d0d1d2_d2d0]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
|
||||
for %i3 = 0 to %M {
|
||||
for %i4 = 0 to %N {
|
||||
for %i5 = 0 to %P {
|
||||
affine.for %i3 = 0 to %M {
|
||||
affine.for %i4 = 0 to %N {
|
||||
affine.for %i5 = 0 to %P {
|
||||
%a5 = load %A[%i4, %i5, %i3] : memref<?x?x?xf32>
|
||||
}
|
||||
}
|
||||
@ -37,26 +37,26 @@ func @vec2d_imperfectly_nested(%A : memref<?x?x?xf32>) {
|
||||
%0 = dim %A, 0 : memref<?x?x?xf32>
|
||||
%1 = dim %A, 1 : memref<?x?x?xf32>
|
||||
%2 = dim %A, 2 : memref<?x?x?xf32>
|
||||
// CHECK: for %i0 = 0 to %0 step 32 {
|
||||
// CHECK: for %i1 = 0 to %1 {
|
||||
// CHECK: for %i2 = 0 to %2 step 256 {
|
||||
// CHECK: affine.for %i0 = 0 to %0 step 32 {
|
||||
// CHECK: affine.for %i1 = 0 to %1 {
|
||||
// CHECK: affine.for %i2 = 0 to %2 step 256 {
|
||||
// CHECK: %3 = vector_transfer_read %arg0, %i2, %i1, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d0]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
|
||||
// CHECK: for %i3 = 0 to %1 step 256 {
|
||||
// CHECK: for %i4 = 0 to %2 {
|
||||
// CHECK: affine.for %i3 = 0 to %1 step 256 {
|
||||
// CHECK: affine.for %i4 = 0 to %2 {
|
||||
// CHECK: %4 = vector_transfer_read %arg0, %i3, %i4, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d0]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
|
||||
// CHECK: for %i5 = 0 to %2 {
|
||||
// CHECK: affine.for %i5 = 0 to %2 {
|
||||
// CHECK: %5 = vector_transfer_read %arg0, %i3, %i5, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d0]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
|
||||
for %i0 = 0 to %0 {
|
||||
for %i1 = 0 to %1 {
|
||||
for %i2 = 0 to %2 {
|
||||
affine.for %i0 = 0 to %0 {
|
||||
affine.for %i1 = 0 to %1 {
|
||||
affine.for %i2 = 0 to %2 {
|
||||
%a2 = load %A[%i2, %i1, %i0] : memref<?x?x?xf32>
|
||||
}
|
||||
}
|
||||
for %i3 = 0 to %1 {
|
||||
for %i4 = 0 to %2 {
|
||||
affine.for %i3 = 0 to %1 {
|
||||
affine.for %i4 = 0 to %2 {
|
||||
%a4 = load %A[%i3, %i4, %i0] : memref<?x?x?xf32>
|
||||
}
|
||||
for %i5 = 0 to %2 {
|
||||
affine.for %i5 = 0 to %2 {
|
||||
%a5 = load %A[%i3, %i5, %i0] : memref<?x?x?xf32>
|
||||
}
|
||||
}
|
||||
|
@ -12,20 +12,20 @@ func @vec2d(%A : memref<?x?x?xf32>) {
|
||||
// CHECK: for {{.*}} = 0 to %2 {
|
||||
// For the case: --test-fastest-varying=0 --test-fastest-varying=1 no
|
||||
// vectorization happens because of loop nesting order.
|
||||
for %i0 = 0 to %M {
|
||||
for %i1 = 0 to %N {
|
||||
for %i2 = 0 to %P {
|
||||
affine.for %i0 = 0 to %M {
|
||||
affine.for %i1 = 0 to %N {
|
||||
affine.for %i2 = 0 to %P {
|
||||
%a2 = load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
|
||||
}
|
||||
}
|
||||
}
|
||||
// CHECK: for %i3 = 0 to %0 step 32
|
||||
// CHECK: for %i4 = 0 to %1 {
|
||||
// CHECK: for %i5 = 0 to %2 step 256
|
||||
// CHECK: affine.for %i3 = 0 to %0 step 32
|
||||
// CHECK: affine.for %i4 = 0 to %1 {
|
||||
// CHECK: affine.for %i5 = 0 to %2 step 256
|
||||
// CHECK: {{.*}} = vector_transfer_read %arg0, %i4, %i5, %i3 {permutation_map: #[[map_proj_d0d1d2_d2d1]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
|
||||
for %i3 = 0 to %M {
|
||||
for %i4 = 0 to %N {
|
||||
for %i5 = 0 to %P {
|
||||
affine.for %i3 = 0 to %M {
|
||||
affine.for %i4 = 0 to %N {
|
||||
affine.for %i5 = 0 to %P {
|
||||
%a5 = load %A[%i4, %i5, %i3] : memref<?x?x?xf32>
|
||||
}
|
||||
}
|
||||
@ -37,26 +37,26 @@ func @vec2d_imperfectly_nested(%A : memref<?x?x?xf32>) {
|
||||
%0 = dim %A, 0 : memref<?x?x?xf32>
|
||||
%1 = dim %A, 1 : memref<?x?x?xf32>
|
||||
%2 = dim %A, 2 : memref<?x?x?xf32>
|
||||
// CHECK: for %i0 = 0 to %0 step 32 {
|
||||
// CHECK: for %i1 = 0 to %1 step 256 {
|
||||
// CHECK: for %i2 = 0 to %2 {
|
||||
// CHECK: affine.for %i0 = 0 to %0 step 32 {
|
||||
// CHECK: affine.for %i1 = 0 to %1 step 256 {
|
||||
// CHECK: affine.for %i2 = 0 to %2 {
|
||||
// CHECK: %3 = vector_transfer_read %arg0, %i2, %i1, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d1]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
|
||||
// CHECK: for %i3 = 0 to %1 {
|
||||
// CHECK: for %i4 = 0 to %2 step 256 {
|
||||
// CHECK: affine.for %i3 = 0 to %1 {
|
||||
// CHECK: affine.for %i4 = 0 to %2 step 256 {
|
||||
// CHECK: %4 = vector_transfer_read %arg0, %i3, %i4, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d1]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
|
||||
// CHECK: for %i5 = 0 to %2 step 256 {
|
||||
// CHECK: affine.for %i5 = 0 to %2 step 256 {
|
||||
// CHECK: %5 = vector_transfer_read %arg0, %i3, %i5, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d1]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
|
||||
for %i0 = 0 to %0 {
|
||||
for %i1 = 0 to %1 {
|
||||
for %i2 = 0 to %2 {
|
||||
affine.for %i0 = 0 to %0 {
|
||||
affine.for %i1 = 0 to %1 {
|
||||
affine.for %i2 = 0 to %2 {
|
||||
%a2 = load %A[%i2, %i1, %i0] : memref<?x?x?xf32>
|
||||
}
|
||||
}
|
||||
for %i3 = 0 to %1 {
|
||||
for %i4 = 0 to %2 {
|
||||
affine.for %i3 = 0 to %1 {
|
||||
affine.for %i4 = 0 to %2 {
|
||||
%a4 = load %A[%i3, %i4, %i0] : memref<?x?x?xf32>
|
||||
}
|
||||
for %i5 = 0 to %2 {
|
||||
affine.for %i5 = 0 to %2 {
|
||||
%a5 = load %A[%i3, %i5, %i0] : memref<?x?x?xf32>
|
||||
}
|
||||
}
|
||||
|
@ -205,10 +205,10 @@ func @dyn_shape_fold(%L : index, %M : index) -> (memref<? x ? x i32>, memref<? x
|
||||
// CHECK-NEXT: %2 = alloc() : memref<512x1024xi32>
|
||||
%c = alloc(%K, %N) : memref<? x ? x i32>
|
||||
|
||||
// CHECK: for %i0 =
|
||||
for %i = 0 to %L {
|
||||
// CHECK-NEXT: for %i1 =
|
||||
for %j = 0 to 10 {
|
||||
// CHECK: affine.for %i0 =
|
||||
affine.for %i = 0 to %L {
|
||||
// CHECK-NEXT: affine.for %i1 =
|
||||
affine.for %j = 0 to 10 {
|
||||
// CHECK-NEXT: %4 = load %0[%i0, %i1] : memref<?x1024xf32>
|
||||
// CHECK-NEXT: store %4, %1[%c0, %c0, %i0, %i1, %c0] : memref<4x1024x8x512x?xf32>
|
||||
%v = load %a[%i, %j] : memref<?x?xf32>
|
||||
@ -234,8 +234,8 @@ func @merge_constants() -> (index, index) {
|
||||
// CHECK-LABEL: func @hoist_constant
|
||||
func @hoist_constant(%arg0: memref<8xi32>) {
|
||||
// CHECK-NEXT: %c42_i32 = constant 42 : i32
|
||||
// CHECK-NEXT: for %i0 = 0 to 8 {
|
||||
for %i0 = 0 to 8 {
|
||||
// CHECK-NEXT: affine.for %i0 = 0 to 8 {
|
||||
affine.for %i0 = 0 to 8 {
|
||||
// CHECK-NEXT: store %c42_i32, %arg0[%i0]
|
||||
%c42_i32 = constant 42 : i32
|
||||
store %c42_i32, %arg0[%i0] : memref<8xi32>
|
||||
|
@ -2,8 +2,8 @@
|
||||
|
||||
// CHECK-LABEL: @test(%arg0: memref<f32>) {
|
||||
func @test(%p : memref<f32>) {
|
||||
for %i0 = 0 to 128 {
|
||||
for %i1 = 0 to 8 { // CHECK: for %i1 = 0 to 8 {
|
||||
affine.for %i0 = 0 to 128 {
|
||||
affine.for %i1 = 0 to 8 { // CHECK: affine.for %i1 = 0 to 8 {
|
||||
%0 = constant 4.5 : f32
|
||||
%1 = constant 1.5 : f32
|
||||
|
||||
|
@ -113,8 +113,8 @@ func @down_propagate_for() {
|
||||
// CHECK: %c1_i32 = constant 1 : i32
|
||||
%0 = constant 1 : i32
|
||||
|
||||
// CHECK-NEXT: for %i0 = 0 to 4 {
|
||||
for %i = 0 to 4 {
|
||||
// CHECK-NEXT: affine.for %i0 = 0 to 4 {
|
||||
affine.for %i = 0 to 4 {
|
||||
// CHECK-NEXT: "foo"(%c1_i32, %c1_i32) : (i32, i32) -> ()
|
||||
%1 = constant 1 : i32
|
||||
"foo"(%0, %1) : (i32, i32) -> ()
|
||||
@ -145,8 +145,8 @@ func @down_propagate() -> i32 {
|
||||
/// Check that operation definitions are NOT propagated up the dominance tree.
|
||||
// CHECK-LABEL: @up_propagate_for
|
||||
func @up_propagate_for() -> i32 {
|
||||
// CHECK: for %i0 = 0 to 4 {
|
||||
for %i = 0 to 4 {
|
||||
// CHECK: affine.for %i0 = 0 to 4 {
|
||||
affine.for %i = 0 to 4 {
|
||||
// CHECK-NEXT: %c1_i32 = constant 1 : i32
|
||||
// CHECK-NEXT: "foo"(%c1_i32) : (i32) -> ()
|
||||
%0 = constant 1 : i32
|
||||
|
@ -42,7 +42,7 @@ func @loop_nest_1d() {
|
||||
// Second DMA transfer.
|
||||
// CHECK: dma_start %1[%c256], %5[%c0], %c256_0, %6[%c0] : memref<512xf32>, memref<256xf32, 2>, memref<1xi32>
|
||||
// CHECK-NEXT: dma_wait %6[%c0], %c256_0 : memref<1xi32>
|
||||
// CHECK: for %i0 = 0 to 256 {
|
||||
// CHECK: affine.for %i0 = 0 to 256 {
|
||||
// CHECK-NEXT: %7 = load %3[%i0] : memref<256xf32, 2>
|
||||
// CHECK: %8 = affine.apply [[MAP_PLUS_256]](%i0)
|
||||
// CHECK: %9 = affine.apply [[MAP_MINUS_256]](%8)
|
||||
@ -55,7 +55,7 @@ func @loop_nest_1d() {
|
||||
// CHECK-NEXT: dealloc %4 : memref<1xi32>
|
||||
// CHECK-NEXT: dealloc %3 : memref<256xf32, 2>
|
||||
// CHECK-NEXT: return
|
||||
for %i = 0 to 256 {
|
||||
affine.for %i = 0 to 256 {
|
||||
load %A[%i] : memref<256 x f32>
|
||||
%idx = affine.apply (d0) -> (d0 + 256)(%i)
|
||||
load %B[%idx] : memref<512 x f32>
|
||||
@ -82,20 +82,20 @@ func @loop_nest_1d() {
|
||||
// INCOMING DMA for C.
|
||||
// CHECK-DAG: dma_start %arg2[%c0, %c0], [[BUFC]][%c0, %c0], %c16384_0, [[TAGC]][%c0] : memref<512x32xf32>, memref<512x32xf32, 2>, memref<1xi32>
|
||||
// CHECK-DAG: dma_wait [[TAGC]][%c0], %c16384_0 : memref<1xi32>
|
||||
// CHECK-NEXT: for %i0 = 0 to 32 {
|
||||
// CHECK-NEXT: for %i1 = 0 to 32 {
|
||||
// CHECK-NEXT: for %i2 = 0 to 32 {
|
||||
// CHECK-NEXT: for %i3 = 0 to 16 {
|
||||
// CHECK-NEXT: affine.for %i0 = 0 to 32 {
|
||||
// CHECK-NEXT: affine.for %i1 = 0 to 32 {
|
||||
// CHECK-NEXT: affine.for %i2 = 0 to 32 {
|
||||
// CHECK-NEXT: affine.for %i3 = 0 to 16 {
|
||||
// CHECK-NEXT: %7 = affine.apply #map{{[0-9]+}}(%i1, %i3)
|
||||
// CHECK-NEXT: %8 = load [[BUFB]][%7, %i0] : memref<512x32xf32, 2>
|
||||
// CHECK-NEXT: "foo"(%8) : (f32) -> ()
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: for %i4 = 0 to 16 {
|
||||
// CHECK-NEXT: affine.for %i4 = 0 to 16 {
|
||||
// CHECK-NEXT: %9 = affine.apply #map{{[0-9]+}}(%i2, %i4)
|
||||
// CHECK-NEXT: %10 = load [[BUFA]][%9, %i1] : memref<512x32xf32, 2>
|
||||
// CHECK-NEXT: "bar"(%10) : (f32) -> ()
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: for %i5 = 0 to 16 {
|
||||
// CHECK-NEXT: affine.for %i5 = 0 to 16 {
|
||||
// CHECK-NEXT: %11 = "abc_compute"() : () -> f32
|
||||
// CHECK-NEXT: %12 = affine.apply #map{{[0-9]+}}(%i2, %i5)
|
||||
// CHECK-NEXT: %13 = load [[BUFC]][%12, %i0] : memref<512x32xf32, 2>
|
||||
@ -123,20 +123,20 @@ func @loop_nest_high_d(%A: memref<512 x 32 x f32>,
|
||||
// DMAs will be performed at this level (jT is the first loop without a stride).
|
||||
// A and B are read, while C is both read and written. A total of three new buffers
|
||||
// are allocated and existing load's/store's are replaced by accesses to those buffers.
|
||||
for %jT = 0 to 32 {
|
||||
for %kT = 0 to 32 {
|
||||
for %iT = 0 to 32 {
|
||||
for %kk = 0 to 16 { // k intratile
|
||||
affine.for %jT = 0 to 32 {
|
||||
affine.for %kT = 0 to 32 {
|
||||
affine.for %iT = 0 to 32 {
|
||||
affine.for %kk = 0 to 16 { // k intratile
|
||||
%k = affine.apply (d0, d1) -> (16*d0 + d1) (%kT, %kk)
|
||||
%v0 = load %B[%k, %jT] : memref<512 x 32 x f32>
|
||||
"foo"(%v0) : (f32) -> ()
|
||||
}
|
||||
for %ii = 0 to 16 { // i intratile.
|
||||
affine.for %ii = 0 to 16 { // i intratile.
|
||||
%i = affine.apply (d0, d1) -> (16*d0 + d1)(%iT, %ii)
|
||||
%v1 = load %A[%i, %kT] : memref<512 x 32 x f32>
|
||||
"bar"(%v1) : (f32) -> ()
|
||||
}
|
||||
for %ii_ = 0 to 16 { // i intratile.
|
||||
affine.for %ii_ = 0 to 16 { // i intratile.
|
||||
%v2 = "abc_compute"() : () -> f32
|
||||
%i_ = affine.apply (d0, d1) -> (16*d0 + d1)(%iT, %ii_)
|
||||
%v3 = load %C[%i_, %jT] : memref<512 x 32 x f32>
|
||||
@ -155,13 +155,13 @@ func @loop_nest_high_d(%A: memref<512 x 32 x f32>,
|
||||
//
|
||||
// CHECK-LABEL: func @loop_nest_modulo() {
|
||||
// CHECK: %0 = alloc() : memref<256x8xf32>
|
||||
// CHECK-NEXT: for %i0 = 0 to 32 step 4 {
|
||||
// CHECK-NEXT: affine.for %i0 = 0 to 32 step 4 {
|
||||
// CHECK-NEXT: %1 = affine.apply #map{{[0-9]+}}(%i0)
|
||||
// CHECK-NEXT: %2 = alloc() : memref<1x2xf32, 2>
|
||||
// CHECK-NEXT: %3 = alloc() : memref<1xi32>
|
||||
// CHECK-NEXT: dma_start %0[%1, %c0], %2[%c0, %c0], %c2, %3[%c0] : memref<256x8xf32>, memref<1x2xf32, 2>, memref<1xi32>
|
||||
// CHECK-NEXT: dma_wait %3[%c0], %c2 : memref<1xi32>
|
||||
// CHECK-NEXT: for %i1 = 0 to 8 {
|
||||
// CHECK-NEXT: affine.for %i1 = 0 to 8 {
|
||||
// ...
|
||||
// ...
|
||||
// CHECK: }
|
||||
@ -171,9 +171,9 @@ func @loop_nest_high_d(%A: memref<512 x 32 x f32>,
|
||||
// CHECK-NEXT: return
|
||||
func @loop_nest_modulo() {
|
||||
%A = alloc() : memref<256 x 8 x f32>
|
||||
for %i = 0 to 32 step 4 {
|
||||
affine.for %i = 0 to 32 step 4 {
|
||||
// DMAs will be performed at this level (%j is the first unit stride loop)
|
||||
for %j = 0 to 8 {
|
||||
affine.for %j = 0 to 8 {
|
||||
%idx = affine.apply (d0) -> (d0 mod 2) (%j)
|
||||
// A buffer of size 32 x 2 will be allocated (original buffer was 256 x 8).
|
||||
%v = load %A[%i, %idx] : memref<256 x 8 x f32>
|
||||
@ -187,17 +187,17 @@ func @loop_nest_modulo() {
|
||||
// CHECK-LABEL: func @loop_nest_tiled() -> memref<256x1024xf32> {
|
||||
func @loop_nest_tiled() -> memref<256x1024xf32> {
|
||||
%0 = alloc() : memref<256x1024xf32>
|
||||
for %i0 = 0 to 256 step 32 {
|
||||
for %i1 = 0 to 1024 step 32 {
|
||||
affine.for %i0 = 0 to 256 step 32 {
|
||||
affine.for %i1 = 0 to 1024 step 32 {
|
||||
// CHECK: %3 = alloc() : memref<32x32xf32, 2>
|
||||
// CHECK-NEXT: %4 = alloc() : memref<1xi32>
|
||||
// Strided DMA here: 32 x 32 tile in a 256 x 1024 memref.
|
||||
// CHECK-NEXT: dma_start %0[%1, %2], %3[%c0, %c0], %c1024, %4[%c0], %c1024_0, %c32 : memref<256x1024xf32>, memref<32x32xf32, 2>, memref<1xi32>
|
||||
// CHECK-NEXT: dma_wait
|
||||
// CHECK-NEXT: for %i2 = #map
|
||||
// CHECK-NEXT: for %i3 = #map
|
||||
for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 32)(%i0) {
|
||||
for %i3 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 32)(%i1) {
|
||||
// CHECK-NEXT: affine.for %i2 = #map
|
||||
// CHECK-NEXT: affine.for %i3 = #map
|
||||
affine.for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 32)(%i0) {
|
||||
affine.for %i3 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 32)(%i1) {
|
||||
// CHECK-NEXT: %5 = affine.apply [[MAP_INDEX_DIFF_EVEN]](%i0, %i1, %i2, %i3)
|
||||
// CHECK-NEXT: %6 = affine.apply [[MAP_INDEX_DIFF_ODD]](%i0, %i1, %i2, %i3)
|
||||
// CHECK-NEXT: %7 = load %3[%5, %6] : memref<32x32xf32, 2>
|
||||
@ -218,8 +218,8 @@ func @dma_constant_dim_access(%A : memref<100x100xf32>) {
|
||||
// No strided DMA needed here.
|
||||
// CHECK: dma_start %arg0[%c1, %c0], %0[%c0, %c0], %c100, %1[%c0] : memref<100x100xf32>, memref<1x100xf32, 2>,
|
||||
// CHECK-NEXT: dma_wait %1[%c0], %c100 : memref<1xi32>
|
||||
for %i = 0 to 100 {
|
||||
for %j = 0 to ()[s0] -> (s0) ()[%N] {
|
||||
affine.for %i = 0 to 100 {
|
||||
affine.for %j = 0 to ()[s0] -> (s0) ()[%N] {
|
||||
// CHECK: %2 = affine.apply [[MAP_D0_MINUS_ONE]](%c1_0, %i1)
|
||||
// CHECK: %3 = affine.apply [[MAP_D1]](%c1_0, %i1)
|
||||
// CHECK-NEXT: %4 = load %0[%2, %3] : memref<1x100xf32, 2>
|
||||
@ -232,8 +232,8 @@ func @dma_constant_dim_access(%A : memref<100x100xf32>) {
|
||||
// CHECK-LABEL: func @dma_with_symbolic_accesses
|
||||
func @dma_with_symbolic_accesses(%A : memref<100x100xf32>, %M : index) {
|
||||
%N = constant 9 : index
|
||||
for %i = 0 to 100 {
|
||||
for %j = 0 to 100 {
|
||||
affine.for %i = 0 to 100 {
|
||||
affine.for %j = 0 to 100 {
|
||||
%idy = affine.apply (d0, d1) [s0, s1] -> (d1 + s0 + s1)(%i, %j)[%M, %N]
|
||||
load %A[%i, %idy] : memref<100 x 100 x f32>
|
||||
}
|
||||
@ -243,8 +243,8 @@ func @dma_with_symbolic_accesses(%A : memref<100x100xf32>, %M : index) {
|
||||
// CHECK-NEXT: %2 = alloc() : memref<1xi32>
|
||||
// CHECK-NEXT: dma_start %arg0[%c0, %0], %1[%c0, %c0], %c10000, %2[%c0]
|
||||
// CHECK-NEXT: dma_wait %2[%c0], %c10000
|
||||
// CHECK-NEXT: for %i0 = 0 to 100 {
|
||||
// CHECK-NEXT: for %i1 = 0 to 100 {
|
||||
// CHECK-NEXT: affine.for %i0 = 0 to 100 {
|
||||
// CHECK-NEXT: affine.for %i1 = 0 to 100 {
|
||||
// CHECK-NEXT: %3 = affine.apply [[MAP_SYM_SHIFT]](%i0, %i1)[%arg1, %c9]
|
||||
// CHECK-NEXT: %4 = affine.apply [[MAP_3D_D1]](%arg1, %i0, %3)
|
||||
// CHECK-NEXT: %5 = affine.apply [[MAP_SUB_OFFSET]](%arg1, %i0, %3)
|
||||
@ -263,8 +263,8 @@ func @dma_with_symbolic_loop_bounds(%A : memref<100x100xf32>, %M : index, %N: in
|
||||
// CHECK-NEXT: %1 = alloc() : memref<1xi32>
|
||||
// CHECK-NEXT: dma_start %arg0[%c0, %c0], %0[%c0, %c0], %c10000, %1[%c0] : memref<100x100xf32>, memref<100x100xf32, 2>, memref<1xi32>
|
||||
// CHECK-NEXT: dma_wait %1[%c0], %c10000 : memref<1xi32>
|
||||
for %i = 0 to 100 {
|
||||
for %j = %M to %N {
|
||||
affine.for %i = 0 to 100 {
|
||||
affine.for %j = %M to %N {
|
||||
%idy = affine.apply (d1) [s0] -> (d1 + s0)(%j)[%K]
|
||||
load %A[%i, %idy] : memref<100 x 100 x f32>
|
||||
}
|
||||
@ -278,8 +278,8 @@ func @dma_with_symbolic_loop_bounds(%A : memref<100x100xf32>, %M : index, %N: in
|
||||
func @dma_unknown_size(%arg0: memref<?x?xf32>) {
|
||||
%M = dim %arg0, 0 : memref<? x ? x f32>
|
||||
%N = dim %arg0, 0 : memref<? x ? x f32>
|
||||
for %i = 0 to %M {
|
||||
for %j = 0 to %N {
|
||||
affine.for %i = 0 to %M {
|
||||
affine.for %j = 0 to %N {
|
||||
// If this loop nest isn't tiled, the access requires a non-constant DMA
|
||||
// size -- not yet implemented.
|
||||
// CHECK: %2 = load %arg0[%i0, %i1] : memref<?x?xf32>
|
||||
@ -294,9 +294,9 @@ func @dma_unknown_size(%arg0: memref<?x?xf32>) {
|
||||
|
||||
// CHECK-LABEL: func @dma_memref_3d
|
||||
func @dma_memref_3d(%arg0: memref<1024x1024x1024xf32>) {
|
||||
for %i = 0 to 1024 {
|
||||
for %j = 0 to 1024 {
|
||||
for %k = 0 to 1024 {
|
||||
affine.for %i = 0 to 1024 {
|
||||
affine.for %j = 0 to 1024 {
|
||||
affine.for %k = 0 to 1024 {
|
||||
%idx = affine.apply (d0) -> (d0 mod 128)(%i)
|
||||
%idy = affine.apply (d0) -> (d0 mod 128)(%j)
|
||||
%idz = affine.apply (d0) -> (d0 mod 128)(%k)
|
||||
@ -330,8 +330,8 @@ func @dma_memref_3d(%arg0: memref<1024x1024x1024xf32>) {
|
||||
// CHECK-LABEL: func @multi_load_store_union() {
|
||||
func @multi_load_store_union() {
|
||||
%A = alloc() : memref<512 x 512 x f32>
|
||||
for %i = 0 to 256 {
|
||||
for %j = 0 to 256 {
|
||||
affine.for %i = 0 to 256 {
|
||||
affine.for %j = 0 to 256 {
|
||||
%idx = affine.apply (d0) -> (d0 + 64)(%i)
|
||||
%idy = affine.apply (d0) -> (d0 + 128)(%j)
|
||||
%ishift = affine.apply (d0) -> (d0 + 2)(%i)
|
||||
@ -355,8 +355,8 @@ func @multi_load_store_union() {
|
||||
// CHECK-NEXT: dma_start %0[%c2_1, %c2_2], %1[%c0, %c0], %c170372_3, %2[%c0], %c512_4, %c446_5 : memref<512x512xf32>, memref<382x446xf32, 2>, memref<1xi32>
|
||||
// CHECK-NEXT: dma_wait %2[%c0], %c170372_3 : memref<1xi32>
|
||||
// CHECK-NEXT: %3 = alloc() : memref<1xi32>
|
||||
// CHECK-NEXT: for %i0 = 0 to 256 {
|
||||
// CHECK-NEXT: for %i1 = 0 to 256 {
|
||||
// CHECK-NEXT: affine.for %i0 = 0 to 256 {
|
||||
// CHECK-NEXT: affine.for %i1 = 0 to 256 {
|
||||
// CHECK-NEXT: %4 = affine.apply [[MAP_PLUS_64]](%i0)
|
||||
// CHECK-NEXT: %5 = affine.apply [[MAP_PLUS_128]](%i1)
|
||||
// CHECK-NEXT: %6 = affine.apply [[MAP_PLUS_2]](%i0)
|
||||
@ -395,7 +395,7 @@ func @dma_loop_straightline_interspersed() {
|
||||
%c255 = constant 255 : index
|
||||
%A = alloc() : memref<256 x f32>
|
||||
%v = load %A[%c0] : memref<256 x f32>
|
||||
for %i = 1 to 255 {
|
||||
affine.for %i = 1 to 255 {
|
||||
load %A[%i] : memref<256 x f32>
|
||||
}
|
||||
%l = load %A[%c255] : memref<256 x f32>
|
||||
@ -416,7 +416,7 @@ func @dma_loop_straightline_interspersed() {
|
||||
// CHECK-NEXT: %5 = alloc() : memref<1xi32>
|
||||
// CHECK-NEXT: dma_start %0[%c1_0], %4[%c0], %c254, %5[%c0] : memref<256xf32>, memref<254xf32, 2>, memref<1xi32>
|
||||
// CHECK-NEXT: dma_wait %5[%c0], %c254 : memref<1xi32>
|
||||
// CHECK-NEXT: for %i0 = 1 to 255 {
|
||||
// CHECK-NEXT: affine.for %i0 = 1 to 255 {
|
||||
// CHECK-NEXT: %6 = affine.apply [[MAP_MINUS_ONE]](%i0)
|
||||
// CHECK-NEXT: %7 = load %4[%6] : memref<254xf32, 2>
|
||||
// CHECK-NEXT: }
|
||||
@ -442,10 +442,10 @@ func @dma_loop_straightline_interspersed() {
|
||||
func @dma_mixed_loop_blocks() {
|
||||
%c0 = constant 0 : index
|
||||
%A = alloc() : memref<256 x 256 x vector<8 x f32>>
|
||||
for %i = 0 to 256 {
|
||||
affine.for %i = 0 to 256 {
|
||||
%v = load %A[%c0, %c0] : memref<256 x 256 x vector<8 x f32>>
|
||||
"foo"(%v) : (vector<8 x f32>) -> ()
|
||||
for %j = 0 to 256 {
|
||||
affine.for %j = 0 to 256 {
|
||||
%w = load %A[%i, %j] : memref<256 x 256 x vector<8 x f32>>
|
||||
"bar"(%w) : (vector<8 x f32>) -> ()
|
||||
}
|
||||
@ -457,17 +457,17 @@ func @dma_mixed_loop_blocks() {
|
||||
// CHECK-DAG: [[TAG:%[0-9]+]] = alloc() : memref<1xi32>
|
||||
// CHECK: dma_start [[MEM]][%c0, %c0], [[BUF]][%c0, %c0], %c65536, [[TAG]][%c0] : memref<256x256xvector<8xf32>>, memref<256x256xvector<8xf32>, 2>, memref<1xi32>
|
||||
// CHECK-NEXT: dma_wait [[TAG]][%c0], %c65536 : memref<1xi32>
|
||||
// CHECK-NEXT: for %i0 = 0 to 256 {
|
||||
// CHECK-NEXT: affine.for %i0 = 0 to 256 {
|
||||
// CHECK-NEXT: %3 = load [[BUF]][%c0_0, %c0_0] : memref<256x256xvector<8xf32>, 2>
|
||||
// CHECK: for %i1 = 0 to 256 {
|
||||
// CHECK: affine.for %i1 = 0 to 256 {
|
||||
// CHECK-NEXT: %4 = load [[BUF]][%i0, %i1] : memref<256x256xvector<8xf32>, 2>
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK-LABEL: func @relative_loop_bounds
|
||||
func @relative_loop_bounds(%arg0: memref<1027xf32>) {
|
||||
for %i0 = 0 to 1024 {
|
||||
for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 4)(%i0) {
|
||||
affine.for %i0 = 0 to 1024 {
|
||||
affine.for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 4)(%i0) {
|
||||
%0 = constant 0.0 : f32
|
||||
store %0, %arg0[%i2] : memref<1027xf32>
|
||||
}
|
||||
@ -476,8 +476,8 @@ func @relative_loop_bounds(%arg0: memref<1027xf32>) {
|
||||
}
|
||||
// CHECK: [[BUF:%[0-9]+]] = alloc() : memref<1027xf32, 2>
|
||||
// CHECK-NEXT: [[MEM:%[0-9]+]] = alloc() : memref<1xi32>
|
||||
// CHECK-NEXT: for %i0 = 0 to 1024 {
|
||||
// CHECK-NEXT: for %i1 = {{#map[0-9]+}}(%i0) to {{#map[0-9]+}}(%i0) {
|
||||
// CHECK-NEXT: affine.for %i0 = 0 to 1024 {
|
||||
// CHECK-NEXT: affine.for %i1 = {{#map[0-9]+}}(%i0) to {{#map[0-9]+}}(%i0) {
|
||||
// CHECK-NEXT: %cst = constant 0.000000e+00 : f32
|
||||
// CHECK-NEXT: store %cst, [[BUF]][%i1] : memref<1027xf32, 2>
|
||||
// CHECK-NEXT: }
|
||||
@ -487,7 +487,7 @@ func @relative_loop_bounds(%arg0: memref<1027xf32>) {
|
||||
|
||||
// ----
|
||||
|
||||
// This should create a buffer of size 2 for %arg2.
|
||||
// This should create a buffer of size 2 affine.for %arg2.
|
||||
|
||||
#map_lb = (d0) -> (d0)
|
||||
#map_ub = (d0) -> (d0 + 3)
|
||||
@ -498,9 +498,9 @@ func @test_analysis_util(%arg0: memref<4x4x16x1xf32>, %arg1: memref<144x9xf32>,
|
||||
%0 = alloc() : memref<64x1xf32>
|
||||
%1 = alloc() : memref<144x4xf32>
|
||||
%2 = constant 0.0 : f32
|
||||
for %i8 = 0 to 9 step 3 {
|
||||
for %i9 = #map_lb(%i8) to #map_ub(%i8) {
|
||||
for %i17 = 0 to 64 {
|
||||
affine.for %i8 = 0 to 9 step 3 {
|
||||
affine.for %i9 = #map_lb(%i8) to #map_ub(%i8) {
|
||||
affine.for %i17 = 0 to 64 {
|
||||
%23 = affine.apply #map_acc(%i9)
|
||||
%25 = load %arg2[%23] : memref<2xf32>
|
||||
%26 = affine.apply #map_lb(%i17)
|
||||
@ -511,11 +511,11 @@ func @test_analysis_util(%arg0: memref<4x4x16x1xf32>, %arg1: memref<144x9xf32>,
|
||||
}
|
||||
return %arg1, %arg2 : memref<144x9xf32>, memref<2xf32>
|
||||
}
|
||||
// CHECK: for %i0 = 0 to 9 step 3 {
|
||||
// CHECK: affine.for %i0 = 0 to 9 step 3 {
|
||||
// CHECK: [[BUF:%[0-9]+]] = alloc() : memref<2xf32, 2>
|
||||
// CHECK: dma_start %arg2[%4], [[BUF]]
|
||||
// CHECK: dma_wait %6[%c0], %c2_0 : memref<1xi32>
|
||||
// CHECK: for %i1 =
|
||||
// CHECK: affine.for %i1 =
|
||||
|
||||
// -----
|
||||
|
||||
@ -524,17 +524,17 @@ func @test_analysis_util(%arg0: memref<4x4x16x1xf32>, %arg1: memref<144x9xf32>,
|
||||
|
||||
// FAST-MEM-16KB-LABEL: func @load_store_same_memref
|
||||
func @load_store_same_memref(%arg0: memref<256x1024xf32>) {
|
||||
// FAST-MEM-16KB: for %i0 = 0 to 256 step 4
|
||||
for %i0 = 0 to 256 step 4 {
|
||||
// FAST-MEM-16KB: affine.for %i0 = 0 to 256 step 4
|
||||
affine.for %i0 = 0 to 256 step 4 {
|
||||
// FAST-MEM-16KB: [[BUF:%[0-9]+]] = alloc() : memref<4x1024xf32, 2>
|
||||
// FAST-MEM-16KB: dma_start %arg0
|
||||
// FAST-MEM-16KB-NEXT: dma_wait
|
||||
// FAST-MEM-16KB: for %i1
|
||||
for %i1 = 0 to 1024 step 4 {
|
||||
// FAST-MEM-16KB: for %i2
|
||||
for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 4)(%i0) {
|
||||
// FAST-MEM-16KB: for %i3
|
||||
for %i3 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 4)(%i1) {
|
||||
// FAST-MEM-16KB: affine.for %i1
|
||||
affine.for %i1 = 0 to 1024 step 4 {
|
||||
// FAST-MEM-16KB: affine.for %i2
|
||||
affine.for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 4)(%i0) {
|
||||
// FAST-MEM-16KB: affine.for %i3
|
||||
affine.for %i3 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 4)(%i1) {
|
||||
%3 = load %arg0[%i2, %i3] : memref<256x1024xf32>
|
||||
%4 = mulf %3, %3 : f32
|
||||
store %4, %arg0[%i2, %i3] : memref<256x1024xf32>
|
||||
@ -560,12 +560,12 @@ func @load_store_same_memref(%arg0: memref<256x1024xf32>) {
|
||||
#map1 = (d0) -> (d0 + 4)
|
||||
// FAST-MEM-16KB-LABEL: func @simple_matmul
|
||||
func @simple_matmul(%arg0: memref<8x8xvector<64xf32>>, %arg1: memref<8x8xvector<64xf32>>, %arg2: memref<8x8xvector<64xf32>>) -> memref<8x8xvector<64xf32>> {
|
||||
for %i = 0 to 8 step 4 {
|
||||
for %j = 0 to 8 step 4 {
|
||||
for %k = 0 to 8 step 4 {
|
||||
for %ii = #map0(%i) to #map1(%i) {
|
||||
for %jj = #map0(%j) to #map1(%j) {
|
||||
for %kk = #map0(%k) to #map1(%k) {
|
||||
affine.for %i = 0 to 8 step 4 {
|
||||
affine.for %j = 0 to 8 step 4 {
|
||||
affine.for %k = 0 to 8 step 4 {
|
||||
affine.for %ii = #map0(%i) to #map1(%i) {
|
||||
affine.for %jj = #map0(%j) to #map1(%j) {
|
||||
affine.for %kk = #map0(%k) to #map1(%k) {
|
||||
%5 = load %arg0[%ii, %kk] : memref<8x8xvector<64xf32>>
|
||||
%6 = load %arg1[%kk, %jj] : memref<8x8xvector<64xf32>>
|
||||
%7 = load %arg2[%ii, %jj] : memref<8x8xvector<64xf32>>
|
||||
@ -580,18 +580,18 @@ func @simple_matmul(%arg0: memref<8x8xvector<64xf32>>, %arg1: memref<8x8xvector<
|
||||
}
|
||||
return %arg2 : memref<8x8xvector<64xf32>>
|
||||
}
|
||||
// FAST-MEM-16KB: for %i0 = 0 to 8 step 4 {
|
||||
// FAST-MEM-16KB: for %i1 = 0 to 8 step 4 {
|
||||
// FAST-MEM-16KB: affine.for %i0 = 0 to 8 step 4 {
|
||||
// FAST-MEM-16KB: affine.for %i1 = 0 to 8 step 4 {
|
||||
// FAST-MEM-16KB: dma_start %arg2
|
||||
// FAST-MEM-16KB: dma_wait
|
||||
// FAST-MEM-16KB: for %i2 = 0 to 8 step 4 {
|
||||
// FAST-MEM-16KB: affine.for %i2 = 0 to 8 step 4 {
|
||||
// FAST-MEM-16KB: dma_start %arg0
|
||||
// FAST-MEM-16KB: dma_wait
|
||||
// FAST-MEM-16KB: dma_start %arg1
|
||||
// FAST-MEM-16KB: dma_wait
|
||||
// FAST-MEM-16KB: for %i3 = #map{{[0-9]+}}(%i0) to #map{{[0-9]+}}(%i0) {
|
||||
// FAST-MEM-16KB-NEXT: for %i4 = #map{{[0-9]+}}(%i1) to #map{{[0-9]+}}(%i1) {
|
||||
// FAST-MEM-16KB-NEXT: for %i5 = #map{{[0-9]+}}(%i2) to #map{{[0-9]+}}(%i2) {
|
||||
// FAST-MEM-16KB: affine.for %i3 = #map{{[0-9]+}}(%i0) to #map{{[0-9]+}}(%i0) {
|
||||
// FAST-MEM-16KB-NEXT: affine.for %i4 = #map{{[0-9]+}}(%i1) to #map{{[0-9]+}}(%i1) {
|
||||
// FAST-MEM-16KB-NEXT: affine.for %i5 = #map{{[0-9]+}}(%i2) to #map{{[0-9]+}}(%i2) {
|
||||
// FAST-MEM-16KB: }
|
||||
// FAST-MEM-16KB: }
|
||||
// FAST-MEM-16KB: }
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -9,12 +9,12 @@
|
||||
// CHECK-DAG: [[UB_INTRA_TILE:#map[0-9]+]] = (d0, d1, d2) -> (d2 + 32, s0, 4096 floordiv s1)
|
||||
|
||||
// CHECK-LABEL: func @loop_tiling()
|
||||
// CHECK-NEXT: for %i0 = 0 to 256 step 32 {
|
||||
// CHECK-NEXT: for %i1 = 0 to 512 step 32 {
|
||||
// CHECK-NEXT: for %i2 = 0 to 1024 step 32 {
|
||||
// CHECK-NEXT: for %i3 = [[IDENTITY]](%i0) to [[MAP0]](%i0) {
|
||||
// CHECK-NEXT: for %i4 = [[IDENTITY]](%i1) to [[MAP0]](%i1) {
|
||||
// CHECK-NEXT: for %i5 = [[IDENTITY]](%i2) to [[MAP0]](%i2) {
|
||||
// CHECK-NEXT: affine.for %i0 = 0 to 256 step 32 {
|
||||
// CHECK-NEXT: affine.for %i1 = 0 to 512 step 32 {
|
||||
// CHECK-NEXT: affine.for %i2 = 0 to 1024 step 32 {
|
||||
// CHECK-NEXT: affine.for %i3 = [[IDENTITY]](%i0) to [[MAP0]](%i0) {
|
||||
// CHECK-NEXT: affine.for %i4 = [[IDENTITY]](%i1) to [[MAP0]](%i1) {
|
||||
// CHECK-NEXT: affine.for %i5 = [[IDENTITY]](%i2) to [[MAP0]](%i2) {
|
||||
// CHECK-NEXT: "foo"(%i3, %i4, %i5) : (index, index, index) -> ()
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
@ -22,32 +22,32 @@
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: for %i6 = 0 to 50 step 32 {
|
||||
// CHECK-NEXT: for %i7 = [[IDENTITY]](%i6) to min [[MAP1]](%i6) {
|
||||
// CHECK-NEXT: affine.for %i6 = 0 to 50 step 32 {
|
||||
// CHECK-NEXT: affine.for %i7 = [[IDENTITY]](%i6) to min [[MAP1]](%i6) {
|
||||
// CHECK-NEXT: "bar"(%i7, %i7) : (index, index) -> ()
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: for %i8 = 0 to 21 step 32 {
|
||||
// CHECK-NEXT: for %i9 = [[IDENTITY]](%i8) to 21 {
|
||||
// CHECK-NEXT: affine.for %i8 = 0 to 21 step 32 {
|
||||
// CHECK-NEXT: affine.for %i9 = [[IDENTITY]](%i8) to 21 {
|
||||
// CHECK-NEXT: "foobar"(%i9) : (index) -> ()
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: return
|
||||
func @loop_tiling() {
|
||||
for %i = 0 to 256 {
|
||||
for %j = 0 to 512 {
|
||||
for %k = 0 to 1024 {
|
||||
affine.for %i = 0 to 256 {
|
||||
affine.for %j = 0 to 512 {
|
||||
affine.for %k = 0 to 1024 {
|
||||
"foo"(%i, %j, %k) : (index, index, index) -> ()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for %x = 0 to 50 {
|
||||
affine.for %x = 0 to 50 {
|
||||
"bar"(%x, %x) : (index, index) -> ()
|
||||
}
|
||||
|
||||
// Intra-tile loop won't need a min expression.
|
||||
for %y = 0 to 21 {
|
||||
affine.for %y = 0 to 21 {
|
||||
"foobar"(%y) : (index) -> ()
|
||||
}
|
||||
|
||||
@ -59,12 +59,12 @@ func @loop_tiling() {
|
||||
// CHECK-LABEL: func @loop_max_min_bound(%arg0: memref<?xi32>, %arg1: index, %arg2: index) {
|
||||
func @loop_max_min_bound(%A : memref<? x i32>, %L : index, %U : index) {
|
||||
%M = dim %A, 0 : memref<? x i32>
|
||||
for %iTT = max #lb()[%L] to min #ub()[%M, %U] {
|
||||
affine.for %iTT = max #lb()[%L] to min #ub()[%M, %U] {
|
||||
%out = affine.apply (d0) -> (d0) (%iTT)
|
||||
}
|
||||
return
|
||||
// CHECK: for %i0 = max [[LB]]()[%arg1] to min [[UB]]()[%0, %arg2] step 32 {
|
||||
// CHECK-NEXT: for %i1 = [[IDENTITY]](%i0) to min [[UB_INTRA_TILE]](%0, %arg2, %i0) {
|
||||
// CHECK: affine.for %i0 = max [[LB]]()[%arg1] to min [[UB]]()[%0, %arg2] step 32 {
|
||||
// CHECK-NEXT: affine.for %i1 = [[IDENTITY]](%i0) to min [[UB_INTRA_TILE]](%0, %arg2, %i0) {
|
||||
// CHECK-NEXT: %1 = affine.apply [[IDENTITY]](%i1)
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
@ -78,9 +78,9 @@ func @loop_max_min_bound(%A : memref<? x i32>, %L : index, %U : index) {
|
||||
|
||||
// MODEL-LABEL: func @simple_matmul
|
||||
func @simple_matmul(%arg0: memref<8x8xvector<64xf32>>, %arg1: memref<8x8xvector<64xf32>>, %arg2: memref<8x8xvector<64xf32>>) -> memref<8x8xvector<64xf32>> {
|
||||
for %i = 0 to 256 {
|
||||
for %j = 0 to 256 {
|
||||
for %k = 0 to 250 {
|
||||
affine.for %i = 0 to 256 {
|
||||
affine.for %j = 0 to 256 {
|
||||
affine.for %k = 0 to 250 {
|
||||
%l = load %arg0[%i, %k] : memref<8x8xvector<64xf32>>
|
||||
%r = load %arg1[%k, %j] : memref<8x8xvector<64xf32>>
|
||||
%o = load %arg2[%i, %j] : memref<8x8xvector<64xf32>>
|
||||
@ -92,6 +92,6 @@ func @simple_matmul(%arg0: memref<8x8xvector<64xf32>>, %arg1: memref<8x8xvector<
|
||||
}
|
||||
return %arg2 : memref<8x8xvector<64xf32>>
|
||||
}
|
||||
// MODEL: for %i0 = 0 to 256 step 4 {
|
||||
// MODEL-NEXT: for %i1 = 0 to 256 step 4 {
|
||||
// MODEL-NEXT: for %i2 = 0 to 250 step 5 {
|
||||
// MODEL: affine.for %i0 = 0 to 256 step 4 {
|
||||
// MODEL-NEXT: affine.for %i1 = 0 to 256 step 4 {
|
||||
// MODEL-NEXT: affine.for %i2 = 0 to 250 step 5 {
|
||||
|
@ -24,7 +24,7 @@ func @body(index) -> ()
|
||||
// CHECK-NEXT: return
|
||||
// CHECK-NEXT: }
|
||||
func @simple_loop() {
|
||||
for %i = 1 to 42 {
|
||||
affine.for %i = 1 to 42 {
|
||||
call @body(%i) : (index) -> ()
|
||||
}
|
||||
return
|
||||
@ -65,9 +65,9 @@ func @post(index) -> ()
|
||||
// CHECK-NEXT: return
|
||||
// CHECK-NEXT: }
|
||||
func @imperfectly_nested_loops() {
|
||||
for %i = 0 to 42 {
|
||||
affine.for %i = 0 to 42 {
|
||||
call @pre(%i) : (index) -> ()
|
||||
for %j = 7 to 56 step 2 {
|
||||
affine.for %j = 7 to 56 step 2 {
|
||||
call @body2(%i, %j) : (index, index) -> ()
|
||||
}
|
||||
call @post(%i) : (index) -> ()
|
||||
@ -122,13 +122,13 @@ func @body3(index, index) -> ()
|
||||
// CHECK-NEXT: return
|
||||
// CHECK-NEXT: }
|
||||
func @more_imperfectly_nested_loops() {
|
||||
for %i = 0 to 42 {
|
||||
affine.for %i = 0 to 42 {
|
||||
call @pre(%i) : (index) -> ()
|
||||
for %j = 7 to 56 step 2 {
|
||||
affine.for %j = 7 to 56 step 2 {
|
||||
call @body2(%i, %j) : (index, index) -> ()
|
||||
}
|
||||
call @mid(%i) : (index) -> ()
|
||||
for %k = 18 to 37 step 3 {
|
||||
affine.for %k = 18 to 37 step 3 {
|
||||
call @body3(%i, %k) : (index, index) -> ()
|
||||
}
|
||||
call @post(%i) : (index) -> ()
|
||||
@ -161,8 +161,8 @@ func @more_imperfectly_nested_loops() {
|
||||
// CHECK-NEXT: return
|
||||
// CHECK-NEXT: }
|
||||
func @affine_apply_loops_shorthand(%N : index) {
|
||||
for %i = 0 to %N {
|
||||
for %j = (d0)[]->(d0)(%i)[] to 42 {
|
||||
affine.for %i = 0 to %N {
|
||||
affine.for %j = (d0)[]->(d0)(%i)[] to 42 {
|
||||
call @body2(%i, %j) : (index, index) -> ()
|
||||
}
|
||||
}
|
||||
@ -360,7 +360,7 @@ func @if_for() {
|
||||
// CHECK-NEXT: [[outerEndBB]]:
|
||||
// CHECK-NEXT: br [[outerLoopInit:\^bb[0-9]+]]
|
||||
affine.if #set1(%i) {
|
||||
for %j = 0 to 42 {
|
||||
affine.for %j = 0 to 42 {
|
||||
affine.if #set2(%j) {
|
||||
call @body2(%i, %j) : (index, index) -> ()
|
||||
}
|
||||
@ -397,9 +397,9 @@ func @if_for() {
|
||||
// CHECK-NEXT: %c1_9 = constant 1 : index
|
||||
// CHECK-NEXT: %16 = addi %9, %c1_9 : index
|
||||
// CHECK-NEXT: br [[outerLoopCond]](%16 : index)
|
||||
for %k = 0 to 42 {
|
||||
affine.for %k = 0 to 42 {
|
||||
affine.if #set2(%k) {
|
||||
for %l = 0 to 42 {
|
||||
affine.for %l = 0 to 42 {
|
||||
call @body3(%k, %l) : (index, index) -> ()
|
||||
}
|
||||
}
|
||||
@ -446,8 +446,8 @@ func @if_for() {
|
||||
// CHECK-NEXT: return
|
||||
// CHECK-NEXT: }
|
||||
func @loop_min_max(%N : index) {
|
||||
for %i = 0 to 42 {
|
||||
for %j = max #lbMultiMap(%i)[%N] to min #ubMultiMap(%i)[%N] {
|
||||
affine.for %i = 0 to 42 {
|
||||
affine.for %j = max #lbMultiMap(%i)[%N] to min #ubMultiMap(%i)[%N] {
|
||||
call @body2(%i, %j) : (index, index) -> ()
|
||||
}
|
||||
}
|
||||
@ -486,7 +486,7 @@ func @loop_min_max(%N : index) {
|
||||
// CHECK-NEXT: return
|
||||
// CHECK-NEXT: }
|
||||
func @min_reduction_tree(%v : index) {
|
||||
for %i = 0 to min #map_7_values(%v)[] {
|
||||
affine.for %i = 0 to min #map_7_values(%v)[] {
|
||||
call @body(%i) : (index) -> ()
|
||||
}
|
||||
return
|
||||
|
@ -11,8 +11,8 @@ func @test() {
|
||||
%A = alloc() : memref<9 x 9 x i32>
|
||||
%B = alloc() : memref<111 x i32>
|
||||
|
||||
for %i = -1 to 10 {
|
||||
for %j = -1 to 10 {
|
||||
affine.for %i = -1 to 10 {
|
||||
affine.for %j = -1 to 10 {
|
||||
%idx0 = affine.apply (d0, d1) -> (d0)(%i, %j)
|
||||
%idx1 = affine.apply (d0, d1) -> (d1)(%i, %j)
|
||||
// Out of bound access.
|
||||
@ -27,7 +27,7 @@ func @test() {
|
||||
}
|
||||
}
|
||||
|
||||
for %k = 0 to 10 {
|
||||
affine.for %k = 0 to 10 {
|
||||
// In bound.
|
||||
%u = load %B[%zero] : memref<111 x i32>
|
||||
// Out of bounds.
|
||||
@ -43,8 +43,8 @@ func @test_mod_floordiv_ceildiv() {
|
||||
%zero = constant 0 : index
|
||||
%A = alloc() : memref<128 x 64 x 64 x i32>
|
||||
|
||||
for %i = 0 to 256 {
|
||||
for %j = 0 to 256 {
|
||||
affine.for %i = 0 to 256 {
|
||||
affine.for %j = 0 to 256 {
|
||||
%idx0 = affine.apply (d0, d1, d2) -> (d0 mod 128 + 1)(%i, %j, %j)
|
||||
%idx1 = affine.apply (d0, d1, d2) -> (d1 floordiv 4 + 1)(%i, %j, %j)
|
||||
%idx2 = affine.apply (d0, d1, d2) -> (d2 ceildiv 4)(%i, %j, %j)
|
||||
@ -69,8 +69,8 @@ func @test_no_out_of_bounds() {
|
||||
%C = alloc() : memref<257 x i32>
|
||||
%B = alloc() : memref<1 x i32>
|
||||
|
||||
for %i = 0 to 256 {
|
||||
for %j = 0 to 256 {
|
||||
affine.for %i = 0 to 256 {
|
||||
affine.for %j = 0 to 256 {
|
||||
// All of these accesses are in bound; check that no errors are emitted.
|
||||
// CHECK: %3 = affine.apply {{#map.*}}(%i0, %i1)
|
||||
// CHECK-NEXT: %4 = load %0[%3, %c0] : memref<257x256xi32>
|
||||
@ -93,8 +93,8 @@ func @mod_div() {
|
||||
%zero = constant 0 : index
|
||||
%A = alloc() : memref<128 x 64 x 64 x i32>
|
||||
|
||||
for %i = 0 to 256 {
|
||||
for %j = 0 to 256 {
|
||||
affine.for %i = 0 to 256 {
|
||||
affine.for %j = 0 to 256 {
|
||||
%idx0 = affine.apply (d0, d1, d2) -> (d0 mod 128 + 1)(%i, %j, %j)
|
||||
%idx1 = affine.apply (d0, d1, d2) -> (d1 floordiv 4 + 1)(%i, %j, %j)
|
||||
%idx2 = affine.apply (d0, d1, d2) -> (d2 ceildiv 4)(%i, %j, %j)
|
||||
@ -115,8 +115,8 @@ func @mod_div() {
|
||||
// CHECK-LABEL: func @mod_floordiv_nested() {
|
||||
func @mod_floordiv_nested() {
|
||||
%A = alloc() : memref<256 x 256 x i32>
|
||||
for %i = 0 to 256 {
|
||||
for %j = 0 to 256 {
|
||||
affine.for %i = 0 to 256 {
|
||||
affine.for %j = 0 to 256 {
|
||||
%idx0 = affine.apply (d0, d1) -> ((d0 mod 1024) floordiv 4)(%i, %j)
|
||||
%idx1 = affine.apply (d0, d1) -> ((((d1 mod 128) mod 32) ceildiv 4) * 32)(%i, %j)
|
||||
load %A[%idx0, %idx1] : memref<256 x 256 x i32> // expected-error {{'std.load' op memref out of upper bound access along dimension #2}}
|
||||
@ -128,7 +128,7 @@ func @mod_floordiv_nested() {
|
||||
// CHECK-LABEL: func @test_semi_affine_bailout
|
||||
func @test_semi_affine_bailout(%N : index) {
|
||||
%B = alloc() : memref<10 x i32>
|
||||
for %i = 0 to 10 {
|
||||
affine.for %i = 0 to 10 {
|
||||
%idx = affine.apply (d0)[s0] -> (d0 * s0)(%i)[%N]
|
||||
%y = load %B[%idx] : memref<10 x i32>
|
||||
// expected-error@-1 {{getMemRefRegion: compose affine map failed}}
|
||||
@ -139,7 +139,7 @@ func @test_semi_affine_bailout(%N : index) {
|
||||
// CHECK-LABEL: func @multi_mod_floordiv
|
||||
func @multi_mod_floordiv() {
|
||||
%A = alloc() : memref<2x2xi32>
|
||||
for %ii = 0 to 64 {
|
||||
affine.for %ii = 0 to 64 {
|
||||
%idx0 = affine.apply (d0) -> ((d0 mod 147456) floordiv 1152) (%ii)
|
||||
%idx1 = affine.apply (d0) -> (((d0 mod 147456) mod 1152) floordiv 384) (%ii)
|
||||
%v = load %A[%idx0, %idx1] : memref<2x2xi32>
|
||||
@ -154,8 +154,8 @@ func @delinearize_mod_floordiv() {
|
||||
%out = alloc() : memref<64x9xi32>
|
||||
|
||||
// Reshape '%in' into '%out'.
|
||||
for %ii = 0 to 64 {
|
||||
for %jj = 0 to 9 {
|
||||
affine.for %ii = 0 to 64 {
|
||||
affine.for %jj = 0 to 9 {
|
||||
%a0 = affine.apply (d0, d1) -> (d0 * (9 * 1024) + d1 * 128) (%ii, %jj)
|
||||
%a10 = affine.apply (d0) ->
|
||||
(d0 floordiv (2 * 3 * 3 * 128 * 128)) (%a0)
|
||||
@ -190,7 +190,7 @@ func @out_of_bounds() {
|
||||
%in = alloc() : memref<1xi32>
|
||||
%c9 = constant 9 : i32
|
||||
|
||||
for %i0 = 10 to 11 {
|
||||
affine.for %i0 = 10 to 11 {
|
||||
%idy = affine.apply (d0) -> (100 * d0 floordiv 1000) (%i0)
|
||||
store %c9, %in[%idy] : memref<1xi32> // expected-error {{'std.store' op memref out of upper bound access along dimension #1}}
|
||||
}
|
||||
@ -210,8 +210,8 @@ func @out_of_bounds() {
|
||||
func @test_complex_mod_floordiv(%arg0: memref<4x4x16x1xf32>) {
|
||||
%c0 = constant 0 : index
|
||||
%0 = alloc() : memref<1x2x3x3x16x1xf32>
|
||||
for %i0 = 0 to 64 {
|
||||
for %i1 = 0 to 9 {
|
||||
affine.for %i0 = 0 to 64 {
|
||||
affine.for %i1 = 0 to 9 {
|
||||
%2 = affine.apply #map3(%i0, %i1)
|
||||
%3 = affine.apply #map4(%i0, %i1)
|
||||
%4 = affine.apply #map5(%i0, %i1)
|
||||
@ -231,8 +231,8 @@ func @test_complex_mod_floordiv(%arg0: memref<4x4x16x1xf32>) {
|
||||
func @test_mod_bound() {
|
||||
%0 = alloc() : memref<7 x f32>
|
||||
%1 = alloc() : memref<6 x f32>
|
||||
for %i0 = 0 to 4096 {
|
||||
for %i1 = #map0(%i0) to #map1(%i0) {
|
||||
affine.for %i0 = 0 to 4096 {
|
||||
affine.for %i1 = #map0(%i0) to #map1(%i0) {
|
||||
load %0[%i1] : memref<7 x f32>
|
||||
load %1[%i1] : memref<6 x f32>
|
||||
// expected-error@-1 {{'std.load' op memref out of upper bound access along dimension #1}}
|
||||
@ -253,13 +253,13 @@ func @test_floordiv_bound() {
|
||||
%1 = alloc() : memref<1026 x f32>
|
||||
%2 = alloc() : memref<4096 x f32>
|
||||
%N = constant 2048 : index
|
||||
for %i0 = 0 to 4096 {
|
||||
for %i1 = #map0(%i0) to #map1(%i0) {
|
||||
affine.for %i0 = 0 to 4096 {
|
||||
affine.for %i1 = #map0(%i0) to #map1(%i0) {
|
||||
load %0[%i1] : memref<1027 x f32>
|
||||
load %1[%i1] : memref<1026 x f32>
|
||||
// expected-error@-1 {{'std.load' op memref out of upper bound access along dimension #1}}
|
||||
}
|
||||
for %i2 = 0 to #map2(%N) {
|
||||
affine.for %i2 = 0 to #map2(%N) {
|
||||
// Within bounds.
|
||||
%v = load %2[%i2] : memref<4096 x f32>
|
||||
}
|
||||
@ -277,9 +277,9 @@ func @test_floordiv_bound() {
|
||||
|
||||
// CHECK-LABEL: func @non_composed_bound_operand
|
||||
func @non_composed_bound_operand(%arg0: memref<1024xf32>) {
|
||||
for %i0 = 4 to 1028 step 4 {
|
||||
affine.for %i0 = 4 to 1028 step 4 {
|
||||
%i1 = affine.apply (d0) -> (d0 - 4) (%i0)
|
||||
for %i2 = #map_lb(%i1) to #map_ub(%i1) {
|
||||
affine.for %i2 = #map_lb(%i1) to #map_ub(%i1) {
|
||||
%0 = load %arg0[%i2] : memref<1024xf32>
|
||||
}
|
||||
}
|
||||
|
@ -10,14 +10,14 @@
|
||||
func @simple_store_load() {
|
||||
%cf7 = constant 7.0 : f32
|
||||
%m = alloc() : memref<10xf32>
|
||||
for %i0 = 0 to 10 {
|
||||
affine.for %i0 = 0 to 10 {
|
||||
store %cf7, %m[%i0] : memref<10xf32>
|
||||
%v0 = load %m[%i0] : memref<10xf32>
|
||||
%v1 = addf %v0, %v0 : f32
|
||||
}
|
||||
return
|
||||
// CHECK: %cst = constant 7.000000e+00 : f32
|
||||
// CHECK-NEXT: for %i0 = 0 to 10 {
|
||||
// CHECK-NEXT: affine.for %i0 = 0 to 10 {
|
||||
// CHECK-NEXT: %0 = addf %cst, %cst : f32
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: return
|
||||
@ -30,7 +30,7 @@ func @multi_store_load() {
|
||||
%cf8 = constant 8.0 : f32
|
||||
%cf9 = constant 9.0 : f32
|
||||
%m = alloc() : memref<10xf32>
|
||||
for %i0 = 0 to 10 {
|
||||
affine.for %i0 = 0 to 10 {
|
||||
store %cf7, %m[%i0] : memref<10xf32>
|
||||
%v0 = load %m[%i0] : memref<10xf32>
|
||||
%v1 = addf %v0, %v0 : f32
|
||||
@ -45,7 +45,7 @@ func @multi_store_load() {
|
||||
// CHECK-NEXT: %cst = constant 7.000000e+00 : f32
|
||||
// CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32
|
||||
// CHECK-NEXT: %cst_1 = constant 9.000000e+00 : f32
|
||||
// CHECK-NEXT: for %i0 = 0 to 10 {
|
||||
// CHECK-NEXT: affine.for %i0 = 0 to 10 {
|
||||
// CHECK-NEXT: %0 = addf %cst, %cst : f32
|
||||
// CHECK-NEXT: %1 = mulf %cst_1, %cst_1 : f32
|
||||
// CHECK-NEXT: }
|
||||
@ -59,8 +59,8 @@ func @multi_store_load() {
|
||||
func @store_load_affine_apply() -> memref<10x10xf32> {
|
||||
%cf7 = constant 7.0 : f32
|
||||
%m = alloc() : memref<10x10xf32>
|
||||
for %i0 = 0 to 10 {
|
||||
for %i1 = 0 to 10 {
|
||||
affine.for %i0 = 0 to 10 {
|
||||
affine.for %i1 = 0 to 10 {
|
||||
%t0 = affine.apply (d0, d1) -> (d1 + 1)(%i0, %i1)
|
||||
%t1 = affine.apply (d0, d1) -> (d0)(%i0, %i1)
|
||||
%idx0 = affine.apply (d0, d1) -> (d1) (%t0, %t1)
|
||||
@ -75,8 +75,8 @@ func @store_load_affine_apply() -> memref<10x10xf32> {
|
||||
return %m : memref<10x10xf32>
|
||||
// CHECK: %cst = constant 7.000000e+00 : f32
|
||||
// CHECK-NEXT: %0 = alloc() : memref<10x10xf32>
|
||||
// CHECK-NEXT: for %i0 = 0 to 10 {
|
||||
// CHECK-NEXT: for %i1 = 0 to 10 {
|
||||
// CHECK-NEXT: affine.for %i0 = 0 to 10 {
|
||||
// CHECK-NEXT: affine.for %i1 = 0 to 10 {
|
||||
// CHECK-NEXT: %1 = affine.apply [[MAP0]](%i0, %i1)
|
||||
// CHECK-NEXT: %2 = affine.apply [[MAP1]](%i0, %i1)
|
||||
// CHECK-NEXT: %3 = affine.apply [[MAP2]](%1, %2)
|
||||
@ -92,17 +92,17 @@ func @store_load_affine_apply() -> memref<10x10xf32> {
|
||||
func @store_load_nested(%N : index) {
|
||||
%cf7 = constant 7.0 : f32
|
||||
%m = alloc() : memref<10xf32>
|
||||
for %i0 = 0 to 10 {
|
||||
affine.for %i0 = 0 to 10 {
|
||||
store %cf7, %m[%i0] : memref<10xf32>
|
||||
for %i1 = 0 to %N {
|
||||
affine.for %i1 = 0 to %N {
|
||||
%v0 = load %m[%i0] : memref<10xf32>
|
||||
%v1 = addf %v0, %v0 : f32
|
||||
}
|
||||
}
|
||||
return
|
||||
// CHECK: %cst = constant 7.000000e+00 : f32
|
||||
// CHECK-NEXT: for %i0 = 0 to 10 {
|
||||
// CHECK-NEXT: for %i1 = 0 to %arg0 {
|
||||
// CHECK-NEXT: affine.for %i0 = 0 to 10 {
|
||||
// CHECK-NEXT: affine.for %i1 = 0 to %arg0 {
|
||||
// CHECK-NEXT: %0 = addf %cst, %cst : f32
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
@ -117,12 +117,12 @@ func @multi_store_load_nested_no_fwd(%N : index) {
|
||||
%cf7 = constant 7.0 : f32
|
||||
%cf8 = constant 8.0 : f32
|
||||
%m = alloc() : memref<10xf32>
|
||||
for %i0 = 0 to 10 {
|
||||
affine.for %i0 = 0 to 10 {
|
||||
store %cf7, %m[%i0] : memref<10xf32>
|
||||
for %i1 = 0 to %N {
|
||||
affine.for %i1 = 0 to %N {
|
||||
store %cf8, %m[%i1] : memref<10xf32>
|
||||
}
|
||||
for %i2 = 0 to %N {
|
||||
affine.for %i2 = 0 to %N {
|
||||
// CHECK: %{{[0-9]+}} = load %0[%i0] : memref<10xf32>
|
||||
%v0 = load %m[%i0] : memref<10xf32>
|
||||
%v1 = addf %v0, %v0 : f32
|
||||
@ -138,9 +138,9 @@ func @store_load_store_nested_no_fwd(%N : index) {
|
||||
%cf7 = constant 7.0 : f32
|
||||
%cf9 = constant 9.0 : f32
|
||||
%m = alloc() : memref<10xf32>
|
||||
for %i0 = 0 to 10 {
|
||||
affine.for %i0 = 0 to 10 {
|
||||
store %cf7, %m[%i0] : memref<10xf32>
|
||||
for %i1 = 0 to %N {
|
||||
affine.for %i1 = 0 to %N {
|
||||
// CHECK: %{{[0-9]+}} = load %0[%i0] : memref<10xf32>
|
||||
%v0 = load %m[%i0] : memref<10xf32>
|
||||
%v1 = addf %v0, %v0 : f32
|
||||
@ -159,16 +159,16 @@ func @multi_store_load_nested_fwd(%N : index) {
|
||||
%cf9 = constant 9.0 : f32
|
||||
%cf10 = constant 10.0 : f32
|
||||
%m = alloc() : memref<10xf32>
|
||||
for %i0 = 0 to 10 {
|
||||
affine.for %i0 = 0 to 10 {
|
||||
store %cf7, %m[%i0] : memref<10xf32>
|
||||
for %i1 = 0 to %N {
|
||||
affine.for %i1 = 0 to %N {
|
||||
store %cf8, %m[%i1] : memref<10xf32>
|
||||
}
|
||||
for %i2 = 0 to %N {
|
||||
affine.for %i2 = 0 to %N {
|
||||
store %cf9, %m[%i2] : memref<10xf32>
|
||||
}
|
||||
store %cf10, %m[%i0] : memref<10xf32>
|
||||
for %i3 = 0 to %N {
|
||||
affine.for %i3 = 0 to %N {
|
||||
// CHECK-NOT: %{{[0-9]+}} = load
|
||||
%v0 = load %m[%i0] : memref<10xf32>
|
||||
%v1 = addf %v0, %v0 : f32
|
||||
@ -182,10 +182,10 @@ func @multi_store_load_nested_fwd(%N : index) {
|
||||
func @store_load_no_fwd() {
|
||||
%cf7 = constant 7.0 : f32
|
||||
%m = alloc() : memref<10xf32>
|
||||
for %i0 = 0 to 10 {
|
||||
affine.for %i0 = 0 to 10 {
|
||||
store %cf7, %m[%i0] : memref<10xf32>
|
||||
for %i1 = 0 to 10 {
|
||||
for %i2 = 0 to 10 {
|
||||
affine.for %i1 = 0 to 10 {
|
||||
affine.for %i2 = 0 to 10 {
|
||||
// CHECK: load %{{[0-9]+}}
|
||||
%v0 = load %m[%i2] : memref<10xf32>
|
||||
%v1 = addf %v0, %v0 : f32
|
||||
@ -202,9 +202,9 @@ func @store_load_fwd() {
|
||||
%c0 = constant 0 : index
|
||||
%m = alloc() : memref<10xf32>
|
||||
store %cf7, %m[%c0] : memref<10xf32>
|
||||
for %i0 = 0 to 10 {
|
||||
for %i1 = 0 to 10 {
|
||||
for %i2 = 0 to 10 {
|
||||
affine.for %i0 = 0 to 10 {
|
||||
affine.for %i1 = 0 to 10 {
|
||||
affine.for %i2 = 0 to 10 {
|
||||
// CHECK-NOT: load %{{[0-9]}}+
|
||||
%v0 = load %m[%c0] : memref<10xf32>
|
||||
%v1 = addf %v0, %v0 : f32
|
||||
@ -223,9 +223,9 @@ func @store_load_store_nested_fwd(%N : index) -> f32 {
|
||||
%c0 = constant 0 : index
|
||||
%c1 = constant 1 : index
|
||||
%m = alloc() : memref<10xf32>
|
||||
for %i0 = 0 to 10 {
|
||||
affine.for %i0 = 0 to 10 {
|
||||
store %cf7, %m[%i0] : memref<10xf32>
|
||||
for %i1 = 0 to %N {
|
||||
affine.for %i1 = 0 to %N {
|
||||
%v0 = load %m[%i0] : memref<10xf32>
|
||||
%v1 = addf %v0, %v0 : f32
|
||||
%idx = affine.apply (d0) -> (d0 + 1) (%i0)
|
||||
@ -236,9 +236,9 @@ func @store_load_store_nested_fwd(%N : index) -> f32 {
|
||||
%v3 = load %m[%c1] : memref<10xf32>
|
||||
return %v3 : f32
|
||||
// CHECK: %0 = alloc() : memref<10xf32>
|
||||
// CHECK-NEXT: for %i0 = 0 to 10 {
|
||||
// CHECK-NEXT: affine.for %i0 = 0 to 10 {
|
||||
// CHECK-NEXT: store %cst, %0[%i0] : memref<10xf32>
|
||||
// CHECK-NEXT: for %i1 = 0 to %arg0 {
|
||||
// CHECK-NEXT: affine.for %i1 = 0 to %arg0 {
|
||||
// CHECK-NEXT: %1 = addf %cst, %cst : f32
|
||||
// CHECK-NEXT: %2 = affine.apply [[MAP4]](%i0)
|
||||
// CHECK-NEXT: store %cst_0, %0[%2] : memref<10xf32>
|
||||
|
@ -13,14 +13,14 @@ func @store_may_execute_before_load() {
|
||||
// ancestor IfOp of the store, dominates the ancestor ForSmt of the load,
|
||||
// and thus the store "may" conditionally execute before the load.
|
||||
affine.if #set0(%c0) {
|
||||
for %i0 = 0 to 10 {
|
||||
affine.for %i0 = 0 to 10 {
|
||||
store %cf7, %m[%i0] : memref<10xf32>
|
||||
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
|
||||
// expected-note@-2 {{dependence from 0 to 0 at depth 2 = false}}
|
||||
// expected-note@-3 {{dependence from 0 to 1 at depth 1 = true}}
|
||||
}
|
||||
}
|
||||
for %i1 = 0 to 10 {
|
||||
affine.for %i1 = 0 to 10 {
|
||||
%v0 = load %m[%i1] : memref<10xf32>
|
||||
// expected-note@-1 {{dependence from 1 to 1 at depth 1 = false}}
|
||||
// expected-note@-2 {{dependence from 1 to 1 at depth 2 = false}}
|
||||
@ -37,13 +37,13 @@ func @dependent_loops() {
|
||||
%cst = constant 7.000000e+00 : f32
|
||||
// There is a dependence from 0 to 1 at depth 1 (common surrounding loops 0)
|
||||
// because the first loop with the store dominates the second loop.
|
||||
for %i0 = 0 to 10 {
|
||||
affine.for %i0 = 0 to 10 {
|
||||
store %cst, %0[%i0] : memref<10xf32>
|
||||
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
|
||||
// expected-note@-2 {{dependence from 0 to 0 at depth 2 = false}}
|
||||
// expected-note@-3 {{dependence from 0 to 1 at depth 1 = true}}
|
||||
}
|
||||
for %i1 = 0 to 10 {
|
||||
affine.for %i1 = 0 to 10 {
|
||||
%1 = load %0[%i1] : memref<10xf32>
|
||||
// expected-note@-1 {{dependence from 1 to 1 at depth 1 = false}}
|
||||
// expected-note@-2 {{dependence from 1 to 1 at depth 2 = false}}
|
||||
@ -231,7 +231,7 @@ func @store_range_load_after_range() {
|
||||
%m = alloc() : memref<100xf32>
|
||||
%c7 = constant 7.0 : f32
|
||||
%c10 = constant 10 : index
|
||||
for %i0 = 0 to 10 {
|
||||
affine.for %i0 = 0 to 10 {
|
||||
%a0 = affine.apply (d0) -> (d0) (%i0)
|
||||
store %c7, %m[%a0] : memref<100xf32>
|
||||
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
|
||||
@ -254,7 +254,7 @@ func @store_load_func_symbol(%arg0: index, %arg1: index) {
|
||||
%m = alloc() : memref<100xf32>
|
||||
%c7 = constant 7.0 : f32
|
||||
%c10 = constant 10 : index
|
||||
for %i0 = 0 to %arg1 {
|
||||
affine.for %i0 = 0 to %arg1 {
|
||||
%a0 = affine.apply (d0) -> (d0) (%arg0)
|
||||
store %c7, %m[%a0] : memref<100xf32>
|
||||
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = [1, +inf]}}
|
||||
@ -277,7 +277,7 @@ func @store_range_load_last_in_range() {
|
||||
%m = alloc() : memref<100xf32>
|
||||
%c7 = constant 7.0 : f32
|
||||
%c10 = constant 10 : index
|
||||
for %i0 = 0 to 10 {
|
||||
affine.for %i0 = 0 to 10 {
|
||||
%a0 = affine.apply (d0) -> (d0) (%i0)
|
||||
// For dependence from 0 to 1, we do not have a loop carried dependence
|
||||
// because only the final write in the loop accesses the same element as the
|
||||
@ -305,7 +305,7 @@ func @store_range_load_before_range() {
|
||||
%m = alloc() : memref<100xf32>
|
||||
%c7 = constant 7.0 : f32
|
||||
%c0 = constant 0 : index
|
||||
for %i0 = 1 to 11 {
|
||||
affine.for %i0 = 1 to 11 {
|
||||
%a0 = affine.apply (d0) -> (d0) (%i0)
|
||||
store %c7, %m[%a0] : memref<100xf32>
|
||||
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
|
||||
@ -328,7 +328,7 @@ func @store_range_load_first_in_range() {
|
||||
%m = alloc() : memref<100xf32>
|
||||
%c7 = constant 7.0 : f32
|
||||
%c0 = constant 0 : index
|
||||
for %i0 = 1 to 11 {
|
||||
affine.for %i0 = 1 to 11 {
|
||||
%a0 = affine.apply (d0) -> (d0) (%i0)
|
||||
// Dependence from 0 to 1 at depth 1 is a range because all loads at
|
||||
// constant index zero are reads after first store at index zero during
|
||||
@ -353,7 +353,7 @@ func @store_range_load_first_in_range() {
|
||||
func @store_plus_3() {
|
||||
%m = alloc() : memref<100xf32>
|
||||
%c7 = constant 7.0 : f32
|
||||
for %i0 = 1 to 11 {
|
||||
affine.for %i0 = 1 to 11 {
|
||||
%a0 = affine.apply (d0) -> (d0 + 3) (%i0)
|
||||
store %c7, %m[%a0] : memref<100xf32>
|
||||
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
|
||||
@ -375,7 +375,7 @@ func @store_plus_3() {
|
||||
func @load_minus_2() {
|
||||
%m = alloc() : memref<100xf32>
|
||||
%c7 = constant 7.0 : f32
|
||||
for %i0 = 2 to 11 {
|
||||
affine.for %i0 = 2 to 11 {
|
||||
%a0 = affine.apply (d0) -> (d0) (%i0)
|
||||
store %c7, %m[%a0] : memref<100xf32>
|
||||
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
|
||||
@ -397,8 +397,8 @@ func @load_minus_2() {
|
||||
func @perfectly_nested_loops_loop_independent() {
|
||||
%m = alloc() : memref<10x10xf32>
|
||||
%c7 = constant 7.0 : f32
|
||||
for %i0 = 0 to 11 {
|
||||
for %i1 = 0 to 11 {
|
||||
affine.for %i0 = 0 to 11 {
|
||||
affine.for %i1 = 0 to 11 {
|
||||
// Dependence from access 0 to 1 is loop independent at depth = 3.
|
||||
%a00 = affine.apply (d0, d1) -> (d0) (%i0, %i1)
|
||||
%a01 = affine.apply (d0, d1) -> (d1) (%i0, %i1)
|
||||
@ -428,8 +428,8 @@ func @perfectly_nested_loops_loop_independent() {
|
||||
func @perfectly_nested_loops_loop_carried_at_depth1() {
|
||||
%m = alloc() : memref<10x10xf32>
|
||||
%c7 = constant 7.0 : f32
|
||||
for %i0 = 0 to 9 {
|
||||
for %i1 = 0 to 9 {
|
||||
affine.for %i0 = 0 to 9 {
|
||||
affine.for %i1 = 0 to 9 {
|
||||
// Dependence from access 0 to 1 is loop carried at depth 1.
|
||||
%a00 = affine.apply (d0, d1) -> (d0) (%i0, %i1)
|
||||
%a01 = affine.apply (d0, d1) -> (d1) (%i0, %i1)
|
||||
@ -459,8 +459,8 @@ func @perfectly_nested_loops_loop_carried_at_depth1() {
|
||||
func @perfectly_nested_loops_loop_carried_at_depth2() {
|
||||
%m = alloc() : memref<10x10xf32>
|
||||
%c7 = constant 7.0 : f32
|
||||
for %i0 = 0 to 10 {
|
||||
for %i1 = 0 to 10 {
|
||||
affine.for %i0 = 0 to 10 {
|
||||
affine.for %i1 = 0 to 10 {
|
||||
// Dependence from access 0 to 1 is loop carried at depth 2.
|
||||
%a00 = affine.apply (d0, d1) -> (d0) (%i0, %i1)
|
||||
%a01 = affine.apply (d0, d1) -> (d1) (%i0, %i1)
|
||||
@ -491,8 +491,8 @@ func @one_common_loop() {
|
||||
%m = alloc() : memref<10x10xf32>
|
||||
%c7 = constant 7.0 : f32
|
||||
// There is a loop-independent dependence from access 0 to 1 at depth 2.
|
||||
for %i0 = 0 to 10 {
|
||||
for %i1 = 0 to 10 {
|
||||
affine.for %i0 = 0 to 10 {
|
||||
affine.for %i1 = 0 to 10 {
|
||||
%a00 = affine.apply (d0, d1) -> (d0) (%i0, %i1)
|
||||
%a01 = affine.apply (d0, d1) -> (d1) (%i0, %i1)
|
||||
store %c7, %m[%a00, %a01] : memref<10x10xf32>
|
||||
@ -502,7 +502,7 @@ func @one_common_loop() {
|
||||
// expected-note@-4 {{dependence from 0 to 1 at depth 1 = false}}
|
||||
// expected-note@-5 {{dependence from 0 to 1 at depth 2 = true}}
|
||||
}
|
||||
for %i2 = 0 to 9 {
|
||||
affine.for %i2 = 0 to 9 {
|
||||
%a10 = affine.apply (d0, d1) -> (d0) (%i0, %i2)
|
||||
%a11 = affine.apply (d0, d1) -> (d1) (%i0, %i2)
|
||||
%v0 = load %m[%a10, %a11] : memref<10x10xf32>
|
||||
@ -525,7 +525,7 @@ func @dependence_cycle() {
|
||||
// Dependences:
|
||||
// *) loop-independent dependence from access 1 to 2 at depth 2.
|
||||
// *) loop-carried dependence from access 3 to 0 at depth 1.
|
||||
for %i0 = 0 to 9 {
|
||||
affine.for %i0 = 0 to 9 {
|
||||
%a0 = affine.apply (d0) -> (d0) (%i0)
|
||||
%v0 = load %m.a[%a0] : memref<100xf32>
|
||||
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
|
||||
@ -575,8 +575,8 @@ func @dependence_cycle() {
|
||||
func @negative_and_positive_direction_vectors(%arg0: index, %arg1: index) {
|
||||
%m = alloc() : memref<10x10xf32>
|
||||
%c7 = constant 7.0 : f32
|
||||
for %i0 = 0 to %arg0 {
|
||||
for %i1 = 0 to %arg1 {
|
||||
affine.for %i0 = 0 to %arg0 {
|
||||
affine.for %i1 = 0 to %arg1 {
|
||||
%a00 = affine.apply (d0, d1) -> (d0 - 1) (%i0, %i1)
|
||||
%a01 = affine.apply (d0, d1) -> (d1 + 1) (%i0, %i1)
|
||||
%v0 = load %m[%a00, %a01] : memref<10x10xf32>
|
||||
@ -605,8 +605,8 @@ func @negative_and_positive_direction_vectors(%arg0: index, %arg1: index) {
|
||||
func @war_raw_waw_deps() {
|
||||
%m = alloc() : memref<100xf32>
|
||||
%c7 = constant 7.0 : f32
|
||||
for %i0 = 0 to 10 {
|
||||
for %i1 = 0 to 10 {
|
||||
affine.for %i0 = 0 to 10 {
|
||||
affine.for %i1 = 0 to 10 {
|
||||
%a0 = affine.apply (d0) -> (d0 + 1) (%i1)
|
||||
%v0 = load %m[%a0] : memref<100xf32>
|
||||
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
|
||||
@ -633,7 +633,7 @@ func @war_raw_waw_deps() {
|
||||
func @mod_deps() {
|
||||
%m = alloc() : memref<100xf32>
|
||||
%c7 = constant 7.0 : f32
|
||||
for %i0 = 0 to 10 {
|
||||
affine.for %i0 = 0 to 10 {
|
||||
%a0 = affine.apply (d0) -> (d0 mod 2) (%i0)
|
||||
// Results are conservative here since we currently don't have a way to
|
||||
// represent strided sets in FlatAffineConstraints.
|
||||
@ -658,8 +658,8 @@ func @loop_nest_depth() {
|
||||
%0 = alloc() : memref<100x100xf32>
|
||||
%c7 = constant 7.0 : f32
|
||||
|
||||
for %i0 = 0 to 128 {
|
||||
for %i1 = 0 to 8 {
|
||||
affine.for %i0 = 0 to 128 {
|
||||
affine.for %i1 = 0 to 8 {
|
||||
store %c7, %0[%i0, %i1] : memref<100x100xf32>
|
||||
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
|
||||
// expected-note@-2 {{dependence from 0 to 0 at depth 2 = false}}
|
||||
@ -667,10 +667,10 @@ func @loop_nest_depth() {
|
||||
// expected-note@-4 {{dependence from 0 to 1 at depth 1 = true}}
|
||||
}
|
||||
}
|
||||
for %i2 = 0 to 8 {
|
||||
for %i3 = 0 to 8 {
|
||||
for %i4 = 0 to 8 {
|
||||
for %i5 = 0 to 16 {
|
||||
affine.for %i2 = 0 to 8 {
|
||||
affine.for %i3 = 0 to 8 {
|
||||
affine.for %i4 = 0 to 8 {
|
||||
affine.for %i5 = 0 to 16 {
|
||||
%8 = affine.apply (d0, d1) -> (d0 * 16 + d1)(%i4, %i5)
|
||||
%9 = load %0[%8, %i3] : memref<100x100xf32>
|
||||
// expected-note@-1 {{dependence from 1 to 0 at depth 1 = false}}
|
||||
@ -693,9 +693,9 @@ func @loop_nest_depth() {
|
||||
func @mod_div_3d() {
|
||||
%M = alloc() : memref<2x2x2xi32>
|
||||
%c0 = constant 0 : i32
|
||||
for %i0 = 0 to 8 {
|
||||
for %i1 = 0 to 8 {
|
||||
for %i2 = 0 to 8 {
|
||||
affine.for %i0 = 0 to 8 {
|
||||
affine.for %i1 = 0 to 8 {
|
||||
affine.for %i2 = 0 to 8 {
|
||||
%idx0 = affine.apply (d0, d1, d2) -> (d0 floordiv 4) (%i0, %i1, %i2)
|
||||
%idx1 = affine.apply (d0, d1, d2) -> (d1 mod 2) (%i0, %i1, %i2)
|
||||
%idx2 = affine.apply (d0, d1, d2) -> (d2 floordiv 4) (%i0, %i1, %i2)
|
||||
@ -719,12 +719,12 @@ func @delinearize_mod_floordiv() {
|
||||
%in = alloc() : memref<2x2x3x3x16x1xi32>
|
||||
%out = alloc() : memref<64x9xi32>
|
||||
|
||||
for %i0 = 0 to 2 {
|
||||
for %i1 = 0 to 2 {
|
||||
for %i2 = 0 to 3 {
|
||||
for %i3 = 0 to 3 {
|
||||
for %i4 = 0 to 16 {
|
||||
for %i5 = 0 to 1 {
|
||||
affine.for %i0 = 0 to 2 {
|
||||
affine.for %i1 = 0 to 2 {
|
||||
affine.for %i2 = 0 to 3 {
|
||||
affine.for %i3 = 0 to 3 {
|
||||
affine.for %i4 = 0 to 16 {
|
||||
affine.for %i5 = 0 to 1 {
|
||||
store %val, %in[%i0, %i1, %i2, %i3, %i4, %i5] : memref<2x2x3x3x16x1xi32>
|
||||
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
|
||||
// expected-note@-2 {{dependence from 0 to 0 at depth 2 = false}}
|
||||
@ -742,8 +742,8 @@ func @delinearize_mod_floordiv() {
|
||||
}
|
||||
}
|
||||
|
||||
for %ii = 0 to 64 {
|
||||
for %jj = 0 to 9 {
|
||||
affine.for %ii = 0 to 64 {
|
||||
affine.for %jj = 0 to 9 {
|
||||
%a0 = affine.apply (d0, d1) -> (d0 * (9 * 1024) + d1 * 128) (%ii, %jj)
|
||||
%a10 = affine.apply (d0) ->
|
||||
(d0 floordiv (2 * 3 * 3 * 128 * 128)) (%a0)
|
||||
|
@ -5,11 +5,11 @@ func @loop_nest_3d_outer_two_parallel(%N : index) {
|
||||
%0 = alloc() : memref<1024 x 1024 x vector<64xf32>>
|
||||
%1 = alloc() : memref<1024 x 1024 x vector<64xf32>>
|
||||
%2 = alloc() : memref<1024 x 1024 x vector<64xf32>>
|
||||
for %i = 0 to %N {
|
||||
affine.for %i = 0 to %N {
|
||||
// expected-note@-1 {{parallel loop}}
|
||||
for %j = 0 to %N {
|
||||
affine.for %j = 0 to %N {
|
||||
// expected-note@-1 {{parallel loop}}
|
||||
for %k = 0 to %N {
|
||||
affine.for %k = 0 to %N {
|
||||
%5 = load %0[%i, %k] : memref<1024x1024xvector<64xf32>>
|
||||
%6 = load %1[%k, %j] : memref<1024x1024xvector<64xf32>>
|
||||
%7 = load %2[%i, %j] : memref<1024x1024xvector<64xf32>>
|
||||
|
@ -16,13 +16,13 @@ func @loop_nest_dma() {
|
||||
%zero = constant 0 : index
|
||||
%num_elts = constant 128 : index
|
||||
|
||||
for %i = 0 to 8 {
|
||||
affine.for %i = 0 to 8 {
|
||||
dma_start %A[%i], %Ah[%i], %num_elts, %tag[%zero] : memref<256 x f32>, memref<32 x f32, 1>, memref<1 x f32>
|
||||
dma_wait %tag[%zero], %num_elts : memref<1 x f32>
|
||||
%v = load %Ah[%i] : memref<32 x f32, (d0) -> (d0), 1>
|
||||
%r = "compute"(%v) : (f32) -> (f32)
|
||||
store %r, %Ah[%i] : memref<32 x f32, (d0) -> (d0), 1>
|
||||
for %j = 0 to 128 {
|
||||
affine.for %j = 0 to 128 {
|
||||
"do_more_compute"(%i, %j) : (index, index) -> ()
|
||||
}
|
||||
}
|
||||
@ -34,7 +34,7 @@ func @loop_nest_dma() {
|
||||
// CHECK-NEXT: %3 = affine.apply [[MOD_2]](%c0)
|
||||
// CHECK-NEXT: %4 = affine.apply [[MOD_2]](%c0)
|
||||
// CHECK-NEXT: dma_start %0[%c0], %1[%3, %c0], %c128, %2[%4, %c0_0] : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
|
||||
// CHECK-NEXT: for %i0 = 1 to 8 {
|
||||
// CHECK-NEXT: affine.for %i0 = 1 to 8 {
|
||||
// CHECK-NEXT: %5 = affine.apply [[MOD_2]](%i0)
|
||||
// CHECK-NEXT: %6 = affine.apply [[MOD_2]](%i0)
|
||||
// CHECK-NEXT: dma_start %0[%i0], %1[%5, %i0], %c128, %2[%6, %c0_0] : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
|
||||
@ -45,7 +45,7 @@ func @loop_nest_dma() {
|
||||
// CHECK-NEXT: %10 = load %1[%9, %7] : memref<2x32xf32, 1>
|
||||
// CHECK-NEXT: %11 = "compute"(%10) : (f32) -> f32
|
||||
// CHECK-NEXT: store %11, %1[%9, %7] : memref<2x32xf32, 1>
|
||||
// CHECK-NEXT: for %i1 = 0 to 128 {
|
||||
// CHECK-NEXT: affine.for %i1 = 0 to 128 {
|
||||
// CHECK-NEXT: "do_more_compute"(%7, %i1) : (index, index) -> ()
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
@ -56,7 +56,7 @@ func @loop_nest_dma() {
|
||||
// CHECK-NEXT: %15 = load %1[%14, %12] : memref<2x32xf32, 1>
|
||||
// CHECK-NEXT: %16 = "compute"(%15) : (f32) -> f32
|
||||
// CHECK-NEXT: store %16, %1[%14, %12] : memref<2x32xf32, 1>
|
||||
// CHECK-NEXT: for %i2 = 0 to 128 {
|
||||
// CHECK-NEXT: affine.for %i2 = 0 to 128 {
|
||||
// CHECK-NEXT: "do_more_compute"(%12, %i2) : (index, index) -> ()
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: dealloc %2 : memref<2x1xf32>
|
||||
@ -70,7 +70,7 @@ func @loop_step(%arg0: memref<512xf32>,
|
||||
%arg1: memref<512xf32>) {
|
||||
%c0 = constant 0 : index
|
||||
%c4 = constant 4 : index
|
||||
for %i0 = 0 to 512 step 4 {
|
||||
affine.for %i0 = 0 to 512 step 4 {
|
||||
%1 = alloc() : memref<4xf32, 1>
|
||||
%2 = alloc() : memref<1xi32>
|
||||
dma_start %arg0[%i0], %1[%c0], %c4, %2[%c0]
|
||||
@ -84,7 +84,7 @@ func @loop_step(%arg0: memref<512xf32>,
|
||||
// CHECK: %2 = affine.apply [[FLOOR_MOD_2]](%c0)
|
||||
// CHECK: %3 = affine.apply [[FLOOR_MOD_2]](%c0)
|
||||
// CHECK-NEXT: dma_start %arg0[%c0], %0[%2, %c0_0], %c4, [[TAG]][%3, %c0_0] : memref<512xf32>, memref<2x4xf32, 1>, memref<2x1xi32>
|
||||
// CHECK-NEXT: for %i0 = 4 to 512 step 4 {
|
||||
// CHECK-NEXT: affine.for %i0 = 4 to 512 step 4 {
|
||||
// CHECK-NEXT: %4 = affine.apply [[FLOOR_MOD_2]](%i0)
|
||||
// CHECK-NEXT: %5 = affine.apply [[FLOOR_MOD_2]](%i0)
|
||||
// CHECK-NEXT: dma_start %arg0[%i0], %0[%4, %c0_0], %c4, [[TAG]][%5, %c0_0] : memref<512xf32>, memref<2x4xf32, 1>, memref<2x1xi32>
|
||||
@ -117,8 +117,8 @@ func @loop_dma_nested(%arg0: memref<512x32xvector<8xf32>, #map0>, %arg1: memref<
|
||||
// CHECK-DAG: [[BUF_ARG2:%[0-9]+]] = alloc() : memref<2x64x4xvector<8xf32>, 2>
|
||||
// CHECK-DAG: [[TAG_ARG2:%[0-9]+]] = alloc() : memref<2x2xi32>
|
||||
// CHECK: dma_start %arg2[
|
||||
// CHECK: for %i0 = 1 to 8 {
|
||||
for %i0 = 0 to 8 {
|
||||
// CHECK: affine.for %i0 = 1 to 8 {
|
||||
affine.for %i0 = 0 to 8 {
|
||||
%6 = affine.apply #map2(%i0)
|
||||
dma_start %arg2[%6, %c0], %2[%c0, %c0], %num_elts, %5[%c0] : memref<512x32xvector<8xf32>, #map0>, memref<64x4xvector<8xf32>, #map0, 2>, memref<2xi32>
|
||||
dma_wait %5[%c0], %num_elts : memref<2xi32>
|
||||
@ -132,8 +132,8 @@ func @loop_dma_nested(%arg0: memref<512x32xvector<8xf32>, #map0>, %arg1: memref<
|
||||
// CHECK: [[TAG_ARG1:%[0-9]+]] = alloc() : memref<2x2xi32>
|
||||
// CHECK: dma_start %arg0[
|
||||
// CHECK: dma_start %arg1[
|
||||
// CHECK-NEXT for %i1 = 1 to 8 {
|
||||
for %i1 = 0 to 8 {
|
||||
// CHECK-NEXT affine.for %i1 = 1 to 8 {
|
||||
affine.for %i1 = 0 to 8 {
|
||||
%7 = affine.apply #map1(%i0, %i1)
|
||||
%8 = affine.apply #map2(%i1)
|
||||
dma_start %arg0[%7, %c0], %0[%c0, %c0], %num_elts, %3[%c0] : memref<512x32xvector<8xf32>, #map0>, memref<64x4xvector<8xf32>, #map0, 2>, memref<2xi32>
|
||||
@ -145,8 +145,8 @@ func @loop_dma_nested(%arg0: memref<512x32xvector<8xf32>, #map0>, %arg1: memref<
|
||||
// CHECK: dma_start %arg1[
|
||||
// CHECK: dma_wait [[TAG_ARG0]]
|
||||
// CHECK: dma_wait [[TAG_ARG1]]
|
||||
// CHECK-NEXT: for %i2 = 0 to 4 {
|
||||
for %i2 = 0 to 4 {
|
||||
// CHECK-NEXT: affine.for %i2 = 0 to 4 {
|
||||
affine.for %i2 = 0 to 4 {
|
||||
"foo"() : () -> ()
|
||||
}
|
||||
}
|
||||
@ -166,16 +166,16 @@ func @loop_dma_nested(%arg0: memref<512x32xvector<8xf32>, #map0>, %arg1: memref<
|
||||
// CHECK: [[TAG_ARG1_NESTED:%[0-9]+]] = alloc() : memref<2x2xi32>
|
||||
// CHECK: dma_start %arg0[
|
||||
// CHECK: dma_start %arg1[
|
||||
// CHECK: for %i4 = 1 to 8 {
|
||||
// CHECK: affine.for %i4 = 1 to 8 {
|
||||
// CHECK: dma_start %arg0[
|
||||
// CHECK: dma_start %arg1[
|
||||
// CHECK: dma_wait [[TAG_ARG0_NESTED]]
|
||||
// CHECK: dma_wait [[TAG_ARG1_NESTED]]
|
||||
// CHECK: for %i5 = 0 to 4 {
|
||||
// CHECK: affine.for %i5 = 0 to 4 {
|
||||
// CHECK: "foo"() : () -> ()
|
||||
// CHECK: dma_wait [[TAG_ARG0_NESTED]]
|
||||
// CHECK: dma_wait [[TAG_ARG1_NESTED]]
|
||||
// CHECK: for %i6 = 0 to 4 {
|
||||
// CHECK: affine.for %i6 = 0 to 4 {
|
||||
}
|
||||
return
|
||||
// CHECK: }
|
||||
@ -202,8 +202,8 @@ func @loop_dma_dependent(%arg2: memref<512x32xvector<8xf32>>) {
|
||||
// The two DMAs below are dependent (incoming and outgoing on the same
|
||||
// memref) in the same iteration; so no pipelining here.
|
||||
// CHECK-NOT: dma_start
|
||||
// CHECK: for %i0 = 0 to 8 {
|
||||
for %i0 = 0 to 8 {
|
||||
// CHECK: affine.for %i0 = 0 to 8 {
|
||||
affine.for %i0 = 0 to 8 {
|
||||
%6 = affine.apply #map2(%i0)
|
||||
dma_start %arg2[%6, %c0], %2[%c0, %c0], %num_elts, %5[%c0] : memref<512x32xvector<8xf32>>, memref<64x4xvector<8xf32>, 2>, memref<2xi32>
|
||||
dma_wait %5[%c0], %num_elts : memref<2xi32>
|
||||
@ -223,8 +223,8 @@ func @escaping_use(%arg0: memref<512 x 32 x f32>) {
|
||||
%tag = alloc() : memref<1 x i32>
|
||||
|
||||
// CHECK-NOT: dma_start
|
||||
// CHECK: for %i0 = 0 to 16 {
|
||||
for %kTT = 0 to 16 {
|
||||
// CHECK: affine.for %i0 = 0 to 16 {
|
||||
affine.for %kTT = 0 to 16 {
|
||||
dma_start %arg0[%zero, %zero], %Av[%zero, %zero], %num_elt, %tag[%zero] :
|
||||
memref<512 x 32 x f32>,
|
||||
memref<32 x 32 x f32, 2>, memref<1 x i32>
|
||||
@ -247,14 +247,14 @@ func @live_out_use(%arg0: memref<512 x 32 x f32>) -> f32 {
|
||||
%tag = alloc() : memref<1 x i32>
|
||||
|
||||
// CHECK-NOT: dma_start
|
||||
// CHECK: for %i0 = 0 to 16 {
|
||||
for %kTT = 0 to 16 {
|
||||
// CHECK: affine.for %i0 = 0 to 16 {
|
||||
affine.for %kTT = 0 to 16 {
|
||||
dma_start %arg0[%zero, %zero], %Av[%zero, %zero], %num_elt, %tag[%zero] :
|
||||
memref<512 x 32 x f32>,
|
||||
memref<32 x 32 x f32, 2>, memref<1 x i32>
|
||||
dma_wait %tag[%zero], %num_elt : memref<1 x i32>
|
||||
}
|
||||
// Use live out of 'for' inst; no DMA pipelining will be done.
|
||||
// Use live out of 'affine.for' inst; no DMA pipelining will be done.
|
||||
%v = load %Av[%zero, %zero] : memref<32 x 32 x f32, 2>
|
||||
return %v : f32
|
||||
// CHECK: %{{[0-9]+}} = load %{{[0-9]+}}[%c0, %c0] : memref<32x32xf32, 2>
|
||||
@ -278,14 +278,14 @@ func @dynamic_shape_dma_buffer(%arg0: memref<512 x 32 x f32>) {
|
||||
// CHECK: %5 = affine.apply [[MOD_2]](%c0)
|
||||
// CHECK: %6 = affine.apply [[MOD_2]](%c0)
|
||||
// CHECK: dma_start %arg0[%c0_0, %c0_0], %3[%5, %c0_0, %c0_0], %c512, %4[%6, %c0_0]
|
||||
for %kTT = 0 to 16 {
|
||||
affine.for %kTT = 0 to 16 {
|
||||
dma_start %arg0[%zero, %zero], %Av[%zero, %zero], %num_elt, %tag[%zero] :
|
||||
memref<512 x 32 x f32>,
|
||||
memref<? x ? x f32, 2>, memref<1 x i32>
|
||||
dma_wait %tag[%zero], %num_elt : memref<1 x i32>
|
||||
}
|
||||
return
|
||||
// CHECK-NEXT: for %i0 = 1 to 16 {
|
||||
// CHECK-NEXT: affine.for %i0 = 1 to 16 {
|
||||
// CHECK: %7 = affine.apply [[MOD_2]](%i0)
|
||||
// CHECK: %8 = affine.apply [[MOD_2]](%i0)
|
||||
// CHECK: dma_start %arg0[%c0_0, %c0_0], %3[%7, %c0_0, %c0_0], %c512, %4[%8, %c0_0]
|
||||
|
@ -73,8 +73,8 @@
|
||||
|
||||
// CHECK-LABEL: func @test_gaussian_elimination_empty_set0() {
|
||||
func @test_gaussian_elimination_empty_set0() {
|
||||
for %i0 = 1 to 10 {
|
||||
for %i1 = 1 to 100 {
|
||||
affine.for %i0 = 1 to 10 {
|
||||
affine.for %i1 = 1 to 100 {
|
||||
// CHECK: [[SET_EMPTY_2D]](%i0, %i1)
|
||||
affine.if (d0, d1) : (2 == 0)(%i0, %i1) {
|
||||
}
|
||||
@ -85,8 +85,8 @@ func @test_gaussian_elimination_empty_set0() {
|
||||
|
||||
// CHECK-LABEL: func @test_gaussian_elimination_empty_set1() {
|
||||
func @test_gaussian_elimination_empty_set1() {
|
||||
for %i0 = 1 to 10 {
|
||||
for %i1 = 1 to 100 {
|
||||
affine.for %i0 = 1 to 10 {
|
||||
affine.for %i1 = 1 to 100 {
|
||||
// CHECK: [[SET_EMPTY_2D]](%i0, %i1)
|
||||
affine.if (d0, d1) : (1 >= 0, -1 >= 0) (%i0, %i1) {
|
||||
}
|
||||
@ -97,8 +97,8 @@ func @test_gaussian_elimination_empty_set1() {
|
||||
|
||||
// CHECK-LABEL: func @test_gaussian_elimination_non_empty_set2() {
|
||||
func @test_gaussian_elimination_non_empty_set2() {
|
||||
for %i0 = 1 to 10 {
|
||||
for %i1 = 1 to 100 {
|
||||
affine.for %i0 = 1 to 10 {
|
||||
affine.for %i1 = 1 to 100 {
|
||||
// CHECK: #set1(%i0, %i1)
|
||||
affine.if #set2(%i0, %i1) {
|
||||
}
|
||||
@ -111,8 +111,8 @@ func @test_gaussian_elimination_non_empty_set2() {
|
||||
func @test_gaussian_elimination_empty_set3() {
|
||||
%c7 = constant 7 : index
|
||||
%c11 = constant 11 : index
|
||||
for %i0 = 1 to 10 {
|
||||
for %i1 = 1 to 100 {
|
||||
affine.for %i0 = 1 to 10 {
|
||||
affine.for %i1 = 1 to 100 {
|
||||
// CHECK: #set2(%i0, %i1)[%c7, %c11]
|
||||
affine.if #set3(%i0, %i1)[%c7, %c11] {
|
||||
}
|
||||
@ -125,8 +125,8 @@ func @test_gaussian_elimination_empty_set3() {
|
||||
func @test_gaussian_elimination_non_empty_set4() {
|
||||
%c7 = constant 7 : index
|
||||
%c11 = constant 11 : index
|
||||
for %i0 = 1 to 10 {
|
||||
for %i1 = 1 to 100 {
|
||||
affine.for %i0 = 1 to 10 {
|
||||
affine.for %i1 = 1 to 100 {
|
||||
// CHECK: #set3(%i0, %i1)[%c7, %c11]
|
||||
affine.if #set4(%i0, %i1)[%c7, %c11] {
|
||||
}
|
||||
@ -139,8 +139,8 @@ func @test_gaussian_elimination_non_empty_set4() {
|
||||
func @test_gaussian_elimination_empty_set5() {
|
||||
%c7 = constant 7 : index
|
||||
%c11 = constant 11 : index
|
||||
for %i0 = 1 to 10 {
|
||||
for %i1 = 1 to 100 {
|
||||
affine.for %i0 = 1 to 10 {
|
||||
affine.for %i1 = 1 to 100 {
|
||||
// CHECK: #set2(%i0, %i1)[%c7, %c11]
|
||||
affine.if #set5(%i0, %i1)[%c7, %c11] {
|
||||
}
|
||||
@ -151,8 +151,8 @@ func @test_gaussian_elimination_empty_set5() {
|
||||
|
||||
// CHECK-LABEL: func @test_fuzz_explosion
|
||||
func @test_fuzz_explosion(%arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index) {
|
||||
for %i0 = 1 to 10 {
|
||||
for %i1 = 1 to 100 {
|
||||
affine.for %i0 = 1 to 10 {
|
||||
affine.for %i1 = 1 to 100 {
|
||||
affine.if #set_fuzz_virus(%i0, %i1, %arg0, %arg1, %arg2, %arg3) {
|
||||
}
|
||||
}
|
||||
@ -163,8 +163,8 @@ func @test_fuzz_explosion(%arg0 : index, %arg1 : index, %arg2 : index, %arg3 : i
|
||||
|
||||
// CHECK-LABEL: func @test_empty_set(%arg0: index) {
|
||||
func @test_empty_set(%N : index) {
|
||||
for %i = 0 to 10 {
|
||||
for %j = 0 to 10 {
|
||||
affine.for %i = 0 to 10 {
|
||||
affine.for %j = 0 to 10 {
|
||||
// CHECK: affine.if [[SET_EMPTY_2D]](%i0, %i1)
|
||||
affine.if (d0, d1) : (d0 - d1 >= 0, d1 - d0 - 1 >= 0)(%i, %j) {
|
||||
"foo"() : () -> ()
|
||||
@ -198,8 +198,8 @@ func @test_empty_set(%N : index) {
|
||||
}
|
||||
}
|
||||
// The tests below test GCDTightenInequalities().
|
||||
for %k = 0 to 10 {
|
||||
for %l = 0 to 10 {
|
||||
affine.for %k = 0 to 10 {
|
||||
affine.for %l = 0 to 10 {
|
||||
// Empty because no multiple of 8 lies between 4 and 7.
|
||||
// CHECK: affine.if [[SET_EMPTY_1D]](%i2)
|
||||
affine.if (d0) : (8*d0 - 4 >= 0, -8*d0 + 7 >= 0)(%k) {
|
||||
@ -226,7 +226,7 @@ func @test_empty_set(%N : index) {
|
||||
}
|
||||
}
|
||||
|
||||
for %m = 0 to 10 {
|
||||
affine.for %m = 0 to 10 {
|
||||
// CHECK: affine.if [[SET_EMPTY_1D]](%i{{[0-9]+}})
|
||||
affine.if (d0) : (d0 mod 2 - 3 == 0) (%m) {
|
||||
"foo"() : () -> ()
|
||||
|
@ -10,7 +10,7 @@ func @inline_notation() -> i32 loc("mysource.cc":10:8) {
|
||||
%1 = "foo"() : () -> i32 loc("foo")
|
||||
|
||||
// CHECK: } loc(unknown)
|
||||
for %i0 = 0 to 8 {
|
||||
affine.for %i0 = 0 to 8 {
|
||||
} loc(fused["foo", "mysource.cc":10:8])
|
||||
|
||||
// CHECK: } loc(unknown)
|
||||
|
@ -8,13 +8,13 @@
|
||||
// CHECK-LABEL: func @unroll_jam_imperfect_nest() {
|
||||
func @unroll_jam_imperfect_nest() {
|
||||
// CHECK: %c100 = constant 100 : index
|
||||
// CHECK-NEXT: for %i0 = 0 to 100 step 2 {
|
||||
for %i = 0 to 101 {
|
||||
// CHECK-NEXT: affine.for %i0 = 0 to 100 step 2 {
|
||||
affine.for %i = 0 to 101 {
|
||||
// CHECK: %0 = "addi32"(%i0, %i0) : (index, index) -> i32
|
||||
// CHECK-NEXT: %1 = affine.apply [[MAP_PLUS_1]](%i0)
|
||||
// CHECK-NEXT: %2 = "addi32"(%1, %1) : (index, index) -> i32
|
||||
%x = "addi32"(%i, %i) : (index, index) -> i32
|
||||
for %j = 0 to 17 {
|
||||
affine.for %j = 0 to 17 {
|
||||
// CHECK: %3 = "addi32"(%i0, %i0) : (index, index) -> i32
|
||||
// CHECK-NEXT: %4 = "addi32"(%3, %3) : (i32, i32) -> i32
|
||||
// CHECK-NEXT: %5 = affine.apply [[MAP_PLUS_1]](%i0)
|
||||
@ -30,7 +30,7 @@ func @unroll_jam_imperfect_nest() {
|
||||
} // CHECK }
|
||||
// cleanup loop (single iteration)
|
||||
// CHECK: %11 = "addi32"(%c100, %c100) : (index, index) -> i32
|
||||
// CHECK-NEXT: for %i2 = 0 to 17 {
|
||||
// CHECK-NEXT: affine.for %i2 = 0 to 17 {
|
||||
// CHECK-NEXT: %12 = "addi32"(%c100, %c100) : (index, index) -> i32
|
||||
// CHECK-NEXT: %13 = "addi32"(%12, %12) : (i32, i32) -> i32
|
||||
// CHECK-NEXT: }
|
||||
@ -40,20 +40,20 @@ func @unroll_jam_imperfect_nest() {
|
||||
|
||||
// CHECK-LABEL: func @loop_nest_unknown_count_1(%arg0: index) {
|
||||
func @loop_nest_unknown_count_1(%N : index) {
|
||||
// CHECK-NEXT: for %i0 = 1 to [[MAP_DIV_OFFSET]]()[%arg0] step 2 {
|
||||
// CHECK-NEXT: for %i1 = 1 to 100 {
|
||||
// CHECK-NEXT: affine.for %i0 = 1 to [[MAP_DIV_OFFSET]]()[%arg0] step 2 {
|
||||
// CHECK-NEXT: affine.for %i1 = 1 to 100 {
|
||||
// CHECK-NEXT: %0 = "foo"() : () -> i32
|
||||
// CHECK-NEXT: %1 = "foo"() : () -> i32
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
// A cleanup loop should be generated here.
|
||||
// CHECK-NEXT: for %i2 = [[MAP_DIV_OFFSET]]()[%arg0] to %arg0 {
|
||||
// CHECK-NEXT: for %i3 = 1 to 100 {
|
||||
// CHECK-NEXT: affine.for %i2 = [[MAP_DIV_OFFSET]]()[%arg0] to %arg0 {
|
||||
// CHECK-NEXT: affine.for %i3 = 1 to 100 {
|
||||
// CHECK-NEXT: %2 = "foo"() : () -> i32
|
||||
// CHECK_NEXT: }
|
||||
// CHECK_NEXT: }
|
||||
for %i = 1 to %N {
|
||||
for %j = 1 to 100 {
|
||||
affine.for %i = 1 to %N {
|
||||
affine.for %j = 1 to 100 {
|
||||
%x = "foo"() : () -> i32
|
||||
}
|
||||
}
|
||||
@ -62,8 +62,8 @@ func @loop_nest_unknown_count_1(%N : index) {
|
||||
|
||||
// CHECK-LABEL: func @loop_nest_unknown_count_2(%arg0: index) {
|
||||
func @loop_nest_unknown_count_2(%arg : index) {
|
||||
// CHECK-NEXT: for %i0 = %arg0 to [[M1]]()[%arg0] step 2 {
|
||||
// CHECK-NEXT: for %i1 = 1 to 100 {
|
||||
// CHECK-NEXT: affine.for %i0 = %arg0 to [[M1]]()[%arg0] step 2 {
|
||||
// CHECK-NEXT: affine.for %i1 = 1 to 100 {
|
||||
// CHECK-NEXT: %0 = "foo"(%i0) : (index) -> i32
|
||||
// CHECK-NEXT: %1 = affine.apply #map{{[0-9]+}}(%i0)
|
||||
// CHECK-NEXT: %2 = "foo"(%1) : (index) -> i32
|
||||
@ -71,11 +71,11 @@ func @loop_nest_unknown_count_2(%arg : index) {
|
||||
// CHECK-NEXT: }
|
||||
// The cleanup loop is a single iteration one and is promoted.
|
||||
// CHECK-NEXT: %3 = affine.apply [[M1]]()[%arg0]
|
||||
// CHECK-NEXT: for %i2 = 1 to 100 {
|
||||
// CHECK-NEXT: affine.for %i2 = 1 to 100 {
|
||||
// CHECK-NEXT: %4 = "foo"(%3) : (index) -> i32
|
||||
// CHECK_NEXT: }
|
||||
for %i = %arg to ()[s0] -> (s0+9) ()[%arg] {
|
||||
for %j = 1 to 100 {
|
||||
affine.for %i = %arg to ()[s0] -> (s0+9) ()[%arg] {
|
||||
affine.for %j = 1 to 100 {
|
||||
%x = "foo"(%i) : (index) -> i32
|
||||
}
|
||||
}
|
||||
@ -84,22 +84,22 @@ func @loop_nest_unknown_count_2(%arg : index) {
|
||||
|
||||
// CHECK-LABEL: func @loop_nest_symbolic_and_min_upper_bound
|
||||
func @loop_nest_symbolic_and_min_upper_bound(%M : index, %N : index, %K : index) {
|
||||
for %i = 0 to min ()[s0, s1] -> (s0, s1, 1024)()[%M, %N] {
|
||||
for %j = 0 to %K {
|
||||
affine.for %i = 0 to min ()[s0, s1] -> (s0, s1, 1024)()[%M, %N] {
|
||||
affine.for %j = 0 to %K {
|
||||
"foo"(%i, %j) : (index, index) -> ()
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
// CHECK-NEXT: for %i0 = 0 to min [[MAP_MULTI_RES]]()[%arg0, %arg1] step 2 {
|
||||
// CHECK-NEXT: for %i1 = 0 to %arg2 {
|
||||
// CHECK-NEXT: affine.for %i0 = 0 to min [[MAP_MULTI_RES]]()[%arg0, %arg1] step 2 {
|
||||
// CHECK-NEXT: affine.for %i1 = 0 to %arg2 {
|
||||
// CHECK-NEXT: "foo"(%i0, %i1) : (index, index) -> ()
|
||||
// CHECK-NEXT: %0 = affine.apply #map2(%i0)
|
||||
// CHECK-NEXT: "foo"(%0, %i1) : (index, index) -> ()
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: for %i2 = max [[MAP_MULTI_RES]]()[%arg0, %arg1] to min #map9()[%arg0, %arg1] {
|
||||
// CHECK-NEXT: for %i3 = 0 to %arg2 {
|
||||
// CHECK-NEXT: affine.for %i2 = max [[MAP_MULTI_RES]]()[%arg0, %arg1] to min #map9()[%arg0, %arg1] {
|
||||
// CHECK-NEXT: affine.for %i3 = 0 to %arg2 {
|
||||
// CHECK-NEXT: "foo"(%i2, %i3) : (index, index) -> ()
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
|
@ -25,13 +25,13 @@
|
||||
|
||||
// UNROLL-FULL-LABEL: func @loop_nest_simplest() {
|
||||
func @loop_nest_simplest() {
|
||||
// UNROLL-FULL: for %i0 = 0 to 100 step 2 {
|
||||
for %i = 0 to 100 step 2 {
|
||||
// UNROLL-FULL: affine.for %i0 = 0 to 100 step 2 {
|
||||
affine.for %i = 0 to 100 step 2 {
|
||||
// UNROLL-FULL: %c1_i32 = constant 1 : i32
|
||||
// UNROLL-FULL-NEXT: %c1_i32_0 = constant 1 : i32
|
||||
// UNROLL-FULL-NEXT: %c1_i32_1 = constant 1 : i32
|
||||
// UNROLL-FULL-NEXT: %c1_i32_2 = constant 1 : i32
|
||||
for %j = 0 to 4 {
|
||||
affine.for %j = 0 to 4 {
|
||||
%x = constant 1 : i32
|
||||
}
|
||||
} // UNROLL-FULL: }
|
||||
@ -41,8 +41,8 @@ func @loop_nest_simplest() {
|
||||
// UNROLL-FULL-LABEL: func @loop_nest_simple_iv_use() {
|
||||
func @loop_nest_simple_iv_use() {
|
||||
// UNROLL-FULL: %c0 = constant 0 : index
|
||||
// UNROLL-FULL-NEXT: for %i0 = 0 to 100 step 2 {
|
||||
for %i = 0 to 100 step 2 {
|
||||
// UNROLL-FULL-NEXT: affine.for %i0 = 0 to 100 step 2 {
|
||||
affine.for %i = 0 to 100 step 2 {
|
||||
// UNROLL-FULL: %0 = "addi32"(%c0, %c0) : (index, index) -> i32
|
||||
// UNROLL-FULL: %1 = affine.apply [[MAP0]](%c0)
|
||||
// UNROLL-FULL-NEXT: %2 = "addi32"(%1, %1) : (index, index) -> i32
|
||||
@ -50,7 +50,7 @@ func @loop_nest_simple_iv_use() {
|
||||
// UNROLL-FULL-NEXT: %4 = "addi32"(%3, %3) : (index, index) -> i32
|
||||
// UNROLL-FULL: %5 = affine.apply [[MAP2]](%c0)
|
||||
// UNROLL-FULL-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> i32
|
||||
for %j = 0 to 4 {
|
||||
affine.for %j = 0 to 4 {
|
||||
%x = "addi32"(%j, %j) : (index, index) -> i32
|
||||
}
|
||||
} // UNROLL-FULL: }
|
||||
@ -61,8 +61,8 @@ func @loop_nest_simple_iv_use() {
|
||||
// UNROLL-FULL-LABEL: func @loop_nest_body_def_use() {
|
||||
func @loop_nest_body_def_use() {
|
||||
// UNROLL-FULL: %c0 = constant 0 : index
|
||||
// UNROLL-FULL-NEXT: for %i0 = 0 to 100 step 2 {
|
||||
for %i = 0 to 100 step 2 {
|
||||
// UNROLL-FULL-NEXT: affine.for %i0 = 0 to 100 step 2 {
|
||||
affine.for %i = 0 to 100 step 2 {
|
||||
// UNROLL-FULL: %c0_0 = constant 0 : index
|
||||
%c0 = constant 0 : index
|
||||
// UNROLL-FULL: %0 = affine.apply [[MAP0]](%c0)
|
||||
@ -76,7 +76,7 @@ func @loop_nest_body_def_use() {
|
||||
// UNROLL-FULL-NEXT: %8 = affine.apply [[MAP2]](%c0)
|
||||
// UNROLL-FULL-NEXT: %9 = affine.apply [[MAP0]](%8)
|
||||
// UNROLL-FULL-NEXT: %10 = "addi32"(%9, %c0_0) : (index, index) -> index
|
||||
for %j = 0 to 4 {
|
||||
affine.for %j = 0 to 4 {
|
||||
%x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
|
||||
(index) -> (index)
|
||||
%y = "addi32"(%x, %c0) : (index, index) -> index
|
||||
@ -89,14 +89,14 @@ func @loop_nest_body_def_use() {
|
||||
func @loop_nest_strided() {
|
||||
// UNROLL-FULL: %c2 = constant 2 : index
|
||||
// UNROLL-FULL-NEXT: %c2_0 = constant 2 : index
|
||||
// UNROLL-FULL-NEXT: for %i0 = 0 to 100 {
|
||||
for %i = 0 to 100 {
|
||||
// UNROLL-FULL-NEXT: affine.for %i0 = 0 to 100 {
|
||||
affine.for %i = 0 to 100 {
|
||||
// UNROLL-FULL: %0 = affine.apply [[MAP0]](%c2_0)
|
||||
// UNROLL-FULL-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
|
||||
// UNROLL-FULL-NEXT: %2 = affine.apply [[MAP1]](%c2_0)
|
||||
// UNROLL-FULL-NEXT: %3 = affine.apply [[MAP0]](%2)
|
||||
// UNROLL-FULL-NEXT: %4 = "addi32"(%3, %3) : (index, index) -> index
|
||||
for %j = 2 to 6 step 2 {
|
||||
affine.for %j = 2 to 6 step 2 {
|
||||
%x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
|
||||
(index) -> (index)
|
||||
%y = "addi32"(%x, %x) : (index, index) -> index
|
||||
@ -109,7 +109,7 @@ func @loop_nest_strided() {
|
||||
// UNROLL-FULL-NEXT: %10 = affine.apply [[MAP3]](%c2)
|
||||
// UNROLL-FULL-NEXT: %11 = affine.apply [[MAP0]](%10)
|
||||
// UNROLL-FULL-NEXT: %12 = "addi32"(%11, %11) : (index, index) -> index
|
||||
for %k = 2 to 7 step 2 {
|
||||
affine.for %k = 2 to 7 step 2 {
|
||||
%z = "affine.apply" (%k) { map: (d0) -> (d0 + 1) } :
|
||||
(index) -> (index)
|
||||
%w = "addi32"(%z, %z) : (index, index) -> index
|
||||
@ -121,8 +121,8 @@ func @loop_nest_strided() {
|
||||
// UNROLL-FULL-LABEL: func @loop_nest_multiple_results() {
|
||||
func @loop_nest_multiple_results() {
|
||||
// UNROLL-FULL: %c0 = constant 0 : index
|
||||
// UNROLL-FULL-NEXT: for %i0 = 0 to 100 {
|
||||
for %i = 0 to 100 {
|
||||
// UNROLL-FULL-NEXT: affine.for %i0 = 0 to 100 {
|
||||
affine.for %i = 0 to 100 {
|
||||
// UNROLL-FULL: %0 = affine.apply [[MAP4]](%i0, %c0)
|
||||
// UNROLL-FULL-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
|
||||
// UNROLL-FULL-NEXT: %2 = affine.apply #map{{.*}}(%i0, %c0)
|
||||
@ -132,7 +132,7 @@ func @loop_nest_multiple_results() {
|
||||
// UNROLL-FULL-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> index
|
||||
// UNROLL-FULL-NEXT: %7 = affine.apply #map{{.*}}(%i0, %4)
|
||||
// UNROLL-FULL-NEXT: %8 = "fma"(%7, %5, %5) : (index, index, index) -> (index, index)
|
||||
for %j = 0 to 2 step 1 {
|
||||
affine.for %j = 0 to 2 step 1 {
|
||||
%x = affine.apply (d0, d1) -> (d0 + 1) (%i, %j)
|
||||
%y = "addi32"(%x, %x) : (index, index) -> index
|
||||
%z = affine.apply (d0, d1) -> (d0 + 3) (%i, %j)
|
||||
@ -149,8 +149,8 @@ func @loop_nest_seq_imperfect(%a : memref<128x128xf32>) {
|
||||
// UNROLL-FULL: %c0 = constant 0 : index
|
||||
// UNROLL-FULL-NEXT: %c128 = constant 128 : index
|
||||
%c128 = constant 128 : index
|
||||
// UNROLL-FULL: for %i0 = 0 to 100 {
|
||||
for %i = 0 to 100 {
|
||||
// UNROLL-FULL: affine.for %i0 = 0 to 100 {
|
||||
affine.for %i = 0 to 100 {
|
||||
// UNROLL-FULL: %0 = "vld"(%i0) : (index) -> i32
|
||||
%ld = "vld"(%i) : (index) -> i32
|
||||
// UNROLL-FULL: %1 = affine.apply [[MAP0]](%c0)
|
||||
@ -168,7 +168,7 @@ func @loop_nest_seq_imperfect(%a : memref<128x128xf32>) {
|
||||
// UNROLL-FULL-NEXT: %13 = affine.apply [[MAP0]](%12)
|
||||
// UNROLL-FULL-NEXT: %14 = "vmulf"(%12, %13) : (index, index) -> index
|
||||
// UNROLL-FULL-NEXT: %15 = "vaddf"(%14, %14) : (index, index) -> index
|
||||
for %j = 0 to 4 {
|
||||
affine.for %j = 0 to 4 {
|
||||
%x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
|
||||
(index) -> (index)
|
||||
%y = "vmulf"(%j, %x) : (index, index) -> index
|
||||
@ -197,7 +197,7 @@ func @loop_nest_seq_multiple() {
|
||||
// UNROLL-FULL-NEXT: %5 = affine.apply [[MAP2]](%c0_0)
|
||||
// UNROLL-FULL-NEXT: %6 = affine.apply [[MAP0]](%5)
|
||||
// UNROLL-FULL-NEXT: "mul"(%6, %6) : (index, index) -> ()
|
||||
for %j = 0 to 4 {
|
||||
affine.for %j = 0 to 4 {
|
||||
%x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
|
||||
(index) -> (index)
|
||||
"mul"(%x, %x) : (index, index) -> ()
|
||||
@ -205,8 +205,8 @@ func @loop_nest_seq_multiple() {
|
||||
|
||||
// UNROLL-FULL: %c99 = constant 99 : index
|
||||
%k = constant 99 : index
|
||||
// UNROLL-FULL: for %i0 = 0 to 100 step 2 {
|
||||
for %m = 0 to 100 step 2 {
|
||||
// UNROLL-FULL: affine.for %i0 = 0 to 100 step 2 {
|
||||
affine.for %m = 0 to 100 step 2 {
|
||||
// UNROLL-FULL: %7 = affine.apply [[MAP0]](%c0)
|
||||
// UNROLL-FULL-NEXT: %8 = affine.apply [[MAP6]](%c0)[%c99]
|
||||
// UNROLL-FULL-NEXT: %9 = affine.apply [[MAP0]](%c0)
|
||||
@ -218,7 +218,7 @@ func @loop_nest_seq_multiple() {
|
||||
// UNROLL-FULL-NEXT: %15 = affine.apply [[MAP2]](%c0)
|
||||
// UNROLL-FULL-NEXT: %16 = affine.apply [[MAP0]](%15)
|
||||
// UNROLL-FULL-NEXT: %17 = affine.apply [[MAP6]](%15)[%c99]
|
||||
for %n = 0 to 4 {
|
||||
affine.for %n = 0 to 4 {
|
||||
%y = "affine.apply" (%n) { map: (d0) -> (d0 + 1) } :
|
||||
(index) -> (index)
|
||||
%z = "affine.apply" (%n, %k) { map: (d0) [s0] -> (d0 + s0 + 1) } :
|
||||
@ -233,7 +233,7 @@ func @loop_nest_unroll_full() {
|
||||
// UNROLL-FULL-NEXT: %0 = "foo"() : () -> i32
|
||||
// UNROLL-FULL-NEXT: %1 = "bar"() : () -> i32
|
||||
// UNROLL-FULL-NEXT: return
|
||||
for %i = 0 to 1 {
|
||||
affine.for %i = 0 to 1 {
|
||||
%x = "foo"() : () -> i32
|
||||
%y = "bar"() : () -> i32
|
||||
}
|
||||
@ -242,16 +242,16 @@ func @loop_nest_unroll_full() {
|
||||
|
||||
// SHORT-LABEL: func @loop_nest_outer_unroll() {
|
||||
func @loop_nest_outer_unroll() {
|
||||
// SHORT: for %i0 = 0 to 4 {
|
||||
// SHORT: affine.for %i0 = 0 to 4 {
|
||||
// SHORT-NEXT: %0 = affine.apply [[MAP0]](%i0)
|
||||
// SHORT-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
|
||||
// SHORT-NEXT: }
|
||||
// SHORT-NEXT: for %i1 = 0 to 4 {
|
||||
// SHORT-NEXT: affine.for %i1 = 0 to 4 {
|
||||
// SHORT-NEXT: %2 = affine.apply [[MAP0]](%i1)
|
||||
// SHORT-NEXT: %3 = "addi32"(%2, %2) : (index, index) -> index
|
||||
// SHORT-NEXT: }
|
||||
for %i = 0 to 2 {
|
||||
for %j = 0 to 4 {
|
||||
affine.for %i = 0 to 2 {
|
||||
affine.for %j = 0 to 4 {
|
||||
%x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
|
||||
(index) -> (index)
|
||||
%y = "addi32"(%x, %x) : (index, index) -> index
|
||||
@ -275,33 +275,33 @@ func @loop_nest_seq_long() -> i32 {
|
||||
|
||||
%zero_idx = constant 0 : index
|
||||
|
||||
// CHECK: for %i0 = 0 to 512
|
||||
for %n0 = 0 to 512 {
|
||||
// CHECK: for %i1 = 0 to 8
|
||||
for %n1 = 0 to 8 {
|
||||
// CHECK: affine.for %i0 = 0 to 512
|
||||
affine.for %n0 = 0 to 512 {
|
||||
// CHECK: affine.for %i1 = 0 to 8
|
||||
affine.for %n1 = 0 to 8 {
|
||||
store %one, %A[%n0, %n1] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
|
||||
store %two, %B[%n0, %n1] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
|
||||
store %zero, %C[%n0, %n1] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
|
||||
}
|
||||
}
|
||||
|
||||
for %x = 0 to 2 {
|
||||
for %y = 0 to 2 {
|
||||
// CHECK: for %i2
|
||||
for %i2 = 0 to 8 {
|
||||
// CHECK-NOT: for %i3
|
||||
affine.for %x = 0 to 2 {
|
||||
affine.for %y = 0 to 2 {
|
||||
// CHECK: affine.for %i2
|
||||
affine.for %i2 = 0 to 8 {
|
||||
// CHECK-NOT: affine.for %i3
|
||||
// CHECK: %{{[0-9]+}} = affine.apply
|
||||
%b2 = "affine.apply" (%y, %i2) {map: (d0, d1) -> (16*d0 + d1)} : (index, index) -> index
|
||||
%z = load %B[%x, %b2] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
|
||||
"op1"(%z) : (i32) -> ()
|
||||
}
|
||||
for %j1 = 0 to 8 {
|
||||
for %j2 = 0 to 8 {
|
||||
affine.for %j1 = 0 to 8 {
|
||||
affine.for %j2 = 0 to 8 {
|
||||
%a2 = "affine.apply" (%y, %j2) {map: (d0, d1) -> (16*d0 + d1)} : (index, index) -> index
|
||||
%v203 = load %A[%j1, %a2] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
|
||||
"op2"(%v203) : (i32) -> ()
|
||||
}
|
||||
for %k2 = 0 to 8 {
|
||||
affine.for %k2 = 0 to 8 {
|
||||
%s0 = "op3"() : () -> i32
|
||||
%c2 = "affine.apply" (%x, %k2) {map: (d0, d1) -> (16*d0 + d1)} : (index, index) -> index
|
||||
%s1 = load %C[%j1, %c2] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
|
||||
@ -318,8 +318,8 @@ func @loop_nest_seq_long() -> i32 {
|
||||
|
||||
// UNROLL-BY-4-LABEL: func @unroll_unit_stride_no_cleanup() {
|
||||
func @unroll_unit_stride_no_cleanup() {
|
||||
// UNROLL-BY-4: for %i0 = 0 to 100 {
|
||||
for %i = 0 to 100 {
|
||||
// UNROLL-BY-4: affine.for %i0 = 0 to 100 {
|
||||
affine.for %i = 0 to 100 {
|
||||
// UNROLL-BY-4: for [[L1:%i[0-9]+]] = 0 to 8 step 4 {
|
||||
// UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
|
||||
// UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32
|
||||
@ -333,13 +333,13 @@ func @unroll_unit_stride_no_cleanup() {
|
||||
// UNROLL-BY-4-NEXT: %9 = "addi32"(%8, %8) : (index, index) -> i32
|
||||
// UNROLL-BY-4-NEXT: %10 = "addi32"(%9, %9) : (i32, i32) -> i32
|
||||
// UNROLL-BY-4-NEXT: }
|
||||
for %j = 0 to 8 {
|
||||
affine.for %j = 0 to 8 {
|
||||
%x = "addi32"(%j, %j) : (index, index) -> i32
|
||||
%y = "addi32"(%x, %x) : (i32, i32) -> i32
|
||||
}
|
||||
// empty loop
|
||||
// UNROLL-BY-4: for %i2 = 0 to 8 {
|
||||
for %k = 0 to 8 {
|
||||
// UNROLL-BY-4: affine.for %i2 = 0 to 8 {
|
||||
affine.for %k = 0 to 8 {
|
||||
}
|
||||
}
|
||||
return
|
||||
@ -347,8 +347,8 @@ func @unroll_unit_stride_no_cleanup() {
|
||||
|
||||
// UNROLL-BY-4-LABEL: func @unroll_unit_stride_cleanup() {
|
||||
func @unroll_unit_stride_cleanup() {
|
||||
// UNROLL-BY-4: for %i0 = 0 to 100 {
|
||||
for %i = 0 to 100 {
|
||||
// UNROLL-BY-4: affine.for %i0 = 0 to 100 {
|
||||
affine.for %i = 0 to 100 {
|
||||
// UNROLL-BY-4: for [[L1:%i[0-9]+]] = 0 to 8 step 4 {
|
||||
// UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
|
||||
// UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32
|
||||
@ -366,7 +366,7 @@ func @unroll_unit_stride_cleanup() {
|
||||
// UNROLL-BY-4-NEXT: %11 = "addi32"([[L2]], [[L2]]) : (index, index) -> i32
|
||||
// UNROLL-BY-4-NEXT: %12 = "addi32"(%11, %11) : (i32, i32) -> i32
|
||||
// UNROLL-BY-4-NEXT: }
|
||||
for %j = 0 to 10 {
|
||||
affine.for %j = 0 to 10 {
|
||||
%x = "addi32"(%j, %j) : (index, index) -> i32
|
||||
%y = "addi32"(%x, %x) : (i32, i32) -> i32
|
||||
}
|
||||
@ -376,8 +376,8 @@ func @unroll_unit_stride_cleanup() {
|
||||
|
||||
// UNROLL-BY-4-LABEL: func @unroll_non_unit_stride_cleanup() {
|
||||
func @unroll_non_unit_stride_cleanup() {
|
||||
// UNROLL-BY-4: for %i0 = 0 to 100 {
|
||||
for %i = 0 to 100 {
|
||||
// UNROLL-BY-4: affine.for %i0 = 0 to 100 {
|
||||
affine.for %i = 0 to 100 {
|
||||
// UNROLL-BY-4: for [[L1:%i[0-9]+]] = 2 to 42 step 20 {
|
||||
// UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
|
||||
// UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32
|
||||
@ -395,7 +395,7 @@ func @unroll_non_unit_stride_cleanup() {
|
||||
// UNROLL-BY-4-NEXT: %11 = "addi32"([[L2]], [[L2]]) : (index, index) -> i32
|
||||
// UNROLL-BY-4-NEXT: %12 = "addi32"(%11, %11) : (i32, i32) -> i32
|
||||
// UNROLL-BY-4-NEXT: }
|
||||
for %j = 2 to 48 step 5 {
|
||||
affine.for %j = 2 to 48 step 5 {
|
||||
%x = "addi32"(%j, %j) : (index, index) -> i32
|
||||
%y = "addi32"(%x, %x) : (i32, i32) -> i32
|
||||
}
|
||||
@ -408,8 +408,8 @@ func @unroll_non_unit_stride_cleanup() {
|
||||
func @loop_nest_single_iteration_after_unroll(%N: index) {
|
||||
// UNROLL-BY-4: %c0 = constant 0 : index
|
||||
// UNROLL-BY-4: %c4 = constant 4 : index
|
||||
// UNROLL-BY-4: for %i0 = 0 to %arg0 {
|
||||
for %i = 0 to %N {
|
||||
// UNROLL-BY-4: affine.for %i0 = 0 to %arg0 {
|
||||
affine.for %i = 0 to %N {
|
||||
// UNROLL-BY-4: %0 = "addi32"(%c0, %c0) : (index, index) -> i32
|
||||
// UNROLL-BY-4-NEXT: %1 = affine.apply [[MAP0]](%c0)
|
||||
// UNROLL-BY-4-NEXT: %2 = "addi32"(%1, %1) : (index, index) -> i32
|
||||
@ -419,7 +419,7 @@ func @loop_nest_single_iteration_after_unroll(%N: index) {
|
||||
// UNROLL-BY-4-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> i32
|
||||
// UNROLL-BY-4-NEXT: %7 = "addi32"(%c4, %c4) : (index, index) -> i32
|
||||
// UNROLL-BY-4-NOT: for
|
||||
for %j = 0 to 5 {
|
||||
affine.for %j = 0 to 5 {
|
||||
%x = "addi32"(%j, %j) : (index, index) -> i32
|
||||
} // UNROLL-BY-4-NOT: }
|
||||
} // UNROLL-BY-4: }
|
||||
@ -431,8 +431,8 @@ func @loop_nest_single_iteration_after_unroll(%N: index) {
|
||||
// No cleanup will be generated here.
|
||||
// UNROLL-BY-4-LABEL: func @loop_nest_operand1() {
|
||||
func @loop_nest_operand1() {
|
||||
// UNROLL-BY-4: for %i0 = 0 to 100 step 2 {
|
||||
// UNROLL-BY-4-NEXT: for %i1 = 0 to #map{{[0-9]+}}(%i0) step 4
|
||||
// UNROLL-BY-4: affine.for %i0 = 0 to 100 step 2 {
|
||||
// UNROLL-BY-4-NEXT: affine.for %i1 = 0 to #map{{[0-9]+}}(%i0) step 4
|
||||
// UNROLL-BY-4-NEXT: %0 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32
|
||||
@ -440,8 +440,8 @@ func @loop_nest_operand1() {
|
||||
// UNROLL-BY-4-NEXT: }
|
||||
// UNROLL-BY-4-NEXT: }
|
||||
// UNROLL-BY-4-NEXT: return
|
||||
for %i = 0 to 100 step 2 {
|
||||
for %j = 0 to (d0) -> (d0 - d0 mod 4) (%i) {
|
||||
affine.for %i = 0 to 100 step 2 {
|
||||
affine.for %j = 0 to (d0) -> (d0 - d0 mod 4) (%i) {
|
||||
%x = "foo"() : () -> i32
|
||||
}
|
||||
}
|
||||
@ -451,8 +451,8 @@ func @loop_nest_operand1() {
|
||||
// No cleanup will be generated here.
|
||||
// UNROLL-BY-4-LABEL: func @loop_nest_operand2() {
|
||||
func @loop_nest_operand2() {
|
||||
// UNROLL-BY-4: for %i0 = 0 to 100 step 2 {
|
||||
// UNROLL-BY-4-NEXT: for %i1 = [[MAP11]](%i0) to #map{{[0-9]+}}(%i0) step 4 {
|
||||
// UNROLL-BY-4: affine.for %i0 = 0 to 100 step 2 {
|
||||
// UNROLL-BY-4-NEXT: affine.for %i1 = [[MAP11]](%i0) to #map{{[0-9]+}}(%i0) step 4 {
|
||||
// UNROLL-BY-4-NEXT: %0 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32
|
||||
@ -460,8 +460,8 @@ func @loop_nest_operand2() {
|
||||
// UNROLL-BY-4-NEXT: }
|
||||
// UNROLL-BY-4-NEXT: }
|
||||
// UNROLL-BY-4-NEXT: return
|
||||
for %i = 0 to 100 step 2 {
|
||||
for %j = (d0) -> (d0) (%i) to (d0) -> (5*d0 + 4) (%i) {
|
||||
affine.for %i = 0 to 100 step 2 {
|
||||
affine.for %j = (d0) -> (d0) (%i) to (d0) -> (5*d0 + 4) (%i) {
|
||||
%x = "foo"() : () -> i32
|
||||
}
|
||||
}
|
||||
@ -472,16 +472,16 @@ func @loop_nest_operand2() {
|
||||
// factor. The cleanup loop happens to be a single iteration one and is promoted.
|
||||
// UNROLL-BY-4-LABEL: func @loop_nest_operand3() {
|
||||
func @loop_nest_operand3() {
|
||||
// UNROLL-BY-4: for %i0 = 0 to 100 step 2 {
|
||||
for %i = 0 to 100 step 2 {
|
||||
// UNROLL-BY-4: for %i1 = [[MAP11]](%i0) to #map{{[0-9]+}}(%i0) step 4 {
|
||||
// UNROLL-BY-4: affine.for %i0 = 0 to 100 step 2 {
|
||||
affine.for %i = 0 to 100 step 2 {
|
||||
// UNROLL-BY-4: affine.for %i1 = [[MAP11]](%i0) to #map{{[0-9]+}}(%i0) step 4 {
|
||||
// UNROLL-BY-4-NEXT: %0 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4-NEXT: %3 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4-NEXT: }
|
||||
// UNROLL-BY-4-NEXT: %4 = "foo"() : () -> i32
|
||||
for %j = (d0) -> (d0) (%i) to (d0) -> (d0 + 9) (%i) {
|
||||
affine.for %j = (d0) -> (d0) (%i) to (d0) -> (d0 + 9) (%i) {
|
||||
%x = "foo"() : () -> i32
|
||||
}
|
||||
} // UNROLL-BY-4: }
|
||||
@ -490,19 +490,19 @@ func @loop_nest_operand3() {
|
||||
|
||||
// UNROLL-BY-4-LABEL: func @loop_nest_symbolic_bound(%arg0: index) {
|
||||
func @loop_nest_symbolic_bound(%N : index) {
|
||||
// UNROLL-BY-4: for %i0 = 0 to 100 {
|
||||
for %i = 0 to 100 {
|
||||
// UNROLL-BY-4: for %i1 = 0 to #map{{[0-9]+}}()[%arg0] step 4 {
|
||||
// UNROLL-BY-4: affine.for %i0 = 0 to 100 {
|
||||
affine.for %i = 0 to 100 {
|
||||
// UNROLL-BY-4: affine.for %i1 = 0 to #map{{[0-9]+}}()[%arg0] step 4 {
|
||||
// UNROLL-BY-4: %0 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4-NEXT: %3 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4-NEXT: }
|
||||
// A cleanup loop will be be generated here.
|
||||
// UNROLL-BY-4-NEXT: for %i2 = #map{{[0-9]+}}()[%arg0] to %arg0 {
|
||||
// UNROLL-BY-4-NEXT: affine.for %i2 = #map{{[0-9]+}}()[%arg0] to %arg0 {
|
||||
// UNROLL-BY-4-NEXT: %4 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4_NEXT: }
|
||||
for %j = 0 to %N {
|
||||
affine.for %j = 0 to %N {
|
||||
%x = "foo"() : () -> i32
|
||||
}
|
||||
}
|
||||
@ -511,18 +511,18 @@ func @loop_nest_symbolic_bound(%N : index) {
|
||||
|
||||
// UNROLL-BY-4-LABEL: func @loop_nest_symbolic_and_min_upper_bound
|
||||
func @loop_nest_symbolic_and_min_upper_bound(%M : index, %N : index, %K : index) {
|
||||
for %i = %M to min ()[s0, s1] -> (s0, s1, 1024)()[%N, %K] {
|
||||
affine.for %i = %M to min ()[s0, s1] -> (s0, s1, 1024)()[%N, %K] {
|
||||
"foo"() : () -> ()
|
||||
}
|
||||
return
|
||||
}
|
||||
// CHECK-NEXT: for %i0 = %arg0 to min [[MAP_TRIP_COUNT_MULTIPLE_FOUR]]()[%arg0, %arg1, %arg2] step 4 {
|
||||
// CHECK-NEXT: affine.for %i0 = %arg0 to min [[MAP_TRIP_COUNT_MULTIPLE_FOUR]]()[%arg0, %arg1, %arg2] step 4 {
|
||||
// CHECK-NEXT: "foo"() : () -> ()
|
||||
// CHECK-NEXT: "foo"() : () -> ()
|
||||
// CHECK-NEXT: "foo"() : () -> ()
|
||||
// CHECK-NEXT: "foo"() : () -> ()
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: for %i1 = max [[MAP_TRIP_COUNT_MULTIPLE_FOUR]]()[%arg0, %arg1, %arg2] to min #map28()[%arg1, %arg2] {
|
||||
// CHECK-NEXT: affine.for %i1 = max [[MAP_TRIP_COUNT_MULTIPLE_FOUR]]()[%arg0, %arg1, %arg2] to min #map28()[%arg1, %arg2] {
|
||||
// CHECK-NEXT: "foo"() : () -> ()
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: return
|
||||
@ -533,22 +533,22 @@ func @loop_nest_symbolic_and_min_upper_bound(%M : index, %N : index, %K : index)
|
||||
func @loop_nest_non_trivial_multiple_unroll_factor(%M : index, %N : index) {
|
||||
%T = affine.apply (d0) -> (4*d0 + 1)(%M)
|
||||
%K = affine.apply (d0) -> (d0 - 1) (%T)
|
||||
for %i = 0 to min (d0, d1) -> (4 * d0, d1, 1024)(%N, %K) {
|
||||
affine.for %i = 0 to min (d0, d1) -> (4 * d0, d1, 1024)(%N, %K) {
|
||||
"foo"() : () -> ()
|
||||
}
|
||||
return
|
||||
}
|
||||
// UNROLL-BY-4: for %i0 = 0 to min
|
||||
// UNROLL-BY-4: affine.for %i0 = 0 to min
|
||||
// UNROLL-BY-4-NOT: for
|
||||
// UNROLL-BY-4: return
|
||||
|
||||
// UNROLL-BY-4-LABEL: func @loop_nest_non_trivial_multiple_unroll_factor_2
|
||||
func @loop_nest_non_trivial_multiple_unroll_factor_2(%M : index, %N : index) {
|
||||
%K = affine.apply (d0) -> (4*d0) (%M)
|
||||
for %i = 0 to min ()[s0, s1] -> (4 * s0, s1, 1024)()[%N, %K] {
|
||||
affine.for %i = 0 to min ()[s0, s1] -> (4 * s0, s1, 1024)()[%N, %K] {
|
||||
"foo"() : () -> ()
|
||||
}
|
||||
// UNROLL-BY-4: for %i0 = 0 to min
|
||||
// UNROLL-BY-4: affine.for %i0 = 0 to min
|
||||
// UNROLL-BY-4-NEXT: "foo"
|
||||
// UNROLL-BY-4-NEXT: "foo"
|
||||
// UNROLL-BY-4-NEXT: "foo"
|
||||
@ -560,7 +560,7 @@ func @loop_nest_non_trivial_multiple_unroll_factor_2(%M : index, %N : index) {
|
||||
|
||||
// UNROLL-BY-1-LABEL: func @unroll_by_one_should_promote_single_iteration_loop()
|
||||
func @unroll_by_one_should_promote_single_iteration_loop() {
|
||||
for %i = 0 to 1 {
|
||||
affine.for %i = 0 to 1 {
|
||||
%x = "foo"(%i) : (index) -> i32
|
||||
}
|
||||
return
|
||||
|
Loading…
Reference in New Issue
Block a user