NFC: Rename the 'for' operation in the AffineOps dialect to 'affine.for' and set the namespace of the AffineOps dialect to 'affine'.

PiperOrigin-RevId: 240165792
2025-01-05 15:42:24 +00:00 · 2019-03-25 10:14:34 -07:00 · 2019-03-25 10:14:34 -07:00 · 832567b379
commit 832567b379
parent 8f5fa56623
70 changed files with 1304 additions and 1294 deletions
--- a/mlir/bindings/python/test/test_py2and3.py
+++ b/mlir/bindings/python/test/test_py2and3.py
@ -56,11 +56,11 @@ class EdscTest(unittest.TestCase):
    code = str(fun)
    # TODO(zinenko,ntv): use FileCheck for these tests
    self.assertIn(
-        '  "for"() {lower_bound: () -> (0), step: 1 : index, upper_bound: () -> (42)} : () -> () {\n',
+        '  "affine.for"() {lower_bound: () -> (0), step: 1 : index, upper_bound: () -> (42)} : () -> () {\n',
        code)
    self.assertIn("  ^bb1(%i0: index):", code)
    self.assertIn(
-        '    "for"(%c42, %2) {lower_bound: (d0) -> (d0), step: 2 : index, upper_bound: (d0) -> (d0)} : (index, index) -> () {\n',
+        '    "affine.for"(%c42, %2) {lower_bound: (d0) -> (d0), step: 2 : index, upper_bound: (d0) -> (d0)} : (index, index) -> () {\n',
        code)
    self.assertIn("    ^bb2(%i1: index):", code)
    self.assertIn(
@ -76,19 +76,19 @@ class EdscTest(unittest.TestCase):

    code = str(fun)
    self.assertIn(
-        ' "for"() {lower_bound: () -> (0), step: 1 : index, upper_bound: () -> (5)} : () -> () {\n',
+        ' "affine.for"() {lower_bound: () -> (0), step: 1 : index, upper_bound: () -> (5)} : () -> () {\n',
        code)
    self.assertIn("  ^bb1(%i0: index):", code)
    self.assertIn(
-        '    "for"() {lower_bound: () -> (1), step: 3 : index, upper_bound: () -> (15)} : () -> () {\n',
+        '    "affine.for"() {lower_bound: () -> (1), step: 3 : index, upper_bound: () -> (15)} : () -> () {\n',
        code)
    self.assertIn("    ^bb2(%i1: index):", code)
    self.assertIn(
-        '      "for"() {lower_bound: () -> (2), step: 5 : index, upper_bound: () -> (25)} : () -> () {\n',
+        '      "affine.for"() {lower_bound: () -> (2), step: 5 : index, upper_bound: () -> (25)} : () -> () {\n',
        code)
    self.assertIn("      ^bb3(%i2: index):", code)
    self.assertIn(
-        '        "for"() {lower_bound: () -> (3), step: 7 : index, upper_bound: () -> (35)} : () -> () {\n',
+        '        "affine.for"() {lower_bound: () -> (3), step: 7 : index, upper_bound: () -> (35)} : () -> () {\n',
        code)
    self.assertIn("        ^bb4(%i3: index):", code)
    self.assertIn(
@ -342,10 +342,10 @@ class EdscTest(unittest.TestCase):

    code = str(fun)
    self.assertIn(
-        '"for"() {lower_bound: () -> (0), step: 1 : index, upper_bound: () -> (10)}',
+        '"affine.for"() {lower_bound: () -> (0), step: 1 : index, upper_bound: () -> (10)}',
        code)
    self.assertIn(
-        '"for"() {lower_bound: () -> (0), step: 1 : index, upper_bound: () -> (42)}',
+        '"affine.for"() {lower_bound: () -> (0), step: 1 : index, upper_bound: () -> (42)}',
        code)
    self.assertIn("%0 = load %arg0[%i0, %i1] : memref<10x42xf32>", code)
    self.assertIn("%1 = addf %0, %cst : f32", code)
@ -367,7 +367,7 @@ class EdscTest(unittest.TestCase):

    code = str(fun)
    self.assertIn(
-        '"for"() {lower_bound: () -> (0), step: 1 : index, upper_bound: () -> (32)} : () -> ()',
+        '"affine.for"() {lower_bound: () -> (0), step: 1 : index, upper_bound: () -> (32)} : () -> ()',
        code)
    self.assertIn("%0 = load %arg0[%i0, %i2] : memref<32x32xf32>", code)
    self.assertIn("%1 = load %arg1[%i2, %i1] : memref<32x32xf32>", code)
--- a/mlir/g3doc/Dialects/Affine.md
+++ b/mlir/g3doc/Dialects/Affine.md
@ -15,7 +15,7 @@ loops and if instructions), the result of a
 [`affine.apply` operation](#'affine.apply'-operation) that recursively takes as
 arguments any symbolic identifiers. Dimensions may be bound not only to anything
 that a symbol is bound to, but also to induction variables of enclosing
-[`for` operations](#'for'-operation), and the result of an
+[`affine.for` operations](#'affine.for'-operation), and the result of an
 [`affine.apply` operation](#'affine.apply'-operation) (which recursively may use
 other dimensions and symbols).

@ -47,12 +47,12 @@ Example:
 %2 = affine.apply (i)[s0] -> (i+s0) (%42)[%n]
 ```

-#### 'for' operation {#'for'-operation}
+#### 'affine.for' operation {#'affine.for'-operation}

 Syntax:

 ``` {.ebnf}
-operation   ::= `for` ssa-id `=` lower-bound `to` upper-bound
+operation   ::= `affine.for` ssa-id `=` lower-bound `to` upper-bound
                      (`step` integer-literal)? `{` inst* `}`

 lower-bound ::= `max`? affine-map dim-and-symbol-use-list | shorthand-bound
@ -60,17 +60,17 @@ upper-bound ::= `min`? affine-map dim-and-symbol-use-list | shorthand-bound
 shorthand-bound ::= ssa-id | `-`? integer-literal
 ```

-The `for` operation represents an affine loop nest, defining an SSA value for
-its induction variable. This SSA value always has type
+The `affine.for` operation represents an affine loop nest, defining an SSA value
+for its induction variable. This SSA value always has type
 [`index`](LangRef.md#index-type), which is the size of the machine word.

-The `for` operation executes its body a number of times iterating from a lower
-bound to an upper bound by a stride. The stride, represented by `step`, is a
-positive constant integer which defaults to "1" if not present. The lower and
+The `affine.for` operation executes its body a number of times iterating from a
+lower bound to an upper bound by a stride. The stride, represented by `step`, is
+a positive constant integer which defaults to "1" if not present. The lower and
 upper bounds specify a half-open range: the range includes the lower bound but
 does not include the upper bound.

-The lower and upper bounds of a `for` operation are represented as an
+The lower and upper bounds of a `affine.for` operation are represented as an
 application of an affine mapping to a list of SSA values passed to the map. The
 [same restrictions](#restrictions-on-dimensions-and-symbols) hold for these SSA
 values as for all bindings of SSA values to dimensions and symbols.
@ -94,8 +94,8 @@ Example showing reverse iteration of the inner loop:

 func @simple_example(%A: memref<?x?xf32>, %B: memref<?x?xf32>) {
  %N = dim %A, 0 : memref<?x?xf32>
-  for %i = 0 to %N step 1 {
-    for %j = 0 to %N {   // implicitly steps by 1
+  affine.for %i = 0 to %N step 1 {
+    affine.for %j = 0 to %N {   // implicitly steps by 1
      %0 = affine.apply #map57(%j)[%N]
      %tmp = call @F1(%A, %i, %0) : (memref<?x?xf32>, index, index)->(f32)
      call @F2(%tmp, %B, %i, %0) : (f32, memref<?x?xf32>, index, index)->()
@ -130,8 +130,8 @@ Example:
 #set = (d0, d1)[s0]: (d0 - 10 >= 0, s0 - d0 - 9 >= 0,
                      d1 - 10 >= 0, s0 - d1 - 9 >= 0)
 func @reduced_domain_example(%A, %X, %N) : (memref<10xi32>, i32, i32) {
-  for %i = 0 to %N {
-     for %j = 0 to %N {
+  affine.for %i = 0 to %N {
+     affine.for %j = 0 to %N {
       %0 = affine.apply #map42(%j)
       %tmp = call @S1(%X, %i, %0)
       affine.if #set(%i, %j)[%N] {
--- a/mlir/g3doc/Dialects/SuperVector.md
+++ b/mlir/g3doc/Dialects/SuperVector.md
@ -23,8 +23,8 @@ Examples:
 // pad with %f0 to handle the boundary case:
 %f0 = constant 0.0f : f32
 for %i0 = 0 to %0 {
-  for %i1 = 0 to %1 step 256 {
-    for %i2 = 0 to %2 step 32 {
+  affine.for %i1 = 0 to %1 step 256 {
+    affine.for %i2 = 0 to %2 step 32 {
      %v = vector_transfer_read %A, %i0, %i1, %i2, %f0
           {permutation_map: (d0, d1, d2) -> (d2, d1)} :
           (memref<?x?x?xf32>, index, index, f32) -> vector<32x256xf32>
@ -34,7 +34,7 @@ for %i0 = 0 to %0 {
 // vector<128xf32>. The underlying implementation will require a 1-D vector
 // broadcast:
 for %i0 = 0 to %0 {
-  for %i1 = 0 to %1 {
+  affine.for %i1 = 0 to %1 {
    %3 = vector_transfer_read %A, %i0, %i1
         {permutation_map: (d0, d1) -> (0)} :
         (memref<?x?xf32>, index, index) -> vector<128xf32>
@ -81,8 +81,8 @@ A notional lowering of vector_transfer_read could generate code resembling:
 %tmp = alloc() : vector<3x4x5xf32>
 %view_in_tmp = "element_type_cast"(%tmp) : memref<1xvector<3x4x5xf32>>
 for %i = 0 to 3 {
-  for %j = 0 to 4 {
-    for %k = 0 to 5 {
+  affine.for %j = 0 to 4 {
+    affine.for %k = 0 to 5 {
      %a = load %A[%expr1 + %k, %expr2, %expr3 + %i, %expr4] : memref<?x?x?x?xf32>
      store %tmp[%i, %j, %k] : vector<3x4x5xf32>
 }}}
@ -102,7 +102,7 @@ lowered code would resemble:
 %tmp = alloc() : vector<3x4x5xf32>
 %view_in_tmp = "element_type_cast"(%tmp) : memref<1xvector<3x4x5xf32>>
 for %i = 0 to 3 {
-  for %k = 0 to 5 {
+  affine.for %k = 0 to 5 {
    %a = load %A[%expr1 + %k, %expr2, %expr3 + %i, %expr4] : memref<?x?x?x?xf32>
    store %tmp[%i, 0, %k] : vector<3x4x5xf32>
 }}
@ -130,9 +130,9 @@ Examples:
 ```mlir {.mlir}
 // write vector<16x32x64xf32> into the slice `%A[%i0, %i1:%i1+32, %i2:%i2+64, %i3:%i3+16]`:
 for %i0 = 0 to %0 {
-  for %i1 = 0 to %1 step 32 {
-    for %i2 = 0 to %2 step 64 {
-      for %i3 = 0 to %3 step 16 {
+  affine.for %i1 = 0 to %1 step 32 {
+    affine.for %i2 = 0 to %2 step 64 {
+      affine.for %i3 = 0 to %3 step 16 {
        %val = `ssa-value` : vector<16x32x64xf32>
        vector_transfer_write %val, %A, %i0, %i1, %i2, %i3
          {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d2)} :
--- a/mlir/g3doc/LangRef.md
+++ b/mlir/g3doc/LangRef.md
@ -40,10 +40,10 @@ which means that values are defined before use and have scope defined by their
 dominance relations. Operations may produce zero or more results, and each is a
 distinct SSA value with its own type defined by the [type system](#type-system).

-MLIR incorporates polyhedral compiler concepts, including `for` and `affine.if`
-operations defined by the [affine dialect](Dialects/Affine.md), which model
-affine loops and affine conditionals. It also includes affine maps integrated
-into the type system - they are key to the representation of data and
+MLIR incorporates polyhedral compiler concepts, including `affine.for` and
+`affine.if` operations defined by the [affine dialect](Dialects/Affine.md),
+which model affine loops and affine conditionals. It also includes affine maps
+integrated into the type system - they are key to the representation of data and
 [MemRefs](#memref-type), which are the representation for tensors in addressable
 memory. MLIR also supports a first-class Tensor type allowing it to concisely
 represent operations on N-dimensional arrays.
@ -99,10 +99,10 @@ func @multiply(%A: memref<100x?xf32>, %B: memref<?x50xf32>)
  %C = alloc() : memref<100x50xf32>

  // Multiplication loop nest.
-  for  %i = 0 to 100 {
-     for %j = 0 to 50 {
+  affine.for %i = 0 to 100 {
+     affine.for %j = 0 to 50 {
        store 0 to %C[%i, %j] : memref<100x50xf32>
-        for %k = 0 to %n {
+        affine.for %k = 0 to %n {
           %a_v  = load %A[%i, %k] : memref<100x?xf32>
           %b_v  = load %B[%k, %j] : memref<?x50xf32>
           %prod = mulf %a_v, %b_v : f32
@ -1697,8 +1697,8 @@ The arity of indices is the rank of the memref (i.e., if the memref loaded from
 is of rank 3, then 3 indices are required for the load following the memref
 identifier).

-In an `affine.if` or `for` body, the indices of a load are restricted to SSA
-values bound to surrounding loop induction variables,
+In an `affine.if` or `affine.for` body, the indices of a load are restricted to
+SSA values bound to surrounding loop induction variables,
 [symbols](#dimensions-and-symbols), results of a
 [`constant` operation](#'constant'-operation), or the result of an
 `affine.apply` operation that can in turn take as arguments all of the
@ -1719,10 +1719,10 @@ Example:

 **Context:** The `load` and `store` instructions are specifically crafted to
 fully resolve a reference to an element of a memref, and (in affine `affine.if`
-and `for` instructions) the compiler can follow use-def chains (e.g. through
-[`affine.apply`](Dialects/Affine.md#'affine.apply'-operation) operations) to
-precisely analyze references at compile-time using polyhedral techniques. This
-is possible because of the
+and `affine.for` instructions) the compiler can follow use-def chains (e.g.
+through [`affine.apply`](Dialects/Affine.md#'affine.apply'-operation)
+operations) to precisely analyze references at compile-time using polyhedral
+techniques. This is possible because of the
 [restrictions on dimensions and symbols](Dialects/Affine.md#restrictions-on-dimensions-and-symbols)
 in these contexts.

@ -1755,10 +1755,11 @@ store %100, %A[%1, 1023] : memref<4x?xf32, #layout, hbm>

 **Context:** The `load` and `store` instructions are specifically crafted to
 fully resolve a reference to an element of a memref, and (in polyhedral
-`affine.if` and `for` instructions) the compiler can follow use-def chains (e.g.
-through [`affine.apply`](Dialects/Affine.md#'affine.apply'-operation)
-operations) to precisely analyze references at compile-time using polyhedral
-techniques. This is possible because of the
+`affine.if` and `affine.for` instructions) the compiler can follow use-def
+chains (e.g. through
+[`affine.apply`](Dialects/Affine.md#'affine.apply'-operation) operations) to
+precisely analyze references at compile-time using polyhedral techniques. This
+is possible because of the
 [restrictions on dimensions and symbols](Dialect/Affine.md#restrictions-on-dimensions-and-symbols)
 in these contexts.

--- a/mlir/g3doc/Passes.md
+++ b/mlir/g3doc/Passes.md
@ -39,9 +39,9 @@ These restrictions may be lifted in the future.

 ### Output IR

-Functions with `for` and `affine.if` instructions eliminated. These functions
-may contain operations from the Standard dialect in addition to those already
-present before the pass.
+Functions with `affine.for` and `affine.if` instructions eliminated. These
+functions may contain operations from the Standard dialect in addition to those
+already present before the pass.

 ### Invariants

@ -95,10 +95,10 @@ Input
 ```mlir
 func @loop_nest_tiled() -> memref<256x1024xf32> {
  %0 = alloc() : memref<256x1024xf32>
-  for %i0 = 0 to 256 step 32 {
-    for %i1 = 0 to 1024 step 32 {
-      for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 32)(%i0) {
-        for %i3 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 32)(%i1) {
+  affine.for %i0 = 0 to 256 step 32 {
+    affine.for %i1 = 0 to 1024 step 32 {
+      affine.for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 32)(%i0) {
+        affine.for %i3 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 32)(%i1) {
          %1 = load %0[%i2, %i3] : memref<256x1024xf32>
        }
      }
@ -119,16 +119,16 @@ func @loop_nest_tiled() -> memref<256x1024xf32> {
  %c32 = constant 32 : index
  %c0 = constant 0 : index
  %0 = alloc() : memref<256x1024xf32>
-  for %i0 = 0 to 256 step 32 {
-    for %i1 = 0 to 1024 step 32 {
+  affine.for %i0 = 0 to 256 step 32 {
+    affine.for %i1 = 0 to 1024 step 32 {
      %1 = affine.apply #map1(%i0)
      %2 = affine.apply #map1(%i1)
      %3 = alloc() : memref<32x32xf32, 1>
      %4 = alloc() : memref<1xi32>
      dma_start %0[%1, %2], %3[%c0, %c0], %c1024, %4[%c0], %c1024, %c32 : memref<256x1024xf32>, memref<32x32xf32, 1>, memref<1xi32>
      dma_wait %4[%c0], %c1024 : memref<1xi32>
-      for %i2 = #map1(%i0) to #map2(%i0) {
-        for %i3 = #map1(%i1) to #map2(%i1) {
+      affine.for %i2 = #map1(%i0) to #map2(%i0) {
+        affine.for %i3 = #map1(%i1) to #map2(%i1) {
          %5 = affine.apply #map3(%i0, %i2)
          %6 = affine.apply #map3(%i1, %i3)
          %7 = load %3[%5, %6] : memref<32x32xf32, 1>
@ -194,8 +194,8 @@ Input
 func @store_load_affine_apply() -> memref<10x10xf32> {
  %cf7 = constant 7.0 : f32
  %m = alloc() : memref<10x10xf32>
-  for %i0 = 0 to 10 {
-    for %i1 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
+    affine.for %i1 = 0 to 10 {
      %t0 = affine.apply (d0, d1) -> (d1 + 1)(%i0, %i1)
      %t1 = affine.apply (d0, d1) -> (d0)(%i0, %i1)
      %idx0 = affine.apply (d0, d1) -> (d1) (%t0, %t1)
@ -217,8 +217,8 @@ Output
 func @store_load_affine_apply() -> memref<10x10xf32> {
  %cst = constant 7.000000e+00 : f32
  %0 = alloc() : memref<10x10xf32>
-  for %i0 = 0 to 10 {
-    for %i1 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
+    affine.for %i1 = 0 to 10 {
      %3 = affine.apply #map1(%1, %2)
      %4 = affine.apply #map2(%1, %2)
      store %cst, %0[%3, %4] : memref<10x10xf32>
@ -258,7 +258,7 @@ Input
  %2 = alloc() : memref<1xf32>
  %c0 = constant 0 : index
  %c128 = constant 128 : index
-  for %i0 = 0 to 8 {
+  affine.for %i0 = 0 to 8 {
    dma_start %0[%i0], %1[%i0], %c128, %2[%c0] : memref<256xf32>, memref<32xf32, 1>, memref<1xf32>
    dma_wait %2[%c0], %c128 : memref<1xf32>
    %3 = load %1[%i0] : memref<32xf32, 1>
@ -282,7 +282,7 @@ Output
  %1 = alloc() : memref<2x32xf32, 1>
  %2 = alloc() : memref<2x1xf32>
  dma_start %0[%c0], %1[%c0, %c0], %c128, %2[%c0, %c0] : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
-  for %i0 = 1 to 8 {
+  affine.for %i0 = 1 to 8 {
    %3 = affine.apply #map2(%i0)
    %4 = affine.apply #map2(%i0)
    dma_start %0[%i0], %1[%3, %i0], %c128, %2[%4, %c0] : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
--- a/mlir/g3doc/Rationale.md
+++ b/mlir/g3doc/Rationale.md
@ -150,8 +150,8 @@ func bar(%A : memref<8x?xf32, #lmap>) {
  // dynamically using dim instruction.
  %N = dim %A, 1 : memref<8x?xf32, #lmap>

-  for %i = 0 to 8 {
-    for %j = 0 to %N {
+  affine.for %i = 0 to 8 {
+    affine.for %j = 0 to %N {
      // A[i,j] += 1
      %s1 = load %A [%i, %j] : memref<8x?xf32, #lmap>
      %s2 = add %s1, 1
@ -548,7 +548,7 @@ nested in an outer function that using affine loops.
 func @search(memref<?x?xi32 %A, <?xi32> %S, i32 %key) {
  %ni = dim %A, 0 : memref<?x?xi32>
  // This loop can be parallelized
-  for %i = 0 to %ni {
+  affine.for %i = 0 to %ni {
    call @search_body (%A, %S, %i) : (memref<?x?xi32>, memref<?xi32>, i32)
  }
  return
@ -582,9 +582,9 @@ func @search_body(%A: memref<?x?xi32>, %S: memref<?xi32>, %key: i32) {

 As per the [MLIR spec](LangRef.md), the restrictions on dimensions and symbol
 identifiers to be used with the affine.apply instruction only apply to accesses
-inside `for` and `affine.if` instructions. However, an analysis of accesses
-inside the called function (`@search_body`) is necessary to determine if the
-`%i` loop could be parallelized: such function access analysis is calling
+inside `affine.for` and `affine.if` instructions. However, an analysis of
+accesses inside the called function (`@search_body`) is necessary to determine
+if the `%i` loop could be parallelized: such function access analysis is calling
 context sensitive.

 ### Non-affine loop bounds {#non-affine-loop-bounds}
@ -604,8 +604,8 @@ for (i=0; i <N; i++)

 ```mlir {.mlir}
 func @outer_nest(%n) : (i32) {
-  for %i = 0 to %n {
-    for %j = 0 to %n {
+  affine.for %i = 0 to %n {
+    affine.for %j = 0 to %n {
      call @inner_nest(%i, %j, %n)
    }
  }
@ -620,8 +620,8 @@ func @inner_nest(%i: i32, %j: i32, %n: i32) {
 }

 func @inner_nest2(%m, %n) -> i32 {
-  for %k = 0 to %m {
-    for %l = 0 to %n {
+  affine.for %k = 0 to %m {
+    affine.for %l = 0 to %n {
      ...
    }
  }
@ -663,13 +663,13 @@ in a dilated convolution.
 func @conv2d(memref<16x1024x1024x3xf32, #lm0, vmem> %input,
             memref<5x5x3x32xf32, #lm0, vmem> %kernel,
             memref<16x512x512x32xf32, #lm0, vmem> %output) {
-  for %b = 0 to %batch {
-    for %oh = 0 to %output_height {
-      for %ow = 0 to %output_width {
-        for %of = 0 to %output_feature {
-          for %kh = 0 to %kernel_height {
-            for %kw = 0 to %kernel_width {
-              for %if = 0 to %input_feature {
+  affine.for %b = 0 to %batch {
+    affine.for %oh = 0 to %output_height {
+      affine.for %ow = 0 to %output_width {
+        affine.for %of = 0 to %output_feature {
+          affine.for %kh = 0 to %kernel_height {
+            affine.for %kw = 0 to %kernel_width {
+              affine.for %if = 0 to %input_feature {
                // Calculate input indices.
                %1_0 = affine.apply #map1_0 (%0#1, %0#2, %0#4, %0#5)
                  [%h_stride, %w_stride, %h_kernel_dilation, %w_kernel_dilation,
@ -913,10 +913,10 @@ func @dma_hbm_to_vmem(memref<1024 x f32, #layout_map0, hbm> %a,
    representation. 2(b) requires no change, but impacts how cost models look at
    index and layout maps.

-### `affine.if` and `for` Extensions for "Escaping Scalars" {#extensions-for-"escaping-scalars"}
+### `affine.if` and `affine.for` Extensions for "Escaping Scalars" {#extensions-for-"escaping-scalars"}

 We considered providing a representation for SSA values that are live out of
-`if/else` conditional bodies and loop carried in `for` loops. We
+`if/else` conditional bodies and loop carried in `affine.for` loops. We
 ultimately abandoned this approach due to its complexity. In the current design
 of MLIR, scalar variables cannot escape for loops or if instructions. In
 situations, where escaping is necessary, we use zero-dimensional tensors and
@ -948,7 +948,7 @@ Example:
 // Return sum of elements in 1-dimensional mref A
 func int32 @sum(%A : memref<?xi32>, %N : i32) -> (i32) {
   %init = 0
-   %result = for %i = 0 to N with %tmp(%init) {
+   %result = affine.for %i = 0 to N with %tmp(%init) {
      %value = load %A[%i]
      %sum = %value + %tmp
      yield %sum
@ -978,7 +978,7 @@ Example:
 // Compute sum of half of the array
 func int32 @sum_half(%A, %N) {
   %s0 = 0
-   %s1 = for %i = 1 ... N step 1 with %s2 (%s0) {
+   %s1 = affine.for %i = 1 ... N step 1 with %s2 (%s0) {
       %s3 = if (%i >= %N / 2) {
          %v0 = load %A[%i]
          %s4 = %s2 + %v0
--- a/mlir/g3doc/RationaleSimplifiedPolyhedralForm.md
+++ b/mlir/g3doc/RationaleSimplifiedPolyhedralForm.md
@ -184,8 +184,8 @@ Our simple example above would be represented as:

 ```mlir
  mlfunc @simple_example(... %N) {
-    for %i = 0 ... %N step 1 {
-      for %j = 0 ... %N step 1 {
+    affine.for %i = 0 ... %N step 1 {
+      affine.for %j = 0 ... %N step 1 {
        // identity noop in this case, but can exist in general.
        %0,%1 = affine.apply #57(%i, %j)

@ -203,8 +203,8 @@ The example with the reduced domain would be represented with an if instruction:

 ```mlir
  mlfunc @reduced_domain_example(... %N) {
-    for %i = 0 ... %N step 1 {
-      for %j = 0 ... %N step 1 {
+    affine.for %i = 0 ... %N step 1 {
+      affine.for %j = 0 ... %N step 1 {
        // identity noop in this case, but can exist in general.
        %0,%1 = affinecall #57(%i, %j)

@ -233,8 +233,8 @@ that transformations call into):

 ```mlir
 mlfunc @skewed_domain_example(... %N) {
-  for %t1 = 0 ... 2*N-2 step 1 {
-    for %t2 = max(0, t1-N+1) ... min(N, t1) step 1 {
+  affine.for %t1 = 0 ... 2*N-2 step 1 {
+    affine.for %t2 = max(0, t1-N+1) ... min(N, t1) step 1 {
      (%i, %j) = (%t1-%t2, %t2)
      ...
    }
@ -373,7 +373,7 @@ mlfunc's (if we support them) will also have to have domains.
 ### Lack of redundancy in IR

 The traditional form has multiple encodings for the same sorts of behavior: you
-end up having bits on `for` loops to specify whether codegen should use
+end up having bits on `affine.for` loops to specify whether codegen should use
 "atomic/separate" policies, unroll loops, etc. Instructions can be split or can
 generate multiple copies of their instruction because of overlapping domains,
 etc.
--- a/mlir/include/mlir/AffineOps/AffineOps.h
+++ b/mlir/include/mlir/AffineOps/AffineOps.h
@ -88,15 +88,15 @@ public:
                                          MLIRContext *context);
 };

-/// The "for" instruction represents an affine loop nest, defining an SSA value
-/// for its induction variable. The induction variable is represented as a
+/// The "affine.for" instruction represents an affine loop nest, defining an SSA
+/// value for its induction variable. The induction variable is represented as a
 /// BlockArgument to the entry block of the body. The body and induction
-/// variable can be created automatically for new "for" ops with 'createBody'.
-/// This SSA value always has type index, which is the size of the machine word.
-/// The stride, represented by step, is a positive constant integer which
-/// defaults to "1" if not present. The lower and upper bounds specify a
-/// half-open range: the range includes the lower bound but does not include the
-/// upper bound.
+/// variable can be created automatically for new "affine.for" ops with
+/// 'createBody'. This SSA value always has type index, which is the size of the
+/// machine word. The stride, represented by step, is a positive constant
+/// integer which defaults to "1" if not present. The lower and upper bounds
+/// specify a half-open range: the range includes the lower bound but does not
+/// include the upper bound.
 ///
 /// The lower and upper bounds of a for operation are represented as an
 /// application of an affine mapping to a list of SSA values passed to the map.
@ -108,7 +108,7 @@ public:
 ///
 /// Example:
 ///
-///   for %i = 1 to 10 {
+///   affine.for %i = 1 to 10 {
 ///     ...
 ///   }
 ///
@ -131,7 +131,7 @@ public:
  static void getCanonicalizationPatterns(OwningRewritePatternList &results,
                                          MLIRContext *context);

-  static StringRef getOperationName() { return "for"; }
+  static StringRef getOperationName() { return "affine.for"; }
  static StringRef getStepAttrName() { return "step"; }
  static StringRef getLowerBoundAttrName() { return "lower_bound"; }
  static StringRef getUpperBoundAttrName() { return "upper_bound"; }
@ -268,10 +268,10 @@ public:
  operand_range getOperands() { return {operand_begin(), operand_end()}; }

 private:
-  // 'for' instruction that contains this bound.
+  // 'affine.for' instruction that contains this bound.
  AffineForOp inst;
  // Start and end positions of this affine bound operands in the list of
-  // the containing 'for' instruction operands.
+  // the containing 'affine.for' instruction operands.
  unsigned opStart, opEnd;
  // Affine map for this bound.
  AffineMap map;
--- a/mlir/include/mlir/Analysis/AffineStructures.h
+++ b/mlir/include/mlir/Analysis/AffineStructures.h
@ -376,14 +376,15 @@ public:

  AffineExpr toAffineExpr(unsigned idx, MLIRContext *context);

-  /// Adds constraints (lower and upper bounds) for the specified 'for'
+  /// Adds constraints (lower and upper bounds) for the specified 'affine.for'
  /// instruction's Value using IR information stored in its bound maps. The
  /// right identifier is first looked up using forOp's Value. Asserts if the
-  /// Value corresponding to the 'for' instruction isn't found in the constraint
-  /// system. Returns failure for the yet unimplemented/unsupported cases.  Any
-  /// new identifiers that are found in the bound operands of the 'for'
-  /// instruction are added as trailing identifiers (either dimensional or
-  /// symbolic depending on whether the operand is a valid ML Function symbol).
+  /// Value corresponding to the 'affine.for' instruction isn't found in the
+  /// constraint system. Returns failure for the yet unimplemented/unsupported
+  /// cases.  Any new identifiers that are found in the bound operands of the
+  /// 'affine.for' instruction are added as trailing identifiers (either
+  /// dimensional or symbolic depending on whether the operand is a valid ML
+  /// Function symbol).
  //  TODO(bondhugula): add support for non-unit strides.
  LogicalResult addAffineForOpDomain(AffineForOp forOp);

--- a/mlir/include/mlir/Analysis/Utils.h
+++ b/mlir/include/mlir/Analysis/Utils.h
@ -45,7 +45,7 @@ class Instruction;
 class Value;

 /// Populates 'loops' with IVs of the loops surrounding 'inst' ordered from
-/// the outermost 'for' instruction to the innermost one.
+/// the outermost 'affine.for' instruction to the innermost one.
 //  TODO(bondhugula): handle 'affine.if' inst's.
 void getLoopIVs(Instruction &inst, SmallVectorImpl<AffineForOp> *loops);

@ -113,8 +113,8 @@ AffineForOp insertBackwardComputationSlice(Instruction *srcOpInst,
 /// surrounding such op's.
 // For example, the memref region for a load operation at loop depth = 1:
 //
-//    for %i = 0 to 32 {
-//      for %ii = %i to (d0) -> (d0 + 8) (%i) {
+//    affine.for %i = 0 to 32 {
+//      affine.for %ii = %i to (d0) -> (d0 + 8) (%i) {
 //        load %A[%ii]
 //      }
 //    }
@ -146,8 +146,8 @@ struct MemRefRegion {
  ///  For example, the memref region for this operation at loopDepth = 1 will
  ///  be:
  ///
-  ///    for %i = 0 to 32 {
-  ///      for %ii = %i to (d0) -> (d0 + 8) (%i) {
+  ///    affine.for %i = 0 to 32 {
+  ///      affine.for %ii = %i to (d0) -> (d0 + 8) (%i) {
  ///        load %A[%ii]
  ///      }
  ///    }
--- a/mlir/include/mlir/Analysis/VectorAnalysis.h
+++ b/mlir/include/mlir/Analysis/VectorAnalysis.h
@ -76,9 +76,9 @@ shapeRatio(VectorType superVectorType, VectorType subVectorType);
 /// The following MLIR snippet:
 ///
 /// ```mlir
-///    for %i3 = 0 to %0 {
-///      for %i4 = 0 to %1 {
-///        for %i5 = 0 to %2 {
+///    affine.for %i3 = 0 to %0 {
+///      affine.for %i4 = 0 to %1 {
+///        affine.for %i5 = 0 to %2 {
 ///          %a5 = load %arg0[%i4, %i5, %i3] : memref<?x?x?xf32>
 ///    }}}
 /// ```
@ -86,9 +86,9 @@ shapeRatio(VectorType superVectorType, VectorType subVectorType);
 /// may vectorize with {permutation_map: (d0, d1, d2) -> (d2, d1)} into:
 ///
 /// ```mlir
-///    for %i3 = 0 to %0 step 32 {
-///      for %i4 = 0 to %1 {
-///        for %i5 = 0 to %2 step 256 {
+///    affine.for %i3 = 0 to %0 step 32 {
+///      affine.for %i4 = 0 to %1 {
+///        affine.for %i5 = 0 to %2 step 256 {
 ///          %4 = vector_transfer_read %arg0, %i4, %i5, %i3
 ///               {permutation_map: (d0, d1, d2) -> (d2, d1)} :
 ///               (memref<?x?x?xf32>, index, index) -> vector<32x256xf32>
@ -103,7 +103,7 @@ shapeRatio(VectorType superVectorType, VectorType subVectorType);
 ///
 /// ```mlir
 ///    %cst0 = constant 0 : index
-///    for %i0 = 0 to %0 {
+///    affine.for %i0 = 0 to %0 {
 ///      %a0 = load %arg0[%cst0, %cst0] : memref<?x?xf32>
 ///    }
 /// ```
@ -111,7 +111,7 @@ shapeRatio(VectorType superVectorType, VectorType subVectorType);
 /// may vectorize with {permutation_map: (d0) -> (0)} into:
 ///
 /// ```mlir
-///    for %i0 = 0 to %0 step 128 {
+///    affine.for %i0 = 0 to %0 step 128 {
 ///      %3 = vector_transfer_read %arg0, %c0_0, %c0_0
 ///           {permutation_map: (d0, d1) -> (0)} :
 ///           (memref<?x?xf32>, index, index) -> vector<128xf32>
--- a/mlir/include/mlir/EDSC/Types.h
+++ b/mlir/include/mlir/EDSC/Types.h
@ -341,7 +341,8 @@ protected:
 ///    (e.g. vectorValue = load(vectorView, zero)).
 ///
 /// Only ExprKind::StmtBlockLikeExpr have `enclosedStmts`, these comprise:
-/// 1. `For`-loops for which the `lhs` binds to the induction variable, `rhs`
+/// 1. `affine.for`-loops for which the `lhs` binds to the induction variable,
+/// `rhs`
 ///   binds to an Expr of kind `ExprKind::For` with lower-bound, upper-bound and
 ///   step respectively.
 // TODO(zinenko): this StmtBlockLikeExpr should be retired in favor of Expr
@ -647,7 +648,7 @@ Stmt For(llvm::ArrayRef<Expr> indices, llvm::ArrayRef<Expr> lbs,
         llvm::ArrayRef<Expr> ubs, llvm::ArrayRef<Expr> steps,
         llvm::ArrayRef<Stmt> enclosedStmts);

-/// Define a 'for' loop from with multi-valued bounds.
+/// Define a 'affine.for' loop from with multi-valued bounds.
 ///
 ///    for max(lbs...) to min(ubs...) {}
 ///
--- a/mlir/include/mlir/Transforms/LoopUtils.h
+++ b/mlir/include/mlir/Transforms/LoopUtils.h
@ -73,9 +73,10 @@ void getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
                              SmallVectorImpl<Value *> *operands,
                              FuncBuilder *builder);

-/// Skew the instructions in the body of a 'for' instruction with the specified
-/// instruction-wise shifts. The shifts are with respect to the original
-/// execution order, and are multiplied by the loop 'step' before being applied.
+/// Skew the instructions in the body of a 'affine.for' instruction with the
+/// specified instruction-wise shifts. The shifts are with respect to the
+/// original execution order, and are multiplied by the loop 'step' before being
+/// applied.
 LLVM_NODISCARD
 LogicalResult instBodySkew(AffineForOp forOp, ArrayRef<uint64_t> shifts,
                           bool unrollPrologueEpilogue = false);
--- a/mlir/include/mlir/Transforms/Utils.h
+++ b/mlir/include/mlir/Transforms/Utils.h
@ -95,14 +95,14 @@ Instruction *createComposedAffineApplyOp(FuncBuilder *builder, Location loc,
 ///
 /// Before
 ///
-/// for %i = 0 to #map(%N)
+/// affine.for %i = 0 to #map(%N)
 ///   %idx = affine.apply (d0) -> (d0 mod 2) (%i)
 ///   send %A[%idx], ...
 ///   %v = "compute"(%idx, ...)
 ///
 /// After
 ///
-/// for %i = 0 to #map(%N)
+/// affine.for %i = 0 to #map(%N)
 ///   %idx = affine.apply (d0) -> (d0 mod 2) (%i)
 ///   send %A[%idx], ...
 ///   %idx_ = affine.apply (d0) -> (d0 mod 2) (%i)
--- a/mlir/lib/AffineOps/AffineOps.cpp
+++ b/mlir/lib/AffineOps/AffineOps.cpp
@ -36,7 +36,7 @@ using llvm::dbgs;
 //===----------------------------------------------------------------------===//

 AffineOpsDialect::AffineOpsDialect(MLIRContext *context)
-    : Dialect(/*namePrefix=*/"", context) {
+    : Dialect(/*namePrefix=*/"affine", context) {
  addOperations<AffineApplyOp, AffineForOp, AffineIfOp>();
 }

@ -69,7 +69,7 @@ bool mlir::isValidDim(Value *value) {
      return isTopLevelSymbol(dimOp->getOperand());
    return false;
  }
-  // This value is a block argument (which also includes 'for' loop IVs).
+  // This value is a block argument (which also includes 'affine.for' loop IVs).
  return true;
 }

@ -969,7 +969,7 @@ static void printBound(AffineBound bound, const char *prefix, OpAsmPrinter *p) {
 }

 void AffineForOp::print(OpAsmPrinter *p) {
-  *p << "for ";
+  *p << "affine.for ";
  p->printOperand(getBody()->getArgument(0));
  *p << " = ";
  printBound(getLowerBound(), "max", p);
--- a/mlir/lib/Analysis/AffineAnalysis.cpp
+++ b/mlir/lib/Analysis/AffineAnalysis.cpp
@ -708,8 +708,8 @@ void MemRefAccess::getAccessMap(AffineValueMap *accessMap) const {
 // For example, given the following MLIR code with with "source" and
 // "destination" accesses to the same memref labled, and symbols %M, %N, %K:
 //
-//   for %i0 = 0 to 100 {
-//     for %i1 = 0 to 50 {
+//   affine.for %i0 = 0 to 100 {
+//     affine.for %i1 = 0 to 50 {
 //       %a0 = affine.apply
 //         (d0, d1) -> (d0 * 2 - d1 * 4 + s1, d1 * 3 - s0) (%i0, %i1)[%M, %N]
 //       // Source memref access.
@ -717,8 +717,8 @@ void MemRefAccess::getAccessMap(AffineValueMap *accessMap) const {
 //     }
 //   }
 //
-//   for %i2 = 0 to 100 {
-//     for %i3 = 0 to 50 {
+//   affine.for %i2 = 0 to 100 {
+//     affine.for %i3 = 0 to 50 {
 //       %a1 = affine.apply
 //         (d0, d1) -> (d0 * 7 + d1 * 9 - s1, d1 * 11 + s0) (%i2, %i3)[%K, %M]
 //       // Destination memref access.
--- a/mlir/lib/Analysis/TestParallelismDetection.cpp
+++ b/mlir/lib/Analysis/TestParallelismDetection.cpp
@ -15,7 +15,7 @@
 // limitations under the License.
 // =============================================================================
 //
-// This file implements a pass to detect parallel affine 'for' ops.
+// This file implements a pass to detect parallel affine 'affine.for' ops.
 //
 //===----------------------------------------------------------------------===//

@ -40,7 +40,8 @@ FunctionPassBase *mlir::createParallelismDetectionTestPass() {
  return new TestParallelismDetection();
 }

-// Walks the function and emits a note for all 'for' ops detected as parallel.
+// Walks the function and emits a note for all 'affine.for' ops detected as
+// parallel.
 void TestParallelismDetection::runOnFunction() {
  Function *f = getFunction();
  FuncBuilder b(f);
--- a/mlir/lib/Analysis/Utils.cpp
+++ b/mlir/lib/Analysis/Utils.cpp
@ -38,11 +38,11 @@ using namespace mlir;
 using llvm::SmallDenseMap;

 /// Populates 'loops' with IVs of the loops surrounding 'inst' ordered from
-/// the outermost 'for' instruction to the innermost one.
+/// the outermost 'affine.for' instruction to the innermost one.
 void mlir::getLoopIVs(Instruction &inst, SmallVectorImpl<AffineForOp> *loops) {
  auto *currInst = inst.getParentInst();
  AffineForOp currAffineForOp;
-  // Traverse up the hierarchy collecing all 'for' instruction while
+  // Traverse up the hierarchy collecing all 'affine.for' instruction while
  // skipping over 'affine.if' instructions.
  while (currInst && ((currAffineForOp = currInst->dyn_cast<AffineForOp>()) ||
                      currInst->isa<AffineIfOp>())) {
@ -162,8 +162,8 @@ LogicalResult MemRefRegion::unionBoundingBox(const MemRefRegion &other) {
 //  For example, the memref region for this load operation at loopDepth = 1 will
 //  be as below:
 //
-//    for %i = 0 to 32 {
-//      for %ii = %i to (d0) -> (d0 + 8) (%i) {
+//    affine.for %i = 0 to 32 {
+//      affine.for %ii = %i to (d0) -> (d0 + 8) (%i) {
 //        load %A[%ii]
 //      }
 //    }
@ -683,7 +683,7 @@ static Optional<int64_t> getMemoryFootprintBytes(Block &block,
                                                 int memorySpace) {
  SmallDenseMap<Value *, std::unique_ptr<MemRefRegion>, 4> regions;

-  // Walk this 'for' instruction to gather all memory regions.
+  // Walk this 'affine.for' instruction to gather all memory regions.
  bool error = false;
  block.walk(start, end, [&](Instruction *opInst) {
    if (!opInst->isa<LoadOp>() && !opInst->isa<StoreOp>()) {
--- a/mlir/lib/EDSC/MLIREmitter.cpp
+++ b/mlir/lib/EDSC/MLIREmitter.cpp
@ -146,7 +146,8 @@ Value *mlir::edsc::MLIREmitter::emitExpr(Expr e) {
  if (auto expr = e.dyn_cast<StmtBlockLikeExpr>()) {
    if (expr.getKind() == ExprKind::For) {
      auto exprGroups = expr.getAllArgumentGroups();
-      assert(exprGroups.size() == 3 && "expected 3 expr groups in `for`");
+      assert(exprGroups.size() == 3 &&
+             "expected 3 expr groups in `affine.for`");
      assert(!exprGroups[0].empty() && "expected at least one lower bound");
      assert(!exprGroups[1].empty() && "expected at least one upper bound");
      assert(exprGroups[2].size() == 1 &&
--- a/mlir/lib/EDSC/Types.cpp
+++ b/mlir/lib/EDSC/Types.cpp
@ -526,8 +526,8 @@ Stmt mlir::edsc::For(llvm::ArrayRef<Expr> idxs, llvm::ArrayRef<MaxExpr> lbs,
 Stmt mlir::edsc::MaxMinFor(const Bindable &idx, ArrayRef<Expr> lbs,
                           ArrayRef<Expr> ubs, Expr step,
                           ArrayRef<Stmt> enclosedStmts) {
-  assert(!lbs.empty() && "'for' loop must have lower bounds");
-  assert(!ubs.empty() && "'for' loop must have upper bounds");
+  assert(!lbs.empty() && "'affine.for' loop must have lower bounds");
+  assert(!ubs.empty() && "'affine.for' loop must have upper bounds");

  // Use a null expression as a sentinel between lower and upper bound
  // expressions in the list of children.
@ -964,7 +964,7 @@ void mlir::edsc::Expr::print(raw_ostream &os) const {
  } else if (auto stmtLikeExpr = this->dyn_cast<StmtBlockLikeExpr>()) {
    switch (stmtLikeExpr.getKind()) {
    // We only print the lb, ub and step here, which are the StmtBlockLike
-    // part of the `for` StmtBlockLikeExpr.
+    // part of the `affine.for` StmtBlockLikeExpr.
    case ExprKind::For: {
      auto exprGroups = stmtLikeExpr.getAllArgumentGroups();
      assert(exprGroups.size() == 3 &&
--- a/mlir/lib/Transforms/DmaGeneration.cpp
+++ b/mlir/lib/Transforms/DmaGeneration.cpp
@ -343,7 +343,7 @@ bool DmaGeneration::generateDma(const MemRefRegion &region, Block *block,
    auto fastMemRefType = top.getMemRefType(
        fastBufferShape, memRefType.getElementType(), {}, fastMemorySpace);

-    // Create the fast memory space buffer just before the 'for'
+    // Create the fast memory space buffer just before the 'affine.for'
    // instruction.
    fastMemRef = prologue.create<AllocOp>(loc, fastMemRefType)->getResult();
    // Record it.
@ -472,7 +472,7 @@ bool DmaGeneration::runOnBlock(Block *block) {
  // approach is conservative in some cases at the moment, we do a check later
  // and report an error with location info.
  // TODO(bondhugula): An 'affine.if' instruction is being treated similar to an
-  // operation instruction. 'affine.if''s could have 'for's in them;
+  // operation instruction. 'affine.if''s could have 'affine.for's in them;
  // treat them separately.

  // Get to the first load, store, or for op.
@ -494,7 +494,7 @@ bool DmaGeneration::runOnBlock(Block *block) {
                    fastMemCapacityBytes);
      };

-      // If the memory footprint of the 'for' loop is higher than fast
+      // If the memory footprint of the 'affine.for' loop is higher than fast
      // memory capacity (when provided), we recurse to DMA at an inner level
      // until we find a depth at which footprint fits in fast mem capacity. If
      // the footprint can't be calculated, we assume for now it fits. Recurse
@ -507,7 +507,7 @@ bool DmaGeneration::runOnBlock(Block *block) {
        runOnBlock(/*begin=*/curBegin, /*end=*/it);
        // Recurse onto the body of this loop.
        runOnBlock(forOp->getBody());
-        // The next region starts right after the 'for' instruction.
+        // The next region starts right after the 'affine.for' instruction.
        curBegin = std::next(it);
      } else {
        // We have enough capacity, i.e., DMAs will be computed for the portion
@ -698,7 +698,8 @@ uint64_t DmaGeneration::runOnBlock(Block::iterator begin, Block::iterator end) {
      [&](const SmallMapVector<Value *, std::unique_ptr<MemRefRegion>, 4>
              &regions) {
        for (const auto &regionEntry : regions) {
-          // For each region, hoist DMA transfer past all invariant 'for's.
+          // For each region, hoist DMA transfer past all invariant
+          // 'affine.for's.
          Block::iterator dmaPlacementReadStart, dmaPlacementWriteStart;
          Block *dmaPlacementBlock;
          findHighestBlockForPlacement(
--- a/mlir/lib/Transforms/LoopFusion.cpp
+++ b/mlir/lib/Transforms/LoopFusion.cpp
@ -641,7 +641,8 @@ bool MemRefDependenceGraph::init(Function *f) {
      // all loads and store accesses it contains.
      LoopNestStateCollector collector;
      collector.collect(&inst);
-      // Return false if a non 'for' region was found (not currently supported).
+      // Return false if a non 'affine.for' region was found (not currently
+      // supported).
      if (collector.hasNonForRegion)
        return false;
      Node node(nextNodeId++, &inst);
--- a/mlir/lib/Transforms/LoopTiling.cpp
+++ b/mlir/lib/Transforms/LoopTiling.cpp
@ -258,7 +258,8 @@ LogicalResult mlir::tileCodeGen(MutableArrayRef<AffineForOp> band,
 // Returns all maximal outermost perfect loop nests to tile.
 static void getTileableBands(Function *f,
                             std::vector<SmallVector<AffineForOp, 6>> *bands) {
-  // Get maximal perfect nest of 'for' insts starting from root (inclusive).
+  // Get maximal perfect nest of 'affine.for' insts starting from root
+  // (inclusive).
  auto getMaximalPerfectLoopNest = [&](AffineForOp root) {
    SmallVector<AffineForOp, 6> band;
    AffineForOp currInst = root;
--- a/mlir/lib/Transforms/LoopUnroll.cpp
+++ b/mlir/lib/Transforms/LoopUnroll.cpp
@ -158,8 +158,8 @@ void LoopUnroll::runOnFunction() {
  }
 }

-/// Unrolls a 'for' inst. Returns success if the loop was unrolled, failure
-/// otherwise. The default unroll factor is 4.
+/// Unrolls a 'affine.for' inst. Returns success if the loop was unrolled,
+/// failure otherwise. The default unroll factor is 4.
 LogicalResult LoopUnroll::runOnAffineForOp(AffineForOp forOp) {
  // Use the function callback if one was provided.
  if (getUnrollFactor) {
--- a/mlir/lib/Transforms/LoopUnrollAndJam.cpp
+++ b/mlir/lib/Transforms/LoopUnrollAndJam.cpp
@ -96,7 +96,7 @@ void LoopUnrollAndJam::runOnFunction() {
    runOnAffineForOp(forOp);
 }

-/// Unroll and jam a 'for' inst. Default unroll jam factor is
+/// Unroll and jam a 'affine.for' inst. Default unroll jam factor is
 /// kDefaultUnrollJamFactor. Return failure if nothing was done.
 LogicalResult LoopUnrollAndJam::runOnAffineForOp(AffineForOp forOp) {
  // Unroll and jam by the factor that was passed if any.
--- a/mlir/lib/Transforms/LowerAffine.cpp
+++ b/mlir/lib/Transforms/LowerAffine.cpp
@ -276,7 +276,7 @@ static Value *buildMinMaxReductionSeq(Location loc, CmpIPredicate predicate,
  return value;
 }

-// Convert a "for" loop to a flow of blocks.  Return `false` on success.
+// Convert a "affine.for" loop to a flow of blocks.  Return `false` on success.
 //
 // Create an SESE region for the loop (including its body) and append it to the
 // end of the current region.  The loop region consists of the initialization
@ -323,8 +323,9 @@ bool LowerAffinePass::lowerAffineFor(AffineForOp forOp) {
  auto loc = forOp->getLoc();
  auto *forInst = forOp->getInstruction();

-  // Start by splitting the block containing the 'for' into two parts.  The part
-  // before will get the init code, the part after will be the end point.
+  // Start by splitting the block containing the 'affine.for' into two parts.
+  // The part before will get the init code, the part after will be the end
+  // point.
  auto *initBlock = forInst->getBlock();
  auto *endBlock = initBlock->splitBlock(forInst);

--- a/mlir/lib/Transforms/LowerVectorTransfers.cpp
+++ b/mlir/lib/Transforms/LowerVectorTransfers.cpp
@ -56,9 +56,9 @@
 ///    // Read the slice `%A[%i0, %i1:%i1+256, %i2:%i2+32]` into
 ///    // vector<32x256xf32> and pad with %f0 to handle the boundary case:
 ///    %f0 = constant 0.0f : f32
-///    for %i0 = 0 to %0 {
-///      for %i1 = 0 to %1 step 256 {
-///        for %i2 = 0 to %2 step 32 {
+///    affine.for %i0 = 0 to %0 {
+///      affine.for %i1 = 0 to %1 step 256 {
+///        affine.for %i2 = 0 to %2 step 32 {
 ///          %v = vector_transfer_read %A, %i0, %i1, %i2, %f0
 ///               {permutation_map: (d0, d1, d2) -> (d2, d1)} :
 ///               (memref<?x?x?xf32>, index, index, f32) -> vector<32x256xf32>
@ -70,8 +70,8 @@
 /// abstraction):
 ///
 /// ```mlir {.mlir}
-///    for %d2 = 0 to 256 {
-///      for %d1 = 0 to 32 {
+///    affine.for %d2 = 0 to 256 {
+///      affine.for %d1 = 0 to 32 {
 ///        %s = %A[%i0, %i1 + %d1, %i2 + %d2] : f32
 ///        %tmp[%d2, %d1] = %s
 ///      }
--- a/mlir/lib/Transforms/MaterializeVectors.cpp
+++ b/mlir/lib/Transforms/MaterializeVectors.cpp
@ -100,10 +100,10 @@
 ///    mlfunc @materialize(%M : index, %N : index, %O : index, %P : index) {
 ///      %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
 ///      %f1 = constant splat<vector<4x4x4xf32>, 1.000000e+00> :
-///      vector<4x4x4xf32> for %i0 = 0 to %M step 4 {
-///        for %i1 = 0 to %N step 4 {
-///          for %i2 = 0 to %O {
-///            for %i3 = 0 to %P step 4 {
+///      vector<4x4x4xf32> affine.for %i0 = 0 to %M step 4 {
+///        affine.for %i1 = 0 to %N step 4 {
+///          affine.for %i2 = 0 to %O {
+///            affine.for %i3 = 0 to %P step 4 {
 ///              vector_transfer_write %f1, %A, %i0, %i1, %i2, %i3
 ///                {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d0)} :
 ///                 vector<4x4x4xf32>, memref<?x?x?x?xf32, 0>,
@ -119,10 +119,10 @@
 ///    mlfunc @materialize(%M : index, %N : index, %O : index, %P : index) {
 ///      %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
 ///      %f1 = constant splat<vector<4x4xf32>, 1.000000e+00> : vector<4x4x4xf32>
-///       for %i0 = 0 to %arg0 step 4 {
-///         for %i1 = 0 to %arg1 step 4 {
-///           for %i2 = 0 to %arg2 {
-///             for %i3 = 0 to %arg3 step 4 {
+///       affine.for %i0 = 0 to %arg0 step 4 {
+///         affine.for %i1 = 0 to %arg1 step 4 {
+///           affine.for %i2 = 0 to %arg2 {
+///             affine.for %i3 = 0 to %arg3 step 4 {
 ///               %1 = affine.apply (d0, d1, d2, d3) -> (d0, d1, d2, d3)
 ///                    (%i0, %i1, %i2, %i3)
 ///               vector_transfer_write f1, %0, %1#0, %1#1, %1#2, %1#3
@ -286,10 +286,10 @@ static Value *substitute(Value *v, VectorType hwVectorType,
 /// super-vectorization has been applied:
 ///
 /// ```mlir
-/// for %i0 = 0 to %M {
-///   for %i1 = 0 to %N step 3 {
-///     for %i2 = 0 to %O {
-///       for %i3 = 0 to %P step 32 {
+/// affine.for %i0 = 0 to %M {
+///   affine.for %i1 = 0 to %N step 3 {
+///     affine.for %i2 = 0 to %O {
+///       affine.for %i3 = 0 to %P step 32 {
 ///         %r = vector_transfer_read(%A, map(%i..)#0, map(%i..)#1, map(%i..)#2)
 ///                                   -> vector<3x32xf32>
 ///         ...
--- a/mlir/lib/Transforms/MemRefDataFlowOpt.cpp
+++ b/mlir/lib/Transforms/MemRefDataFlowOpt.cpp
@ -19,7 +19,7 @@
 // potentially getting rid of intermediate memref's entirely.
 // TODO(mlir-team): In the future, similar techniques could be used to eliminate
 // dead memref store's and perform more complex forwarding when support for
-// SSA scalars live out of 'for'/'affine.if' statements is available.
+// SSA scalars live out of 'affine.for'/'affine.if' statements is available.
 //===----------------------------------------------------------------------===//

 #include "mlir/Analysis/AffineAnalysis.h"
@ -55,7 +55,7 @@ namespace {
 //
 // (* A dependence being satisfied at a block: a dependence that is satisfied by
 // virtue of the destination instruction appearing textually / lexically after
-// the source instruction within the body of a 'for' instruction; thus, a
+// the source instruction within the body of a 'affine.for' instruction; thus, a
 // dependence is always either satisfied by a loop or by a block).
 //
 // The above conditions are simple to check, sufficient, and powerful for most
@ -139,8 +139,8 @@ void MemRefDataFlowOpt::forwardStoreToLoad(LoadOp loadOp) {
      // Check if this store is a candidate for forwarding; we only forward if
      // the dependence from the store is carried by the *body* of innermost
      // common surrounding loop. As an example this filters out cases like:
-      // for %i0
-      //   for %i1
+      // affine.for %i0
+      //   affine.for %i1
      //     %idx = affine.apply (d0) -> (d0 + 1) (%i0)
      //     store %A[%idx]
      //     load %A[%i0]
--- a/mlir/lib/Transforms/PipelineDataTransfer.cpp
+++ b/mlir/lib/Transforms/PipelineDataTransfer.cpp
@ -66,11 +66,11 @@ static unsigned getTagMemRefPos(Instruction &dmaInst) {
  return 0;
 }

-/// Doubles the buffer of the supplied memref on the specified 'for' instruction
-/// by adding a leading dimension of size two to the memref. Replaces all uses
-/// of the old memref by the new one while indexing the newly added dimension by
-/// the loop IV of the specified 'for' instruction modulo 2. Returns false if
-/// such a replacement cannot be performed.
+/// Doubles the buffer of the supplied memref on the specified 'affine.for'
+/// instruction by adding a leading dimension of size two to the memref.
+/// Replaces all uses of the old memref by the new one while indexing the newly
+/// added dimension by the loop IV of the specified 'affine.for' instruction
+/// modulo 2. Returns false if such a replacement cannot be performed.
 static bool doubleBuffer(Value *oldMemRef, AffineForOp forOp) {
  auto *forBody = forOp->getBody();
  FuncBuilder bInner(forBody, forBody->begin());
@ -104,7 +104,7 @@ static bool doubleBuffer(Value *oldMemRef, AffineForOp forOp) {
                                                   dynamicDimCount++));
  }

-  // Create and place the alloc right before the 'for' instruction.
+  // Create and place the alloc right before the 'affine.for' instruction.
  Value *newMemRef =
      bOuter.create<AllocOp>(forInst->getLoc(), newMemRefType, allocOperands);

@ -139,9 +139,9 @@ static bool doubleBuffer(Value *oldMemRef, AffineForOp forOp) {
 /// Returns success if the IR is in a valid state.
 void PipelineDataTransfer::runOnFunction() {
  // Do a post order walk so that inner loop DMAs are processed first. This is
-  // necessary since 'for' instructions nested within would otherwise become
-  // invalid (erased) when the outer loop is pipelined (the pipelined one gets
-  // deleted and replaced by a prologue, a new steady-state loop and an
+  // necessary since 'affine.for' instructions nested within would otherwise
+  // become invalid (erased) when the outer loop is pipelined (the pipelined one
+  // gets deleted and replaced by a prologue, a new steady-state loop and an
  // epilogue).
  forOps.clear();
  getFunction()->walkPostOrder<AffineForOp>(
--- a/mlir/lib/Transforms/Utils/LoopUtils.cpp
+++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp
@ -71,7 +71,7 @@ void mlir::getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
  auto lb = b->create<AffineApplyOp>(forOp->getLoc(), lbMap, lbOperands);

  // For each upper bound expr, get the range.
-  // Eg: for %i = lb to min (ub1, ub2),
+  // Eg: affine.for %i = lb to min (ub1, ub2),
  // where tripCountExprs yield (tr1, tr2), we create affine.apply's:
  // lb + tr1 - tr1 % ufactor, lb + tr2 - tr2 % ufactor; the results of all
  // these affine.apply's make up the cleanup loop lower bound.
@ -161,8 +161,8 @@ void mlir::promoteSingleIterationLoops(Function *f) {
      [](AffineForOp forOp) { promoteIfSingleIteration(forOp); });
 }

-/// Generates a 'for' inst with the specified lower and upper bounds while
-/// generating the right IV remappings for the shifted instructions. The
+/// Generates a 'affine.for' inst with the specified lower and upper bounds
+/// while generating the right IV remappings for the shifted instructions. The
 /// instruction blocks that go into the loop are specified in instGroupQueue
 /// starting from the specified offset, and in that order; the first element of
 /// the pair specifies the shift applied to that group of instructions; note
@ -216,10 +216,10 @@ generateLoop(AffineMap lbMap, AffineMap ubMap,
  return loopChunk;
 }

-/// Skew the instructions in the body of a 'for' instruction with the specified
-/// instruction-wise shifts. The shifts are with respect to the original
-/// execution order, and are multiplied by the loop 'step' before being applied.
-/// A shift of zero for each instruction will lead to no change.
+/// Skew the instructions in the body of a 'affine.for' instruction with the
+/// specified instruction-wise shifts. The shifts are with respect to the
+/// original execution order, and are multiplied by the loop 'step' before being
+/// applied. A shift of zero for each instruction will lead to no change.
 // The skewing of instructions with respect to one another can be used for
 // example to allow overlap of asynchronous operations (such as DMA
 // communication) with computation, or just relative shifting of instructions
@ -267,7 +267,7 @@ LogicalResult mlir::instBodySkew(AffineForOp forOp, ArrayRef<uint64_t> shifts,

  // An array of instruction groups sorted by shift amount; each group has all
  // instructions with the same shift in the order in which they appear in the
-  // body of the 'for' inst.
+  // body of the 'affine.for' inst.
  std::vector<std::vector<Instruction *>> sortedInstGroups(maxShift + 1);
  unsigned pos = 0;
  for (auto &inst : *forOp->getBody()) {
@ -499,7 +499,7 @@ void mlir::sinkLoop(AffineForOp forOp, unsigned loopDepth) {
 // bounds, the resulting IR resembles:
 //
 // ```mlir
-//    for %i = max (`iv, ...) to min (`iv` + `offset`) {
+//    affine.for %i = max (`iv, ...) to min (`iv` + `offset`) {
 //      ...
 //    }
 // ```
--- a/mlir/lib/Transforms/Utils/Utils.cpp
+++ b/mlir/lib/Transforms/Utils/Utils.cpp
@ -199,14 +199,14 @@ bool mlir::replaceAllMemRefUsesWith(Value *oldMemRef, Value *newMemRef,
 ///
 /// Before
 ///
-/// for %i = 0 to #map(%N)
+/// affine.for %i = 0 to #map(%N)
 ///   %idx = affine.apply (d0) -> (d0 mod 2) (%i)
 ///   "send"(%idx, %A, ...)
 ///   "compute"(%idx)
 ///
 /// After
 ///
-/// for %i = 0 to #map(%N)
+/// affine.for %i = 0 to #map(%N)
 ///   %idx = affine.apply (d0) -> (d0 mod 2) (%i)
 ///   "send"(%idx, %A, ...)
 ///   %idx_ = affine.apply (d0) -> (d0 mod 2) (%i)
--- a/mlir/lib/Transforms/Vectorize.cpp
+++ b/mlir/lib/Transforms/Vectorize.cpp
@ -113,7 +113,7 @@ using namespace mlir;
 ///
 /// At a high level, a vectorized load in a loop will resemble:
 /// ```mlir
-///   for %i = ? to ? step ? {
+///   affine.for %i = ? to ? step ? {
 ///     %v_a = "vector_transfer_read" (A, %i) : (memref<?xf32>, index) ->
 ///                                              vector<128xf32>
 ///   }
@ -309,7 +309,7 @@ using namespace mlir;
 /// ```mlir
 /// mlfunc @fill(%A : memref<128xf32>) -> () {
 ///   %f1 = constant 1.0 : f32
-///   for %i0 = 0 to 32 {
+///   affine.for %i0 = 0 to 32 {
 ///     store %f1, %A[%i0] : memref<128xf32, 0>
 ///   }
 ///   return
@ -322,7 +322,7 @@ using namespace mlir;
 /// is still subject to exploratory tradeoffs. In particular, say we want to
 /// vectorize by a factor 128, we want to transform the following input:
 /// ```mlir
-///   for %i = %M to %N {
+///   affine.for %i = %M to %N {
 ///     %a = load A[%i] : memref<?xf32>
 ///   }
 /// ```
@ -331,8 +331,8 @@ using namespace mlir;
 /// memory promotion etc) say after stripmining (and potentially unrolling in
 /// the case of LLVM's SLP vectorizer):
 /// ```mlir
-///   for %i = floor(%M, 128) to ceil(%N, 128) {
-///     for %ii = max(%M, 128 * %i) to min(%N, 128*%i + 127) {
+///   affine.for %i = floor(%M, 128) to ceil(%N, 128) {
+///     affine.for %ii = max(%M, 128 * %i) to min(%N, 128*%i + 127) {
 ///       %a = load A[%ii] : memref<?xf32>
 ///     }
 ///   }
@ -341,7 +341,7 @@ using namespace mlir;
 /// Instead, we seek to vectorize early and freeze vector types before
 /// scheduling, so we want to generate a pattern that resembles:
 /// ```mlir
-///   for %i = ? to ? step ? {
+///   affine.for %i = ? to ? step ? {
 ///     %v_a = "vector_transfer_read" (A, %i) : (memref<?xf32>, index) ->
 ///                                              vector<128xf32>
 ///   }
@ -362,7 +362,7 @@ using namespace mlir;
 /// For the simple strawman example above, vectorizing for a 1-D vector
 /// abstraction of size 128 returns code similar to:
 /// ```mlir
-///   for %i = %M to %N step 128 {
+///   affine.for %i = %M to %N step 128 {
 ///     %v_a = "vector_transfer_read" (A, %i) : (memref<?xf32>, index) ->
 ///                                              vector<128xf32>
 ///   }
@ -391,20 +391,20 @@ using namespace mlir;
 ///   %C = alloc (%M, %N) : memref<?x?xf32, 0>
 ///   %f1 = constant 1.0 : f32
 ///   %f2 = constant 2.0 : f32
-///   for %i0 = 0 to %M {
-///     for %i1 = 0 to %N {
+///   affine.for %i0 = 0 to %M {
+///     affine.for %i1 = 0 to %N {
 ///       // non-scoped %f1
 ///       store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
 ///     }
 ///   }
-///   for %i2 = 0 to %M {
-///     for %i3 = 0 to %N {
+///   affine.for %i2 = 0 to %M {
+///     affine.for %i3 = 0 to %N {
 ///       // non-scoped %f2
 ///       store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
 ///     }
 ///   }
-///   for %i4 = 0 to %M {
-///     for %i5 = 0 to %N {
+///   affine.for %i4 = 0 to %M {
+///     affine.for %i5 = 0 to %N {
 ///       %a5 = load %A[%i4, %i5] : memref<?x?xf32, 0>
 ///       %b5 = load %B[%i4, %i5] : memref<?x?xf32, 0>
 ///       %s5 = addf %a5, %b5 : f32
@ -438,24 +438,24 @@ using namespace mlir;
 ///   %2 = alloc(%arg0, %arg1) : memref<?x?xf32>
 ///   %cst = constant 1.0 : f32
 ///   %cst_0 = constant 2.0 : f32
-///   for %i0 = 0 to %arg0 {
-///     for %i1 = 0 to %arg1 step 256 {
+///   affine.for %i0 = 0 to %arg0 {
+///     affine.for %i1 = 0 to %arg1 step 256 {
 ///       %cst_1 = constant splat<vector<256xf32>, 1.0> :
 ///                vector<256xf32>
 ///       "vector_transfer_write"(%cst_1, %0, %i0, %i1) :
 ///                (vector<256xf32>, memref<?x?xf32>, index, index) -> ()
 ///     }
 ///   }
-///   for %i2 = 0 to %arg0 {
-///     for %i3 = 0 to %arg1 step 256 {
+///   affine.for %i2 = 0 to %arg0 {
+///     affine.for %i3 = 0 to %arg1 step 256 {
 ///       %cst_2 = constant splat<vector<256xf32>, 2.0> :
 ///                vector<256xf32>
 ///       "vector_transfer_write"(%cst_2, %1, %i2, %i3) :
 ///                (vector<256xf32>, memref<?x?xf32>, index, index) -> ()
 ///     }
 ///   }
-///   for %i4 = 0 to %arg0 {
-///     for %i5 = 0 to %arg1 step 256 {
+///   affine.for %i4 = 0 to %arg0 {
+///     affine.for %i5 = 0 to %arg1 step 256 {
 ///       %3 = "vector_transfer_read"(%0, %i4, %i5) :
 ///                      (memref<?x?xf32>, index, index) -> vector<256xf32>
 ///       %4 = "vector_transfer_read"(%1, %i4, %i5) :
@ -494,24 +494,24 @@ using namespace mlir;
 ///   %2 = alloc(%arg0, %arg1) : memref<?x?xf32>
 ///   %cst = constant 1.0 : f32
 ///   %cst_0 = constant 2.0 : f32
-///   for %i0 = 0 to %arg0 step 32 {
-///     for %i1 = 0 to %arg1 step 256 {
+///   affine.for %i0 = 0 to %arg0 step 32 {
+///     affine.for %i1 = 0 to %arg1 step 256 {
 ///       %cst_1 = constant splat<vector<32x256xf32>, 1.0> :
 ///                vector<32x256xf32>
 ///       "vector_transfer_write"(%cst_1, %0, %i0, %i1) :
 ///                (vector<32x256xf32>, memref<?x?xf32>, index, index) -> ()
 ///     }
 ///   }
-///   for %i2 = 0 to %arg0 step 32 {
-///     for %i3 = 0 to %arg1 step 256 {
+///   affine.for %i2 = 0 to %arg0 step 32 {
+///     affine.for %i3 = 0 to %arg1 step 256 {
 ///       %cst_2 = constant splat<vector<32x256xf32>, 2.0> :
 ///                vector<32x256xf32>
 ///       "vector_transfer_write"(%cst_2, %1, %i2, %i3) :
 ///                (vector<32x256xf32>, memref<?x?xf32>, index, index) -> ()
 ///     }
 ///   }
-///   for %i4 = 0 to %arg0 step 32 {
-///     for %i5 = 0 to %arg1 step 256 {
+///   affine.for %i4 = 0 to %arg0 step 32 {
+///     affine.for %i5 = 0 to %arg1 step 256 {
 ///       %3 = "vector_transfer_read"(%0, %i4, %i5) :
 ///                (memref<?x?xf32>, index, index) -> vector<32x256xf32>
 ///       %4 = "vector_transfer_read"(%1, %i4, %i5) :
--- a/mlir/test/AffineOps/canonicalize.mlir
+++ b/mlir/test/AffineOps/canonicalize.mlir
@ -47,7 +47,7 @@
 func @compose_affine_maps_1dto2d_no_symbols() {
  %0 = alloc() : memref<4x4xf32>

-  for %i0 = 0 to 15 {
+  affine.for %i0 = 0 to 15 {
    // Test load[%x, %x]

    %x0 = affine.apply (d0) -> (d0 - 1) (%i0)
@ -93,7 +93,7 @@ func @compose_affine_maps_1dto2d_no_symbols() {
 func @compose_affine_maps_1dto2d_with_symbols() {
  %0 = alloc() : memref<4x4xf32>

-  for %i0 = 0 to 15 {
+  affine.for %i0 = 0 to 15 {
    // Test load[%x0, %x0] with symbol %c4
    %c4 = constant 4 : index
    %x0 = affine.apply (d0)[s0] -> (d0 - s0) (%i0)[%c4]
@ -134,13 +134,13 @@ func @compose_affine_maps_2d_tile() {
  %c4 = constant 4 : index
  %c8 = constant 8 : index

-  for %i0 = 0 to 3 {
+  affine.for %i0 = 0 to 3 {
    %x0 = affine.apply (d0)[s0] -> (d0 ceildiv s0) (%i0)[%c4]
-    for %i1 = 0 to 3 {
+    affine.for %i1 = 0 to 3 {
      %x1 = affine.apply (d0)[s0] -> (d0 ceildiv s0) (%i1)[%c8]
-      for %i2 = 0 to 3 {
+      affine.for %i2 = 0 to 3 {
        %x2 = affine.apply (d0)[s0] -> (d0 mod s0) (%i2)[%c4]
-        for %i3 = 0 to 3 {
+        affine.for %i3 = 0 to 3 {
          %x3 = affine.apply (d0)[s0] -> (d0 mod s0) (%i3)[%c8]

          %x40 = affine.apply (d0, d1, d2, d3)[s0, s1] ->
@ -166,9 +166,9 @@ func @compose_affine_maps_dependent_loads() {
  %0 = alloc() : memref<16x32xf32>
  %1 = alloc() : memref<16x32xf32>

-  for %i0 = 0 to 3 {
-    for %i1 = 0 to 3 {
-      for %i2 = 0 to 3 {
+  affine.for %i0 = 0 to 3 {
+    affine.for %i1 = 0 to 3 {
+      affine.for %i2 = 0 to 3 {
        %c3 = constant 3 : index
        %c7 = constant 7 : index

@ -212,7 +212,7 @@ func @compose_affine_maps_dependent_loads() {
 func @compose_affine_maps_diamond_dependency() {
  %0 = alloc() : memref<4x4xf32>

-  for %i0 = 0 to 15 {
+  affine.for %i0 = 0 to 15 {
    %a = affine.apply (d0) -> (d0 - 1) (%i0)
    %b = affine.apply (d0) -> (d0 + 7) (%a)
    %c = affine.apply (d0) -> (d0 * 4) (%a)
@ -232,8 +232,8 @@ func @arg_used_as_dim_and_symbol(%arg0: memref<100x100xf32>, %arg1: index) {
  %c9 = constant 9 : index
  %1 = alloc() : memref<100x100xf32, 1>
  %2 = alloc() : memref<1xi32>
-  for %i0 = 0 to 100 {
-    for %i1 = 0 to 100 {
+  affine.for %i0 = 0 to 100 {
+    affine.for %i1 = 0 to 100 {
      %3 = affine.apply (d0, d1)[s0, s1] -> (d1 + s0 + s1)
        (%i0, %i1)[%arg1, %c9]
      %4 = affine.apply (d0, d1, d3) -> (d3 - (d0 + d1))
@ -253,7 +253,7 @@ func @trivial_maps() {
  %0 = alloc() : memref<10xf32>
  %c0 = constant 0 : index
  %cst = constant 0.000000e+00 : f32
-  for %i1 = 0 to 10 {
+  affine.for %i1 = 0 to 10 {
    %1 = affine.apply ()[s0] -> (s0)()[%c0]
    store %cst, %0[%1] : memref<10xf32>
    %2 = load %0[%c0] : memref<10xf32>
@ -380,7 +380,7 @@ func @mix_dims_and_symbols_g(%M: index, %N: index) -> (index, index, index) {
 // CHECK-LABEL: func @symbolic_semi_affine(%arg0: index, %arg1: index, %arg2: memref<?xf32>) {
 func @symbolic_semi_affine(%M: index, %N: index, %A: memref<?xf32>) {
  %f1 = constant 1.0 : f32
-  for %i0 = 1 to 100 {
+  affine.for %i0 = 1 to 100 {
    %1 = affine.apply ()[s0] -> (s0 + 1) ()[%M]
    %2 = affine.apply (d0)[s0] -> (d0 floordiv s0) (%i0)[%1]
    // CHECK-DAG: {{.*}} = affine.apply [[symbolic_semi_affine]](%i0)[%arg0]
@ -404,20 +404,20 @@ func @constant_fold_bounds(%N : index) {
  %c3 = affine.apply (d0, d1) -> (d0 + d1) (%c1, %c2)
  %l = "foo"() : () -> index

-  // CHECK:  for %i0 = 5 to 7 {
-  for %i = max (d0, d1) -> (0, d0 + d1)(%c2, %c3) to min (d0, d1) -> (d0 - 2, 32*d1) (%c9, %c1) {
+  // CHECK:  affine.for %i0 = 5 to 7 {
+  affine.for %i = max (d0, d1) -> (0, d0 + d1)(%c2, %c3) to min (d0, d1) -> (d0 - 2, 32*d1) (%c9, %c1) {
    "foo"(%i, %c3) : (index, index) -> ()
  }

  // Bound takes a non-constant argument but can still be folded.
-  // CHECK:  for %i1 = 1 to 7 {
-  for %j = max (d0) -> (0, 1)(%N) to min (d0, d1) -> (7, 9)(%N, %l) {
+  // CHECK:  affine.for %i1 = 1 to 7 {
+  affine.for %j = max (d0) -> (0, 1)(%N) to min (d0, d1) -> (7, 9)(%N, %l) {
    "foo"(%j, %c3) : (index, index) -> ()
  }

  // None of the bounds can be folded.
-  // CHECK: for %i2 = max [[MAP0]]()[%0] to min [[MAP1]]()[%arg0] {
-  for %k = max ()[s0] -> (0, s0) ()[%l] to min ()[s0] -> (100, s0)()[%N] {
+  // CHECK: affine.for %i2 = max [[MAP0]]()[%0] to min [[MAP1]]()[%arg0] {
+  affine.for %k = max ()[s0] -> (0, s0) ()[%l] to min ()[s0] -> (100, s0)()[%N] {
    "foo"(%k, %c3) : (index, index) -> ()
  }
  return
--- a/mlir/test/AffineOps/invalid.mlir
+++ b/mlir/test/AffineOps/invalid.mlir
@ -5,7 +5,7 @@
 #map = (d0)[s0] -> (d0 + s0)

 func @affine_apply_invalid_dim(%arg : index) {
-  for %n0 = 0 to 7 {
+  affine.for %n0 = 0 to 7 {
    %dim = addi %arg, %arg : index

    // expected-error@+1 {{operand cannot be used as a dimension id}}
@ -19,7 +19,7 @@ func @affine_apply_invalid_dim(%arg : index) {
 #map0 = (d0)[s0] -> (d0 + s0)

 func @affine_apply_invalid_sym() {
-  for %i0 = 0 to 7 {
+  affine.for %i0 = 0 to 7 {
    // expected-error@+1 {{operand cannot be used as a symbol}}
    %0 = affine.apply #map0(%i0)[%i0]
  }
@ -31,11 +31,11 @@ func @affine_apply_invalid_sym() {
 #map = (d0)[s0] -> (d0 + s0)

 func @affine_for_lower_bound_invalid_dim(%arg : index) {
-  for %n0 = 0 to 7 {
+  affine.for %n0 = 0 to 7 {
    %dim = addi %arg, %arg : index

    // expected-error@+1 {{operand cannot be used as a dimension id}}
-    for %n1 = 0 to #map(%dim)[%arg] {
+    affine.for %n1 = 0 to #map(%dim)[%arg] {
    }
  }
  return
@ -46,11 +46,11 @@ func @affine_for_lower_bound_invalid_dim(%arg : index) {
 #map = (d0)[s0] -> (d0 + s0)

 func @affine_for_upper_bound_invalid_dim(%arg : index) {
-  for %n0 = 0 to 7 {
+  affine.for %n0 = 0 to 7 {
    %dim = addi %arg, %arg : index

    // expected-error@+1 {{operand cannot be used as a dimension id}}
-    for %n1 = #map(%dim)[%arg] to 7 {
+    affine.for %n1 = #map(%dim)[%arg] to 7 {
    }
  }
  return
@ -61,9 +61,9 @@ func @affine_for_upper_bound_invalid_dim(%arg : index) {
 #map0 = (d0)[s0] -> (d0 + s0)

 func @affine_for_lower_bound_invalid_sym() {
-  for %i0 = 0 to 7 {
+  affine.for %i0 = 0 to 7 {
    // expected-error@+1 {{operand cannot be used as a symbol}}
-    for %n0 = #map0(%i0)[%i0] to 7 {
+    affine.for %n0 = #map0(%i0)[%i0] to 7 {
    }
  }
  return
@ -74,9 +74,9 @@ func @affine_for_lower_bound_invalid_sym() {
 #map0 = (d0)[s0] -> (d0 + s0)

 func @affine_for_upper_bound_invalid_sym() {
-  for %i0 = 0 to 7 {
+  affine.for %i0 = 0 to 7 {
    // expected-error@+1 {{operand cannot be used as a symbol}}
-    for %n0 = 0 to #map0(%i0)[%i0] {
+    affine.for %n0 = 0 to #map0(%i0)[%i0] {
    }
  }
  return
@ -87,7 +87,7 @@ func @affine_for_upper_bound_invalid_sym() {
 #set0 = (i)[N] : (i >= 0, N - i >= 0)

 func @affine_if_invalid_dim(%arg : index) {
-  for %n0 = 0 to 7 {
+  affine.for %n0 = 0 to 7 {
    %dim = addi %arg, %arg : index

    // expected-error@+1 {{operand cannot be used as a dimension id}}
@ -101,7 +101,7 @@ func @affine_if_invalid_dim(%arg : index) {
 #set0 = (i)[N] : (i >= 0, N - i >= 0)

 func @affine_if_invalid_sym() {
-  for %i0 = 0 to 7 {
+  affine.for %i0 = 0 to 7 {
    // expected-error@+1 {{operand cannot be used as a symbol}}
    affine.if #set0(%i0)[%i0] {}
  }
@ -113,7 +113,7 @@ func @affine_if_invalid_sym() {
 #set0 = (i)[N] : (i >= 0, N - i >= 0)

 func @affine_if_invalid_dimop_dim(%arg0: index, %arg1: index, %arg2: index, %arg3: index) {
-  for %n0 = 0 to 7 {
+  affine.for %n0 = 0 to 7 {
    %0 = alloc(%arg0, %arg1, %arg2, %arg3) : memref<?x?x?x?xf32>
    %dim = dim %0, 0 : memref<?x?x?x?xf32>

--- a/mlir/test/AffineOps/ops.mlir
+++ b/mlir/test/AffineOps/ops.mlir
@ -2,9 +2,9 @@

 // Check that the attributes for the affine operations are round-tripped.
 func @attributes() {
-  // CHECK: for %i
+  // CHECK: affine.for %i
  // CHECK-NEXT: } {some_attr: true}
-  for %i = 0 to 10 {
+  affine.for %i = 0 to 10 {
  } {some_attr: true}

  // CHECK: if
--- a/mlir/test/EDSC/api-test.cpp
+++ b/mlir/test/EDSC/api-test.cpp
@ -143,7 +143,7 @@ TEST_FUNC(cond_branch) {
  f->print(llvm::outs());
 }

-// Inject a EDSC-constructed `for` loop with bounds coming from function
+// Inject a EDSC-constructed `affine.for` loop with bounds coming from function
 // arguments.
 TEST_FUNC(dynamic_for_func_args) {
  auto indexType = IndexType::get(&globalContext());
@ -164,7 +164,7 @@ TEST_FUNC(dynamic_for_func_args) {

  // clang-format off
  // CHECK-LABEL: func @dynamic_for_func_args(%arg0: index, %arg1: index) {
-  // CHECK:  for %i0 = (d0) -> (d0)(%arg0) to (d0) -> (d0)(%arg1) step 3 {
+  // CHECK:  affine.for %i0 = (d0) -> (d0)(%arg0) to (d0) -> (d0)(%arg1) step 3 {
  // CHECK:  {{.*}} = affine.apply ()[s0] -> (s0 * 3)()[%arg0]
  // CHECK:  {{.*}} = affine.apply ()[s0, s1] -> (s1 + s0 * 3)()[%arg0, %arg1]
  // CHECK:  {{.*}} = affine.apply ()[s0] -> (s0 + 3)()[%arg0]
@ -172,7 +172,7 @@ TEST_FUNC(dynamic_for_func_args) {
  f->print(llvm::outs());
 }

-// Inject a EDSC-constructed `for` loop with non-constant bounds that are
+// Inject a EDSC-constructed `affine.for` loop with non-constant bounds that are
 // obtained from AffineApplyOp (also constructed using EDSC operator
 // overloads).
 TEST_FUNC(dynamic_for) {
@ -200,12 +200,12 @@ TEST_FUNC(dynamic_for) {
  // CHECK-LABEL: func @dynamic_for(%arg0: index, %arg1: index, %arg2: index, %arg3: index) {
  // CHECK:        %0 = affine.apply ()[s0, s1] -> (s0 - s1)()[%arg0, %arg1]
  // CHECK-NEXT:   %1 = affine.apply ()[s0, s1] -> (s0 + s1)()[%arg2, %arg3]
-  // CHECK-NEXT:   for %i0 = (d0) -> (d0)(%0) to (d0) -> (d0)(%1) step 2 {
+  // CHECK-NEXT:   affine.for %i0 = (d0) -> (d0)(%0) to (d0) -> (d0)(%1) step 2 {
  // clang-format on
  f->print(llvm::outs());
 }

-// Inject a EDSC-constructed empty `for` loop with max/min bounds that
+// Inject a EDSC-constructed empty `affine.for` loop with max/min bounds that
 // corresponds to
 //
 //     for max(%arg0, %arg1) to (%arg2, %arg3) step 1
@ -234,7 +234,7 @@ TEST_FUNC(max_min_for) {

  // clang-format off
  // CHECK-LABEL: func @max_min_for(%arg0: index, %arg1: index, %arg2: index, %arg3: index) {
-  // CHECK:  for %i0 = max (d0, d1) -> (d0, d1)(%arg0, %arg1) to min (d0, d1) -> (d0, d1)(%arg2, %arg3) {
+  // CHECK:  affine.for %i0 = max (d0, d1) -> (d0, d1)(%arg0, %arg1) to min (d0, d1) -> (d0, d1)(%arg2, %arg3) {
  // clang-format on
  f->print(llvm::outs());
 }
@ -334,7 +334,7 @@ TEST_FUNC(assignments_1) {

  // clang-format off
  // CHECK-LABEL: func @assignments(%arg0: memref<4xf32>, %arg1: memref<4xf32>, %arg2: memref<4xf32>) {
-  // CHECK: for %[[iv:.*]] = 0 to 4 {
+  // CHECK: affine.for %[[iv:.*]] = 0 to 4 {
  // CHECK:   %[[a:.*]] = load %arg0[%[[iv]]] : memref<4xf32>
  // CHECK:   %[[b:.*]] = load %arg1[%[[iv]]] : memref<4xf32>
  // CHECK:   %[[tmp:.*]] = mulf %[[a]], %[[b]] : f32
@ -348,7 +348,7 @@ TEST_FUNC(assignments_2) {

  // clang-format off
  // CHECK-LABEL: func @assignments(%arg0: memref<?xf32>, %arg1: memref<?xf32>, %arg2: memref<?xf32>) {
-  // CHECK: for %[[iv:.*]] = {{.*}} to {{.*}} {
+  // CHECK: affine.for %[[iv:.*]] = {{.*}} to {{.*}} {
  // CHECK:   %[[a:.*]] = load %arg0[%[[iv]]] : memref<?xf32>
  // CHECK:   %[[b:.*]] = load %arg1[%[[iv]]] : memref<?xf32>
  // CHECK:   %[[tmp:.*]] = mulf %[[a]], %[[b]] : f32
@ -405,13 +405,13 @@ TEST_FUNC(tile_2d) {
  //       CHECK: %[[M:[0-9]+]] = dim %arg0, 0 : memref<?x?x?xf32>
  //  CHECK-NEXT: %[[N:[0-9]+]] = dim %arg0, 1 : memref<?x?x?xf32>
  //  CHECK-NEXT: %[[P:[0-9]+]] = dim %arg0, 2 : memref<?x?x?xf32>
-  //       CHECK:   for %i0 = (d0) -> (d0)(%[[ZERO]]) to (d0) -> (d0)(%[[M]]) step 512 {
-  //  CHECK-NEXT:     for %i1 = (d0) -> (d0)(%[[ZERO]]) to (d0) -> (d0)(%[[N]]) step 1024 {
-  //  CHECK-NEXT:       for %i2 = (d0) -> (d0)(%[[ZERO]]) to (d0) -> (d0)(%[[P]]) {
-  //  CHECK-NEXT:         for %i3 = max (d0)[s0] -> (s0, d0)(%i0)[%[[ZERO]]] to min (d0)[s0] -> (s0, d0 + 512)(%i0)[%[[M]]] step 16 {
-  //  CHECK-NEXT:           for %i4 = max (d0)[s0] -> (s0, d0)(%i1)[%[[ZERO]]] to min (d0)[s0] -> (s0, d0 + 1024)(%i1)[%[[N]]] step 32 {
-  //  CHECK-NEXT:             for %i5 = max (d0, d1)[s0] -> (s0, d0, d1)(%i1, %i4)[%[[ZERO]]] to min (d0, d1)[s0] -> (s0, d0 + 1024, d1 + 32)(%i1, %i4)[%[[N]]] {
-  //  CHECK-NEXT:               for %i6 = max (d0, d1)[s0] -> (s0, d0, d1)(%i0, %i3)[%[[ZERO]]] to min (d0, d1)[s0] -> (s0, d0 + 512, d1 + 16)(%i0, %i3)[%[[M]]] {
+  //       CHECK:   affine.for %i0 = (d0) -> (d0)(%[[ZERO]]) to (d0) -> (d0)(%[[M]]) step 512 {
+  //  CHECK-NEXT:     affine.for %i1 = (d0) -> (d0)(%[[ZERO]]) to (d0) -> (d0)(%[[N]]) step 1024 {
+  //  CHECK-NEXT:       affine.for %i2 = (d0) -> (d0)(%[[ZERO]]) to (d0) -> (d0)(%[[P]]) {
+  //  CHECK-NEXT:         affine.for %i3 = max (d0)[s0] -> (s0, d0)(%i0)[%[[ZERO]]] to min (d0)[s0] -> (s0, d0 + 512)(%i0)[%[[M]]] step 16 {
+  //  CHECK-NEXT:           affine.for %i4 = max (d0)[s0] -> (s0, d0)(%i1)[%[[ZERO]]] to min (d0)[s0] -> (s0, d0 + 1024)(%i1)[%[[N]]] step 32 {
+  //  CHECK-NEXT:             affine.for %i5 = max (d0, d1)[s0] -> (s0, d0, d1)(%i1, %i4)[%[[ZERO]]] to min (d0, d1)[s0] -> (s0, d0 + 1024, d1 + 32)(%i1, %i4)[%[[N]]] {
+  //  CHECK-NEXT:               affine.for %i6 = max (d0, d1)[s0] -> (s0, d0, d1)(%i0, %i3)[%[[ZERO]]] to min (d0, d1)[s0] -> (s0, d0 + 512, d1 + 16)(%i0, %i3)[%[[M]]] {
  //  CHECK-NEXT:                 {{.*}} = load {{.*}}[%i6, %i5, %i2] : memref<?x?x?xf32>
  //  CHECK-NEXT:                 {{.*}} = load {{.*}}[%i6, %i5, %i2] : memref<?x?x?xf32>
  //  CHECK-NEXT:                 {{.*}} = addf {{.*}}, {{.*}} : f32
@ -421,9 +421,9 @@ TEST_FUNC(tile_2d) {
  //  CHECK-NEXT:           }
  //  CHECK-NEXT:         }
  //  CHECK-NEXT:       }
-  //  CHECK-NEXT:       for %i7 = (d0) -> (d0)(%[[ZERO]]) to (d0) -> (d0)(%[[P]]) {
-  //  CHECK-NEXT:         for %i8 = max (d0)[s0] -> (s0, d0)(%i0)[%[[ZERO]]] to min (d0)[s0] -> (s0, d0 + 512)(%i0)[%[[M]]] {
-  //  CHECK-NEXT:           for %i9 = max (d0)[s0] -> (s0, d0)(%i1)[%[[ZERO]]] to min (d0)[s0] -> (s0, d0 + 1024)(%i1)[%[[N]]] {
+  //  CHECK-NEXT:       affine.for %i7 = (d0) -> (d0)(%[[ZERO]]) to (d0) -> (d0)(%[[P]]) {
+  //  CHECK-NEXT:         affine.for %i8 = max (d0)[s0] -> (s0, d0)(%i0)[%[[ZERO]]] to min (d0)[s0] -> (s0, d0 + 512)(%i0)[%[[M]]] {
+  //  CHECK-NEXT:           affine.for %i9 = max (d0)[s0] -> (s0, d0)(%i1)[%[[ZERO]]] to min (d0)[s0] -> (s0, d0 + 1024)(%i1)[%[[N]]] {
  //  CHECK-NEXT:             {{.*}} = load {{.*}}[%i8, %i9, %i7] : memref<?x?x?xf32>
  //  CHECK-NEXT:             {{.*}} = load {{.*}}[%i8, %i9, %i7] : memref<?x?x?xf32>
  //  CHECK-NEXT:             {{.*}}= addf {{.*}}, {{.*}} : f32
--- a/mlir/test/EDSC/builder-api-test.cpp
+++ b/mlir/test/EDSC/builder-api-test.cpp
@ -80,11 +80,11 @@ TEST_FUNC(builder_dynamic_for_func_args) {

  // clang-format off
  // CHECK-LABEL: func @builder_dynamic_for_func_args(%arg0: index, %arg1: index) {
-  // CHECK:  for %i0 = (d0) -> (d0)(%arg0) to (d0) -> (d0)(%arg1) step 3 {
+  // CHECK:  affine.for %i0 = (d0) -> (d0)(%arg0) to (d0) -> (d0)(%arg1) step 3 {
  // CHECK:  {{.*}} = affine.apply ()[s0] -> (s0 * 3)()[%arg0]
  // CHECK:  {{.*}} = affine.apply ()[s0, s1] -> (s1 + s0 * 3)()[%arg0, %arg1]
  // CHECK:  {{.*}} = affine.apply ()[s0] -> (s0 + 3)()[%arg0]
-  // CHECK:  for %i1 = (d0) -> (d0)(%arg0) to (d0) -> (d0)(%arg1) step 2 {
+  // CHECK:  affine.for %i1 = (d0) -> (d0)(%arg0) to (d0) -> (d0)(%arg1) step 2 {
  // CHECK:    {{.*}} = affine.apply (d0, d1) -> ((d0 + d1 * 3) floordiv 32)(%i0, %i1)
  // CHECK:    {{.*}} = affine.apply (d0, d1) -> (((d0 + d1 * 3) floordiv 32) * 31)(%i0, %i1)
  // CHECK:    {{.*}} = affine.apply (d0, d1) -> ((((d0 + d1 * 3) floordiv 32) * 31) ceildiv 32)(%i0, %i1)
@ -119,7 +119,7 @@ TEST_FUNC(builder_dynamic_for) {
  // CHECK-LABEL: func @builder_dynamic_for(%arg0: index, %arg1: index, %arg2: index, %arg3: index) {
  // CHECK:        %0 = affine.apply ()[s0, s1] -> (s0 - s1)()[%arg0, %arg1]
  // CHECK-NEXT:   %1 = affine.apply ()[s0, s1] -> (s0 + s1)()[%arg2, %arg3]
-  // CHECK-NEXT:   for %i0 = (d0) -> (d0)(%0) to (d0) -> (d0)(%1) step 2 {
+  // CHECK-NEXT:   affine.for %i0 = (d0) -> (d0)(%0) to (d0) -> (d0)(%1) step 2 {
  // clang-format on
  f->print(llvm::outs());
 }
@ -140,7 +140,7 @@ TEST_FUNC(builder_max_min_for) {

  // clang-format off
  // CHECK-LABEL: func @builder_max_min_for(%arg0: index, %arg1: index, %arg2: index, %arg3: index) {
-  // CHECK:  for %i0 = max (d0, d1) -> (d0, d1)(%arg0, %arg1) to min (d0, d1) -> (d0, d1)(%arg2, %arg3) {
+  // CHECK:  affine.for %i0 = max (d0, d1) -> (d0, d1)(%arg0, %arg1) to min (d0, d1) -> (d0, d1)(%arg2, %arg3) {
  // CHECK:  return
  // clang-format on
  f->print(llvm::outs());
@ -344,16 +344,16 @@ TEST_FUNC(builder_helpers) {
  });

  // CHECK-LABEL: @builder_helpers
-  //      CHECK:   for %i0 = (d0) -> (d0)({{.*}}) to (d0) -> (d0)({{.*}}) {
-  // CHECK-NEXT:     for %i1 = (d0) -> (d0)({{.*}}) to (d0) -> (d0)({{.*}}) {
-  // CHECK-NEXT:       for %i2 = (d0) -> (d0)({{.*}}) to (d0) -> (d0)({{.*}}) {
+  //      CHECK:   affine.for %i0 = (d0) -> (d0)({{.*}}) to (d0) -> (d0)({{.*}}) {
+  // CHECK-NEXT:     affine.for %i1 = (d0) -> (d0)({{.*}}) to (d0) -> (d0)({{.*}}) {
+  // CHECK-NEXT:       affine.for %i2 = (d0) -> (d0)({{.*}}) to (d0) -> (d0)({{.*}}) {
  // CHECK-NEXT:         [[a:%.*]] = load %arg0[%i0, %i1, %i2] : memref<?x?x?xf32>
  // CHECK-NEXT:         [[b:%.*]] = addf {{.*}}, [[a]] : f32
  // CHECK-NEXT:         [[c:%.*]] = load %arg1[%i0, %i1, %i2] : memref<?x?x?xf32>
  // CHECK-NEXT:         [[d:%.*]] = addf [[b]], [[c]] : f32
  // CHECK-NEXT:         store [[d]], %arg2[%i0, %i1, %i2] : memref<?x?x?xf32>
  // CHECK-NEXT:       }
-  // CHECK-NEXT:       for %i3 = (d0) -> (d0)(%c0_1) to (d0) -> (d0)(%2) {
+  // CHECK-NEXT:       affine.for %i3 = (d0) -> (d0)(%c0_1) to (d0) -> (d0)(%2) {
  // CHECK-NEXT:         [[a:%.*]] = load %arg1[%i0, %i1, %i3] : memref<?x?x?xf32>
  // CHECK-NEXT:         [[b:%.*]] = load %arg0[%i0, %i1, %i3] : memref<?x?x?xf32>
  // CHECK-NEXT:         [[c:%.*]] = addf [[b]], [[a]] : f32
@ -392,8 +392,8 @@ TEST_FUNC(custom_ops) {
  });

  // CHECK-LABEL: @custom_ops
-  // CHECK: for %i0 {{.*}}
-  // CHECK:   for %i1 {{.*}}
+  // CHECK: affine.for %i0 {{.*}}
+  // CHECK:   affine.for %i1 {{.*}}
  // CHECK:     {{.*}} = "my_custom_op"{{.*}} : (index, index) -> index
  // CHECK:     "my_custom_inst_0"{{.*}} : (index, index) -> ()
  // CHECK:     [[TWO:%[a-z0-9]+]] = "my_custom_inst_2"{{.*}} : (index, index) -> (index, index)
--- a/mlir/test/IR/invalid.mlir
+++ b/mlir/test/IR/invalid.mlir
@ -204,24 +204,24 @@ func @illegaltype(i0) // expected-error {{invalid integer width}}
 // -----

 func @malformed_for_percent() {
-  for i = 1 to 10 { // expected-error {{expected SSA operand}}
+  affine.for i = 1 to 10 { // expected-error {{expected SSA operand}}

 // -----

 func @malformed_for_equal() {
-  for %i 1 to 10 { // expected-error {{expected '='}}
+  affine.for %i 1 to 10 { // expected-error {{expected '='}}

 // -----

 func @malformed_for_to() {
-  for %i = 1 too 10 { // expected-error {{expected 'to' between bounds}}
+  affine.for %i = 1 too 10 { // expected-error {{expected 'to' between bounds}}
  }
 }

 // -----

 func @incomplete_for() {
-  for %i = 1 to 10 step 2
+  affine.for %i = 1 to 10 step 2
 }        // expected-error {{expected '{' to begin a region}}

 // -----
@ -230,19 +230,19 @@ func @incomplete_for() {

 func @reference_to_iv_in_bound() {
  // expected-error@+1 {{operand use before it's defined}}
-  for %i0 = #map0(%i0) to 10 {
+  affine.for %i0 = #map0(%i0) to 10 {
  }
 }

 // -----

 func @nonconstant_step(%1 : i32) {
-  for %2 = 1 to 5 step %1 { // expected-error {{expected non-function type}}
+  affine.for %2 = 1 to 5 step %1 { // expected-error {{expected non-function type}}

 // -----

 func @for_negative_stride() {
-  for %i = 1 to 10 step -1
+  affine.for %i = 1 to 10 step -1
 }        // expected-error@-1 {{expected step to be representable as a positive signed integer}}

 // -----
@ -254,7 +254,7 @@ func @non_instruction() {
 // -----

 func @invalid_if_conditional2() {
-  for %i = 1 to 10 {
+  affine.for %i = 1 to 10 {
    affine.if (i)[N] : (i >= )  // expected-error {{expected '== 0' or '>= 0' at end of affine constraint}}
  }
 }
@ -262,7 +262,7 @@ func @invalid_if_conditional2() {
 // -----

 func @invalid_if_conditional3() {
-  for %i = 1 to 10 {
+  affine.for %i = 1 to 10 {
    affine.if (i)[N] : (i == 1) // expected-error {{expected '0' after '=='}}
  }
 }
@ -270,7 +270,7 @@ func @invalid_if_conditional3() {
 // -----

 func @invalid_if_conditional4() {
-  for %i = 1 to 10 {
+  affine.for %i = 1 to 10 {
    affine.if (i)[N] : (i >= 2) // expected-error {{expected '0' after '>='}}
  }
 }
@ -278,7 +278,7 @@ func @invalid_if_conditional4() {
 // -----

 func @invalid_if_conditional5() {
-  for %i = 1 to 10 {
+  affine.for %i = 1 to 10 {
    affine.if (i)[N] : (i <= 0 ) // expected-error {{expected '== 0' or '>= 0' at end of affine constraint}}
  }
 }
@ -286,7 +286,7 @@ func @invalid_if_conditional5() {
 // -----

 func @invalid_if_conditional6() {
-  for %i = 1 to 10 {
+  affine.for %i = 1 to 10 {
    affine.if (i) : (i) // expected-error {{expected '== 0' or '>= 0' at end of affine constraint}}
  }
 }
@ -294,7 +294,7 @@ func @invalid_if_conditional6() {
 // -----
 // TODO (support affine.if (1)?
 func @invalid_if_conditional7() {
-  for %i = 1 to 10 {
+  affine.for %i = 1 to 10 {
    affine.if (i) : (1) // expected-error {{expected '== 0' or '>= 0' at end of affine constraint}}
  }
 }
@ -440,8 +440,8 @@ func @undef() {
 // -----

 func @duplicate_induction_var() {
-  for %i = 1 to 10 {   // expected-error {{previously defined here}}
-    for %i = 1 to 10 { // expected-error {{redefinition of SSA value '%i'}}
+  affine.for %i = 1 to 10 {   // expected-error {{previously defined here}}
+    affine.for %i = 1 to 10 { // expected-error {{redefinition of SSA value '%i'}}
    }
  }
  return
@ -450,7 +450,7 @@ func @duplicate_induction_var() {
 // -----

 func @dominance_failure() {
-  for %i = 1 to 10 {
+  affine.for %i = 1 to 10 {
  }
  "xxx"(%i) : (index)->()   // expected-error {{operand #0 does not dominate this use}}
  return
@ -477,7 +477,7 @@ func @return_type_mismatch() -> i32 {
 // -----

 func @return_inside_loop() {
-  for %i = 1 to 100 {
+  affine.for %i = 1 to 100 {
    // expected-error@-1 {{op expects body block to not have a terminator}}
    return
  }
@ -522,7 +522,7 @@ func @referer() {
 #map1 = (i)[j] -> (i+j)

 func @bound_symbol_mismatch(%N : index) {
-  for %i = #map1(%N) to 100 {
+  affine.for %i = #map1(%N) to 100 {
  // expected-error@-1 {{symbol operand count and integer set symbol count must match}}
  }
  return
@ -533,7 +533,7 @@ func @bound_symbol_mismatch(%N : index) {
 #map1 = (i)[j] -> (i+j)

 func @bound_dim_mismatch(%N : index) {
-  for %i = #map1(%N, %N)[%N] to 100 {
+  affine.for %i = #map1(%N, %N)[%N] to 100 {
  // expected-error@-1 {{dim operand count and integer set dim count must match}}
  }
  return
@ -542,7 +542,7 @@ func @bound_dim_mismatch(%N : index) {
 // -----

 func @large_bound() {
-  for %i = 1 to 9223372036854775810 {
+  affine.for %i = 1 to 9223372036854775810 {
  // expected-error@-1 {{integer constant out of range for attribute}}
  }
  return
@ -551,7 +551,7 @@ func @large_bound() {
 // -----

 func @max_in_upper_bound(%N : index) {
-  for %i = 1 to max (i)->(N, 100) { //expected-error {{expected non-function type}}
+  affine.for %i = 1 to max (i)->(N, 100) { //expected-error {{expected non-function type}}
  }
  return
 }
@ -559,7 +559,7 @@ func @max_in_upper_bound(%N : index) {
 // -----

 func @step_typo() {
-  for %i = 1 to 100 step -- 1 { //expected-error {{expected constant integer}}
+  affine.for %i = 1 to 100 step -- 1 { //expected-error {{expected constant integer}}
  }
  return
 }
@ -567,7 +567,7 @@ func @step_typo() {
 // -----

 func @invalid_bound_map(%N : i32) {
-  for %i = 1 to (i)->(j)(%N) { //expected-error {{use of undeclared identifier}}
+  affine.for %i = 1 to (i)->(j)(%N) { //expected-error {{use of undeclared identifier}}
  }
  return
 }
@ -580,7 +580,7 @@ func @invalid_bound_map(%N : i32) {
 #set0 = (i)[N] : (i >= 0, N - i >= 0)

 func @invalid_if_operands1(%N : index) {
-  for %i = 1 to 10 {
+  affine.for %i = 1 to 10 {
    affine.if #set0(%i) {
    // expected-error@-1 {{symbol operand count and integer set symbol count must match}}

@ -588,7 +588,7 @@ func @invalid_if_operands1(%N : index) {
 #set0 = (i)[N] : (i >= 0, N - i >= 0)

 func @invalid_if_operands2(%N : index) {
-  for %i = 1 to 10 {
+  affine.for %i = 1 to 10 {
    affine.if #set0()[%N] {
    // expected-error@-1 {{dim operand count and integer set dim count must match}}

@ -596,7 +596,7 @@ func @invalid_if_operands2(%N : index) {
 #set0 = (i)[N] : (i >= 0, N - i >= 0)

 func @invalid_if_operands3(%N : index) {
-  for %i = 1 to 10 {
+  affine.for %i = 1 to 10 {
    affine.if #set0(%i)[%i] {
    // expected-error@-1 {{operand cannot be used as a symbol}}
    }
@ -751,11 +751,11 @@ func @f(f32) {
 // -----

 func @f(%m : memref<?x?xf32>) {
-  for %i0 = 0 to 42 {
+  affine.for %i0 = 0 to 42 {
    // expected-error@+1 {{operand #2 does not dominate this use}}
    %x = load %m[%i0, %i1] : memref<?x?xf32>
  }
-  for %i1 = 0 to 42 {
+  affine.for %i1 = 0 to 42 {
  }
  return
 }
@ -805,7 +805,7 @@ func @type_alias_unknown(!unknown_alias) -> () { // expected-error {{undefined t

 // Check ill-formed opaque tensor.
 func @complex_loops() {
-  for %i1 = 1 to 100 {
+  affine.for %i1 = 1 to 100 {
  // expected-error @+1 {{expected '"' in string literal}}
  "opaqueIntTensor"(){bar: opaque<"", tensor<2x1x4xi32>, "0x686]>} : () -> ()

@ -839,7 +839,7 @@ func @invalid_affine_structure() {

 func @missing_for_max(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
  // expected-error @+1 {{lower loop bound affine map with multiple results requires 'max' prefix}}
-  for %i0 = ()[s]->(0,s-1)()[%arg0] to %arg1 {
+  affine.for %i0 = ()[s]->(0,s-1)()[%arg0] to %arg1 {
  }
  return
 }
@ -848,7 +848,7 @@ func @missing_for_max(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {

 func @missing_for_min(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
  // expected-error @+1 {{upper loop bound affine map with multiple results requires 'min' prefix}}
-  for %i0 = %arg0 to ()[s]->(100,s+1)()[%arg1] {
+  affine.for %i0 = %arg0 to ()[s]->(100,s+1)()[%arg1] {
  }
  return
 }
--- a/mlir/test/IR/locations.mlir
+++ b/mlir/test/IR/locations.mlir
@ -13,7 +13,7 @@ func @inline_notation() -> i32 loc("mysource.cc":10:8) {
  %2 = constant 4 : index loc(callsite("foo" at "mysource.cc":10:8))

  // CHECK: } loc(fused["foo", "mysource.cc":10:8])
-  for %i0 = 0 to 8 {
+  affine.for %i0 = 0 to 8 {
  } loc(fused["foo", "mysource.cc":10:8])

  // CHECK: } loc(fused<"myPass">["foo", "foo2"])
--- a/mlir/test/IR/parser.mlir
+++ b/mlir/test/IR/parser.mlir
@ -208,8 +208,8 @@ func @identity_functor(%a : () -> ()) -> (() -> ())  {
 func @func_ops_in_loop() {
  // CHECK: %0 = "foo"() : () -> i64
  %a = "foo"() : ()->i64
-  // CHECK: for %i0 = 1 to 10 {
-  for %i = 1 to 10 {
+  // CHECK: affine.for %i0 = 1 to 10 {
+  affine.for %i = 1 to 10 {
    // CHECK: %1 = "doo"() : () -> f32
    %b = "doo"() : ()->f32
    // CHECK: "bar"(%0, %1) : (i64, f32) -> ()
@ -224,10 +224,10 @@ func @func_ops_in_loop() {

 // CHECK-LABEL: func @loops() {
 func @loops() {
-  // CHECK: for %i0 = 1 to 100 step 2 {
-  for %i = 1 to 100 step 2 {
-    // CHECK: for %i1 = 1 to 200 {
-    for %j = 1 to 200 {
+  // CHECK: affine.for %i0 = 1 to 100 step 2 {
+  affine.for %i = 1 to 100 step 2 {
+    // CHECK: affine.for %i1 = 1 to 200 {
+    affine.for %j = 1 to 200 {
    }        // CHECK:     }
  }          // CHECK:   }
  return     // CHECK:   return
@ -235,14 +235,14 @@ func @loops() {

 // CHECK-LABEL: func @complex_loops() {
 func @complex_loops() {
-  for %i1 = 1 to 100 {      // CHECK:   for %i0 = 1 to 100 {
-    for %j1 = 1 to 100 {    // CHECK:     for %i1 = 1 to 100 {
+  affine.for %i1 = 1 to 100 {      // CHECK:   affine.for %i0 = 1 to 100 {
+    affine.for %j1 = 1 to 100 {    // CHECK:     affine.for %i1 = 1 to 100 {
       // CHECK: "foo"(%i0, %i1) : (index, index) -> ()
       "foo"(%i1, %j1) : (index,index) -> ()
    }                       // CHECK:     }
    "boo"() : () -> ()      // CHECK:     "boo"() : () -> ()
-    for %j2 = 1 to 10 {     // CHECK:     for %i2 = 1 to 10 {
-      for %k2 = 1 to 10 {   // CHECK:       for %i3 = 1 to 10 {
+    affine.for %j2 = 1 to 10 {     // CHECK:     affine.for %i2 = 1 to 10 {
+      affine.for %k2 = 1 to 10 {   // CHECK:       affine.for %i3 = 1 to 10 {
        "goo"() : () -> ()  // CHECK:         "goo"() : () -> ()
      }                     // CHECK:       }
    }                       // CHECK:     }
@ -253,8 +253,8 @@ func @complex_loops() {
 // CHECK: func @triang_loop(%arg0: index, %arg1: memref<?x?xi32>) {
 func @triang_loop(%arg0: index, %arg1: memref<?x?xi32>) {
  %c = constant 0 : i32       // CHECK: %c0_i32 = constant 0 : i32
-  for %i0 = 1 to %arg0 {      // CHECK: for %i0 = 1 to %arg0 {
-    for %i1 = (d0)[]->(d0)(%i0)[] to %arg0 {  // CHECK:   for %i1 = #map{{[0-9]+}}(%i0) to %arg0 {
+  affine.for %i0 = 1 to %arg0 {      // CHECK: affine.for %i0 = 1 to %arg0 {
+    affine.for %i1 = (d0)[]->(d0)(%i0)[] to %arg0 {  // CHECK:   affine.for %i1 = #map{{[0-9]+}}(%i0) to %arg0 {
      store %c, %arg1[%i0, %i1] : memref<?x?xi32>  // CHECK: store %c0_i32, %arg1[%i0, %i1]
    }          // CHECK:     }
  }            // CHECK:   }
@ -263,8 +263,8 @@ func @triang_loop(%arg0: index, %arg1: memref<?x?xi32>) {

 // CHECK: func @minmax_loop(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
 func @minmax_loop(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
-  // CHECK: for %i0 = max #map{{.*}}()[%arg0] to min #map{{.*}}()[%arg1] {
-  for %i0 = max()[s]->(0,s-1)()[%arg0] to min()[s]->(100,s+1)()[%arg1] {
+  // CHECK: affine.for %i0 = max #map{{.*}}()[%arg0] to min #map{{.*}}()[%arg1] {
+  affine.for %i0 = max()[s]->(0,s-1)()[%arg0] to min()[s]->(100,s+1)()[%arg1] {
    // CHECK: "foo"(%arg2, %i0) : (memref<100xf32>, index) -> ()
    "foo"(%arg2, %i0) : (memref<100xf32>, index) -> ()
  }      // CHECK:   }
@ -275,24 +275,24 @@ func @minmax_loop(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
 func @loop_bounds(%N : index) {
  // CHECK: %0 = "foo"(%arg0) : (index) -> index
  %s = "foo"(%N) : (index) -> index
-  // CHECK: for %i0 = %0 to %arg0
-  for %i = %s to %N {
-    // CHECK: for %i1 = #map{{[0-9]+}}(%i0) to 0
-    for %j = (d0)[]->(d0)(%i)[] to 0 step 1 {
+  // CHECK: affine.for %i0 = %0 to %arg0
+  affine.for %i = %s to %N {
+    // CHECK: affine.for %i1 = #map{{[0-9]+}}(%i0) to 0
+    affine.for %j = (d0)[]->(d0)(%i)[] to 0 step 1 {
       // CHECK: %1 = affine.apply #map{{.*}}(%i0, %i1)[%0]
       %w1 = affine.apply(d0, d1)[s0] -> (d0+d1) (%i, %j) [%s]
       // CHECK: %2 = affine.apply #map{{.*}}(%i0, %i1)[%0]
       %w2 = affine.apply(d0, d1)[s0] -> (s0+1) (%i, %j) [%s]
-       // CHECK: for %i2 = #map{{.*}}(%1, %i0)[%arg0] to #map{{.*}}(%2, %i1)[%0] {
-       for %k = #bound_map1 (%w1, %i)[%N] to (i, j)[s] -> (i + j + s) (%w2, %j)[%s] {
+       // CHECK: affine.for %i2 = #map{{.*}}(%1, %i0)[%arg0] to #map{{.*}}(%2, %i1)[%0] {
+       affine.for %k = #bound_map1 (%w1, %i)[%N] to (i, j)[s] -> (i + j + s) (%w2, %j)[%s] {
          // CHECK: "foo"(%i0, %i1, %i2) : (index, index, index) -> ()
          "foo"(%i, %j, %k) : (index, index, index)->()
          // CHECK: %c30 = constant 30 : index
          %c = constant 30 : index
          // CHECK: %3 = affine.apply #map{{.*}}(%arg0, %c30)
          %u = affine.apply (d0, d1)->(d0+d1) (%N, %c)
-          // CHECK: for %i3 = max #map{{.*}}(%i0)[%3] to min #map{{.*}}(%i2)[%c30] {
-          for %l = max #bound_map2(%i)[%u] to min #bound_map2(%k)[%c] {
+          // CHECK: affine.for %i3 = max #map{{.*}}(%i0)[%3] to min #map{{.*}}(%i2)[%c30] {
+          affine.for %l = max #bound_map2(%i)[%u] to min #bound_map2(%k)[%c] {
            // CHECK: "bar"(%i3) : (index) -> ()
            "bar"(%l) : (index) -> ()
          } // CHECK:           }
@ -305,7 +305,7 @@ func @loop_bounds(%N : index) {
 // CHECK-LABEL: func @ifinst(%arg0: index) {
 func @ifinst(%N: index) {
  %c = constant 200 : index // CHECK   %c200 = constant 200
-  for %i = 1 to 10 {           // CHECK   for %i0 = 1 to 10 {
+  affine.for %i = 1 to 10 {           // CHECK   affine.for %i0 = 1 to 10 {
    affine.if #set0(%i)[%N, %c] {     // CHECK     affine.if #set0(%i0)[%arg0, %c200] {
      %x = constant 1 : i32
       // CHECK: %c1_i32 = constant 1 : i32
@ -328,7 +328,7 @@ func @ifinst(%N: index) {
 // CHECK-LABEL: func @simple_ifinst(%arg0: index) {
 func @simple_ifinst(%N: index) {
  %c = constant 200 : index // CHECK   %c200 = constant 200
-  for %i = 1 to 10 {           // CHECK   for %i0 = 1 to 10 {
+  affine.for %i = 1 to 10 {           // CHECK   affine.for %i0 = 1 to 10 {
    affine.if #set0(%i)[%N, %c] {     // CHECK     affine.if #set0(%i0)[%arg0, %c200] {
      %x = constant 1 : i32
       // CHECK: %c1_i32 = constant 1 : i32
@ -549,18 +549,18 @@ func @funcattrwithblock() -> ()
 #map_non_simple2 = ()[s0, s1] -> (s0 + s1)
 #map_non_simple3 = ()[s0] -> (s0 + 3)
 func @funcsimplemap(%arg0: index, %arg1: index) -> () {
-  for %i0 = 0 to #map_simple0()[] {
-  // CHECK: for %i0 = 0 to 10 {
-    for %i1 = 0 to #map_simple1()[%arg1] {
-    // CHECK: for %i1 = 0 to %arg1 {
-      for %i2 = 0 to #map_non_simple0(%i0)[] {
-      // CHECK: for %i2 = 0 to #map{{[a-z_0-9]*}}(%i0) {
-        for %i3 = 0 to #map_non_simple1(%i0)[%arg1] {
-        // CHECK: for %i3 = 0 to #map{{[a-z_0-9]*}}(%i0)[%arg1] {
-          for %i4 = 0 to #map_non_simple2()[%arg1, %arg0] {
-          // CHECK: for %i4 = 0 to #map{{[a-z_0-9]*}}()[%arg1, %arg0] {
-            for %i5 = 0 to #map_non_simple3()[%arg0] {
-            // CHECK: for %i5 = 0 to #map{{[a-z_0-9]*}}()[%arg0] {
+  affine.for %i0 = 0 to #map_simple0()[] {
+  // CHECK: affine.for %i0 = 0 to 10 {
+    affine.for %i1 = 0 to #map_simple1()[%arg1] {
+    // CHECK: affine.for %i1 = 0 to %arg1 {
+      affine.for %i2 = 0 to #map_non_simple0(%i0)[] {
+      // CHECK: affine.for %i2 = 0 to #map{{[a-z_0-9]*}}(%i0) {
+        affine.for %i3 = 0 to #map_non_simple1(%i0)[%arg1] {
+        // CHECK: affine.for %i3 = 0 to #map{{[a-z_0-9]*}}(%i0)[%arg1] {
+          affine.for %i4 = 0 to #map_non_simple2()[%arg1, %arg0] {
+          // CHECK: affine.for %i4 = 0 to #map{{[a-z_0-9]*}}()[%arg1, %arg0] {
+            affine.for %i5 = 0 to #map_non_simple3()[%arg0] {
+            // CHECK: affine.for %i5 = 0 to #map{{[a-z_0-9]*}}()[%arg0] {
              %c42_i32 = constant 42 : i32
            }
          }
@ -745,9 +745,9 @@ func @sparsevectorattr() -> () {
 // CHECK-LABEL: func @loops_with_blockids() {
 func @loops_with_blockids() {
 ^block0:
-  for %i = 1 to 100 step 2 {
+  affine.for %i = 1 to 100 step 2 {
  ^block1:
-    for %j = 1 to 200 {
+    affine.for %j = 1 to 200 {
    ^block2:
    }
  }
--- a/mlir/test/IR/pretty-locations.mlir
+++ b/mlir/test/IR/pretty-locations.mlir
@ -18,7 +18,7 @@ func @inline_notation() -> i32 loc("mysource.cc":10:8) {
  %3 = constant 4 : index loc(callsite("foo" at callsite("mysource1.cc":10:8 at callsite("mysource2.cc":13:8 at "mysource3.cc":100:10))))

  // CHECK: } ["foo", mysource.cc:10:8]
-  for %i0 = 0 to 8 {
+  affine.for %i0 = 0 to 8 {
  } loc(fused["foo", "mysource.cc":10:8])

  // CHECK: } <"myPass">["foo", "foo2"]
--- a/mlir/test/Transforms/Vectorize/lower_vector_transfers.mlir
+++ b/mlir/test/Transforms/Vectorize/lower_vector_transfers.mlir
@ -6,8 +6,8 @@
 // CHECK-LABEL: func @materialize_read_1d() {
 func @materialize_read_1d() {
  %A = alloc () : memref<7x42xf32>
-  for %i0 = 0 to 7 step 4 {
-    for %i1 = 0 to 42 step 4 {
+  affine.for %i0 = 0 to 7 step 4 {
+    affine.for %i1 = 0 to 42 step 4 {
      %f1 = vector_transfer_read %A, %i0, %i1 {permutation_map: (d0, d1) -> (d0)} : (memref<7x42xf32>, index, index) -> vector<4xf32>
      %ip1 = affine.apply (d0) -> (d0 + 1) (%i1)
      %f2 = vector_transfer_read %A, %i0, %ip1 {permutation_map: (d0, d1) -> (d0)} : (memref<7x42xf32>, index, index) -> vector<4xf32>
@ -29,11 +29,11 @@ func @materialize_read_1d() {
 // CHECK-LABEL: func @materialize_read_1d_partially_specialized
 func @materialize_read_1d_partially_specialized(%dyn1 : index, %dyn2 : index, %dyn4 : index) {
  %A = alloc (%dyn1, %dyn2, %dyn4) : memref<7x?x?x42x?xf32>
-  for %i0 = 0 to 7 {
-    for %i1 = 0 to %dyn1 {
-      for %i2 = 0 to %dyn2 {
-        for %i3 = 0 to 42 step 2 {
-          for %i4 = 0 to %dyn4 {
+  affine.for %i0 = 0 to 7 {
+    affine.for %i1 = 0 to %dyn1 {
+      affine.for %i2 = 0 to %dyn2 {
+        affine.for %i3 = 0 to 42 step 2 {
+          affine.for %i4 = 0 to %dyn4 {
            %f1 = vector_transfer_read %A, %i0, %i1, %i2, %i3, %i4 {permutation_map: (d0, d1, d2, d3, d4) -> (d3)} : ( memref<7x?x?x42x?xf32>, index, index, index, index, index) -> vector<4xf32>
            %i3p1 = affine.apply (d0) -> (d0 + 1) (%i3)
            %f2 = vector_transfer_read %A, %i0, %i1, %i2, %i3p1, %i4 {permutation_map: (d0, d1, d2, d3, d4) -> (d3)} : ( memref<7x?x?x42x?xf32>, index, index, index, index, index) -> vector<4xf32>
@ -54,19 +54,19 @@ func @materialize_read_1d_partially_specialized(%dyn1 : index, %dyn2 : index, %d
 // CHECK-LABEL: func @materialize_read(%arg0: index, %arg1: index, %arg2: index, %arg3: index) {
 func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
  // CHECK-NEXT:  %0 = alloc(%arg0, %arg1, %arg2, %arg3) : memref<?x?x?x?xf32>
-  // CHECK-NEXT:  for %[[I0:.*]] = 0 to %arg0 step 3 {
-  // CHECK-NEXT:    for %[[I1:.*]] = 0 to %arg1 {
-  // CHECK-NEXT:      for %[[I2:.*]] = 0 to %arg2 {
-  // CHECK-NEXT:        for %[[I3:.*]] = 0 to %arg3 step 5 {
+  // CHECK-NEXT:  affine.for %[[I0:.*]] = 0 to %arg0 step 3 {
+  // CHECK-NEXT:    affine.for %[[I1:.*]] = 0 to %arg1 {
+  // CHECK-NEXT:      affine.for %[[I2:.*]] = 0 to %arg2 {
+  // CHECK-NEXT:        affine.for %[[I3:.*]] = 0 to %arg3 step 5 {
  //      CHECK:          %[[D0:.*]] = dim %0, 0 : memref<?x?x?x?xf32>
  // CHECK-NEXT:          %[[D1:.*]] = dim %0, 1 : memref<?x?x?x?xf32>
  // CHECK-NEXT:          %[[D2:.*]] = dim %0, 2 : memref<?x?x?x?xf32>
  // CHECK-NEXT:          %[[D3:.*]] = dim %0, 3 : memref<?x?x?x?xf32>
  //      CHECK:          %[[ALLOC:.*]] = alloc() : memref<5x4x3xf32>
  // CHECK-NEXT:          %[[VECTOR_VIEW:.*]] = vector_type_cast %[[ALLOC]] : memref<5x4x3xf32>, memref<1xvector<5x4x3xf32>>
-  // CHECK-NEXT:          for %[[I4:.*]] = 0 to 3 {
-  // CHECK-NEXT:            for %[[I5:.*]] = 0 to 4 {
-  // CHECK-NEXT:              for %[[I6:.*]] = 0 to 5 {
+  // CHECK-NEXT:          affine.for %[[I4:.*]] = 0 to 3 {
+  // CHECK-NEXT:            affine.for %[[I5:.*]] = 0 to 4 {
+  // CHECK-NEXT:              affine.for %[[I6:.*]] = 0 to 5 {
  // CHECK-NEXT:                %[[C0:.*]] = constant 0 : index
  // CHECK-NEXT:                %[[C1:.*]] = constant 1 : index
  // CHECK-NEXT:                {{.*}} = affine.apply #[[ADD]](%[[I0]], %[[I4]])
@ -117,10 +117,10 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
  // Check that I3 + I6 (of size 5) read from last index load(..., L3) and write into first index store(I6, ...)
  // Other dimensions are just accessed with I1, I2 resp.
  %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
-  for %i0 = 0 to %M step 3 {
-    for %i1 = 0 to %N {
-      for %i2 = 0 to %O {
-        for %i3 = 0 to %P step 5 {
+  affine.for %i0 = 0 to %M step 3 {
+    affine.for %i1 = 0 to %N {
+      affine.for %i2 = 0 to %O {
+        affine.for %i3 = 0 to %P step 5 {
          %f = vector_transfer_read %A, %i0, %i1, %i2, %i3 {permutation_map: (d0, d1, d2, d3) -> (d3, 0, d0)} : (memref<?x?x?x?xf32, 0>, index, index, index, index) -> vector<5x4x3xf32>
        }
      }
@ -133,10 +133,10 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
 func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
  // CHECK-NEXT:  %0 = alloc(%arg0, %arg1, %arg2, %arg3) : memref<?x?x?x?xf32>
  // CHECK-NEXT:  %cst = constant splat<vector<5x4x3xf32>, 1.000000e+00> : vector<5x4x3xf32>
-  // CHECK-NEXT:  for %[[I0:.*]] = 0 to %arg0 step 3 {
-  // CHECK-NEXT:    for %[[I1:.*]] = 0 to %arg1 step 4 {
-  // CHECK-NEXT:      for %[[I2:.*]] = 0 to %arg2 {
-  // CHECK-NEXT:        for %[[I3:.*]] = 0 to %arg3 step 5 {
+  // CHECK-NEXT:  affine.for %[[I0:.*]] = 0 to %arg0 step 3 {
+  // CHECK-NEXT:    affine.for %[[I1:.*]] = 0 to %arg1 step 4 {
+  // CHECK-NEXT:      affine.for %[[I2:.*]] = 0 to %arg2 {
+  // CHECK-NEXT:        affine.for %[[I3:.*]] = 0 to %arg3 step 5 {
  //      CHECK:          %[[D0:.*]] = dim %0, 0 : memref<?x?x?x?xf32>
  // CHECK-NEXT:          %[[D1:.*]] = dim %0, 1 : memref<?x?x?x?xf32>
  // CHECK-NEXT:          %[[D2:.*]] = dim %0, 2 : memref<?x?x?x?xf32>
@ -144,9 +144,9 @@ func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
  // CHECK:               %[[ALLOC:.*]] = alloc() : memref<5x4x3xf32>
  // CHECK-NEXT:          %[[VECTOR_VIEW:.*]] = vector_type_cast {{.*}} : memref<5x4x3xf32>, memref<1xvector<5x4x3xf32>>
  //      CHECK:          store %cst, {{.*}} : memref<1xvector<5x4x3xf32>>
-  // CHECK-NEXT:          for %[[I4:.*]] = 0 to 3 {
-  // CHECK-NEXT:            for %[[I5:.*]] = 0 to 4 {
-  // CHECK-NEXT:              for %[[I6:.*]] = 0 to 5 {
+  // CHECK-NEXT:          affine.for %[[I4:.*]] = 0 to 3 {
+  // CHECK-NEXT:            affine.for %[[I5:.*]] = 0 to 4 {
+  // CHECK-NEXT:              affine.for %[[I6:.*]] = 0 to 5 {
  // CHECK-NEXT:                %[[C0:.*]] = constant 0 : index
  // CHECK-NEXT:                %[[C1:.*]] = constant 1 : index
  // CHECK-NEXT:                {{.*}} = affine.apply #[[ADD]](%[[I0]], %[[I4]])
@ -201,10 +201,10 @@ func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
  // Other dimension is just accessed with I2.
  %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
  %f1 = constant splat<vector<5x4x3xf32>, 1.000000e+00> : vector<5x4x3xf32>
-  for %i0 = 0 to %M step 3 {
-    for %i1 = 0 to %N step 4 {
-      for %i2 = 0 to %O {
-        for %i3 = 0 to %P step 5 {
+  affine.for %i0 = 0 to %M step 3 {
+    affine.for %i1 = 0 to %N step 4 {
+      affine.for %i2 = 0 to %O {
+        affine.for %i3 = 0 to %P step 5 {
          vector_transfer_write %f1, %A, %i0, %i1, %i2, %i3 {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d0)} : vector<5x4x3xf32>, memref<?x?x?x?xf32, 0>, index, index, index, index
        }
      }
--- a/mlir/test/Transforms/Vectorize/materialize.mlir
+++ b/mlir/test/Transforms/Vectorize/materialize.mlir
@ -10,10 +10,10 @@
 func @materialize(%M : index, %N : index, %O : index, %P : index) {
  %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
  %f1 = constant splat<vector<4x4x4xf32>, 1.000000e+00> : vector<4x4x4xf32>
-  // CHECK:  for %i0 = 0 to %arg0 step 4 {
-  // CHECK-NEXT:    for %i1 = 0 to %arg1 step 4 {
-  // CHECK-NEXT:      for %i2 = 0 to %arg2 {
-  // CHECK-NEXT:        for %i3 = 0 to %arg3 step 4 {
+  // CHECK:  affine.for %i0 = 0 to %arg0 step 4 {
+  // CHECK-NEXT:    affine.for %i1 = 0 to %arg1 step 4 {
+  // CHECK-NEXT:      affine.for %i2 = 0 to %arg2 {
+  // CHECK-NEXT:        affine.for %i3 = 0 to %arg3 step 4 {
  // CHECK-NEXT:          %[[a:[0-9]+]] = {{.*}}[[ID1]](%i0)
  // CHECK-NEXT:          %[[b:[0-9]+]] = {{.*}}[[ID1]](%i1)
  // CHECK-NEXT:          %[[c:[0-9]+]] = {{.*}}[[ID1]](%i2)
@ -25,10 +25,10 @@ func @materialize(%M : index, %N : index, %O : index, %P : index) {
  // CHECK:          vector_transfer_write {{.*}}, %0, {{.*}}, %[[b2]], {{.*}} {permutation_map: #[[D0D1D2D3TOD1D0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index
  // CHECK:          %[[b3:[0-9]+]] = {{.*}}[[D0P3]](%i1)
  // CHECK:          vector_transfer_write {{.*}}, %0, {{.*}}, %[[b3]], {{.*}} {permutation_map: #[[D0D1D2D3TOD1D0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index
-  for %i0 = 0 to %M step 4 {
-    for %i1 = 0 to %N step 4 {
-      for %i2 = 0 to %O {
-        for %i3 = 0 to %P step 4 {
+  affine.for %i0 = 0 to %M step 4 {
+    affine.for %i1 = 0 to %N step 4 {
+      affine.for %i2 = 0 to %O {
+        affine.for %i3 = 0 to %P step 4 {
          "vector_transfer_write"(%f1, %A, %i0, %i1, %i2, %i3) {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d0)} : (vector<4x4x4xf32>, memref<?x?x?x?xf32, 0>, index, index, index, index) -> ()
        }
      }
--- a/mlir/test/Transforms/Vectorize/materialize_vectors_1d_to_1d.mlir
+++ b/mlir/test/Transforms/Vectorize/materialize_vectors_1d_to_1d.mlir
@ -15,8 +15,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
  %f1 = constant 1.0 : f32
  %f2 = constant 2.0 : f32
  // 4x unroll (jammed by construction).
-  // CHECK: for %i0 = 0 to %arg0 {
-  // CHECK-NEXT:   for %i1 = 0 to %arg1 step 32 {
+  // CHECK: affine.for %i0 = 0 to %arg0 {
+  // CHECK-NEXT:   affine.for %i1 = 0 to %arg1 step 32 {
  // CHECK-NEXT:     [[CST0:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
  // CHECK-NEXT:     [[CST1:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
  // CHECK-NEXT:     [[CST2:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
@ -34,15 +34,15 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
  // CHECK-NEXT:     [[VAL31:%.*]] = affine.apply [[D0P24]]{{.*}}
  // CHECK-NEXT:     vector_transfer_write [[CST3]], {{.*}}, [[VAL30]], [[VAL31]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
  //
-  for %i0 = 0 to %M {
-    for %i1 = 0 to %N {
+  affine.for %i0 = 0 to %M {
+    affine.for %i1 = 0 to %N {
      // non-scoped %f1
      store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
    }
  }
  // 4x unroll (jammed by construction).
-  // CHECK: for %i2 = 0 to %arg0 {
-  // CHECK-NEXT:   for %i3 = 0 to %arg1 step 32 {
+  // CHECK: affine.for %i2 = 0 to %arg0 {
+  // CHECK-NEXT:   affine.for %i3 = 0 to %arg1 step 32 {
  // CHECK-NEXT:     [[CST0:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
  // CHECK-NEXT:     [[CST1:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
  // CHECK-NEXT:     [[CST2:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
@ -60,15 +60,15 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
  // CHECK-NEXT:     [[VAL31:%.*]] = affine.apply [[D0P24]]{{.*}}
  // CHECK-NEXT:     vector_transfer_write [[CST3]], {{.*}}, [[VAL30]], [[VAL31]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
  //
-  for %i2 = 0 to %M {
-    for %i3 = 0 to %N {
+  affine.for %i2 = 0 to %M {
+    affine.for %i3 = 0 to %N {
      // non-scoped %f2
      store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
    }
  }
  // 4x unroll (jammed by construction).
-  // CHECK: for %i4 = 0 to %arg0 {
-  // CHECK-NEXT:   for %i5 = 0 to %arg1 step 32 {
+  // CHECK: affine.for %i4 = 0 to %arg0 {
+  // CHECK-NEXT:   affine.for %i5 = 0 to %arg1 step 32 {
  // CHECK-NEXT:     {{.*}} = affine.apply
  // CHECK-NEXT:     {{.*}} = affine.apply
  // CHECK-NEXT:     {{.*}} = vector_transfer_read
@ -110,8 +110,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
  // CHECK-NEXT:     {{.*}} = affine.apply
  // CHECK-NEXT:     vector_transfer_write
  //
-  for %i4 = 0 to %M {
-    for %i5 = 0 to %N {
+  affine.for %i4 = 0 to %M {
+    affine.for %i5 = 0 to %N {
      %a5 = load %A[%i4, %i5] : memref<?x?xf32, 0>
      %b5 = load %B[%i4, %i5] : memref<?x?xf32, 0>
      %s5 = addf %a5, %b5 : f32
--- a/mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_1d.mlir
+++ b/mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_1d.mlir
@ -15,8 +15,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
  %f1 = constant 1.0 : f32
  %f2 = constant 2.0 : f32
  // (3x2)x unroll (jammed by construction).
-  // CHECK: for %i0 = 0 to %arg0 step 3 {
-  // CHECK-NEXT:   for %i1 = 0 to %arg1 step 16 {
+  // CHECK: affine.for %i0 = 0 to %arg0 step 3 {
+  // CHECK-NEXT:   affine.for %i1 = 0 to %arg1 step 16 {
  // CHECK-NEXT:     {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
  // CHECK-NEXT:     {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
  // CHECK-NEXT:     {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
@ -41,26 +41,26 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
  // CHECK-NEXT:     [[VAL50:%.*]] = affine.apply [[D0P2]](%i0)
  // CHECK-NEXT:     [[VAL51:%.*]] = affine.apply [[D0P8]](%i1)
  // CHECK-NEXT:     vector_transfer_write {{.*}}, {{.*}}, [[VAL50]], [[VAL51]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
-  for %i0 = 0 to %M {
-    for %i1 = 0 to %N {
+  affine.for %i0 = 0 to %M {
+    affine.for %i1 = 0 to %N {
      // non-scoped %f1
      store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
    }
  }
  // (3x2)x unroll (jammed by construction).
-  // CHECK: for %i2 = 0 to %arg0 step 3 {
-  // CHECK-NEXT:   for %i3 = 0 to %arg1 step 16 {
+  // CHECK: affine.for %i2 = 0 to %arg0 step 3 {
+  // CHECK-NEXT:   affine.for %i3 = 0 to %arg1 step 16 {
  // .....
-  for %i2 = 0 to %M {
-    for %i3 = 0 to %N {
+  affine.for %i2 = 0 to %M {
+    affine.for %i3 = 0 to %N {
      // non-scoped %f2
      // CHECK does (3x4)x unrolling.
      store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
    }
  }
  // (3x2)x unroll (jammed by construction).
-  // CHECK: for %i4 = 0 to %arg0 step 3 {
-  // CHECK-NEXT:   for %i5 = 0 to %arg1 step 16 {
+  // CHECK: affine.for %i4 = 0 to %arg0 step 3 {
+  // CHECK-NEXT:   affine.for %i5 = 0 to %arg1 step 16 {
  // CHECK-NEXT:     {{.*}} = affine.apply
  // CHECK-NEXT:     {{.*}} = affine.apply
  // CHECK-NEXT:     {{.*}} = vector_transfer_read
@ -122,8 +122,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
  // CHECK-NEXT:     {{.*}} = affine.apply
  // CHECK-NEXT:     vector_transfer_write
  //
-  for %i4 = 0 to %M {
-    for %i5 = 0 to %N {
+  affine.for %i4 = 0 to %M {
+    affine.for %i5 = 0 to %N {
      %a5 = load %A[%i4, %i5] : memref<?x?xf32, 0>
      %b5 = load %B[%i4, %i5] : memref<?x?xf32, 0>
      %s5 = addf %a5, %b5 : f32
--- a/mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_2d.mlir
+++ b/mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_2d.mlir
@ -13,8 +13,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
  %f1 = constant 1.0 : f32
  %f2 = constant 2.0 : f32
  // 2x unroll (jammed by construction).
-  // CHECK: for %i0 = 0 to %arg0 step 3 {
-  // CHECK-NEXT:   for %i1 = 0 to %arg1 step 32 {
+  // CHECK: affine.for %i0 = 0 to %arg0 step 3 {
+  // CHECK-NEXT:   affine.for %i1 = 0 to %arg1 step 32 {
  // CHECK-NEXT:     {{.*}} = constant splat<vector<3x16xf32>, 1.000000e+00> : vector<3x16xf32>
  // CHECK-NEXT:     {{.*}} = constant splat<vector<3x16xf32>, 1.000000e+00> : vector<3x16xf32>
  // CHECK-NEXT:     [[VAL00:%.*]] = affine.apply [[ID1]](%i0)
@ -24,15 +24,15 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
  // CHECK-NEXT:     [[VAL11:%.*]] = affine.apply [[D0P16]](%i1)
  // CHECK-NEXT:     vector_transfer_write {{.*}}, {{.*}}, [[VAL10]], [[VAL11]] {permutation_map: [[ID2]]} : vector<3x16xf32>
  //
-  for %i0 = 0 to %M {
-    for %i1 = 0 to %N {
+  affine.for %i0 = 0 to %M {
+    affine.for %i1 = 0 to %N {
      // non-scoped %f1
      store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
    }
  }
  // 2x unroll (jammed by construction).
-  // CHECK: for %i2 = 0 to %arg0 step 3 {
-  // CHECK-NEXT:   for %i3 = 0 to %arg1 step 32 {
+  // CHECK: affine.for %i2 = 0 to %arg0 step 3 {
+  // CHECK-NEXT:   affine.for %i3 = 0 to %arg1 step 32 {
  // CHECK-NEXT:     {{.*}} = constant splat<vector<3x16xf32>, 2.000000e+00> : vector<3x16xf32>
  // CHECK-NEXT:     {{.*}} = constant splat<vector<3x16xf32>, 2.000000e+00> : vector<3x16xf32>
  // CHECK-NEXT:     [[VAL00:%.*]] = affine.apply [[ID1]](%i2)
@ -42,15 +42,15 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
  // CHECK-NEXT:     [[VAL11:%.*]] = affine.apply [[D0P16]](%i3)
  // CHECK-NEXT:     vector_transfer_write {{.*}}, {{.*}}, [[VAL10]], [[VAL11]] {permutation_map: [[ID2]]} : vector<3x16xf32>
  //
-  for %i2 = 0 to %M {
-    for %i3 = 0 to %N {
+  affine.for %i2 = 0 to %M {
+    affine.for %i3 = 0 to %N {
      // non-scoped %f2
      store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
    }
  }
  // 2x unroll (jammed by construction).
-  // CHECK: for %i4 = 0 to %arg0 step 3 {
-  // CHECK-NEXT:   for %i5 = 0 to %arg1 step 32 {
+  // CHECK: affine.for %i4 = 0 to %arg0 step 3 {
+  // CHECK-NEXT:   affine.for %i5 = 0 to %arg1 step 32 {
  // CHECK-NEXT:     {{.*}} = affine.apply
  // CHECK-NEXT:     {{.*}} = affine.apply
  // CHECK-NEXT:     {{.*}} = vector_transfer_read
@ -72,8 +72,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
  // CHECK-NEXT:     {{.*}} = affine.apply
  // CHECK-NEXT:     vector_transfer_write
  //
-  for %i4 = 0 to %M {
-    for %i5 = 0 to %N {
+  affine.for %i4 = 0 to %M {
+    affine.for %i5 = 0 to %N {
      %a5 = load %A[%i4, %i5] : memref<?x?xf32, 0>
      %b5 = load %B[%i4, %i5] : memref<?x?xf32, 0>
      %s5 = addf %a5, %b5 : f32
--- a/mlir/test/Transforms/Vectorize/normalize_maps.mlir
+++ b/mlir/test/Transforms/Vectorize/normalize_maps.mlir
@ -9,19 +9,19 @@

 // CHECK-LABEL: func @simple()
 func @simple() {
-  for %i0 = 0 to 7 {
+  affine.for %i0 = 0 to 7 {
    %0 = affine.apply (d0) -> (d0) (%i0)
    %1 = affine.apply (d0) -> (d0) (%0)
    %2 = affine.apply (d0, d1) -> (d0 + d1) (%0, %0)
    %3 = affine.apply (d0, d1) -> (d0 - d1) (%0, %0)
  }
-  // CHECK-NEXT: for %i0 = 0 to 7
+  // CHECK-NEXT: affine.for %i0 = 0 to 7
  // CHECK-NEXT:   {{.*}} affine.apply #[[ID1]](%i0)
  // CHECK-NEXT:   {{.*}} affine.apply #[[D0TIMES2]](%i0)
  // CHECK-NEXT:   {{.*}} affine.apply #[[ZERO]]()

-  for %i1 = 0 to 7 {
-    for %i2 = 0 to 42 {
+  affine.for %i1 = 0 to 7 {
+    affine.for %i2 = 0 to 42 {
      %20 = affine.apply (d0, d1) -> (d1) (%i1, %i2)
      %21 = affine.apply (d0, d1) -> (d0) (%i1, %i2)
      %22 = affine.apply (d0, d1) -> (d0 + d1) (%20, %21)
@ -29,15 +29,15 @@ func @simple() {
      %24 = affine.apply (d0, d1) -> (-d0 + d1) (%20, %21)
    }
  }
-  //      CHECK: for %i1 = 0 to 7
-  // CHECK-NEXT:   for %i2 = 0 to 42
+  //      CHECK: affine.for %i1 = 0 to 7
+  // CHECK-NEXT:   affine.for %i2 = 0 to 42
  // CHECK-NEXT:     {{.*}} affine.apply #[[D0PLUSD1]](%i1, %i2)
  // CHECK-NEXT:     {{.*}} affine.apply #[[MINSD0PLUSD1]](%i1, %i2)
  // CHECK-NEXT:     {{.*}} affine.apply #[[D0MINUSD1]](%i1, %i2)

-  for %i3 = 0 to 16 {
-    for %i4 = 0 to 47 step 2 {
-      for %i5 = 0 to 78 step 16 {
+  affine.for %i3 = 0 to 16 {
+    affine.for %i4 = 0 to 47 step 2 {
+      affine.for %i5 = 0 to 78 step 16 {
        %50 = affine.apply (d0) -> (d0) (%i3)
        %51 = affine.apply (d0) -> (d0) (%i4)
        %52 = affine.apply (d0) -> (d0) (%i5)
@ -47,9 +47,9 @@ func @simple() {
      }
    }
  }
-  // CHECK:      for %i3 = 0 to 16
-  // CHECK-NEXT:   for %i4 = 0 to 47 step 2
-  // CHECK-NEXT:     for %i5 = 0 to 78 step 16
+  // CHECK:      affine.for %i3 = 0 to 16
+  // CHECK-NEXT:   affine.for %i4 = 0 to 47 step 2
+  // CHECK-NEXT:     affine.for %i5 = 0 to 78 step 16
  // CHECK-NEXT:       {{.*}} affine.apply #[[ID1]](%i3)
  // CHECK-NEXT:       {{.*}} affine.apply #[[ID1]](%i4)
  // CHECK-NEXT:       {{.*}} affine.apply #[[ID1]](%i5)
--- a/mlir/test/Transforms/Vectorize/vectorize_1d.mlir
+++ b/mlir/test/Transforms/Vectorize/vectorize_1d.mlir
@ -23,17 +23,17 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
 //
 // CHECK: for {{.*}} step 128
 // CHECK-NEXT: {{.*}} = vector_transfer_read %arg0, [[C0]], [[C0]] {permutation_map: #[[map_proj_d0d1_0]]} : (memref<?x?xf32>, index, index) -> vector<128xf32>
-   for %i0 = 0 to %M { // vectorized due to scalar -> vector
+   affine.for %i0 = 0 to %M { // vectorized due to scalar -> vector
     %a0 = load %A[%cst0, %cst0] : memref<?x?xf32>
   }
 //
 // CHECK:for {{.*}} [[ARG_M]] {
-   for %i1 = 0 to %M { // not vectorized
+   affine.for %i1 = 0 to %M { // not vectorized
     %a1 = load %A[%i1, %i1] : memref<?x?xf32>
   }
 //
-// CHECK:   for %i{{[0-9]*}} = 0 to [[ARG_M]] {
-   for %i2 = 0 to %M { // not vectorized, would vectorize with --test-fastest-varying=1
+// CHECK:   affine.for %i{{[0-9]*}} = 0 to [[ARG_M]] {
+   affine.for %i2 = 0 to %M { // not vectorized, would vectorize with --test-fastest-varying=1
     %r2 = affine.apply (d0) -> (d0) (%i2)
     %a2 = load %A[%r2#0, %cst0] : memref<?x?xf32>
   }
@ -41,7 +41,7 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
 // CHECK:for [[IV3:%[a-zA-Z0-9]+]] = 0 to [[ARG_M]] step 128
 // CHECK-NEXT:   [[APP3:%[a-zA-Z0-9]+]] = affine.apply {{.*}}[[IV3]]
 // CHECK-NEXT:   {{.*}} = vector_transfer_read %arg0, [[C0]], [[APP3]] {permutation_map: #[[map_proj_d0d1_d1]]} : {{.*}} -> vector<128xf32>
-   for %i3 = 0 to %M { // vectorized
+   affine.for %i3 = 0 to %M { // vectorized
     %r3 = affine.apply (d0) -> (d0) (%i3)
     %a3 = load %A[%cst0, %r3#0] : memref<?x?xf32>
   }
@ -51,8 +51,8 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
 // CHECK-NEXT:   [[APP50:%[0-9]+]] = affine.apply {{.*}}([[IV4]], [[IV5]])
 // CHECK-NEXT:   [[APP51:%[0-9]+]] = affine.apply {{.*}}([[IV4]], [[IV5]])
 // CHECK-NEXT:   {{.*}} = vector_transfer_read %arg0, [[APP50]], [[APP51]] {permutation_map: #[[map_proj_d0d1_d1]]} : {{.*}} -> vector<128xf32>
-   for %i4 = 0 to %M { // vectorized
-     for %i5 = 0 to %N { // not vectorized, would vectorize with --test-fastest-varying=1
+   affine.for %i4 = 0 to %M { // vectorized
+     affine.for %i5 = 0 to %N { // not vectorized, would vectorize with --test-fastest-varying=1
       %r50 = affine.apply (d0, d1) -> (d1) (%i4, %i5)
       %r51 = affine.apply (d0, d1) -> (d0) (%i4, %i5)
       %a5 = load %A[%r50, %r51] : memref<?x?xf32>
@ -61,8 +61,8 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
 //
 // CHECK: for [[IV6:%[i0-9]*]] = 0 to [[ARG_M]] {
 // CHECK-NEXT:   for [[IV7:%[i0-9]*]] = 0 to [[ARG_N]] {
-   for %i6 = 0 to %M { // not vectorized, would vectorize with --test-fastest-varying=1
-     for %i7 = 0 to %N { // not vectorized, can never vectorize
+   affine.for %i6 = 0 to %M { // not vectorized, would vectorize with --test-fastest-varying=1
+     affine.for %i7 = 0 to %N { // not vectorized, can never vectorize
       %r70 = affine.apply (d0, d1) -> (d1 + d0) (%i6, %i7)
       %r71 = affine.apply (d0, d1) -> (d0) (%i6, %i7)
       %a7 = load %A[%r70, %r71] : memref<?x?xf32>
@ -74,8 +74,8 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
 // CHECK-NEXT:   [[APP9_0:%[0-9]+]] = affine.apply {{.*}}([[IV8]], [[IV9]])
 // CHECK-NEXT:   [[APP9_1:%[0-9]+]] = affine.apply {{.*}}([[IV8]], [[IV9]])
 // CHECK-NEXT:   {{.*}} = vector_transfer_read %arg0, [[APP9_0]], [[APP9_1]] {permutation_map: #[[map_proj_d0d1_d1]]} : {{.*}} -> vector<128xf32>
-   for %i8 = 0 to %M { // vectorized
-     for %i9 = 0 to %N {
+   affine.for %i8 = 0 to %M { // vectorized
+     affine.for %i9 = 0 to %N {
       %r90 = affine.apply (d0, d1) -> (d1) (%i8, %i9)
       %r91 = affine.apply (d0, d1) -> (d0 + d1) (%i8, %i9)
       %a9 = load %A[%r90, %r91] : memref<?x?xf32>
@ -84,8 +84,8 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
 //
 // CHECK: for [[IV10:%[i0-9]*]] = 0 to %{{[0-9]*}} {
 // CHECK:   for [[IV11:%[i0-9]*]] = 0 to %{{[0-9]*}} {
-   for %i10 = 0 to %M { // not vectorized, need per load transposes
-     for %i11 = 0 to %N { // not vectorized, need per load transposes
+   affine.for %i10 = 0 to %M { // not vectorized, need per load transposes
+     affine.for %i11 = 0 to %N { // not vectorized, need per load transposes
       %r11_0 = affine.apply (d0, d1) -> (d0) (%i10, %i11)
       %r11_1 = affine.apply (d0, d1) -> (d1) (%i10, %i11)
       %a11 = load %A[%r11_0, %r11_1] : memref<?x?xf32>
@ -98,9 +98,9 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
 // CHECK: for [[IV12:%[i0-9]*]] = 0 to %{{[0-9]*}} {
 // CHECK:   for [[IV13:%[i0-9]*]] = 0 to %{{[0-9]*}} {
 // CHECK:     for [[IV14:%[i0-9]+]] = 0 to [[ARG_P]] step 128
-   for %i12 = 0 to %M { // not vectorized, can never vectorize
-     for %i13 = 0 to %N { // not vectorized, can never vectorize
-       for %i14 = 0 to %P { // vectorized
+   affine.for %i12 = 0 to %M { // not vectorized, can never vectorize
+     affine.for %i13 = 0 to %N { // not vectorized, can never vectorize
+       affine.for %i14 = 0 to %P { // vectorized
         %r14_0 = affine.apply (d0, d1, d2) -> (d1) (%i12, %i13, %i14)
         %r14_1 = affine.apply (d0, d1, d2) -> (d0 + d1) (%i12, %i13, %i14)
         %r14_2 = affine.apply (d0, d1, d2) -> (d0 + d2) (%i12, %i13, %i14)
@ -109,24 +109,24 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
     }
   }
 //
-// CHECK:  for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
-   for %i15 = 0 to %M { // not vectorized due to condition below
+// CHECK:  affine.for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
+   affine.for %i15 = 0 to %M { // not vectorized due to condition below
     affine.if #set0(%i15) {
       %a15 = load %A[%cst0, %cst0] : memref<?x?xf32>
     }
   }
 //
-// CHECK:  for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
-   for %i16 = 0 to %M { // not vectorized, can't vectorize a vector load
+// CHECK:  affine.for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
+   affine.for %i16 = 0 to %M { // not vectorized, can't vectorize a vector load
     %a16 = alloc(%M) : memref<?xvector<2xf32>>
     %l16 = load %a16[%i16] : memref<?xvector<2xf32>>
   }
 //
-// CHECK: for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
+// CHECK: affine.for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
 // CHECK:   for [[IV18:%[a-zA-Z0-9]+]] = 0 to [[ARG_M]] step 128
 // CHECK:     {{.*}} = vector_transfer_read %arg0, [[C0]], [[C0]] {permutation_map: #[[map_proj_d0d1_0]]} : {{.*}} -> vector<128xf32>
-   for %i17 = 0 to %M { // not vectorized, the 1-D pattern that matched %i18 in DFS post-order prevents vectorizing %i17
-     for %i18 = 0 to %M { // vectorized due to scalar -> vector
+   affine.for %i17 = 0 to %M { // not vectorized, the 1-D pattern that matched %i18 in DFS post-order prevents vectorizing %i17
+     affine.for %i18 = 0 to %M { // vectorized due to scalar -> vector
       %a18 = load %A[%cst0, %cst0] : memref<?x?xf32>
     }
   }
@ -139,24 +139,24 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
  %C = alloc (%M, %N) : memref<?x?xf32, 0>
  %f1 = constant 1.0 : f32
  %f2 = constant 2.0 : f32
-  for %i0 = 0 to %M {
-    for %i1 = 0 to %N {
+  affine.for %i0 = 0 to %M {
+    affine.for %i1 = 0 to %N {
      // CHECK: [[C1:%.*]] = constant splat<vector<128xf32>, 1.000000e+00> : vector<128xf32>
      // CHECK: vector_transfer_write [[C1]], {{.*}} {permutation_map: #[[map_proj_d0d1_d1]]} : vector<128xf32>, memref<?x?xf32>, index, index
      // non-scoped %f1
      store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
    }
  }
-  for %i2 = 0 to %M {
-    for %i3 = 0 to %N {
+  affine.for %i2 = 0 to %M {
+    affine.for %i3 = 0 to %N {
      // CHECK: [[C3:%.*]] = constant splat<vector<128xf32>, 2.000000e+00> : vector<128xf32>
      // CHECK: vector_transfer_write [[C3]], {{.*}} {permutation_map: #[[map_proj_d0d1_d1]]} : vector<128xf32>, memref<?x?xf32>, index, index
      // non-scoped %f2
      store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
    }
  }
-  for %i4 = 0 to %M {
-    for %i5 = 0 to %N {
+  affine.for %i4 = 0 to %M {
+    affine.for %i5 = 0 to %N {
      // CHECK: [[A5:%.*]] = vector_transfer_read %0, {{.*}} {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<128xf32>
      // CHECK: [[B5:%.*]] = vector_transfer_read %1, {{.*}} {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<128xf32>
      // CHECK: [[S5:%.*]] = addf [[A5]], [[B5]] : vector<128xf32>
@ -188,10 +188,10 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
 // CHECK-LABEL: @vec_rejected
 func @vec_rejected(%A : memref<?x?xf32>, %C : memref<?x?xf32>) {
  %N = dim %A, 0 : memref<?x?xf32>
-  for %i = 0 to %N {
+  affine.for %i = 0 to %N {
 // CHECK-NOT: vector
    %a = load %A[%i, %i] : memref<?x?xf32> // not vectorized
-    for %j = 0 to %N {
+    affine.for %j = 0 to %N {
      %b = load %A[%i, %j] : memref<?x?xf32> // may be vectorized
 // CHECK-NOT: vector
      %c = addf %a, %b : f32 // not vectorized because %a wasn't
--- a/mlir/test/Transforms/Vectorize/vectorize_2d.mlir
+++ b/mlir/test/Transforms/Vectorize/vectorize_2d.mlir
@ -11,13 +11,13 @@ func @vec2d(%A : memref<?x?x?xf32>) {
   // CHECK:   for {{.*}} = 0 to %1 step 32
   // CHECK:     for {{.*}} = 0 to %2 step 256
   // Example:
-   // for %i0 = 0 to %0 {
-   //   for %i1 = 0 to %1 step 32 {
-   //     for %i2 = 0 to %2 step 256 {
+   // affine.for %i0 = 0 to %0 {
+   //   affine.for %i1 = 0 to %1 step 32 {
+   //     affine.for %i2 = 0 to %2 step 256 {
   //       %3 = "vector_transfer_read"(%arg0, %i0, %i1, %i2) : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
-   for %i0 = 0 to %M {
-     for %i1 = 0 to %N {
-       for %i2 = 0 to %P {
+   affine.for %i0 = 0 to %M {
+     affine.for %i1 = 0 to %N {
+       affine.for %i2 = 0 to %P {
         %a2 = load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
       }
     }
@ -27,9 +27,9 @@ func @vec2d(%A : memref<?x?x?xf32>) {
   // CHECK:     for  {{.*}} = 0 to %2 {
   // For the case: --test-fastest-varying=1 --test-fastest-varying=0 no
   // vectorization happens because of loop nesting order .
-   for %i3 = 0 to %M {
-     for %i4 = 0 to %N {
-       for %i5 = 0 to %P {
+   affine.for %i3 = 0 to %M {
+     affine.for %i4 = 0 to %N {
+       affine.for %i5 = 0 to %P {
         %a5 = load %A[%i4, %i5, %i3] : memref<?x?x?xf32>
       }
     }
@ -43,24 +43,24 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
  %C = alloc (%M, %N) : memref<?x?xf32, 0>
  %f1 = constant 1.0 : f32
  %f2 = constant 2.0 : f32
-  for %i0 = 0 to %M {
-    for %i1 = 0 to %N {
+  affine.for %i0 = 0 to %M {
+    affine.for %i1 = 0 to %N {
      // CHECK: [[C1:%.*]] = constant splat<vector<32x256xf32>, 1.000000e+00> : vector<32x256xf32>
      // CHECK: vector_transfer_write [[C1]], {{.*}} {permutation_map: #[[map_proj_d0d1_d0d1]]} : vector<32x256xf32>, memref<?x?xf32>, index, index
      // non-scoped %f1
      store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
    }
  }
-  for %i2 = 0 to %M {
-    for %i3 = 0 to %N {
+  affine.for %i2 = 0 to %M {
+    affine.for %i3 = 0 to %N {
      // CHECK: [[C3:%.*]] = constant splat<vector<32x256xf32>, 2.000000e+00> : vector<32x256xf32>
      // CHECK: vector_transfer_write [[C3]], {{.*}} {permutation_map: #[[map_proj_d0d1_d0d1]]}  : vector<32x256xf32>, memref<?x?xf32>, index, index
      // non-scoped %f2
      store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
    }
  }
-  for %i4 = 0 to %M {
-    for %i5 = 0 to %N {
+  affine.for %i4 = 0 to %M {
+    affine.for %i5 = 0 to %N {
      // CHECK: [[A5:%.*]] = vector_transfer_read %0, {{.*}} {permutation_map: #[[map_proj_d0d1_d0d1]]} : (memref<?x?xf32>, index, index) -> vector<32x256xf32>
      // CHECK: [[B5:%.*]] = vector_transfer_read %1, {{.*}} {permutation_map: #[[map_proj_d0d1_d0d1]]} : (memref<?x?xf32>, index, index) -> vector<32x256xf32>
      // CHECK: [[S5:%.*]] = addf [[A5]], [[B5]] : vector<32x256xf32>
--- a/mlir/test/Transforms/Vectorize/vectorize_3d.mlir
+++ b/mlir/test/Transforms/Vectorize/vectorize_3d.mlir
@ -7,17 +7,17 @@ func @vec3d(%A : memref<?x?x?xf32>) {
   %0 = dim %A, 0 : memref<?x?x?xf32>
   %1 = dim %A, 1 : memref<?x?x?xf32>
   %2 = dim %A, 2 : memref<?x?x?xf32>
-   // CHECK: for %i0 = 0 to %0 {
-   // CHECK:   for %i1 = 0 to %0 {
-   // CHECK:     for %i2 = 0 to %0 step 32 {
-   // CHECK:       for %i3 = 0 to %1 step 64 {
-   // CHECK:         for %i4 = 0 to %2 step 256 {
+   // CHECK: affine.for %i0 = 0 to %0 {
+   // CHECK:   affine.for %i1 = 0 to %0 {
+   // CHECK:     affine.for %i2 = 0 to %0 step 32 {
+   // CHECK:       affine.for %i3 = 0 to %1 step 64 {
+   // CHECK:         affine.for %i4 = 0 to %2 step 256 {
   // CHECK:           %3 = vector_transfer_read %arg0, %i2, %i3, %i4 {permutation_map: #[[map_proj_d0d1d2_d0d1d2]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x64x256xf32>
-   for %t0 = 0 to %0 {
-     for %t1 = 0 to %0 {
-       for %i0 = 0 to %0 {
-         for %i1 = 0 to %1 {
-           for %i2 = 0 to %2 {
+   affine.for %t0 = 0 to %0 {
+     affine.for %t1 = 0 to %0 {
+       affine.for %i0 = 0 to %0 {
+         affine.for %i1 = 0 to %1 {
+           affine.for %i2 = 0 to %2 {
             %a2 = load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
           }
         }
--- a/mlir/test/Transforms/Vectorize/vectorize_outer_loop_2d.mlir
+++ b/mlir/test/Transforms/Vectorize/vectorize_outer_loop_2d.mlir
@ -7,13 +7,13 @@ func @vec2d(%A : memref<?x?x?xf32>) {
   %M = dim %A, 0 : memref<?x?x?xf32>
   %N = dim %A, 1 : memref<?x?x?xf32>
   %P = dim %A, 2 : memref<?x?x?xf32>
-   // CHECK: for %i0 = 0 to %0 step 32
-   // CHECK:   for %i1 = 0 to %1 {
-   // CHECK:     for %i2 = 0 to %2 step 256
+   // CHECK: affine.for %i0 = 0 to %0 step 32
+   // CHECK:   affine.for %i1 = 0 to %1 {
+   // CHECK:     affine.for %i2 = 0 to %2 step 256
   // CHECK:       {{.*}} = vector_transfer_read %arg0, %i0, %i1, %i2 {permutation_map: #[[map_proj_d0d1d2_d0d2]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
-   for %i0 = 0 to %M {
-     for %i1 = 0 to %N {
-       for %i2 = 0 to %P {
+   affine.for %i0 = 0 to %M {
+     affine.for %i1 = 0 to %N {
+       affine.for %i2 = 0 to %P {
         %a2 = load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
       }
     }
@ -23,9 +23,9 @@ func @vec2d(%A : memref<?x?x?xf32>) {
   // CHECK:     for  {{.*}} = 0 to %2 {
   // For the case: --test-fastest-varying=2 --test-fastest-varying=0 no
   // vectorization happens because of loop nesting order
-   for %i3 = 0 to %M {
-     for %i4 = 0 to %N {
-       for %i5 = 0 to %P {
+   affine.for %i3 = 0 to %M {
+     affine.for %i4 = 0 to %N {
+       affine.for %i5 = 0 to %P {
         %a5 = load %A[%i4, %i5, %i3] : memref<?x?x?xf32>
       }
     }
--- a/mlir/test/Transforms/Vectorize/vectorize_outer_loop_transpose_2d.mlir
+++ b/mlir/test/Transforms/Vectorize/vectorize_outer_loop_transpose_2d.mlir
@ -12,20 +12,20 @@ func @vec2d(%A : memref<?x?x?xf32>) {
   // CHECK:     for  {{.*}} = 0 to %2 {
   // For the case: --test-fastest-varying=0 --test-fastest-varying=2 no
   // vectorization happens because of loop nesting order.
-   for %i0 = 0 to %M {
-     for %i1 = 0 to %N {
-       for %i2 = 0 to %P {
+   affine.for %i0 = 0 to %M {
+     affine.for %i1 = 0 to %N {
+       affine.for %i2 = 0 to %P {
         %a2 = load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
       }
     }
   }
-   // CHECK: for %i3 = 0 to %0 step 32
-   // CHECK:   for %i4 = 0 to %1 step 256
-   // CHECK:     for %i5 = 0 to %2 {
+   // CHECK: affine.for %i3 = 0 to %0 step 32
+   // CHECK:   affine.for %i4 = 0 to %1 step 256
+   // CHECK:     affine.for %i5 = 0 to %2 {
   // CHECK:       {{.*}} = vector_transfer_read %arg0, %i4, %i5, %i3 {permutation_map: #[[map_proj_d0d1d2_d2d0]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
-   for %i3 = 0 to %M {
-     for %i4 = 0 to %N {
-       for %i5 = 0 to %P {
+   affine.for %i3 = 0 to %M {
+     affine.for %i4 = 0 to %N {
+       affine.for %i5 = 0 to %P {
         %a5 = load %A[%i4, %i5, %i3] : memref<?x?x?xf32>
       }
     }
@ -37,26 +37,26 @@ func @vec2d_imperfectly_nested(%A : memref<?x?x?xf32>) {
   %0 = dim %A, 0 : memref<?x?x?xf32>
   %1 = dim %A, 1 : memref<?x?x?xf32>
   %2 = dim %A, 2 : memref<?x?x?xf32>
-   // CHECK: for %i0 = 0 to %0 step 32 {
-   // CHECK:   for %i1 = 0 to %1 {
-   // CHECK:     for %i2 = 0 to %2 step 256 {
+   // CHECK: affine.for %i0 = 0 to %0 step 32 {
+   // CHECK:   affine.for %i1 = 0 to %1 {
+   // CHECK:     affine.for %i2 = 0 to %2 step 256 {
   // CHECK:       %3 = vector_transfer_read %arg0, %i2, %i1, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d0]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
-   // CHECK:   for %i3 = 0 to %1 step 256 {
-   // CHECK:     for %i4 = 0 to %2 {
+   // CHECK:   affine.for %i3 = 0 to %1 step 256 {
+   // CHECK:     affine.for %i4 = 0 to %2 {
   // CHECK:       %4 = vector_transfer_read %arg0, %i3, %i4, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d0]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
-   // CHECK:     for %i5 = 0 to %2 {
+   // CHECK:     affine.for %i5 = 0 to %2 {
   // CHECK:       %5 = vector_transfer_read %arg0, %i3, %i5, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d0]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
-   for %i0 = 0 to %0 {
-     for %i1 = 0 to %1 {
-       for %i2 = 0 to %2 {
+   affine.for %i0 = 0 to %0 {
+     affine.for %i1 = 0 to %1 {
+       affine.for %i2 = 0 to %2 {
         %a2 = load %A[%i2, %i1, %i0] : memref<?x?x?xf32>
       }
     }
-     for %i3 = 0 to %1 {
-       for %i4 = 0 to %2 {
+     affine.for %i3 = 0 to %1 {
+       affine.for %i4 = 0 to %2 {
         %a4 = load %A[%i3, %i4, %i0] : memref<?x?x?xf32>
       }
-       for %i5 = 0 to %2 {
+       affine.for %i5 = 0 to %2 {
         %a5 = load %A[%i3, %i5, %i0] : memref<?x?x?xf32>
       }
     }
--- a/mlir/test/Transforms/Vectorize/vectorize_transpose_2d.mlir
+++ b/mlir/test/Transforms/Vectorize/vectorize_transpose_2d.mlir
@ -12,20 +12,20 @@ func @vec2d(%A : memref<?x?x?xf32>) {
   // CHECK:     for  {{.*}} = 0 to %2 {
   // For the case: --test-fastest-varying=0 --test-fastest-varying=1 no
   // vectorization happens because of loop nesting order.
-  for %i0 = 0 to %M {
-     for %i1 = 0 to %N {
-       for %i2 = 0 to %P {
+  affine.for %i0 = 0 to %M {
+     affine.for %i1 = 0 to %N {
+       affine.for %i2 = 0 to %P {
         %a2 = load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
       }
     }
   }
-   // CHECK: for %i3 = 0 to %0 step 32
-   // CHECK:   for %i4 = 0 to %1 {
-   // CHECK:     for %i5 = 0 to %2 step 256
+   // CHECK: affine.for %i3 = 0 to %0 step 32
+   // CHECK:   affine.for %i4 = 0 to %1 {
+   // CHECK:     affine.for %i5 = 0 to %2 step 256
   // CHECK:       {{.*}} = vector_transfer_read %arg0, %i4, %i5, %i3 {permutation_map: #[[map_proj_d0d1d2_d2d1]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
-   for %i3 = 0 to %M {
-     for %i4 = 0 to %N {
-       for %i5 = 0 to %P {
+   affine.for %i3 = 0 to %M {
+     affine.for %i4 = 0 to %N {
+       affine.for %i5 = 0 to %P {
         %a5 = load %A[%i4, %i5, %i3] : memref<?x?x?xf32>
       }
     }
@ -37,26 +37,26 @@ func @vec2d_imperfectly_nested(%A : memref<?x?x?xf32>) {
   %0 = dim %A, 0 : memref<?x?x?xf32>
   %1 = dim %A, 1 : memref<?x?x?xf32>
   %2 = dim %A, 2 : memref<?x?x?xf32>
-   // CHECK: for %i0 = 0 to %0 step 32 {
-   // CHECK:   for %i1 = 0 to %1 step 256 {
-   // CHECK:     for %i2 = 0 to %2 {
+   // CHECK: affine.for %i0 = 0 to %0 step 32 {
+   // CHECK:   affine.for %i1 = 0 to %1 step 256 {
+   // CHECK:     affine.for %i2 = 0 to %2 {
   // CHECK:       %3 = vector_transfer_read %arg0, %i2, %i1, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d1]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
-   // CHECK:   for %i3 = 0 to %1 {
-   // CHECK:     for %i4 = 0 to %2 step 256 {
+   // CHECK:   affine.for %i3 = 0 to %1 {
+   // CHECK:     affine.for %i4 = 0 to %2 step 256 {
   // CHECK:       %4 = vector_transfer_read %arg0, %i3, %i4, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d1]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
-   // CHECK:     for %i5 = 0 to %2 step 256 {
+   // CHECK:     affine.for %i5 = 0 to %2 step 256 {
   // CHECK:       %5 = vector_transfer_read %arg0, %i3, %i5, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d1]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
-   for %i0 = 0 to %0 {
-     for %i1 = 0 to %1 {
-       for %i2 = 0 to %2 {
+   affine.for %i0 = 0 to %0 {
+     affine.for %i1 = 0 to %1 {
+       affine.for %i2 = 0 to %2 {
         %a2 = load %A[%i2, %i1, %i0] : memref<?x?x?xf32>
       }
     }
-     for %i3 = 0 to %1 {
-       for %i4 = 0 to %2 {
+     affine.for %i3 = 0 to %1 {
+       affine.for %i4 = 0 to %2 {
         %a4 = load %A[%i3, %i4, %i0] : memref<?x?x?xf32>
       }
-       for %i5 = 0 to %2 {
+       affine.for %i5 = 0 to %2 {
         %a5 = load %A[%i3, %i5, %i0] : memref<?x?x?xf32>
       }
     }
--- a/mlir/test/Transforms/canonicalize.mlir
+++ b/mlir/test/Transforms/canonicalize.mlir
@ -205,10 +205,10 @@ func @dyn_shape_fold(%L : index, %M : index) -> (memref<? x ? x i32>, memref<? x
  // CHECK-NEXT: %2 = alloc() : memref<512x1024xi32>
  %c = alloc(%K, %N) : memref<? x ? x i32>

-  // CHECK: for %i0 =
-  for %i = 0 to %L {
-    // CHECK-NEXT: for %i1 =
-    for %j = 0 to 10 {
+  // CHECK: affine.for %i0 =
+  affine.for %i = 0 to %L {
+    // CHECK-NEXT: affine.for %i1 =
+    affine.for %j = 0 to 10 {
      // CHECK-NEXT: %4 = load %0[%i0, %i1] : memref<?x1024xf32>
      // CHECK-NEXT: store %4, %1[%c0, %c0, %i0, %i1, %c0] : memref<4x1024x8x512x?xf32>
      %v = load %a[%i, %j] : memref<?x?xf32>
@ -234,8 +234,8 @@ func @merge_constants() -> (index, index) {
 // CHECK-LABEL: func @hoist_constant
 func @hoist_constant(%arg0: memref<8xi32>) {
  // CHECK-NEXT: %c42_i32 = constant 42 : i32
-  // CHECK-NEXT: for %i0 = 0 to 8 {
-  for %i0 = 0 to 8 {
+  // CHECK-NEXT: affine.for %i0 = 0 to 8 {
+  affine.for %i0 = 0 to 8 {
    // CHECK-NEXT: store %c42_i32, %arg0[%i0]
    %c42_i32 = constant 42 : i32
    store %c42_i32, %arg0[%i0] : memref<8xi32>
--- a/mlir/test/Transforms/constant-fold.mlir
+++ b/mlir/test/Transforms/constant-fold.mlir
@ -2,8 +2,8 @@

 // CHECK-LABEL: @test(%arg0: memref<f32>) {
 func @test(%p : memref<f32>) {
-  for %i0 = 0 to 128 {
-    for %i1 = 0 to 8 { // CHECK: for %i1 = 0 to 8 {
+  affine.for %i0 = 0 to 128 {
+    affine.for %i1 = 0 to 8 { // CHECK: affine.for %i1 = 0 to 8 {
      %0 = constant 4.5 : f32
      %1 = constant 1.5 : f32

--- a/mlir/test/Transforms/cse.mlir
+++ b/mlir/test/Transforms/cse.mlir
@ -113,8 +113,8 @@ func @down_propagate_for() {
  // CHECK: %c1_i32 = constant 1 : i32
  %0 = constant 1 : i32

-  // CHECK-NEXT: for %i0 = 0 to 4 {
-  for %i = 0 to 4 {
+  // CHECK-NEXT: affine.for %i0 = 0 to 4 {
+  affine.for %i = 0 to 4 {
    // CHECK-NEXT: "foo"(%c1_i32, %c1_i32) : (i32, i32) -> ()
    %1 = constant 1 : i32
    "foo"(%0, %1) : (i32, i32) -> ()
@ -145,8 +145,8 @@ func @down_propagate() -> i32 {
 /// Check that operation definitions are NOT propagated up the dominance tree.
 // CHECK-LABEL: @up_propagate_for
 func @up_propagate_for() -> i32 {
-  // CHECK: for %i0 = 0 to 4 {
-  for %i = 0 to 4 {
+  // CHECK: affine.for %i0 = 0 to 4 {
+  affine.for %i = 0 to 4 {
    // CHECK-NEXT: %c1_i32 = constant 1 : i32
    // CHECK-NEXT: "foo"(%c1_i32) : (i32) -> ()
    %0 = constant 1 : i32
--- a/mlir/test/Transforms/dma-generate.mlir
+++ b/mlir/test/Transforms/dma-generate.mlir
@ -42,7 +42,7 @@ func @loop_nest_1d() {
  // Second DMA transfer.
  // CHECK:       dma_start %1[%c256], %5[%c0], %c256_0, %6[%c0] : memref<512xf32>, memref<256xf32, 2>, memref<1xi32>
  // CHECK-NEXT:  dma_wait %6[%c0], %c256_0 : memref<1xi32>
-  // CHECK: for %i0 = 0 to 256 {
+  // CHECK: affine.for %i0 = 0 to 256 {
      // CHECK-NEXT: %7 = load %3[%i0] : memref<256xf32, 2>
      // CHECK:      %8 = affine.apply [[MAP_PLUS_256]](%i0)
      // CHECK:      %9 = affine.apply [[MAP_MINUS_256]](%8)
@ -55,7 +55,7 @@ func @loop_nest_1d() {
  // CHECK-NEXT: dealloc %4 : memref<1xi32>
  // CHECK-NEXT: dealloc %3 : memref<256xf32, 2>
  // CHECK-NEXT: return
-  for %i = 0 to 256 {
+  affine.for %i = 0 to 256 {
    load %A[%i] : memref<256 x f32>
    %idx = affine.apply (d0) -> (d0 + 256)(%i)
    load %B[%idx] : memref<512 x f32>
@ -82,20 +82,20 @@ func @loop_nest_1d() {
 // INCOMING DMA for C.
 // CHECK-DAG:  dma_start %arg2[%c0, %c0], [[BUFC]][%c0, %c0], %c16384_0, [[TAGC]][%c0] : memref<512x32xf32>, memref<512x32xf32, 2>, memref<1xi32>
 // CHECK-DAG:  dma_wait [[TAGC]][%c0], %c16384_0 : memref<1xi32>
-// CHECK-NEXT:  for %i0 = 0 to 32 {
-// CHECK-NEXT:    for %i1 = 0 to 32 {
-// CHECK-NEXT:      for %i2 = 0 to 32 {
-// CHECK-NEXT:        for %i3 = 0 to 16 {
+// CHECK-NEXT:  affine.for %i0 = 0 to 32 {
+// CHECK-NEXT:    affine.for %i1 = 0 to 32 {
+// CHECK-NEXT:      affine.for %i2 = 0 to 32 {
+// CHECK-NEXT:        affine.for %i3 = 0 to 16 {
 // CHECK-NEXT:          %7 = affine.apply #map{{[0-9]+}}(%i1, %i3)
 // CHECK-NEXT:          %8 = load [[BUFB]][%7, %i0] : memref<512x32xf32, 2>
 // CHECK-NEXT:          "foo"(%8) : (f32) -> ()
 // CHECK-NEXT:        }
-// CHECK-NEXT:        for %i4 = 0 to 16 {
+// CHECK-NEXT:        affine.for %i4 = 0 to 16 {
 // CHECK-NEXT:          %9 = affine.apply #map{{[0-9]+}}(%i2, %i4)
 // CHECK-NEXT:          %10 = load [[BUFA]][%9, %i1] : memref<512x32xf32, 2>
 // CHECK-NEXT:          "bar"(%10) : (f32) -> ()
 // CHECK-NEXT:        }
-// CHECK-NEXT:        for %i5 = 0 to 16 {
+// CHECK-NEXT:        affine.for %i5 = 0 to 16 {
 // CHECK-NEXT:          %11 = "abc_compute"() : () -> f32
 // CHECK-NEXT:          %12 = affine.apply #map{{[0-9]+}}(%i2, %i5)
 // CHECK-NEXT:          %13 = load [[BUFC]][%12, %i0] : memref<512x32xf32, 2>
@ -123,20 +123,20 @@ func @loop_nest_high_d(%A: memref<512 x 32 x f32>,
  // DMAs will be performed at this level (jT is the first loop without a stride).
  // A and B are read, while C is both read and written. A total of three new buffers
  // are allocated and existing load's/store's are replaced by accesses to those buffers.
-  for %jT = 0 to 32 {
-    for %kT = 0 to 32 {
-      for %iT = 0 to 32 {
-        for %kk = 0 to 16 { // k intratile
+  affine.for %jT = 0 to 32 {
+    affine.for %kT = 0 to 32 {
+      affine.for %iT = 0 to 32 {
+        affine.for %kk = 0 to 16 { // k intratile
          %k = affine.apply (d0, d1) -> (16*d0 + d1) (%kT, %kk)
          %v0 = load %B[%k, %jT] : memref<512 x 32 x f32>
          "foo"(%v0) : (f32) -> ()
        }
-        for %ii = 0 to 16 { // i intratile.
+        affine.for %ii = 0 to 16 { // i intratile.
          %i = affine.apply (d0, d1) -> (16*d0 + d1)(%iT, %ii)
          %v1 = load %A[%i, %kT] : memref<512 x 32 x f32>
          "bar"(%v1) : (f32) -> ()
        }
-        for %ii_ = 0 to 16 { // i intratile.
+        affine.for %ii_ = 0 to 16 { // i intratile.
          %v2 = "abc_compute"() : () -> f32
          %i_ = affine.apply (d0, d1) -> (16*d0 + d1)(%iT, %ii_)
          %v3 =  load %C[%i_, %jT] : memref<512 x 32 x f32>
@ -155,13 +155,13 @@ func @loop_nest_high_d(%A: memref<512 x 32 x f32>,
 //
 // CHECK-LABEL: func @loop_nest_modulo() {
 // CHECK:       %0 = alloc() : memref<256x8xf32>
-// CHECK-NEXT:    for %i0 = 0 to 32 step 4 {
+// CHECK-NEXT:    affine.for %i0 = 0 to 32 step 4 {
 // CHECK-NEXT:      %1 = affine.apply #map{{[0-9]+}}(%i0)
 // CHECK-NEXT:      %2 = alloc() : memref<1x2xf32, 2>
 // CHECK-NEXT:      %3 = alloc() : memref<1xi32>
 // CHECK-NEXT:      dma_start %0[%1, %c0], %2[%c0, %c0], %c2, %3[%c0] : memref<256x8xf32>, memref<1x2xf32, 2>, memref<1xi32>
 // CHECK-NEXT:      dma_wait %3[%c0], %c2 : memref<1xi32>
-// CHECK-NEXT:      for %i1 = 0 to 8 {
+// CHECK-NEXT:      affine.for %i1 = 0 to 8 {
 //                    ...
 //                    ...
 // CHECK:           }
@ -171,9 +171,9 @@ func @loop_nest_high_d(%A: memref<512 x 32 x f32>,
 // CHECK-NEXT:    return
 func @loop_nest_modulo() {
  %A = alloc() : memref<256 x 8 x f32>
-  for %i = 0 to 32 step 4 {
+  affine.for %i = 0 to 32 step 4 {
    // DMAs will be performed at this level (%j is the first unit stride loop)
-    for %j = 0 to 8 {
+    affine.for %j = 0 to 8 {
      %idx = affine.apply (d0) -> (d0 mod 2) (%j)
      // A buffer of size 32 x 2 will be allocated (original buffer was 256 x 8).
      %v = load %A[%i, %idx] : memref<256 x 8 x f32>
@ -187,17 +187,17 @@ func @loop_nest_modulo() {
 // CHECK-LABEL: func @loop_nest_tiled() -> memref<256x1024xf32> {
 func @loop_nest_tiled() -> memref<256x1024xf32> {
  %0 = alloc() : memref<256x1024xf32>
-  for %i0 = 0 to 256 step 32 {
-    for %i1 = 0 to 1024 step 32 {
+  affine.for %i0 = 0 to 256 step 32 {
+    affine.for %i1 = 0 to 1024 step 32 {
 // CHECK:      %3 = alloc() : memref<32x32xf32, 2>
 // CHECK-NEXT: %4 = alloc() : memref<1xi32>
 // Strided DMA here: 32 x 32 tile in a 256 x 1024 memref.
 // CHECK-NEXT: dma_start %0[%1, %2], %3[%c0, %c0], %c1024, %4[%c0], %c1024_0, %c32 : memref<256x1024xf32>, memref<32x32xf32, 2>, memref<1xi32>
 // CHECK-NEXT: dma_wait
-// CHECK-NEXT: for %i2 = #map
-// CHECK-NEXT:   for %i3 = #map
-      for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 32)(%i0) {
-        for %i3 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 32)(%i1) {
+// CHECK-NEXT: affine.for %i2 = #map
+// CHECK-NEXT:   affine.for %i3 = #map
+      affine.for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 32)(%i0) {
+        affine.for %i3 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 32)(%i1) {
          // CHECK-NEXT: %5 = affine.apply [[MAP_INDEX_DIFF_EVEN]](%i0, %i1, %i2, %i3)
          // CHECK-NEXT: %6 = affine.apply [[MAP_INDEX_DIFF_ODD]](%i0, %i1, %i2, %i3)
          // CHECK-NEXT: %7 = load %3[%5, %6] : memref<32x32xf32, 2>
@ -218,8 +218,8 @@ func @dma_constant_dim_access(%A : memref<100x100xf32>) {
  // No strided DMA needed here.
  // CHECK:      dma_start %arg0[%c1, %c0], %0[%c0, %c0], %c100, %1[%c0] : memref<100x100xf32>, memref<1x100xf32, 2>,
  // CHECK-NEXT: dma_wait %1[%c0], %c100 : memref<1xi32>
-  for %i = 0 to 100 {
-    for %j = 0 to ()[s0] -> (s0) ()[%N] {
+  affine.for %i = 0 to 100 {
+    affine.for %j = 0 to ()[s0] -> (s0) ()[%N] {
      // CHECK:      %2 = affine.apply [[MAP_D0_MINUS_ONE]](%c1_0, %i1)
      // CHECK:      %3 = affine.apply [[MAP_D1]](%c1_0, %i1)
      // CHECK-NEXT: %4 = load %0[%2, %3] : memref<1x100xf32, 2>
@ -232,8 +232,8 @@ func @dma_constant_dim_access(%A : memref<100x100xf32>) {
 // CHECK-LABEL: func @dma_with_symbolic_accesses
 func @dma_with_symbolic_accesses(%A : memref<100x100xf32>, %M : index) {
  %N = constant 9 : index
-  for %i = 0 to 100 {
-    for %j = 0 to 100 {
+  affine.for %i = 0 to 100 {
+    affine.for %j = 0 to 100 {
      %idy = affine.apply (d0, d1) [s0, s1] -> (d1 + s0 + s1)(%i, %j)[%M, %N]
      load %A[%i, %idy] : memref<100 x 100 x f32>
    }
@ -243,8 +243,8 @@ func @dma_with_symbolic_accesses(%A : memref<100x100xf32>, %M : index) {
 // CHECK-NEXT:  %2 = alloc() : memref<1xi32>
 // CHECK-NEXT:  dma_start %arg0[%c0, %0], %1[%c0, %c0], %c10000, %2[%c0]
 // CHECK-NEXT:  dma_wait %2[%c0], %c10000
-// CHECK-NEXT:  for %i0 = 0 to 100 {
-// CHECK-NEXT:    for %i1 = 0 to 100 {
+// CHECK-NEXT:  affine.for %i0 = 0 to 100 {
+// CHECK-NEXT:    affine.for %i1 = 0 to 100 {
 // CHECK-NEXT:      %3 = affine.apply [[MAP_SYM_SHIFT]](%i0, %i1)[%arg1, %c9]
 // CHECK-NEXT:      %4 = affine.apply [[MAP_3D_D1]](%arg1, %i0, %3)
 // CHECK-NEXT:      %5 = affine.apply [[MAP_SUB_OFFSET]](%arg1, %i0, %3)
@ -263,8 +263,8 @@ func @dma_with_symbolic_loop_bounds(%A : memref<100x100xf32>, %M : index, %N: in
 // CHECK-NEXT:  %1 = alloc() : memref<1xi32>
 // CHECK-NEXT:  dma_start %arg0[%c0, %c0], %0[%c0, %c0], %c10000, %1[%c0] : memref<100x100xf32>, memref<100x100xf32, 2>, memref<1xi32>
 // CHECK-NEXT:  dma_wait %1[%c0], %c10000 : memref<1xi32>
-  for %i = 0 to 100 {
-    for %j = %M to %N {
+  affine.for %i = 0 to 100 {
+    affine.for %j = %M to %N {
      %idy = affine.apply (d1) [s0] -> (d1 + s0)(%j)[%K]
      load %A[%i, %idy] : memref<100 x 100 x f32>
    }
@ -278,8 +278,8 @@ func @dma_with_symbolic_loop_bounds(%A : memref<100x100xf32>, %M : index, %N: in
 func @dma_unknown_size(%arg0: memref<?x?xf32>) {
  %M = dim %arg0, 0 : memref<? x ? x f32>
  %N = dim %arg0, 0 : memref<? x ? x f32>
-  for %i = 0 to %M {
-    for %j = 0 to %N {
+  affine.for %i = 0 to %M {
+    affine.for %j = 0 to %N {
      // If this loop nest isn't tiled, the access requires a non-constant DMA
      // size -- not yet implemented.
      // CHECK: %2 = load %arg0[%i0, %i1] : memref<?x?xf32>
@ -294,9 +294,9 @@ func @dma_unknown_size(%arg0: memref<?x?xf32>) {

 // CHECK-LABEL: func @dma_memref_3d
 func @dma_memref_3d(%arg0: memref<1024x1024x1024xf32>) {
-  for %i = 0 to 1024 {
-    for %j = 0 to 1024 {
-      for %k = 0 to 1024 {
+  affine.for %i = 0 to 1024 {
+    affine.for %j = 0 to 1024 {
+      affine.for %k = 0 to 1024 {
        %idx = affine.apply (d0) -> (d0 mod 128)(%i)
        %idy = affine.apply (d0) -> (d0 mod 128)(%j)
        %idz = affine.apply (d0) -> (d0 mod 128)(%k)
@ -330,8 +330,8 @@ func @dma_memref_3d(%arg0: memref<1024x1024x1024xf32>) {
 // CHECK-LABEL: func @multi_load_store_union() {
 func @multi_load_store_union() {
  %A = alloc() : memref<512 x 512 x f32>
-  for %i = 0 to 256 {
-    for %j = 0 to 256 {
+  affine.for %i = 0 to 256 {
+    affine.for %j = 0 to 256 {
      %idx = affine.apply (d0) -> (d0 + 64)(%i)
      %idy = affine.apply (d0) -> (d0 + 128)(%j)
      %ishift = affine.apply (d0) -> (d0 + 2)(%i)
@ -355,8 +355,8 @@ func @multi_load_store_union() {
 // CHECK-NEXT:  dma_start %0[%c2_1, %c2_2], %1[%c0, %c0], %c170372_3, %2[%c0], %c512_4, %c446_5 : memref<512x512xf32>, memref<382x446xf32, 2>, memref<1xi32>
 // CHECK-NEXT:  dma_wait %2[%c0], %c170372_3 : memref<1xi32>
 // CHECK-NEXT:  %3 = alloc() : memref<1xi32>
-// CHECK-NEXT:  for %i0 = 0 to 256 {
-// CHECK-NEXT:    for %i1 = 0 to 256 {
+// CHECK-NEXT:  affine.for %i0 = 0 to 256 {
+// CHECK-NEXT:    affine.for %i1 = 0 to 256 {
 // CHECK-NEXT:      %4 = affine.apply [[MAP_PLUS_64]](%i0)
 // CHECK-NEXT:      %5 = affine.apply [[MAP_PLUS_128]](%i1)
 // CHECK-NEXT:      %6 = affine.apply [[MAP_PLUS_2]](%i0)
@ -395,7 +395,7 @@ func @dma_loop_straightline_interspersed() {
  %c255 = constant 255 : index
  %A = alloc() : memref<256 x f32>
  %v = load %A[%c0] : memref<256 x f32>
-  for %i = 1 to 255 {
+  affine.for %i = 1 to 255 {
    load %A[%i] : memref<256 x f32>
  }
  %l = load %A[%c255] : memref<256 x f32>
@ -416,7 +416,7 @@ func @dma_loop_straightline_interspersed() {
 // CHECK-NEXT:  %5 = alloc() : memref<1xi32>
 // CHECK-NEXT:  dma_start %0[%c1_0], %4[%c0], %c254, %5[%c0] : memref<256xf32>, memref<254xf32, 2>, memref<1xi32>
 // CHECK-NEXT:  dma_wait %5[%c0], %c254 : memref<1xi32>
-// CHECK-NEXT:  for %i0 = 1 to 255 {
+// CHECK-NEXT:  affine.for %i0 = 1 to 255 {
 // CHECK-NEXT:    %6 = affine.apply [[MAP_MINUS_ONE]](%i0)
 // CHECK-NEXT:    %7 = load %4[%6] : memref<254xf32, 2>
 // CHECK-NEXT:  }
@ -442,10 +442,10 @@ func @dma_loop_straightline_interspersed() {
 func @dma_mixed_loop_blocks() {
  %c0 = constant 0 : index
  %A = alloc() : memref<256 x 256 x vector<8 x f32>>
-  for %i = 0 to 256 {
+  affine.for %i = 0 to 256 {
    %v = load %A[%c0, %c0] : memref<256 x 256 x vector<8 x f32>>
    "foo"(%v) : (vector<8 x f32>) -> ()
-    for %j = 0 to 256 {
+    affine.for %j = 0 to 256 {
      %w = load %A[%i, %j] : memref<256 x 256 x vector<8 x f32>>
      "bar"(%w) : (vector<8 x f32>) -> ()
    }
@ -457,17 +457,17 @@ func @dma_mixed_loop_blocks() {
 // CHECK-DAG:   [[TAG:%[0-9]+]] = alloc() : memref<1xi32>
 // CHECK:       dma_start [[MEM]][%c0, %c0], [[BUF]][%c0, %c0], %c65536, [[TAG]][%c0] : memref<256x256xvector<8xf32>>, memref<256x256xvector<8xf32>, 2>, memref<1xi32>
 // CHECK-NEXT:  dma_wait [[TAG]][%c0], %c65536 : memref<1xi32>
-// CHECK-NEXT:  for %i0 = 0 to 256 {
+// CHECK-NEXT:  affine.for %i0 = 0 to 256 {
 // CHECK-NEXT:    %3 = load [[BUF]][%c0_0, %c0_0] : memref<256x256xvector<8xf32>, 2>
-// CHECK:         for %i1 = 0 to 256 {
+// CHECK:         affine.for %i1 = 0 to 256 {
 // CHECK-NEXT:      %4 = load [[BUF]][%i0, %i1] : memref<256x256xvector<8xf32>, 2>

 // -----

 // CHECK-LABEL: func @relative_loop_bounds
 func @relative_loop_bounds(%arg0: memref<1027xf32>) {
-  for %i0 = 0 to 1024 {
-    for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 4)(%i0) {
+  affine.for %i0 = 0 to 1024 {
+    affine.for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 4)(%i0) {
      %0 = constant 0.0 : f32
      store %0, %arg0[%i2] : memref<1027xf32>
    }
@ -476,8 +476,8 @@ func @relative_loop_bounds(%arg0: memref<1027xf32>) {
 }
 // CHECK:      [[BUF:%[0-9]+]] = alloc() : memref<1027xf32, 2>
 // CHECK-NEXT: [[MEM:%[0-9]+]] = alloc() : memref<1xi32>
-// CHECK-NEXT: for %i0 = 0 to 1024 {
-// CHECK-NEXT:    for %i1 = {{#map[0-9]+}}(%i0) to {{#map[0-9]+}}(%i0) {
+// CHECK-NEXT: affine.for %i0 = 0 to 1024 {
+// CHECK-NEXT:    affine.for %i1 = {{#map[0-9]+}}(%i0) to {{#map[0-9]+}}(%i0) {
 // CHECK-NEXT:      %cst = constant 0.000000e+00 : f32
 // CHECK-NEXT:      store %cst, [[BUF]][%i1] : memref<1027xf32, 2>
 // CHECK-NEXT:    }
@ -487,7 +487,7 @@ func @relative_loop_bounds(%arg0: memref<1027xf32>) {

 // ----

-// This should create a buffer of size 2 for %arg2.
+// This should create a buffer of size 2 affine.for %arg2.

 #map_lb = (d0) -> (d0)
 #map_ub = (d0) -> (d0 + 3)
@ -498,9 +498,9 @@ func @test_analysis_util(%arg0: memref<4x4x16x1xf32>, %arg1: memref<144x9xf32>,
  %0 = alloc() : memref<64x1xf32>
  %1 = alloc() : memref<144x4xf32>
  %2 =  constant 0.0 : f32
-  for %i8 = 0 to 9 step 3 {
-    for %i9 = #map_lb(%i8) to #map_ub(%i8) {
-      for %i17 = 0 to 64 {
+  affine.for %i8 = 0 to 9 step 3 {
+    affine.for %i9 = #map_lb(%i8) to #map_ub(%i8) {
+      affine.for %i17 = 0 to 64 {
        %23 = affine.apply #map_acc(%i9)
        %25 = load %arg2[%23] : memref<2xf32>
        %26 = affine.apply #map_lb(%i17)
@ -511,11 +511,11 @@ func @test_analysis_util(%arg0: memref<4x4x16x1xf32>, %arg1: memref<144x9xf32>,
  }
  return %arg1, %arg2 : memref<144x9xf32>, memref<2xf32>
 }
-// CHECK:       for %i0 = 0 to 9 step 3 {
+// CHECK:       affine.for %i0 = 0 to 9 step 3 {
 // CHECK:         [[BUF:%[0-9]+]] = alloc() : memref<2xf32, 2>
 // CHECK:         dma_start %arg2[%4], [[BUF]]
 // CHECK:         dma_wait %6[%c0], %c2_0 : memref<1xi32>
-// CHECK:         for %i1 =
+// CHECK:         affine.for %i1 =

 // -----

@ -524,17 +524,17 @@ func @test_analysis_util(%arg0: memref<4x4x16x1xf32>, %arg1: memref<144x9xf32>,

 // FAST-MEM-16KB-LABEL: func @load_store_same_memref
 func @load_store_same_memref(%arg0: memref<256x1024xf32>) {
-  // FAST-MEM-16KB:  for %i0 = 0 to 256 step 4
-  for %i0 = 0 to 256 step 4 {
+  // FAST-MEM-16KB:  affine.for %i0 = 0 to 256 step 4
+  affine.for %i0 = 0 to 256 step 4 {
    // FAST-MEM-16KB: [[BUF:%[0-9]+]] = alloc() : memref<4x1024xf32, 2>
    // FAST-MEM-16KB:    dma_start %arg0
    // FAST-MEM-16KB-NEXT: dma_wait
-    // FAST-MEM-16KB:  for %i1
-    for %i1 = 0 to 1024 step 4 {
-      // FAST-MEM-16KB:  for %i2
-      for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 4)(%i0) {
-        // FAST-MEM-16KB:  for %i3
-        for %i3 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 4)(%i1) {
+    // FAST-MEM-16KB:  affine.for %i1
+    affine.for %i1 = 0 to 1024 step 4 {
+      // FAST-MEM-16KB:  affine.for %i2
+      affine.for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 4)(%i0) {
+        // FAST-MEM-16KB:  affine.for %i3
+        affine.for %i3 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 4)(%i1) {
          %3 = load %arg0[%i2, %i3] : memref<256x1024xf32>
          %4 = mulf %3, %3 : f32
          store %4, %arg0[%i2, %i3] : memref<256x1024xf32>
@ -560,12 +560,12 @@ func @load_store_same_memref(%arg0: memref<256x1024xf32>) {
 #map1 = (d0) -> (d0 + 4)
 // FAST-MEM-16KB-LABEL: func @simple_matmul
 func @simple_matmul(%arg0: memref<8x8xvector<64xf32>>, %arg1: memref<8x8xvector<64xf32>>, %arg2: memref<8x8xvector<64xf32>>) -> memref<8x8xvector<64xf32>> {
-  for %i = 0 to 8 step 4 {
-    for %j = 0 to 8 step 4 {
-      for %k = 0 to 8 step 4 {
-        for %ii = #map0(%i) to #map1(%i) {
-          for %jj = #map0(%j) to #map1(%j) {
-            for %kk = #map0(%k) to #map1(%k) {
+  affine.for %i = 0 to 8 step 4 {
+    affine.for %j = 0 to 8 step 4 {
+      affine.for %k = 0 to 8 step 4 {
+        affine.for %ii = #map0(%i) to #map1(%i) {
+          affine.for %jj = #map0(%j) to #map1(%j) {
+            affine.for %kk = #map0(%k) to #map1(%k) {
              %5 = load %arg0[%ii, %kk] : memref<8x8xvector<64xf32>>
              %6 = load %arg1[%kk, %jj] : memref<8x8xvector<64xf32>>
              %7 = load %arg2[%ii, %jj] : memref<8x8xvector<64xf32>>
@ -580,18 +580,18 @@ func @simple_matmul(%arg0: memref<8x8xvector<64xf32>>, %arg1: memref<8x8xvector<
  }
  return %arg2 : memref<8x8xvector<64xf32>>
 }
-// FAST-MEM-16KB: for %i0 = 0 to 8 step 4 {
-// FAST-MEM-16KB:   for %i1 = 0 to 8 step 4 {
+// FAST-MEM-16KB: affine.for %i0 = 0 to 8 step 4 {
+// FAST-MEM-16KB:   affine.for %i1 = 0 to 8 step 4 {
 // FAST-MEM-16KB:     dma_start %arg2
 // FAST-MEM-16KB:     dma_wait
-// FAST-MEM-16KB:     for %i2 = 0 to 8 step 4 {
+// FAST-MEM-16KB:     affine.for %i2 = 0 to 8 step 4 {
 // FAST-MEM-16KB:       dma_start %arg0
 // FAST-MEM-16KB:       dma_wait
 // FAST-MEM-16KB:       dma_start %arg1
 // FAST-MEM-16KB:       dma_wait
-// FAST-MEM-16KB:       for %i3 = #map{{[0-9]+}}(%i0) to #map{{[0-9]+}}(%i0) {
-// FAST-MEM-16KB-NEXT:    for %i4 = #map{{[0-9]+}}(%i1) to #map{{[0-9]+}}(%i1) {
-// FAST-MEM-16KB-NEXT:      for %i5 = #map{{[0-9]+}}(%i2) to #map{{[0-9]+}}(%i2) {
+// FAST-MEM-16KB:       affine.for %i3 = #map{{[0-9]+}}(%i0) to #map{{[0-9]+}}(%i0) {
+// FAST-MEM-16KB-NEXT:    affine.for %i4 = #map{{[0-9]+}}(%i1) to #map{{[0-9]+}}(%i1) {
+// FAST-MEM-16KB-NEXT:      affine.for %i5 = #map{{[0-9]+}}(%i2) to #map{{[0-9]+}}(%i2) {
 // FAST-MEM-16KB:           }
 // FAST-MEM-16KB:         }
 // FAST-MEM-16KB:       }
--- a/mlir/test/Transforms/loop-fusion.mlir
+++ b/mlir/test/Transforms/loop-fusion.mlir
--- a/mlir/test/Transforms/loop-tiling.mlir
+++ b/mlir/test/Transforms/loop-tiling.mlir
@ -9,12 +9,12 @@
 // CHECK-DAG: [[UB_INTRA_TILE:#map[0-9]+]] = (d0, d1, d2) -> (d2 + 32, s0, 4096 floordiv s1)

 // CHECK-LABEL: func @loop_tiling()
-// CHECK-NEXT:   for %i0 = 0 to 256 step 32 {
-// CHECK-NEXT:     for %i1 = 0 to 512 step 32 {
-// CHECK-NEXT:       for %i2 = 0 to 1024 step 32 {
-// CHECK-NEXT:         for %i3 = [[IDENTITY]](%i0) to [[MAP0]](%i0) {
-// CHECK-NEXT:           for %i4 = [[IDENTITY]](%i1) to [[MAP0]](%i1) {
-// CHECK-NEXT:             for %i5 = [[IDENTITY]](%i2) to [[MAP0]](%i2) {
+// CHECK-NEXT:   affine.for %i0 = 0 to 256 step 32 {
+// CHECK-NEXT:     affine.for %i1 = 0 to 512 step 32 {
+// CHECK-NEXT:       affine.for %i2 = 0 to 1024 step 32 {
+// CHECK-NEXT:         affine.for %i3 = [[IDENTITY]](%i0) to [[MAP0]](%i0) {
+// CHECK-NEXT:           affine.for %i4 = [[IDENTITY]](%i1) to [[MAP0]](%i1) {
+// CHECK-NEXT:             affine.for %i5 = [[IDENTITY]](%i2) to [[MAP0]](%i2) {
 // CHECK-NEXT:               "foo"(%i3, %i4, %i5) : (index, index, index) -> ()
 // CHECK-NEXT:             }
 // CHECK-NEXT:           }
@ -22,32 +22,32 @@
 // CHECK-NEXT:       }
 // CHECK-NEXT:     }
 // CHECK-NEXT:   }
-// CHECK-NEXT:   for %i6 = 0 to 50 step 32 {
-// CHECK-NEXT:     for %i7 = [[IDENTITY]](%i6) to min [[MAP1]](%i6) {
+// CHECK-NEXT:   affine.for %i6 = 0 to 50 step 32 {
+// CHECK-NEXT:     affine.for %i7 = [[IDENTITY]](%i6) to min [[MAP1]](%i6) {
 // CHECK-NEXT:       "bar"(%i7, %i7) : (index, index) -> ()
 // CHECK-NEXT:     }
 // CHECK-NEXT:   }
-// CHECK-NEXT: for %i8 = 0 to 21 step 32 {
-// CHECK-NEXT:    for %i9 = [[IDENTITY]](%i8) to 21 {
+// CHECK-NEXT: affine.for %i8 = 0 to 21 step 32 {
+// CHECK-NEXT:    affine.for %i9 = [[IDENTITY]](%i8) to 21 {
 // CHECK-NEXT:      "foobar"(%i9) : (index) -> ()
 // CHECK-NEXT:    }
 // CHECK-NEXT:  }
 // CHECK-NEXT:  return
 func @loop_tiling() {
-  for %i = 0 to 256 {
-    for %j = 0 to 512 {
-      for %k = 0 to 1024 {
+  affine.for %i = 0 to 256 {
+    affine.for %j = 0 to 512 {
+      affine.for %k = 0 to 1024 {
        "foo"(%i, %j, %k) : (index, index, index) -> ()
      }
    }
  }

-  for %x = 0 to 50 {
+  affine.for %x = 0 to 50 {
    "bar"(%x, %x) : (index, index) -> ()
  }

  // Intra-tile loop won't need a min expression.
-  for %y = 0 to 21 {
+  affine.for %y = 0 to 21 {
    "foobar"(%y) : (index) -> ()
  }

@ -59,12 +59,12 @@ func @loop_tiling() {
 // CHECK-LABEL: func @loop_max_min_bound(%arg0: memref<?xi32>, %arg1: index, %arg2: index) {
 func @loop_max_min_bound(%A : memref<? x i32>, %L : index, %U : index) {
  %M = dim %A, 0 : memref<? x i32>
-  for %iTT = max #lb()[%L] to min #ub()[%M, %U] {
+  affine.for %iTT = max #lb()[%L] to min #ub()[%M, %U] {
      %out = affine.apply (d0) -> (d0) (%iTT)
  }
  return
-// CHECK:       for %i0 = max [[LB]]()[%arg1] to min [[UB]]()[%0, %arg2] step 32 {
-// CHECK-NEXT:    for %i1 = [[IDENTITY]](%i0) to min [[UB_INTRA_TILE]](%0, %arg2, %i0) {
+// CHECK:       affine.for %i0 = max [[LB]]()[%arg1] to min [[UB]]()[%0, %arg2] step 32 {
+// CHECK-NEXT:    affine.for %i1 = [[IDENTITY]](%i0) to min [[UB_INTRA_TILE]](%0, %arg2, %i0) {
 // CHECK-NEXT:      %1 = affine.apply [[IDENTITY]](%i1)
 // CHECK-NEXT:    }
 // CHECK-NEXT:  }
@ -78,9 +78,9 @@ func @loop_max_min_bound(%A : memref<? x i32>, %L : index, %U : index) {

 // MODEL-LABEL: func @simple_matmul
 func @simple_matmul(%arg0: memref<8x8xvector<64xf32>>, %arg1: memref<8x8xvector<64xf32>>, %arg2: memref<8x8xvector<64xf32>>) -> memref<8x8xvector<64xf32>> {
-  for %i = 0 to 256 {
-    for %j = 0 to 256 {
-      for %k = 0 to 250 {
+  affine.for %i = 0 to 256 {
+    affine.for %j = 0 to 256 {
+      affine.for %k = 0 to 250 {
        %l = load %arg0[%i, %k] : memref<8x8xvector<64xf32>>
        %r = load %arg1[%k, %j] : memref<8x8xvector<64xf32>>
        %o = load %arg2[%i, %j] : memref<8x8xvector<64xf32>>
@ -92,6 +92,6 @@ func @simple_matmul(%arg0: memref<8x8xvector<64xf32>>, %arg1: memref<8x8xvector<
  }
  return %arg2 : memref<8x8xvector<64xf32>>
 }
-// MODEL:       for %i0 = 0 to 256 step 4 {
-// MODEL-NEXT:    for %i1 = 0 to 256 step 4 {
-// MODEL-NEXT:      for %i2 = 0 to 250 step 5 {
+// MODEL:       affine.for %i0 = 0 to 256 step 4 {
+// MODEL-NEXT:    affine.for %i1 = 0 to 256 step 4 {
+// MODEL-NEXT:      affine.for %i2 = 0 to 250 step 5 {
--- a/mlir/test/Transforms/lower-affine.mlir
+++ b/mlir/test/Transforms/lower-affine.mlir
@ -24,7 +24,7 @@ func @body(index) -> ()
 // CHECK-NEXT:   return
 // CHECK-NEXT: }
 func @simple_loop() {
-  for %i = 1 to 42 {
+  affine.for %i = 1 to 42 {
    call @body(%i) : (index) -> ()
  }
  return
@ -65,9 +65,9 @@ func @post(index) -> ()
 // CHECK-NEXT:   return
 // CHECK-NEXT: }
 func @imperfectly_nested_loops() {
-  for %i = 0 to 42 {
+  affine.for %i = 0 to 42 {
    call @pre(%i) : (index) -> ()
-    for %j = 7 to 56 step 2 {
+    affine.for %j = 7 to 56 step 2 {
      call @body2(%i, %j) : (index, index) -> ()
    }
    call @post(%i) : (index) -> ()
@ -122,13 +122,13 @@ func @body3(index, index) -> ()
 // CHECK-NEXT:   return
 // CHECK-NEXT: }
 func @more_imperfectly_nested_loops() {
-  for %i = 0 to 42 {
+  affine.for %i = 0 to 42 {
    call @pre(%i) : (index) -> ()
-    for %j = 7 to 56 step 2 {
+    affine.for %j = 7 to 56 step 2 {
      call @body2(%i, %j) : (index, index) -> ()
    }
    call @mid(%i) : (index) -> ()
-    for %k = 18 to 37 step 3 {
+    affine.for %k = 18 to 37 step 3 {
      call @body3(%i, %k) : (index, index) -> ()
    }
    call @post(%i) : (index) -> ()
@ -161,8 +161,8 @@ func @more_imperfectly_nested_loops() {
 // CHECK-NEXT:   return
 // CHECK-NEXT: }
 func @affine_apply_loops_shorthand(%N : index) {
-  for %i = 0 to %N {
-    for %j = (d0)[]->(d0)(%i)[] to 42 {
+  affine.for %i = 0 to %N {
+    affine.for %j = (d0)[]->(d0)(%i)[] to 42 {
      call @body2(%i, %j) : (index, index) -> ()
    }
  }
@ -360,7 +360,7 @@ func @if_for() {
 // CHECK-NEXT: [[outerEndBB]]:
 // CHECK-NEXT:   br [[outerLoopInit:\^bb[0-9]+]]
  affine.if #set1(%i) {
-    for %j = 0 to 42 {
+    affine.for %j = 0 to 42 {
      affine.if #set2(%j) {
        call @body2(%i, %j) : (index, index) -> ()
      }
@ -397,9 +397,9 @@ func @if_for() {
 // CHECK-NEXT:   %c1_9 = constant 1 : index
 // CHECK-NEXT:   %16 = addi %9, %c1_9 : index
 // CHECK-NEXT:   br [[outerLoopCond]](%16 : index)
-  for %k = 0 to 42 {
+  affine.for %k = 0 to 42 {
    affine.if #set2(%k) {
-      for %l = 0 to 42 {
+      affine.for %l = 0 to 42 {
        call @body3(%k, %l) : (index, index) -> ()
      }
    }
@ -446,8 +446,8 @@ func @if_for() {
 // CHECK-NEXT:   return
 // CHECK-NEXT: }
 func @loop_min_max(%N : index) {
-  for %i = 0 to 42 {
-    for %j = max #lbMultiMap(%i)[%N] to min #ubMultiMap(%i)[%N] {
+  affine.for %i = 0 to 42 {
+    affine.for %j = max #lbMultiMap(%i)[%N] to min #ubMultiMap(%i)[%N] {
      call @body2(%i, %j) : (index, index) -> ()
    }
  }
@ -486,7 +486,7 @@ func @loop_min_max(%N : index) {
 // CHECK-NEXT:   return
 // CHECK-NEXT: }
 func @min_reduction_tree(%v : index) {
-  for %i = 0 to min #map_7_values(%v)[] {
+  affine.for %i = 0 to min #map_7_values(%v)[] {
    call @body(%i) : (index) -> ()
  }
  return
--- a/mlir/test/Transforms/memref-bound-check.mlir
+++ b/mlir/test/Transforms/memref-bound-check.mlir
@ -11,8 +11,8 @@ func @test() {
  %A = alloc() : memref<9 x 9 x i32>
  %B = alloc() : memref<111 x i32>

-  for %i = -1 to 10 {
-    for %j = -1 to 10 {
+  affine.for %i = -1 to 10 {
+    affine.for %j = -1 to 10 {
      %idx0 = affine.apply (d0, d1) -> (d0)(%i, %j)
      %idx1 = affine.apply (d0, d1) -> (d1)(%i, %j)
      // Out of bound access.
@ -27,7 +27,7 @@ func @test() {
    }
  }

-  for %k = 0 to 10 {
+  affine.for %k = 0 to 10 {
      // In bound.
      %u = load %B[%zero] : memref<111 x i32>
      // Out of bounds.
@ -43,8 +43,8 @@ func @test_mod_floordiv_ceildiv() {
  %zero = constant 0 : index
  %A = alloc() : memref<128 x 64 x 64 x i32>

-  for %i = 0 to 256 {
-    for %j = 0 to 256 {
+  affine.for %i = 0 to 256 {
+    affine.for %j = 0 to 256 {
      %idx0 = affine.apply (d0, d1, d2) -> (d0 mod 128 + 1)(%i, %j, %j)
      %idx1 = affine.apply (d0, d1, d2) -> (d1 floordiv 4 + 1)(%i, %j, %j)
      %idx2 = affine.apply (d0, d1, d2) -> (d2 ceildiv 4)(%i, %j, %j)
@ -69,8 +69,8 @@ func @test_no_out_of_bounds() {
  %C = alloc() : memref<257 x i32>
  %B = alloc() : memref<1 x i32>

-  for %i = 0 to 256 {
-    for %j = 0 to 256 {
+  affine.for %i = 0 to 256 {
+    affine.for %j = 0 to 256 {
      // All of these accesses are in bound; check that no errors are emitted.
      // CHECK: %3 = affine.apply {{#map.*}}(%i0, %i1)
      // CHECK-NEXT: %4 = load %0[%3, %c0] : memref<257x256xi32>
@ -93,8 +93,8 @@ func @mod_div() {
  %zero = constant 0 : index
  %A = alloc() : memref<128 x 64 x 64 x i32>

-  for %i = 0 to 256 {
-    for %j = 0 to 256 {
+  affine.for %i = 0 to 256 {
+    affine.for %j = 0 to 256 {
      %idx0 = affine.apply (d0, d1, d2) -> (d0 mod 128 + 1)(%i, %j, %j)
      %idx1 = affine.apply (d0, d1, d2) -> (d1 floordiv 4 + 1)(%i, %j, %j)
      %idx2 = affine.apply (d0, d1, d2) -> (d2 ceildiv 4)(%i, %j, %j)
@ -115,8 +115,8 @@ func @mod_div() {
 // CHECK-LABEL: func @mod_floordiv_nested() {
 func @mod_floordiv_nested() {
  %A = alloc() : memref<256 x 256 x i32>
-  for %i = 0 to 256 {
-    for %j = 0 to 256 {
+  affine.for %i = 0 to 256 {
+    affine.for %j = 0 to 256 {
      %idx0 = affine.apply (d0, d1) -> ((d0 mod 1024) floordiv 4)(%i, %j)
      %idx1 = affine.apply (d0, d1) -> ((((d1 mod 128) mod 32) ceildiv 4) * 32)(%i, %j)
      load %A[%idx0, %idx1] : memref<256 x 256 x i32> // expected-error {{'std.load' op memref out of upper bound access along dimension #2}}
@ -128,7 +128,7 @@ func @mod_floordiv_nested() {
 // CHECK-LABEL: func @test_semi_affine_bailout
 func @test_semi_affine_bailout(%N : index) {
  %B = alloc() : memref<10 x i32>
-  for %i = 0 to 10 {
+  affine.for %i = 0 to 10 {
    %idx = affine.apply (d0)[s0] -> (d0 * s0)(%i)[%N]
    %y = load %B[%idx] : memref<10 x i32>
    // expected-error@-1 {{getMemRefRegion: compose affine map failed}}
@ -139,7 +139,7 @@ func @test_semi_affine_bailout(%N : index) {
 // CHECK-LABEL: func @multi_mod_floordiv
 func @multi_mod_floordiv() {
  %A = alloc() : memref<2x2xi32>
-  for %ii = 0 to 64 {
+  affine.for %ii = 0 to 64 {
      %idx0 = affine.apply (d0) -> ((d0 mod 147456) floordiv 1152) (%ii)
      %idx1 = affine.apply (d0) -> (((d0 mod 147456) mod 1152) floordiv 384) (%ii)
      %v = load %A[%idx0, %idx1] : memref<2x2xi32>
@ -154,8 +154,8 @@ func @delinearize_mod_floordiv() {
  %out = alloc() : memref<64x9xi32>

  // Reshape '%in' into '%out'.
-  for %ii = 0 to 64 {
-    for %jj = 0 to 9 {
+  affine.for %ii = 0 to 64 {
+    affine.for %jj = 0 to 9 {
      %a0 = affine.apply (d0, d1) -> (d0 * (9 * 1024) + d1 * 128) (%ii, %jj)
      %a10 = affine.apply (d0) ->
        (d0 floordiv (2 * 3 * 3 * 128 * 128)) (%a0)
@ -190,7 +190,7 @@ func @out_of_bounds() {
  %in = alloc() : memref<1xi32>
  %c9 = constant 9 : i32

-  for %i0 = 10 to 11 {
+  affine.for %i0 = 10 to 11 {
    %idy = affine.apply (d0) ->  (100 * d0 floordiv 1000) (%i0)
    store %c9, %in[%idy] : memref<1xi32> // expected-error {{'std.store' op memref out of upper bound access along dimension #1}}
  }
@ -210,8 +210,8 @@ func @out_of_bounds() {
 func @test_complex_mod_floordiv(%arg0: memref<4x4x16x1xf32>) {
  %c0 = constant 0 : index
  %0 = alloc() : memref<1x2x3x3x16x1xf32>
-  for %i0 = 0 to 64 {
-    for %i1 = 0 to 9 {
+  affine.for %i0 = 0 to 64 {
+    affine.for %i1 = 0 to 9 {
      %2 = affine.apply #map3(%i0, %i1)
      %3 = affine.apply #map4(%i0, %i1)
      %4 = affine.apply #map5(%i0, %i1)
@ -231,8 +231,8 @@ func @test_complex_mod_floordiv(%arg0: memref<4x4x16x1xf32>) {
 func @test_mod_bound() {
  %0 = alloc() : memref<7 x f32>
  %1 = alloc() : memref<6 x f32>
-  for %i0 = 0 to 4096 {
-    for %i1 = #map0(%i0) to #map1(%i0) {
+  affine.for %i0 = 0 to 4096 {
+    affine.for %i1 = #map0(%i0) to #map1(%i0) {
      load %0[%i1] : memref<7 x f32>
      load %1[%i1] : memref<6 x f32>
      // expected-error@-1 {{'std.load' op memref out of upper bound access along dimension #1}}
@ -253,13 +253,13 @@ func @test_floordiv_bound() {
  %1 = alloc() : memref<1026 x f32>
  %2 = alloc() : memref<4096 x f32>
  %N = constant 2048 : index
-  for %i0 = 0 to 4096 {
-    for %i1 = #map0(%i0) to #map1(%i0) {
+  affine.for %i0 = 0 to 4096 {
+    affine.for %i1 = #map0(%i0) to #map1(%i0) {
      load %0[%i1] : memref<1027 x f32>
      load %1[%i1] : memref<1026 x f32>
      // expected-error@-1 {{'std.load' op memref out of upper bound access along dimension #1}}
    }
-    for %i2 = 0 to #map2(%N) {
+    affine.for %i2 = 0 to #map2(%N) {
      // Within bounds.
      %v = load %2[%i2] : memref<4096 x f32>
    }
@ -277,9 +277,9 @@ func @test_floordiv_bound() {

 // CHECK-LABEL: func @non_composed_bound_operand
 func @non_composed_bound_operand(%arg0: memref<1024xf32>) {
-  for %i0 = 4 to 1028 step 4 {
+  affine.for %i0 = 4 to 1028 step 4 {
    %i1 = affine.apply (d0) -> (d0 - 4) (%i0)
-    for %i2 = #map_lb(%i1) to #map_ub(%i1) {
+    affine.for %i2 = #map_lb(%i1) to #map_ub(%i1) {
        %0 = load %arg0[%i2] : memref<1024xf32>
    }
  }
--- a/mlir/test/Transforms/memref-dataflow-opt.mlir
+++ b/mlir/test/Transforms/memref-dataflow-opt.mlir
@ -10,14 +10,14 @@
 func @simple_store_load() {
  %cf7 = constant 7.0 : f32
  %m = alloc() : memref<10xf32>
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
    store %cf7, %m[%i0] : memref<10xf32>
    %v0 = load %m[%i0] : memref<10xf32>
    %v1 = addf %v0, %v0 : f32
  }
  return
 // CHECK:       %cst = constant 7.000000e+00 : f32
-// CHECK-NEXT:  for %i0 = 0 to 10 {
+// CHECK-NEXT:  affine.for %i0 = 0 to 10 {
 // CHECK-NEXT:    %0 = addf %cst, %cst : f32
 // CHECK-NEXT:  }
 // CHECK-NEXT:  return
@ -30,7 +30,7 @@ func @multi_store_load() {
  %cf8 = constant 8.0 : f32
  %cf9 = constant 9.0 : f32
  %m = alloc() : memref<10xf32>
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
    store %cf7, %m[%i0] : memref<10xf32>
    %v0 = load %m[%i0] : memref<10xf32>
    %v1 = addf %v0, %v0 : f32
@ -45,7 +45,7 @@ func @multi_store_load() {
 // CHECK-NEXT:  %cst = constant 7.000000e+00 : f32
 // CHECK-NEXT:  %cst_0 = constant 8.000000e+00 : f32
 // CHECK-NEXT:  %cst_1 = constant 9.000000e+00 : f32
-// CHECK-NEXT:  for %i0 = 0 to 10 {
+// CHECK-NEXT:  affine.for %i0 = 0 to 10 {
 // CHECK-NEXT:    %0 = addf %cst, %cst : f32
 // CHECK-NEXT:    %1 = mulf %cst_1, %cst_1 : f32
 // CHECK-NEXT:  }
@ -59,8 +59,8 @@ func @multi_store_load() {
 func @store_load_affine_apply() -> memref<10x10xf32> {
  %cf7 = constant 7.0 : f32
  %m = alloc() : memref<10x10xf32>
-  for %i0 = 0 to 10 {
-    for %i1 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
+    affine.for %i1 = 0 to 10 {
      %t0 = affine.apply (d0, d1) -> (d1 + 1)(%i0, %i1)
      %t1 = affine.apply (d0, d1) -> (d0)(%i0, %i1)
      %idx0 = affine.apply (d0, d1) -> (d1) (%t0, %t1)
@ -75,8 +75,8 @@ func @store_load_affine_apply() -> memref<10x10xf32> {
  return %m : memref<10x10xf32>
 // CHECK:       %cst = constant 7.000000e+00 : f32
 // CHECK-NEXT:  %0 = alloc() : memref<10x10xf32>
-// CHECK-NEXT:  for %i0 = 0 to 10 {
-// CHECK-NEXT:    for %i1 = 0 to 10 {
+// CHECK-NEXT:  affine.for %i0 = 0 to 10 {
+// CHECK-NEXT:    affine.for %i1 = 0 to 10 {
 // CHECK-NEXT:      %1 = affine.apply [[MAP0]](%i0, %i1)
 // CHECK-NEXT:      %2 = affine.apply [[MAP1]](%i0, %i1)
 // CHECK-NEXT:      %3 = affine.apply [[MAP2]](%1, %2)
@ -92,17 +92,17 @@ func @store_load_affine_apply() -> memref<10x10xf32> {
 func @store_load_nested(%N : index) {
  %cf7 = constant 7.0 : f32
  %m = alloc() : memref<10xf32>
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
    store %cf7, %m[%i0] : memref<10xf32>
-    for %i1 = 0 to %N {
+    affine.for %i1 = 0 to %N {
      %v0 = load %m[%i0] : memref<10xf32>
      %v1 = addf %v0, %v0 : f32
    }
  }
  return
 // CHECK:       %cst = constant 7.000000e+00 : f32
-// CHECK-NEXT:  for %i0 = 0 to 10 {
-// CHECK-NEXT:    for %i1 = 0 to %arg0 {
+// CHECK-NEXT:  affine.for %i0 = 0 to 10 {
+// CHECK-NEXT:    affine.for %i1 = 0 to %arg0 {
 // CHECK-NEXT:      %0 = addf %cst, %cst : f32
 // CHECK-NEXT:    }
 // CHECK-NEXT:  }
@ -117,12 +117,12 @@ func @multi_store_load_nested_no_fwd(%N : index) {
  %cf7 = constant 7.0 : f32
  %cf8 = constant 8.0 : f32
  %m = alloc() : memref<10xf32>
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
    store %cf7, %m[%i0] : memref<10xf32>
-    for %i1 = 0 to %N {
+    affine.for %i1 = 0 to %N {
      store %cf8, %m[%i1] : memref<10xf32>
    }
-    for %i2 = 0 to %N {
+    affine.for %i2 = 0 to %N {
      // CHECK: %{{[0-9]+}} = load %0[%i0] : memref<10xf32>
      %v0 = load %m[%i0] : memref<10xf32>
      %v1 = addf %v0, %v0 : f32
@ -138,9 +138,9 @@ func @store_load_store_nested_no_fwd(%N : index) {
  %cf7 = constant 7.0 : f32
  %cf9 = constant 9.0 : f32
  %m = alloc() : memref<10xf32>
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
    store %cf7, %m[%i0] : memref<10xf32>
-    for %i1 = 0 to %N {
+    affine.for %i1 = 0 to %N {
      // CHECK: %{{[0-9]+}} = load %0[%i0] : memref<10xf32>
      %v0 = load %m[%i0] : memref<10xf32>
      %v1 = addf %v0, %v0 : f32
@ -159,16 +159,16 @@ func @multi_store_load_nested_fwd(%N : index) {
  %cf9 = constant 9.0 : f32
  %cf10 = constant 10.0 : f32
  %m = alloc() : memref<10xf32>
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
    store %cf7, %m[%i0] : memref<10xf32>
-    for %i1 = 0 to %N {
+    affine.for %i1 = 0 to %N {
      store %cf8, %m[%i1] : memref<10xf32>
    }
-    for %i2 = 0 to %N {
+    affine.for %i2 = 0 to %N {
      store %cf9, %m[%i2] : memref<10xf32>
    }
    store %cf10, %m[%i0] : memref<10xf32>
-    for %i3 = 0 to %N {
+    affine.for %i3 = 0 to %N {
      // CHECK-NOT: %{{[0-9]+}} = load
      %v0 = load %m[%i0] : memref<10xf32>
      %v1 = addf %v0, %v0 : f32
@ -182,10 +182,10 @@ func @multi_store_load_nested_fwd(%N : index) {
 func @store_load_no_fwd() {
  %cf7 = constant 7.0 : f32
  %m = alloc() : memref<10xf32>
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
    store %cf7, %m[%i0] : memref<10xf32>
-    for %i1 = 0 to 10 {
-      for %i2 = 0 to 10 {
+    affine.for %i1 = 0 to 10 {
+      affine.for %i2 = 0 to 10 {
        // CHECK: load %{{[0-9]+}}
        %v0 = load %m[%i2] : memref<10xf32>
        %v1 = addf %v0, %v0 : f32
@ -202,9 +202,9 @@ func @store_load_fwd() {
  %c0 = constant 0 : index
  %m = alloc() : memref<10xf32>
  store %cf7, %m[%c0] : memref<10xf32>
-  for %i0 = 0 to 10 {
-    for %i1 = 0 to 10 {
-      for %i2 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
+    affine.for %i1 = 0 to 10 {
+      affine.for %i2 = 0 to 10 {
        // CHECK-NOT: load %{{[0-9]}}+
        %v0 = load %m[%c0] : memref<10xf32>
        %v1 = addf %v0, %v0 : f32
@ -223,9 +223,9 @@ func @store_load_store_nested_fwd(%N : index) -> f32 {
  %c0 = constant 0 : index
  %c1 = constant 1 : index
  %m = alloc() : memref<10xf32>
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
    store %cf7, %m[%i0] : memref<10xf32>
-    for %i1 = 0 to %N {
+    affine.for %i1 = 0 to %N {
      %v0 = load %m[%i0] : memref<10xf32>
      %v1 = addf %v0, %v0 : f32
      %idx = affine.apply (d0) -> (d0 + 1) (%i0)
@ -236,9 +236,9 @@ func @store_load_store_nested_fwd(%N : index) -> f32 {
  %v3 = load %m[%c1] : memref<10xf32>
  return %v3 : f32
 // CHECK:       %0 = alloc() : memref<10xf32>
-// CHECK-NEXT:  for %i0 = 0 to 10 {
+// CHECK-NEXT:  affine.for %i0 = 0 to 10 {
 // CHECK-NEXT:    store %cst, %0[%i0] : memref<10xf32>
-// CHECK-NEXT:    for %i1 = 0 to %arg0 {
+// CHECK-NEXT:    affine.for %i1 = 0 to %arg0 {
 // CHECK-NEXT:      %1 = addf %cst, %cst : f32
 // CHECK-NEXT:      %2 = affine.apply [[MAP4]](%i0)
 // CHECK-NEXT:      store %cst_0, %0[%2] : memref<10xf32>
--- a/mlir/test/Transforms/memref-dependence-check.mlir
+++ b/mlir/test/Transforms/memref-dependence-check.mlir
@ -13,14 +13,14 @@ func @store_may_execute_before_load() {
  // ancestor IfOp of the store, dominates the ancestor ForSmt of the load,
  // and thus the store "may" conditionally execute before the load.
  affine.if #set0(%c0) {
-    for %i0 = 0 to 10 {
+    affine.for %i0 = 0 to 10 {
      store %cf7, %m[%i0] : memref<10xf32>
      // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
      // expected-note@-2 {{dependence from 0 to 0 at depth 2 = false}}
      // expected-note@-3 {{dependence from 0 to 1 at depth 1 = true}}
    }
  }
-  for %i1 = 0 to 10 {
+  affine.for %i1 = 0 to 10 {
    %v0 = load %m[%i1] : memref<10xf32>
    // expected-note@-1 {{dependence from 1 to 1 at depth 1 = false}}
    // expected-note@-2 {{dependence from 1 to 1 at depth 2 = false}}
@ -37,13 +37,13 @@ func @dependent_loops() {
  %cst = constant 7.000000e+00 : f32
  // There is a dependence from 0 to 1 at depth 1 (common surrounding loops 0)
  // because the first loop with the store dominates the second loop.
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
    store %cst, %0[%i0] : memref<10xf32>
    // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
    // expected-note@-2 {{dependence from 0 to 0 at depth 2 = false}}
    // expected-note@-3 {{dependence from 0 to 1 at depth 1 = true}}
  }
-  for %i1 = 0 to 10 {
+  affine.for %i1 = 0 to 10 {
    %1 = load %0[%i1] : memref<10xf32>
    // expected-note@-1 {{dependence from 1 to 1 at depth 1 = false}}
    // expected-note@-2 {{dependence from 1 to 1 at depth 2 = false}}
@ -231,7 +231,7 @@ func @store_range_load_after_range() {
  %m = alloc() : memref<100xf32>
  %c7 = constant 7.0 : f32
  %c10 = constant 10 : index
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
    %a0 = affine.apply (d0) -> (d0) (%i0)
    store %c7, %m[%a0] : memref<100xf32>
    // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
@ -254,7 +254,7 @@ func @store_load_func_symbol(%arg0: index, %arg1: index) {
  %m = alloc() : memref<100xf32>
  %c7 = constant 7.0 : f32
  %c10 = constant 10 : index
-  for %i0 = 0 to %arg1 {
+  affine.for %i0 = 0 to %arg1 {
    %a0 = affine.apply (d0) -> (d0) (%arg0)
    store %c7, %m[%a0] : memref<100xf32>
    // expected-note@-1 {{dependence from 0 to 0 at depth 1 = [1, +inf]}}
@ -277,7 +277,7 @@ func @store_range_load_last_in_range() {
  %m = alloc() : memref<100xf32>
  %c7 = constant 7.0 : f32
  %c10 = constant 10 : index
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
    %a0 = affine.apply (d0) -> (d0) (%i0)
    // For dependence from 0 to 1, we do not have a loop carried dependence
    // because only the final write in the loop accesses the same element as the
@ -305,7 +305,7 @@ func @store_range_load_before_range() {
  %m = alloc() : memref<100xf32>
  %c7 = constant 7.0 : f32
  %c0 = constant 0 : index
-  for %i0 = 1 to 11 {
+  affine.for %i0 = 1 to 11 {
    %a0 = affine.apply (d0) -> (d0) (%i0)
    store %c7, %m[%a0] : memref<100xf32>
    // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
@ -328,7 +328,7 @@ func @store_range_load_first_in_range() {
  %m = alloc() : memref<100xf32>
  %c7 = constant 7.0 : f32
  %c0 = constant 0 : index
-  for %i0 = 1 to 11 {
+  affine.for %i0 = 1 to 11 {
    %a0 = affine.apply (d0) -> (d0) (%i0)
    // Dependence from 0 to 1 at depth 1 is a range because all loads at
    // constant index zero are reads after first store at index zero during
@ -353,7 +353,7 @@ func @store_range_load_first_in_range() {
 func @store_plus_3() {
  %m = alloc() : memref<100xf32>
  %c7 = constant 7.0 : f32
-  for %i0 = 1 to 11 {
+  affine.for %i0 = 1 to 11 {
    %a0 = affine.apply (d0) -> (d0 + 3) (%i0)
    store %c7, %m[%a0] : memref<100xf32>
    // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
@ -375,7 +375,7 @@ func @store_plus_3() {
 func @load_minus_2() {
  %m = alloc() : memref<100xf32>
  %c7 = constant 7.0 : f32
-  for %i0 = 2 to 11 {
+  affine.for %i0 = 2 to 11 {
    %a0 = affine.apply (d0) -> (d0) (%i0)
    store %c7, %m[%a0] : memref<100xf32>
    // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
@ -397,8 +397,8 @@ func @load_minus_2() {
 func @perfectly_nested_loops_loop_independent() {
  %m = alloc() : memref<10x10xf32>
  %c7 = constant 7.0 : f32
-  for %i0 = 0 to 11 {
-    for %i1 = 0 to 11 {
+  affine.for %i0 = 0 to 11 {
+    affine.for %i1 = 0 to 11 {
      // Dependence from access 0 to 1 is loop independent at depth = 3.
      %a00 = affine.apply (d0, d1) -> (d0) (%i0, %i1)
      %a01 = affine.apply (d0, d1) -> (d1) (%i0, %i1)
@ -428,8 +428,8 @@ func @perfectly_nested_loops_loop_independent() {
 func @perfectly_nested_loops_loop_carried_at_depth1() {
  %m = alloc() : memref<10x10xf32>
  %c7 = constant 7.0 : f32
-  for %i0 = 0 to 9 {
-    for %i1 = 0 to 9 {
+  affine.for %i0 = 0 to 9 {
+    affine.for %i1 = 0 to 9 {
      // Dependence from access 0 to 1 is loop carried at depth 1.
      %a00 = affine.apply (d0, d1) -> (d0) (%i0, %i1)
      %a01 = affine.apply (d0, d1) -> (d1) (%i0, %i1)
@ -459,8 +459,8 @@ func @perfectly_nested_loops_loop_carried_at_depth1() {
 func @perfectly_nested_loops_loop_carried_at_depth2() {
  %m = alloc() : memref<10x10xf32>
  %c7 = constant 7.0 : f32
-  for %i0 = 0 to 10 {
-    for %i1 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
+    affine.for %i1 = 0 to 10 {
      // Dependence from access 0 to 1 is loop carried at depth 2.
      %a00 = affine.apply (d0, d1) -> (d0) (%i0, %i1)
      %a01 = affine.apply (d0, d1) -> (d1) (%i0, %i1)
@ -491,8 +491,8 @@ func @one_common_loop() {
  %m = alloc() : memref<10x10xf32>
  %c7 = constant 7.0 : f32
  // There is a loop-independent dependence from access 0 to 1 at depth 2.
-  for %i0 = 0 to 10 {
-    for %i1 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
+    affine.for %i1 = 0 to 10 {
      %a00 = affine.apply (d0, d1) -> (d0) (%i0, %i1)
      %a01 = affine.apply (d0, d1) -> (d1) (%i0, %i1)
      store %c7, %m[%a00, %a01] : memref<10x10xf32>
@ -502,7 +502,7 @@ func @one_common_loop() {
      // expected-note@-4 {{dependence from 0 to 1 at depth 1 = false}}
      // expected-note@-5 {{dependence from 0 to 1 at depth 2 = true}}
    }
-    for %i2 = 0 to 9 {
+    affine.for %i2 = 0 to 9 {
      %a10 = affine.apply (d0, d1) -> (d0) (%i0, %i2)
      %a11 = affine.apply (d0, d1) -> (d1) (%i0, %i2)
      %v0 = load %m[%a10, %a11] : memref<10x10xf32>
@ -525,7 +525,7 @@ func @dependence_cycle() {
  // Dependences:
  // *) loop-independent dependence from access 1 to 2 at depth 2.
  // *) loop-carried dependence from access 3 to 0 at depth 1.
-  for %i0 = 0 to 9 {
+  affine.for %i0 = 0 to 9 {
    %a0 = affine.apply (d0) -> (d0) (%i0)
    %v0 = load %m.a[%a0] : memref<100xf32>
    // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
@ -575,8 +575,8 @@ func @dependence_cycle() {
 func @negative_and_positive_direction_vectors(%arg0: index, %arg1: index) {
  %m = alloc() : memref<10x10xf32>
  %c7 = constant 7.0 : f32
-  for %i0 = 0 to %arg0 {
-    for %i1 = 0 to %arg1 {
+  affine.for %i0 = 0 to %arg0 {
+    affine.for %i1 = 0 to %arg1 {
      %a00 = affine.apply (d0, d1) -> (d0 - 1) (%i0, %i1)
      %a01 = affine.apply (d0, d1) -> (d1 + 1) (%i0, %i1)
      %v0 = load %m[%a00, %a01] : memref<10x10xf32>
@ -605,8 +605,8 @@ func @negative_and_positive_direction_vectors(%arg0: index, %arg1: index) {
 func @war_raw_waw_deps() {
  %m = alloc() : memref<100xf32>
  %c7 = constant 7.0 : f32
-  for %i0 = 0 to 10 {
-    for %i1 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
+    affine.for %i1 = 0 to 10 {
      %a0 = affine.apply (d0) -> (d0 + 1) (%i1)
      %v0 = load %m[%a0] : memref<100xf32>
      // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
@ -633,7 +633,7 @@ func @war_raw_waw_deps() {
 func @mod_deps() {
  %m = alloc() : memref<100xf32>
  %c7 = constant 7.0 : f32
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
    %a0 = affine.apply (d0) -> (d0 mod 2) (%i0)
    // Results are conservative here since we currently don't have a way to
    // represent strided sets in FlatAffineConstraints.
@ -658,8 +658,8 @@ func @loop_nest_depth() {
  %0 = alloc() : memref<100x100xf32>
  %c7 = constant 7.0 : f32

-  for %i0 = 0 to 128 {
-    for %i1 = 0 to 8 {
+  affine.for %i0 = 0 to 128 {
+    affine.for %i1 = 0 to 8 {
      store %c7, %0[%i0, %i1] : memref<100x100xf32>
      // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
      // expected-note@-2 {{dependence from 0 to 0 at depth 2 = false}}
@ -667,10 +667,10 @@ func @loop_nest_depth() {
      // expected-note@-4 {{dependence from 0 to 1 at depth 1 = true}}
    }
  }
-  for %i2 = 0 to 8 {
-    for %i3 = 0 to 8 {
-      for %i4 = 0 to 8 {
-        for %i5 = 0 to 16 {
+  affine.for %i2 = 0 to 8 {
+    affine.for %i3 = 0 to 8 {
+      affine.for %i4 = 0 to 8 {
+        affine.for %i5 = 0 to 16 {
          %8 = affine.apply (d0, d1) -> (d0 * 16 + d1)(%i4, %i5)
          %9 = load %0[%8, %i3] : memref<100x100xf32>
          // expected-note@-1 {{dependence from 1 to 0 at depth 1 = false}}
@ -693,9 +693,9 @@ func @loop_nest_depth() {
 func @mod_div_3d() {
  %M = alloc() : memref<2x2x2xi32>
  %c0 = constant 0 : i32
-  for %i0 = 0 to 8 {
-    for %i1 = 0 to 8 {
-      for %i2 = 0 to 8 {
+  affine.for %i0 = 0 to 8 {
+    affine.for %i1 = 0 to 8 {
+      affine.for %i2 = 0 to 8 {
        %idx0 = affine.apply (d0, d1, d2) -> (d0 floordiv 4) (%i0, %i1, %i2)
        %idx1 = affine.apply (d0, d1, d2) -> (d1 mod 2) (%i0, %i1, %i2)
        %idx2 = affine.apply (d0, d1, d2) -> (d2 floordiv 4) (%i0, %i1, %i2)
@ -719,12 +719,12 @@ func @delinearize_mod_floordiv() {
  %in = alloc() : memref<2x2x3x3x16x1xi32>
  %out = alloc() : memref<64x9xi32>

-  for %i0 = 0 to 2 {
-    for %i1 = 0 to 2 {
-      for %i2 = 0 to 3 {
-        for %i3 = 0 to 3 {
-          for %i4 = 0 to 16 {
-            for %i5 = 0 to 1 {
+  affine.for %i0 = 0 to 2 {
+    affine.for %i1 = 0 to 2 {
+      affine.for %i2 = 0 to 3 {
+        affine.for %i3 = 0 to 3 {
+          affine.for %i4 = 0 to 16 {
+            affine.for %i5 = 0 to 1 {
              store %val, %in[%i0, %i1, %i2, %i3, %i4, %i5] : memref<2x2x3x3x16x1xi32>
 // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
 // expected-note@-2 {{dependence from 0 to 0 at depth 2 = false}}
@ -742,8 +742,8 @@ func @delinearize_mod_floordiv() {
    }
  }

-  for %ii = 0 to 64 {
-    for %jj = 0 to 9 {
+  affine.for %ii = 0 to 64 {
+    affine.for %jj = 0 to 9 {
      %a0 = affine.apply (d0, d1) -> (d0 * (9 * 1024) + d1 * 128) (%ii, %jj)
      %a10 = affine.apply (d0) ->
        (d0 floordiv (2 * 3 * 3 * 128 * 128)) (%a0)
--- a/mlir/test/Transforms/parallelism-detection.mlir
+++ b/mlir/test/Transforms/parallelism-detection.mlir
@ -5,11 +5,11 @@ func @loop_nest_3d_outer_two_parallel(%N : index) {
  %0 = alloc() : memref<1024 x 1024 x vector<64xf32>>
  %1 = alloc() : memref<1024 x 1024 x vector<64xf32>>
  %2 = alloc() : memref<1024 x 1024 x vector<64xf32>>
-  for %i = 0 to %N {
+  affine.for %i = 0 to %N {
  // expected-note@-1 {{parallel loop}}
-    for %j = 0 to %N {
+    affine.for %j = 0 to %N {
    // expected-note@-1 {{parallel loop}}
-      for %k = 0 to %N {
+      affine.for %k = 0 to %N {
        %5 = load %0[%i, %k] : memref<1024x1024xvector<64xf32>>
        %6 = load %1[%k, %j] : memref<1024x1024xvector<64xf32>>
        %7 = load %2[%i, %j] : memref<1024x1024xvector<64xf32>>
--- a/mlir/test/Transforms/pipeline-data-transfer.mlir
+++ b/mlir/test/Transforms/pipeline-data-transfer.mlir
@ -16,13 +16,13 @@ func @loop_nest_dma() {
  %zero = constant 0 : index
  %num_elts = constant 128 : index

-  for %i = 0 to 8 {
+  affine.for %i = 0 to 8 {
    dma_start %A[%i], %Ah[%i], %num_elts, %tag[%zero] : memref<256 x f32>, memref<32 x f32, 1>, memref<1 x f32>
    dma_wait %tag[%zero], %num_elts : memref<1 x f32>
    %v = load %Ah[%i] : memref<32 x f32, (d0) -> (d0), 1>
    %r = "compute"(%v) : (f32) -> (f32)
    store %r, %Ah[%i] : memref<32 x f32, (d0) -> (d0), 1>
-    for %j = 0 to 128 {
+    affine.for %j = 0 to 128 {
      "do_more_compute"(%i, %j) : (index, index) -> ()
    }
  }
@ -34,7 +34,7 @@ func @loop_nest_dma() {
 // CHECK-NEXT:  %3 = affine.apply [[MOD_2]](%c0)
 // CHECK-NEXT:  %4 = affine.apply [[MOD_2]](%c0)
 // CHECK-NEXT:  dma_start %0[%c0], %1[%3, %c0], %c128, %2[%4, %c0_0] : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
-// CHECK-NEXT:  for %i0 = 1 to 8 {
+// CHECK-NEXT:  affine.for %i0 = 1 to 8 {
 // CHECK-NEXT:    %5 = affine.apply [[MOD_2]](%i0)
 // CHECK-NEXT:    %6 = affine.apply [[MOD_2]](%i0)
 // CHECK-NEXT:    dma_start %0[%i0], %1[%5, %i0], %c128, %2[%6, %c0_0] : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
@ -45,7 +45,7 @@ func @loop_nest_dma() {
 // CHECK-NEXT:    %10 = load %1[%9, %7] : memref<2x32xf32, 1>
 // CHECK-NEXT:    %11 = "compute"(%10) : (f32) -> f32
 // CHECK-NEXT:    store %11, %1[%9, %7] : memref<2x32xf32, 1>
-// CHECK-NEXT:    for %i1 = 0 to 128 {
+// CHECK-NEXT:    affine.for %i1 = 0 to 128 {
 // CHECK-NEXT:      "do_more_compute"(%7, %i1) : (index, index) -> ()
 // CHECK-NEXT:    }
 // CHECK-NEXT:  }
@ -56,7 +56,7 @@ func @loop_nest_dma() {
 // CHECK-NEXT:  %15 = load %1[%14, %12] : memref<2x32xf32, 1>
 // CHECK-NEXT:  %16 = "compute"(%15) : (f32) -> f32
 // CHECK-NEXT:  store %16, %1[%14, %12] : memref<2x32xf32, 1>
-// CHECK-NEXT:  for %i2 = 0 to 128 {
+// CHECK-NEXT:  affine.for %i2 = 0 to 128 {
 // CHECK-NEXT:    "do_more_compute"(%12, %i2) : (index, index) -> ()
 // CHECK-NEXT:  }
 // CHECK-NEXT:  dealloc %2 : memref<2x1xf32>
@ -70,7 +70,7 @@ func @loop_step(%arg0: memref<512xf32>,
                  %arg1: memref<512xf32>) {
  %c0 = constant 0 : index
  %c4 = constant 4 : index
-  for %i0 = 0 to 512 step 4 {
+  affine.for %i0 = 0 to 512 step 4 {
    %1 = alloc() : memref<4xf32, 1>
    %2 = alloc() : memref<1xi32>
    dma_start %arg0[%i0], %1[%c0], %c4, %2[%c0]
@ -84,7 +84,7 @@ func @loop_step(%arg0: memref<512xf32>,
 // CHECK:        %2 = affine.apply [[FLOOR_MOD_2]](%c0)
 // CHECK:        %3 = affine.apply [[FLOOR_MOD_2]](%c0)
 // CHECK-NEXT:   dma_start %arg0[%c0], %0[%2, %c0_0], %c4, [[TAG]][%3, %c0_0] : memref<512xf32>, memref<2x4xf32, 1>, memref<2x1xi32>
-// CHECK-NEXT:   for %i0 = 4 to 512 step 4 {
+// CHECK-NEXT:   affine.for %i0 = 4 to 512 step 4 {
 // CHECK-NEXT:     %4 = affine.apply [[FLOOR_MOD_2]](%i0)
 // CHECK-NEXT:     %5 = affine.apply [[FLOOR_MOD_2]](%i0)
 // CHECK-NEXT:     dma_start %arg0[%i0], %0[%4, %c0_0], %c4, [[TAG]][%5, %c0_0] : memref<512xf32>, memref<2x4xf32, 1>, memref<2x1xi32>
@ -117,8 +117,8 @@ func @loop_dma_nested(%arg0: memref<512x32xvector<8xf32>, #map0>, %arg1: memref<
  // CHECK-DAG: [[BUF_ARG2:%[0-9]+]] = alloc() : memref<2x64x4xvector<8xf32>, 2>
  // CHECK-DAG: [[TAG_ARG2:%[0-9]+]] = alloc() : memref<2x2xi32>
  // CHECK: dma_start %arg2[
-  // CHECK: for %i0 = 1 to 8 {
-  for %i0 = 0 to 8 {
+  // CHECK: affine.for %i0 = 1 to 8 {
+  affine.for %i0 = 0 to 8 {
    %6 = affine.apply #map2(%i0)
    dma_start %arg2[%6, %c0], %2[%c0, %c0], %num_elts, %5[%c0] : memref<512x32xvector<8xf32>, #map0>, memref<64x4xvector<8xf32>, #map0, 2>, memref<2xi32>
    dma_wait %5[%c0], %num_elts : memref<2xi32>
@ -132,8 +132,8 @@ func @loop_dma_nested(%arg0: memref<512x32xvector<8xf32>, #map0>, %arg1: memref<
    // CHECK: [[TAG_ARG1:%[0-9]+]] = alloc() : memref<2x2xi32>
    // CHECK: dma_start %arg0[
    // CHECK: dma_start %arg1[
-    // CHECK-NEXT for %i1 = 1 to 8 {
-    for %i1 = 0 to 8 {
+    // CHECK-NEXT affine.for %i1 = 1 to 8 {
+    affine.for %i1 = 0 to 8 {
      %7 = affine.apply #map1(%i0, %i1)
      %8 = affine.apply #map2(%i1)
      dma_start %arg0[%7, %c0], %0[%c0, %c0], %num_elts, %3[%c0] : memref<512x32xvector<8xf32>, #map0>, memref<64x4xvector<8xf32>, #map0, 2>, memref<2xi32>
@ -145,8 +145,8 @@ func @loop_dma_nested(%arg0: memref<512x32xvector<8xf32>, #map0>, %arg1: memref<
      // CHECK: dma_start %arg1[
      // CHECK: dma_wait [[TAG_ARG0]]
      // CHECK: dma_wait [[TAG_ARG1]]
-      // CHECK-NEXT: for %i2 = 0 to 4 {
-      for %i2 = 0 to 4 {
+      // CHECK-NEXT: affine.for %i2 = 0 to 4 {
+      affine.for %i2 = 0 to 4 {
        "foo"() : () -> ()
      }
    }
@ -166,16 +166,16 @@ func @loop_dma_nested(%arg0: memref<512x32xvector<8xf32>, #map0>, %arg1: memref<
  // CHECK: [[TAG_ARG1_NESTED:%[0-9]+]] = alloc() : memref<2x2xi32>
  // CHECK:  dma_start %arg0[
  // CHECK:  dma_start %arg1[
-  // CHECK:  for %i4 = 1 to 8 {
+  // CHECK:  affine.for %i4 = 1 to 8 {
  // CHECK:    dma_start %arg0[
  // CHECK:    dma_start %arg1[
  // CHECK:    dma_wait [[TAG_ARG0_NESTED]]
  // CHECK:    dma_wait [[TAG_ARG1_NESTED]]
-  // CHECK:    for %i5 = 0 to 4 {
+  // CHECK:    affine.for %i5 = 0 to 4 {
  // CHECK:      "foo"() : () -> ()
  // CHECK:  dma_wait [[TAG_ARG0_NESTED]]
  // CHECK:  dma_wait [[TAG_ARG1_NESTED]]
-  // CHECK:  for %i6 = 0 to 4 {
+  // CHECK:  affine.for %i6 = 0 to 4 {
  }
  return
 // CHECK: }
@ -202,8 +202,8 @@ func @loop_dma_dependent(%arg2: memref<512x32xvector<8xf32>>) {
  // The two DMAs below are dependent (incoming and outgoing on the same
  // memref) in the same iteration; so no pipelining here.
  // CHECK-NOT: dma_start
-  // CHECK: for %i0 = 0 to 8 {
-  for %i0 = 0 to 8 {
+  // CHECK: affine.for %i0 = 0 to 8 {
+  affine.for %i0 = 0 to 8 {
    %6 = affine.apply #map2(%i0)
    dma_start %arg2[%6, %c0], %2[%c0, %c0], %num_elts, %5[%c0] : memref<512x32xvector<8xf32>>, memref<64x4xvector<8xf32>, 2>, memref<2xi32>
    dma_wait %5[%c0], %num_elts : memref<2xi32>
@ -223,8 +223,8 @@ func @escaping_use(%arg0: memref<512 x 32 x f32>) {
  %tag = alloc() : memref<1 x i32>

  // CHECK-NOT: dma_start
-  // CHECK: for %i0 = 0 to 16 {
-  for %kTT = 0 to 16 {
+  // CHECK: affine.for %i0 = 0 to 16 {
+  affine.for %kTT = 0 to 16 {
    dma_start %arg0[%zero, %zero], %Av[%zero, %zero], %num_elt, %tag[%zero] :
      memref<512 x 32 x f32>,
      memref<32 x 32 x f32, 2>, memref<1 x i32>
@ -247,14 +247,14 @@ func @live_out_use(%arg0: memref<512 x 32 x f32>) -> f32 {
  %tag = alloc() : memref<1 x i32>

  // CHECK-NOT: dma_start
-  // CHECK: for %i0 = 0 to 16 {
-  for %kTT = 0 to 16 {
+  // CHECK: affine.for %i0 = 0 to 16 {
+  affine.for %kTT = 0 to 16 {
    dma_start %arg0[%zero, %zero], %Av[%zero, %zero], %num_elt, %tag[%zero] :
      memref<512 x 32 x f32>,
      memref<32 x 32 x f32, 2>, memref<1 x i32>
    dma_wait %tag[%zero], %num_elt : memref<1 x i32>
  }
-  // Use live out of 'for' inst; no DMA pipelining will be done.
+  // Use live out of 'affine.for' inst; no DMA pipelining will be done.
  %v = load %Av[%zero, %zero] : memref<32 x 32 x f32, 2>
  return %v : f32
 // CHECK:      %{{[0-9]+}} = load %{{[0-9]+}}[%c0, %c0] : memref<32x32xf32, 2>
@ -278,14 +278,14 @@ func @dynamic_shape_dma_buffer(%arg0: memref<512 x 32 x f32>) {
 // CHECK:       %5 = affine.apply [[MOD_2]](%c0)
 // CHECK:       %6 = affine.apply [[MOD_2]](%c0)
 // CHECK:       dma_start %arg0[%c0_0, %c0_0], %3[%5, %c0_0, %c0_0], %c512, %4[%6, %c0_0]
-  for %kTT = 0 to 16 {
+  affine.for %kTT = 0 to 16 {
    dma_start %arg0[%zero, %zero], %Av[%zero, %zero], %num_elt, %tag[%zero] :
      memref<512 x 32 x f32>,
      memref<? x ? x f32, 2>, memref<1 x i32>
    dma_wait %tag[%zero], %num_elt : memref<1 x i32>
  }
  return
-// CHECK-NEXT:  for %i0 = 1 to 16 {
+// CHECK-NEXT:  affine.for %i0 = 1 to 16 {
 // CHECK:         %7 = affine.apply [[MOD_2]](%i0)
 // CHECK:         %8 = affine.apply [[MOD_2]](%i0)
 // CHECK:         dma_start %arg0[%c0_0, %c0_0], %3[%7, %c0_0, %c0_0], %c512, %4[%8, %c0_0]
--- a/mlir/test/Transforms/simplify-affine-structures.mlir
+++ b/mlir/test/Transforms/simplify-affine-structures.mlir
@ -73,8 +73,8 @@

 // CHECK-LABEL: func @test_gaussian_elimination_empty_set0() {
 func @test_gaussian_elimination_empty_set0() {
-  for %i0 = 1 to 10 {
-    for %i1 = 1 to 100 {
+  affine.for %i0 = 1 to 10 {
+    affine.for %i1 = 1 to 100 {
      // CHECK: [[SET_EMPTY_2D]](%i0, %i1)
      affine.if (d0, d1) : (2 == 0)(%i0, %i1) {
      }
@ -85,8 +85,8 @@ func @test_gaussian_elimination_empty_set0() {

 // CHECK-LABEL: func @test_gaussian_elimination_empty_set1() {
 func @test_gaussian_elimination_empty_set1() {
-  for %i0 = 1 to 10 {
-    for %i1 = 1 to 100 {
+  affine.for %i0 = 1 to 10 {
+    affine.for %i1 = 1 to 100 {
      // CHECK: [[SET_EMPTY_2D]](%i0, %i1)
      affine.if (d0, d1) : (1 >= 0, -1 >= 0) (%i0, %i1) {
      }
@ -97,8 +97,8 @@ func @test_gaussian_elimination_empty_set1() {

 // CHECK-LABEL: func @test_gaussian_elimination_non_empty_set2() {
 func @test_gaussian_elimination_non_empty_set2() {
-  for %i0 = 1 to 10 {
-    for %i1 = 1 to 100 {
+  affine.for %i0 = 1 to 10 {
+    affine.for %i1 = 1 to 100 {
      // CHECK: #set1(%i0, %i1)
      affine.if #set2(%i0, %i1) {
      }
@ -111,8 +111,8 @@ func @test_gaussian_elimination_non_empty_set2() {
 func @test_gaussian_elimination_empty_set3() {
  %c7 = constant 7 : index
  %c11 = constant 11 : index
-  for %i0 = 1 to 10 {
-    for %i1 = 1 to 100 {
+  affine.for %i0 = 1 to 10 {
+    affine.for %i1 = 1 to 100 {
      // CHECK: #set2(%i0, %i1)[%c7, %c11]
      affine.if #set3(%i0, %i1)[%c7, %c11] {
      }
@ -125,8 +125,8 @@ func @test_gaussian_elimination_empty_set3() {
 func @test_gaussian_elimination_non_empty_set4() {
  %c7 = constant 7 : index
  %c11 = constant 11 : index
-  for %i0 = 1 to 10 {
-    for %i1 = 1 to 100 {
+  affine.for %i0 = 1 to 10 {
+    affine.for %i1 = 1 to 100 {
      // CHECK: #set3(%i0, %i1)[%c7, %c11]
      affine.if #set4(%i0, %i1)[%c7, %c11] {
      }
@ -139,8 +139,8 @@ func @test_gaussian_elimination_non_empty_set4() {
 func @test_gaussian_elimination_empty_set5() {
  %c7 = constant 7 : index
  %c11 = constant 11 : index
-  for %i0 = 1 to 10 {
-    for %i1 = 1 to 100 {
+  affine.for %i0 = 1 to 10 {
+    affine.for %i1 = 1 to 100 {
      // CHECK: #set2(%i0, %i1)[%c7, %c11]
      affine.if #set5(%i0, %i1)[%c7, %c11] {
      }
@ -151,8 +151,8 @@ func @test_gaussian_elimination_empty_set5() {

 // CHECK-LABEL: func @test_fuzz_explosion
 func @test_fuzz_explosion(%arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index) {
-  for %i0 = 1 to 10 {
-    for %i1 = 1 to 100 {
+  affine.for %i0 = 1 to 10 {
+    affine.for %i1 = 1 to 100 {
      affine.if #set_fuzz_virus(%i0, %i1, %arg0, %arg1, %arg2, %arg3) {
      }
    }
@ -163,8 +163,8 @@ func @test_fuzz_explosion(%arg0 : index, %arg1 : index, %arg2 : index, %arg3 : i

 // CHECK-LABEL: func @test_empty_set(%arg0: index) {
 func @test_empty_set(%N : index) {
-  for %i = 0 to 10 {
-    for %j = 0 to 10 {
+  affine.for %i = 0 to 10 {
+    affine.for %j = 0 to 10 {
      // CHECK: affine.if [[SET_EMPTY_2D]](%i0, %i1)
      affine.if (d0, d1) : (d0 - d1 >= 0, d1 - d0 - 1 >= 0)(%i, %j) {
        "foo"() : () -> ()
@ -198,8 +198,8 @@ func @test_empty_set(%N : index) {
    }
  }
  // The tests below test GCDTightenInequalities().
-  for %k = 0 to 10 {
-    for %l = 0 to 10 {
+  affine.for %k = 0 to 10 {
+    affine.for %l = 0 to 10 {
      // Empty because no multiple of 8 lies between 4 and 7.
      // CHECK: affine.if [[SET_EMPTY_1D]](%i2)
      affine.if (d0) : (8*d0 - 4 >= 0, -8*d0 + 7 >= 0)(%k) {
@ -226,7 +226,7 @@ func @test_empty_set(%N : index) {
    }
  }

-  for %m = 0 to 10 {
+  affine.for %m = 0 to 10 {
    // CHECK: affine.if [[SET_EMPTY_1D]](%i{{[0-9]+}})
    affine.if (d0) : (d0 mod 2 - 3 == 0) (%m) {
      "foo"() : () -> ()
--- a/mlir/test/Transforms/strip-debuginfo.mlir
+++ b/mlir/test/Transforms/strip-debuginfo.mlir
@ -10,7 +10,7 @@ func @inline_notation() -> i32 loc("mysource.cc":10:8) {
  %1 = "foo"() : () -> i32 loc("foo")

  // CHECK: } loc(unknown)
-  for %i0 = 0 to 8 {
+  affine.for %i0 = 0 to 8 {
  } loc(fused["foo", "mysource.cc":10:8])

  // CHECK: } loc(unknown)
--- a/mlir/test/Transforms/unroll-jam.mlir
+++ b/mlir/test/Transforms/unroll-jam.mlir
@ -8,13 +8,13 @@
 // CHECK-LABEL: func @unroll_jam_imperfect_nest() {
 func @unroll_jam_imperfect_nest() {
  // CHECK: %c100 = constant 100 : index
-  // CHECK-NEXT: for %i0 = 0 to 100 step 2 {
-  for %i = 0 to 101 {
+  // CHECK-NEXT: affine.for %i0 = 0 to 100 step 2 {
+  affine.for %i = 0 to 101 {
    // CHECK: %0 = "addi32"(%i0, %i0) : (index, index) -> i32
    // CHECK-NEXT: %1 = affine.apply [[MAP_PLUS_1]](%i0)
    // CHECK-NEXT: %2 = "addi32"(%1, %1) : (index, index) -> i32
    %x = "addi32"(%i, %i) : (index, index) -> i32
-    for %j = 0 to 17 {
+    affine.for %j = 0 to 17 {
      // CHECK:      %3 = "addi32"(%i0, %i0) : (index, index) -> i32
      // CHECK-NEXT: %4 = "addi32"(%3, %3) : (i32, i32) -> i32
      // CHECK-NEXT: %5 = affine.apply [[MAP_PLUS_1]](%i0)
@ -30,7 +30,7 @@ func @unroll_jam_imperfect_nest() {
  } // CHECK }
  // cleanup loop (single iteration)
  // CHECK: %11 = "addi32"(%c100, %c100) : (index, index) -> i32
-  // CHECK-NEXT: for %i2 = 0 to 17 {
+  // CHECK-NEXT: affine.for %i2 = 0 to 17 {
  // CHECK-NEXT:   %12 = "addi32"(%c100, %c100) : (index, index) -> i32
  // CHECK-NEXT:   %13 = "addi32"(%12, %12) : (i32, i32) -> i32
  // CHECK-NEXT: }
@ -40,20 +40,20 @@ func @unroll_jam_imperfect_nest() {

 // CHECK-LABEL: func @loop_nest_unknown_count_1(%arg0: index) {
 func @loop_nest_unknown_count_1(%N : index) {
-  // CHECK-NEXT: for %i0 = 1 to [[MAP_DIV_OFFSET]]()[%arg0] step 2 {
-  // CHECK-NEXT:   for %i1 = 1 to 100 {
+  // CHECK-NEXT: affine.for %i0 = 1 to [[MAP_DIV_OFFSET]]()[%arg0] step 2 {
+  // CHECK-NEXT:   affine.for %i1 = 1 to 100 {
  // CHECK-NEXT:     %0 = "foo"() : () -> i32
  // CHECK-NEXT:     %1 = "foo"() : () -> i32
  // CHECK-NEXT:   }
  // CHECK-NEXT: }
  // A cleanup loop should be generated here.
-  // CHECK-NEXT: for %i2 = [[MAP_DIV_OFFSET]]()[%arg0] to %arg0 {
-  // CHECK-NEXT:   for %i3 = 1 to 100 {
+  // CHECK-NEXT: affine.for %i2 = [[MAP_DIV_OFFSET]]()[%arg0] to %arg0 {
+  // CHECK-NEXT:   affine.for %i3 = 1 to 100 {
  // CHECK-NEXT:     %2 = "foo"() : () -> i32
  // CHECK_NEXT:   }
  // CHECK_NEXT: }
-  for %i = 1 to %N {
-    for %j = 1 to 100 {
+  affine.for %i = 1 to %N {
+    affine.for %j = 1 to 100 {
      %x = "foo"() : () -> i32
    }
  }
@ -62,8 +62,8 @@ func @loop_nest_unknown_count_1(%N : index) {

 // CHECK-LABEL: func @loop_nest_unknown_count_2(%arg0: index) {
 func @loop_nest_unknown_count_2(%arg : index) {
-  // CHECK-NEXT: for %i0 = %arg0 to  [[M1]]()[%arg0] step 2 {
-  // CHECK-NEXT:   for %i1 = 1 to 100 {
+  // CHECK-NEXT: affine.for %i0 = %arg0 to  [[M1]]()[%arg0] step 2 {
+  // CHECK-NEXT:   affine.for %i1 = 1 to 100 {
  // CHECK-NEXT:     %0 = "foo"(%i0) : (index) -> i32
  // CHECK-NEXT:     %1 = affine.apply #map{{[0-9]+}}(%i0)
  // CHECK-NEXT:     %2 = "foo"(%1) : (index) -> i32
@ -71,11 +71,11 @@ func @loop_nest_unknown_count_2(%arg : index) {
  // CHECK-NEXT: }
  // The cleanup loop is a single iteration one and is promoted.
  // CHECK-NEXT: %3 = affine.apply [[M1]]()[%arg0]
-  // CHECK-NEXT: for %i2 = 1 to 100 {
+  // CHECK-NEXT: affine.for %i2 = 1 to 100 {
  // CHECK-NEXT:   %4 = "foo"(%3) : (index) -> i32
  // CHECK_NEXT: }
-  for %i = %arg to ()[s0] -> (s0+9) ()[%arg] {
-    for %j = 1 to 100 {
+  affine.for %i = %arg to ()[s0] -> (s0+9) ()[%arg] {
+    affine.for %j = 1 to 100 {
      %x = "foo"(%i) : (index) -> i32
    }
  }
@ -84,22 +84,22 @@ func @loop_nest_unknown_count_2(%arg : index) {

 // CHECK-LABEL: func @loop_nest_symbolic_and_min_upper_bound
 func @loop_nest_symbolic_and_min_upper_bound(%M : index, %N : index, %K : index) {
-  for %i = 0 to min ()[s0, s1] -> (s0, s1, 1024)()[%M, %N] {
-    for %j = 0 to %K {
+  affine.for %i = 0 to min ()[s0, s1] -> (s0, s1, 1024)()[%M, %N] {
+    affine.for %j = 0 to %K {
      "foo"(%i, %j) : (index, index) -> ()
    }
  }
  return
 }
-// CHECK-NEXT:  for %i0 = 0 to min [[MAP_MULTI_RES]]()[%arg0, %arg1] step 2 {
-// CHECK-NEXT:    for %i1 = 0 to %arg2 {
+// CHECK-NEXT:  affine.for %i0 = 0 to min [[MAP_MULTI_RES]]()[%arg0, %arg1] step 2 {
+// CHECK-NEXT:    affine.for %i1 = 0 to %arg2 {
 // CHECK-NEXT:      "foo"(%i0, %i1) : (index, index) -> ()
 // CHECK-NEXT:      %0 = affine.apply #map2(%i0)
 // CHECK-NEXT:      "foo"(%0, %i1) : (index, index) -> ()
 // CHECK-NEXT:    }
 // CHECK-NEXT:  }
-// CHECK-NEXT:  for %i2 = max [[MAP_MULTI_RES]]()[%arg0, %arg1] to min #map9()[%arg0, %arg1] {
-// CHECK-NEXT:    for %i3 = 0 to %arg2 {
+// CHECK-NEXT:  affine.for %i2 = max [[MAP_MULTI_RES]]()[%arg0, %arg1] to min #map9()[%arg0, %arg1] {
+// CHECK-NEXT:    affine.for %i3 = 0 to %arg2 {
 // CHECK-NEXT:      "foo"(%i2, %i3) : (index, index) -> ()
 // CHECK-NEXT:    }
 // CHECK-NEXT:  }
--- a/mlir/test/Transforms/unroll.mlir
+++ b/mlir/test/Transforms/unroll.mlir
@ -25,13 +25,13 @@

 // UNROLL-FULL-LABEL: func @loop_nest_simplest() {
 func @loop_nest_simplest() {
-  // UNROLL-FULL: for %i0 = 0 to 100 step 2 {
-  for %i = 0 to 100 step 2 {
+  // UNROLL-FULL: affine.for %i0 = 0 to 100 step 2 {
+  affine.for %i = 0 to 100 step 2 {
    // UNROLL-FULL: %c1_i32 = constant 1 : i32
    // UNROLL-FULL-NEXT: %c1_i32_0 = constant 1 : i32
    // UNROLL-FULL-NEXT: %c1_i32_1 = constant 1 : i32
    // UNROLL-FULL-NEXT: %c1_i32_2 = constant 1 : i32
-    for %j = 0 to 4 {
+    affine.for %j = 0 to 4 {
      %x = constant 1 : i32
    }
  }       // UNROLL-FULL:  }
@ -41,8 +41,8 @@ func @loop_nest_simplest() {
 // UNROLL-FULL-LABEL: func @loop_nest_simple_iv_use() {
 func @loop_nest_simple_iv_use() {
  // UNROLL-FULL: %c0 = constant 0 : index
-  // UNROLL-FULL-NEXT: for %i0 = 0 to 100 step 2 {
-  for %i = 0 to 100 step 2 {
+  // UNROLL-FULL-NEXT: affine.for %i0 = 0 to 100 step 2 {
+  affine.for %i = 0 to 100 step 2 {
    // UNROLL-FULL: %0 = "addi32"(%c0, %c0) : (index, index) -> i32
    // UNROLL-FULL: %1 = affine.apply [[MAP0]](%c0)
    // UNROLL-FULL-NEXT:  %2 = "addi32"(%1, %1) : (index, index) -> i32
@ -50,7 +50,7 @@ func @loop_nest_simple_iv_use() {
    // UNROLL-FULL-NEXT:  %4 = "addi32"(%3, %3) : (index, index) -> i32
    // UNROLL-FULL: %5 = affine.apply [[MAP2]](%c0)
    // UNROLL-FULL-NEXT:  %6 = "addi32"(%5, %5) : (index, index) -> i32
-    for %j = 0 to 4 {
+    affine.for %j = 0 to 4 {
      %x = "addi32"(%j, %j) : (index, index) -> i32
    }
  }       // UNROLL-FULL:  }
@ -61,8 +61,8 @@ func @loop_nest_simple_iv_use() {
 // UNROLL-FULL-LABEL: func @loop_nest_body_def_use() {
 func @loop_nest_body_def_use() {
  // UNROLL-FULL: %c0 = constant 0 : index
-  // UNROLL-FULL-NEXT: for %i0 = 0 to 100 step 2 {
-  for %i = 0 to 100 step 2 {
+  // UNROLL-FULL-NEXT: affine.for %i0 = 0 to 100 step 2 {
+  affine.for %i = 0 to 100 step 2 {
    // UNROLL-FULL: %c0_0 = constant 0 : index
    %c0 = constant 0 : index
    // UNROLL-FULL:      %0 = affine.apply [[MAP0]](%c0)
@ -76,7 +76,7 @@ func @loop_nest_body_def_use() {
    // UNROLL-FULL-NEXT: %8 = affine.apply [[MAP2]](%c0)
    // UNROLL-FULL-NEXT: %9 = affine.apply [[MAP0]](%8)
    // UNROLL-FULL-NEXT: %10 = "addi32"(%9, %c0_0) : (index, index) -> index
-    for %j = 0 to 4 {
+    affine.for %j = 0 to 4 {
      %x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
        (index) -> (index)
      %y = "addi32"(%x, %c0) : (index, index) -> index
@ -89,14 +89,14 @@ func @loop_nest_body_def_use() {
 func @loop_nest_strided() {
  // UNROLL-FULL: %c2 = constant 2 : index
  // UNROLL-FULL-NEXT: %c2_0 = constant 2 : index
-  // UNROLL-FULL-NEXT: for %i0 = 0 to 100 {
-  for %i = 0 to 100 {
+  // UNROLL-FULL-NEXT: affine.for %i0 = 0 to 100 {
+  affine.for %i = 0 to 100 {
    // UNROLL-FULL:      %0 = affine.apply [[MAP0]](%c2_0)
    // UNROLL-FULL-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
    // UNROLL-FULL-NEXT: %2 = affine.apply [[MAP1]](%c2_0)
    // UNROLL-FULL-NEXT: %3 = affine.apply [[MAP0]](%2)
    // UNROLL-FULL-NEXT: %4 = "addi32"(%3, %3) : (index, index) -> index
-    for %j = 2 to 6 step 2 {
+    affine.for %j = 2 to 6 step 2 {
      %x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
        (index) -> (index)
      %y = "addi32"(%x, %x) : (index, index) -> index
@ -109,7 +109,7 @@ func @loop_nest_strided() {
    // UNROLL-FULL-NEXT: %10 = affine.apply [[MAP3]](%c2)
    // UNROLL-FULL-NEXT: %11 = affine.apply [[MAP0]](%10)
    // UNROLL-FULL-NEXT: %12 = "addi32"(%11, %11) : (index, index) -> index
-    for %k = 2 to 7 step 2 {
+    affine.for %k = 2 to 7 step 2 {
      %z = "affine.apply" (%k) { map: (d0) -> (d0 + 1) } :
        (index) -> (index)
      %w = "addi32"(%z, %z) : (index, index) -> index
@ -121,8 +121,8 @@ func @loop_nest_strided() {
 // UNROLL-FULL-LABEL: func @loop_nest_multiple_results() {
 func @loop_nest_multiple_results() {
  // UNROLL-FULL: %c0 = constant 0 : index
-  // UNROLL-FULL-NEXT: for %i0 = 0 to 100 {
-  for %i = 0 to 100 {
+  // UNROLL-FULL-NEXT: affine.for %i0 = 0 to 100 {
+  affine.for %i = 0 to 100 {
    // UNROLL-FULL: %0 = affine.apply [[MAP4]](%i0, %c0)
    // UNROLL-FULL-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
    // UNROLL-FULL-NEXT: %2 = affine.apply #map{{.*}}(%i0, %c0)
@ -132,7 +132,7 @@ func @loop_nest_multiple_results() {
    // UNROLL-FULL-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> index
    // UNROLL-FULL-NEXT: %7 = affine.apply #map{{.*}}(%i0, %4)
    // UNROLL-FULL-NEXT: %8 = "fma"(%7, %5, %5) : (index, index, index) -> (index, index)
-    for %j = 0 to 2 step 1 {
+    affine.for %j = 0 to 2 step 1 {
      %x = affine.apply (d0, d1) -> (d0 + 1) (%i, %j)
      %y = "addi32"(%x, %x) : (index, index) -> index
      %z = affine.apply (d0, d1) -> (d0 + 3) (%i, %j)
@ -149,8 +149,8 @@ func @loop_nest_seq_imperfect(%a : memref<128x128xf32>) {
  // UNROLL-FULL: %c0 = constant 0 : index
  // UNROLL-FULL-NEXT: %c128 = constant 128 : index
  %c128 = constant 128 : index
-  // UNROLL-FULL: for %i0 = 0 to 100 {
-  for %i = 0 to 100 {
+  // UNROLL-FULL: affine.for %i0 = 0 to 100 {
+  affine.for %i = 0 to 100 {
    // UNROLL-FULL: %0 = "vld"(%i0) : (index) -> i32
    %ld = "vld"(%i) : (index) -> i32
    // UNROLL-FULL: %1 = affine.apply [[MAP0]](%c0)
@ -168,7 +168,7 @@ func @loop_nest_seq_imperfect(%a : memref<128x128xf32>) {
    // UNROLL-FULL-NEXT: %13 = affine.apply [[MAP0]](%12)
    // UNROLL-FULL-NEXT: %14 = "vmulf"(%12, %13) : (index, index) -> index
    // UNROLL-FULL-NEXT: %15 = "vaddf"(%14, %14) : (index, index) -> index
-    for %j = 0 to 4 {
+    affine.for %j = 0 to 4 {
      %x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
        (index) -> (index)
       %y = "vmulf"(%j, %x) : (index, index) -> index
@ -197,7 +197,7 @@ func @loop_nest_seq_multiple() {
  // UNROLL-FULL-NEXT: %5 = affine.apply [[MAP2]](%c0_0)
  // UNROLL-FULL-NEXT: %6 = affine.apply [[MAP0]](%5)
  // UNROLL-FULL-NEXT: "mul"(%6, %6) : (index, index) -> ()
-  for %j = 0 to 4 {
+  affine.for %j = 0 to 4 {
    %x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
      (index) -> (index)
    "mul"(%x, %x) : (index, index) -> ()
@ -205,8 +205,8 @@ func @loop_nest_seq_multiple() {

  // UNROLL-FULL: %c99 = constant 99 : index
  %k = constant 99 : index
-  // UNROLL-FULL: for %i0 = 0 to 100 step 2 {
-  for %m = 0 to 100 step 2 {
+  // UNROLL-FULL: affine.for %i0 = 0 to 100 step 2 {
+  affine.for %m = 0 to 100 step 2 {
    // UNROLL-FULL: %7 = affine.apply [[MAP0]](%c0)
    // UNROLL-FULL-NEXT: %8 = affine.apply [[MAP6]](%c0)[%c99]
    // UNROLL-FULL-NEXT: %9 = affine.apply [[MAP0]](%c0)
@ -218,7 +218,7 @@ func @loop_nest_seq_multiple() {
    // UNROLL-FULL-NEXT: %15 = affine.apply [[MAP2]](%c0)
    // UNROLL-FULL-NEXT: %16 = affine.apply [[MAP0]](%15)
    // UNROLL-FULL-NEXT: %17 = affine.apply [[MAP6]](%15)[%c99]
-    for %n = 0 to 4 {
+    affine.for %n = 0 to 4 {
      %y = "affine.apply" (%n) { map: (d0) -> (d0 + 1) } :
        (index) -> (index)
      %z = "affine.apply" (%n, %k) { map: (d0) [s0] -> (d0 + s0 + 1) } :
@ -233,7 +233,7 @@ func @loop_nest_unroll_full() {
  // UNROLL-FULL-NEXT: %0 = "foo"() : () -> i32
  // UNROLL-FULL-NEXT: %1 = "bar"() : () -> i32
  // UNROLL-FULL-NEXT:  return
-  for %i = 0 to 1 {
+  affine.for %i = 0 to 1 {
    %x = "foo"() : () -> i32
    %y = "bar"() : () -> i32
  }
@ -242,16 +242,16 @@ func @loop_nest_unroll_full() {

 // SHORT-LABEL: func @loop_nest_outer_unroll() {
 func @loop_nest_outer_unroll() {
-  // SHORT:      for %i0 = 0 to 4 {
+  // SHORT:      affine.for %i0 = 0 to 4 {
  // SHORT-NEXT:   %0 = affine.apply [[MAP0]](%i0)
  // SHORT-NEXT:   %1 = "addi32"(%0, %0) : (index, index) -> index
  // SHORT-NEXT: }
-  // SHORT-NEXT: for %i1 = 0 to 4 {
+  // SHORT-NEXT: affine.for %i1 = 0 to 4 {
  // SHORT-NEXT:   %2 = affine.apply [[MAP0]](%i1)
  // SHORT-NEXT:   %3 = "addi32"(%2, %2) : (index, index) -> index
  // SHORT-NEXT: }
-  for %i = 0 to 2 {
-    for %j = 0 to 4 {
+  affine.for %i = 0 to 2 {
+    affine.for %j = 0 to 4 {
      %x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
        (index) -> (index)
      %y = "addi32"(%x, %x) : (index, index) -> index
@ -275,33 +275,33 @@ func @loop_nest_seq_long() -> i32 {

  %zero_idx = constant 0 : index

-  // CHECK: for %i0 = 0 to 512
-  for %n0 = 0 to 512 {
-    // CHECK: for %i1 = 0 to 8
-    for %n1 = 0 to 8 {
+  // CHECK: affine.for %i0 = 0 to 512
+  affine.for %n0 = 0 to 512 {
+    // CHECK: affine.for %i1 = 0 to 8
+    affine.for %n1 = 0 to 8 {
      store %one,  %A[%n0, %n1] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
      store %two,  %B[%n0, %n1] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
      store %zero, %C[%n0, %n1] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
    }
  }

-  for %x = 0 to 2 {
-    for %y = 0 to 2 {
-      // CHECK: for %i2
-      for %i2 = 0 to 8 {
-        // CHECK-NOT: for %i3
+  affine.for %x = 0 to 2 {
+    affine.for %y = 0 to 2 {
+      // CHECK: affine.for %i2
+      affine.for %i2 = 0 to 8 {
+        // CHECK-NOT: affine.for %i3
        // CHECK: %{{[0-9]+}} = affine.apply
        %b2 = "affine.apply" (%y, %i2) {map: (d0, d1) -> (16*d0 + d1)} : (index, index) -> index
        %z = load %B[%x, %b2] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
        "op1"(%z) : (i32) -> ()
      }
-      for %j1 = 0 to 8 {
-        for %j2 = 0 to 8 {
+      affine.for %j1 = 0 to 8 {
+        affine.for %j2 = 0 to 8 {
          %a2 = "affine.apply" (%y, %j2) {map: (d0, d1) -> (16*d0 + d1)} : (index, index) -> index
          %v203 = load %A[%j1, %a2] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
          "op2"(%v203) : (i32) -> ()
        }
-        for %k2 = 0 to 8 {
+        affine.for %k2 = 0 to 8 {
          %s0 = "op3"() : () -> i32
          %c2 = "affine.apply" (%x, %k2) {map: (d0, d1) -> (16*d0 + d1)} : (index, index) -> index
          %s1 =  load %C[%j1, %c2] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
@ -318,8 +318,8 @@ func @loop_nest_seq_long() -> i32 {

 // UNROLL-BY-4-LABEL: func @unroll_unit_stride_no_cleanup() {
 func @unroll_unit_stride_no_cleanup() {
-  // UNROLL-BY-4: for %i0 = 0 to 100 {
-  for %i = 0 to 100 {
+  // UNROLL-BY-4: affine.for %i0 = 0 to 100 {
+  affine.for %i = 0 to 100 {
    // UNROLL-BY-4: for [[L1:%i[0-9]+]] = 0 to 8 step 4 {
    // UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
    // UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32
@ -333,13 +333,13 @@ func @unroll_unit_stride_no_cleanup() {
    // UNROLL-BY-4-NEXT: %9 = "addi32"(%8, %8) : (index, index) -> i32
    // UNROLL-BY-4-NEXT: %10 = "addi32"(%9, %9) : (i32, i32) -> i32
    // UNROLL-BY-4-NEXT: }
-    for %j = 0 to 8 {
+    affine.for %j = 0 to 8 {
      %x = "addi32"(%j, %j) : (index, index) -> i32
      %y = "addi32"(%x, %x) : (i32, i32) -> i32
    }
    // empty loop
-    // UNROLL-BY-4: for %i2 = 0 to 8 {
-    for %k = 0 to 8 {
+    // UNROLL-BY-4: affine.for %i2 = 0 to 8 {
+    affine.for %k = 0 to 8 {
    }
  }
  return
@ -347,8 +347,8 @@ func @unroll_unit_stride_no_cleanup() {

 // UNROLL-BY-4-LABEL: func @unroll_unit_stride_cleanup() {
 func @unroll_unit_stride_cleanup() {
-  // UNROLL-BY-4: for %i0 = 0 to 100 {
-  for %i = 0 to 100 {
+  // UNROLL-BY-4: affine.for %i0 = 0 to 100 {
+  affine.for %i = 0 to 100 {
    // UNROLL-BY-4: for [[L1:%i[0-9]+]] = 0 to 8 step 4 {
    // UNROLL-BY-4-NEXT:   %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
    // UNROLL-BY-4-NEXT:   %1 = "addi32"(%0, %0) : (i32, i32) -> i32
@ -366,7 +366,7 @@ func @unroll_unit_stride_cleanup() {
    // UNROLL-BY-4-NEXT:   %11 = "addi32"([[L2]], [[L2]]) : (index, index) -> i32
    // UNROLL-BY-4-NEXT:   %12 = "addi32"(%11, %11) : (i32, i32) -> i32
    // UNROLL-BY-4-NEXT: }
-    for %j = 0 to 10 {
+    affine.for %j = 0 to 10 {
      %x = "addi32"(%j, %j) : (index, index) -> i32
      %y = "addi32"(%x, %x) : (i32, i32) -> i32
    }
@ -376,8 +376,8 @@ func @unroll_unit_stride_cleanup() {

 // UNROLL-BY-4-LABEL: func @unroll_non_unit_stride_cleanup() {
 func @unroll_non_unit_stride_cleanup() {
-  // UNROLL-BY-4: for %i0 = 0 to 100 {
-  for %i = 0 to 100 {
+  // UNROLL-BY-4: affine.for %i0 = 0 to 100 {
+  affine.for %i = 0 to 100 {
    // UNROLL-BY-4: for [[L1:%i[0-9]+]] = 2 to 42 step 20 {
    // UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
    // UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32
@ -395,7 +395,7 @@ func @unroll_non_unit_stride_cleanup() {
    // UNROLL-BY-4-NEXT: %11 = "addi32"([[L2]], [[L2]]) : (index, index) -> i32
    // UNROLL-BY-4-NEXT: %12 = "addi32"(%11, %11) : (i32, i32) -> i32
    // UNROLL-BY-4-NEXT: }
-    for %j = 2 to 48 step 5 {
+    affine.for %j = 2 to 48 step 5 {
      %x = "addi32"(%j, %j) : (index, index) -> i32
      %y = "addi32"(%x, %x) : (i32, i32) -> i32
    }
@ -408,8 +408,8 @@ func @unroll_non_unit_stride_cleanup() {
 func @loop_nest_single_iteration_after_unroll(%N: index) {
  // UNROLL-BY-4: %c0 = constant 0 : index
  // UNROLL-BY-4: %c4 = constant 4 : index
-  // UNROLL-BY-4: for %i0 = 0 to %arg0 {
-  for %i = 0 to %N {
+  // UNROLL-BY-4: affine.for %i0 = 0 to %arg0 {
+  affine.for %i = 0 to %N {
    // UNROLL-BY-4: %0 = "addi32"(%c0, %c0) : (index, index) -> i32
    // UNROLL-BY-4-NEXT: %1 = affine.apply [[MAP0]](%c0)
    // UNROLL-BY-4-NEXT: %2 = "addi32"(%1, %1) : (index, index) -> i32
@ -419,7 +419,7 @@ func @loop_nest_single_iteration_after_unroll(%N: index) {
    // UNROLL-BY-4-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> i32
    // UNROLL-BY-4-NEXT: %7 = "addi32"(%c4, %c4) : (index, index) -> i32
    // UNROLL-BY-4-NOT: for
-    for %j = 0 to 5 {
+    affine.for %j = 0 to 5 {
      %x = "addi32"(%j, %j) : (index, index) -> i32
    } // UNROLL-BY-4-NOT: }
  } // UNROLL-BY-4:  }
@ -431,8 +431,8 @@ func @loop_nest_single_iteration_after_unroll(%N: index) {
 // No cleanup will be generated here.
 // UNROLL-BY-4-LABEL: func @loop_nest_operand1() {
 func @loop_nest_operand1() {
-// UNROLL-BY-4:      for %i0 = 0 to 100 step 2 {
-// UNROLL-BY-4-NEXT:   for %i1 = 0 to #map{{[0-9]+}}(%i0) step 4
+// UNROLL-BY-4:      affine.for %i0 = 0 to 100 step 2 {
+// UNROLL-BY-4-NEXT:   affine.for %i1 = 0 to #map{{[0-9]+}}(%i0) step 4
 // UNROLL-BY-4-NEXT:      %0 = "foo"() : () -> i32
 // UNROLL-BY-4-NEXT:      %1 = "foo"() : () -> i32
 // UNROLL-BY-4-NEXT:      %2 = "foo"() : () -> i32
@ -440,8 +440,8 @@ func @loop_nest_operand1() {
 // UNROLL-BY-4-NEXT:   }
 // UNROLL-BY-4-NEXT: }
 // UNROLL-BY-4-NEXT: return
-  for %i = 0 to 100 step 2 {
-    for %j = 0 to (d0) -> (d0 - d0 mod 4) (%i) {
+  affine.for %i = 0 to 100 step 2 {
+    affine.for %j = 0 to (d0) -> (d0 - d0 mod 4) (%i) {
      %x = "foo"() : () -> i32
    }
  }
@ -451,8 +451,8 @@ func @loop_nest_operand1() {
 // No cleanup will be generated here.
 // UNROLL-BY-4-LABEL: func @loop_nest_operand2() {
 func @loop_nest_operand2() {
-// UNROLL-BY-4:      for %i0 = 0 to 100 step 2 {
-// UNROLL-BY-4-NEXT:   for %i1 = [[MAP11]](%i0) to #map{{[0-9]+}}(%i0) step 4 {
+// UNROLL-BY-4:      affine.for %i0 = 0 to 100 step 2 {
+// UNROLL-BY-4-NEXT:   affine.for %i1 = [[MAP11]](%i0) to #map{{[0-9]+}}(%i0) step 4 {
 // UNROLL-BY-4-NEXT:     %0 = "foo"() : () -> i32
 // UNROLL-BY-4-NEXT:     %1 = "foo"() : () -> i32
 // UNROLL-BY-4-NEXT:     %2 = "foo"() : () -> i32
@ -460,8 +460,8 @@ func @loop_nest_operand2() {
 // UNROLL-BY-4-NEXT:   }
 // UNROLL-BY-4-NEXT: }
 // UNROLL-BY-4-NEXT: return
-  for %i = 0 to 100 step 2 {
-    for %j = (d0) -> (d0) (%i) to (d0) -> (5*d0 + 4) (%i) {
+  affine.for %i = 0 to 100 step 2 {
+    affine.for %j = (d0) -> (d0) (%i) to (d0) -> (5*d0 + 4) (%i) {
      %x = "foo"() : () -> i32
    }
  }
@ -472,16 +472,16 @@ func @loop_nest_operand2() {
 // factor. The cleanup loop happens to be a single iteration one and is promoted.
 // UNROLL-BY-4-LABEL: func @loop_nest_operand3() {
 func @loop_nest_operand3() {
-  // UNROLL-BY-4: for %i0 = 0 to 100 step 2 {
-  for %i = 0 to 100 step 2 {
-    // UNROLL-BY-4: for %i1 = [[MAP11]](%i0) to #map{{[0-9]+}}(%i0) step 4 {
+  // UNROLL-BY-4: affine.for %i0 = 0 to 100 step 2 {
+  affine.for %i = 0 to 100 step 2 {
+    // UNROLL-BY-4: affine.for %i1 = [[MAP11]](%i0) to #map{{[0-9]+}}(%i0) step 4 {
    // UNROLL-BY-4-NEXT: %0 = "foo"() : () -> i32
    // UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32
    // UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32
    // UNROLL-BY-4-NEXT: %3 = "foo"() : () -> i32
    // UNROLL-BY-4-NEXT: }
    // UNROLL-BY-4-NEXT: %4 = "foo"() : () -> i32
-    for %j = (d0) -> (d0) (%i) to (d0) -> (d0 + 9) (%i) {
+    affine.for %j = (d0) -> (d0) (%i) to (d0) -> (d0 + 9) (%i) {
      %x = "foo"() : () -> i32
    }
  } // UNROLL-BY-4: }
@ -490,19 +490,19 @@ func @loop_nest_operand3() {

 // UNROLL-BY-4-LABEL: func @loop_nest_symbolic_bound(%arg0: index) {
 func @loop_nest_symbolic_bound(%N : index) {
-  // UNROLL-BY-4: for %i0 = 0 to 100 {
-  for %i = 0 to 100 {
-    // UNROLL-BY-4: for %i1 = 0 to #map{{[0-9]+}}()[%arg0] step 4 {
+  // UNROLL-BY-4: affine.for %i0 = 0 to 100 {
+  affine.for %i = 0 to 100 {
+    // UNROLL-BY-4: affine.for %i1 = 0 to #map{{[0-9]+}}()[%arg0] step 4 {
    // UNROLL-BY-4: %0 = "foo"() : () -> i32
    // UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32
    // UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32
    // UNROLL-BY-4-NEXT: %3 = "foo"() : () -> i32
    // UNROLL-BY-4-NEXT: }
    // A cleanup loop will be be generated here.
-    // UNROLL-BY-4-NEXT: for %i2 = #map{{[0-9]+}}()[%arg0] to %arg0 {
+    // UNROLL-BY-4-NEXT: affine.for %i2 = #map{{[0-9]+}}()[%arg0] to %arg0 {
    // UNROLL-BY-4-NEXT: %4 = "foo"() : () -> i32
    // UNROLL-BY-4_NEXT: }
-    for %j = 0 to %N {
+    affine.for %j = 0 to %N {
      %x = "foo"() : () -> i32
    }
  }
@ -511,18 +511,18 @@ func @loop_nest_symbolic_bound(%N : index) {

 // UNROLL-BY-4-LABEL: func @loop_nest_symbolic_and_min_upper_bound
 func @loop_nest_symbolic_and_min_upper_bound(%M : index, %N : index, %K : index) {
-  for %i = %M to min ()[s0, s1] -> (s0, s1, 1024)()[%N, %K] {
+  affine.for %i = %M to min ()[s0, s1] -> (s0, s1, 1024)()[%N, %K] {
    "foo"() : () -> ()
  }
  return
 }
-// CHECK-NEXT:  for %i0 = %arg0 to min [[MAP_TRIP_COUNT_MULTIPLE_FOUR]]()[%arg0, %arg1, %arg2] step 4 {
+// CHECK-NEXT:  affine.for %i0 = %arg0 to min [[MAP_TRIP_COUNT_MULTIPLE_FOUR]]()[%arg0, %arg1, %arg2] step 4 {
 // CHECK-NEXT:    "foo"() : () -> ()
 // CHECK-NEXT:    "foo"() : () -> ()
 // CHECK-NEXT:    "foo"() : () -> ()
 // CHECK-NEXT:    "foo"() : () -> ()
 // CHECK-NEXT:  }
-// CHECK-NEXT:  for %i1 = max [[MAP_TRIP_COUNT_MULTIPLE_FOUR]]()[%arg0, %arg1, %arg2] to min #map28()[%arg1, %arg2] {
+// CHECK-NEXT:  affine.for %i1 = max [[MAP_TRIP_COUNT_MULTIPLE_FOUR]]()[%arg0, %arg1, %arg2] to min #map28()[%arg1, %arg2] {
 // CHECK-NEXT:    "foo"() : () -> ()
 // CHECK-NEXT:  }
 // CHECK-NEXT:  return
@ -533,22 +533,22 @@ func @loop_nest_symbolic_and_min_upper_bound(%M : index, %N : index, %K : index)
 func @loop_nest_non_trivial_multiple_unroll_factor(%M : index, %N : index) {
  %T = affine.apply (d0) -> (4*d0 + 1)(%M)
  %K = affine.apply (d0) -> (d0 - 1) (%T)
-  for %i = 0 to min (d0, d1) -> (4 * d0, d1, 1024)(%N, %K) {
+  affine.for %i = 0 to min (d0, d1) -> (4 * d0, d1, 1024)(%N, %K) {
    "foo"() : () -> ()
  }
  return
 }
-// UNROLL-BY-4: for %i0 = 0 to min
+// UNROLL-BY-4: affine.for %i0 = 0 to min
 // UNROLL-BY-4-NOT: for
 // UNROLL-BY-4: return

 // UNROLL-BY-4-LABEL: func @loop_nest_non_trivial_multiple_unroll_factor_2
 func @loop_nest_non_trivial_multiple_unroll_factor_2(%M : index, %N : index) {
  %K = affine.apply (d0) -> (4*d0) (%M)
-  for %i = 0 to min ()[s0, s1] -> (4 * s0, s1, 1024)()[%N, %K] {
+  affine.for %i = 0 to min ()[s0, s1] -> (4 * s0, s1, 1024)()[%N, %K] {
    "foo"() : () -> ()
  }
-  // UNROLL-BY-4: for %i0 = 0 to min
+  // UNROLL-BY-4: affine.for %i0 = 0 to min
  // UNROLL-BY-4-NEXT: "foo"
  // UNROLL-BY-4-NEXT: "foo"
  // UNROLL-BY-4-NEXT: "foo"
@ -560,7 +560,7 @@ func @loop_nest_non_trivial_multiple_unroll_factor_2(%M : index, %N : index) {

 // UNROLL-BY-1-LABEL: func @unroll_by_one_should_promote_single_iteration_loop()
 func @unroll_by_one_should_promote_single_iteration_loop() {
-  for %i = 0 to 1 {
+  affine.for %i = 0 to 1 {
    %x = "foo"(%i) : (index) -> i32
  }
  return