From 832567b3799f763ec3ba9480e1628c5a3de7fa6e Mon Sep 17 00:00:00 2001
From: River Riddle <riverriddle@google.com>
Date: Mon, 25 Mar 2019 10:14:34 -0700
Subject: [PATCH] NFC: Rename the 'for' operation in the AffineOps dialect to
 'affine.for' and set the namespace of the AffineOps dialect to 'affine'.

PiperOrigin-RevId: 240165792
---
 mlir/bindings/python/test/test_py2and3.py     |  18 +-
 mlir/g3doc/Dialects/Affine.md                 |  26 +-
 mlir/g3doc/Dialects/SuperVector.md            |  18 +-
 mlir/g3doc/LangRef.md                         |  35 +-
 mlir/g3doc/Passes.md                          |  34 +-
 mlir/g3doc/Rationale.md                       |  42 +-
 .../RationaleSimplifiedPolyhedralForm.md      |  14 +-
 mlir/include/mlir/AffineOps/AffineOps.h       |  24 +-
 mlir/include/mlir/Analysis/AffineStructures.h |  13 +-
 mlir/include/mlir/Analysis/Utils.h            |  10 +-
 mlir/include/mlir/Analysis/VectorAnalysis.h   |  16 +-
 mlir/include/mlir/EDSC/Types.h                |   5 +-
 mlir/include/mlir/Transforms/LoopUtils.h      |   7 +-
 mlir/include/mlir/Transforms/Utils.h          |   4 +-
 mlir/lib/AffineOps/AffineOps.cpp              |   6 +-
 mlir/lib/Analysis/AffineAnalysis.cpp          |   8 +-
 .../lib/Analysis/TestParallelismDetection.cpp |   5 +-
 mlir/lib/Analysis/Utils.cpp                   |  10 +-
 mlir/lib/EDSC/MLIREmitter.cpp                 |   3 +-
 mlir/lib/EDSC/Types.cpp                       |   6 +-
 mlir/lib/Transforms/DmaGeneration.cpp         |  11 +-
 mlir/lib/Transforms/LoopFusion.cpp            |   3 +-
 mlir/lib/Transforms/LoopTiling.cpp            |   3 +-
 mlir/lib/Transforms/LoopUnroll.cpp            |   4 +-
 mlir/lib/Transforms/LoopUnrollAndJam.cpp      |   2 +-
 mlir/lib/Transforms/LowerAffine.cpp           |   7 +-
 mlir/lib/Transforms/LowerVectorTransfers.cpp  |  10 +-
 mlir/lib/Transforms/MaterializeVectors.cpp    |  24 +-
 mlir/lib/Transforms/MemRefDataFlowOpt.cpp     |   8 +-
 mlir/lib/Transforms/PipelineDataTransfer.cpp  |  18 +-
 mlir/lib/Transforms/Utils/LoopUtils.cpp       |  18 +-
 mlir/lib/Transforms/Utils/Utils.cpp           |   4 +-
 mlir/lib/Transforms/Vectorize.cpp             |  50 +-
 mlir/test/AffineOps/canonicalize.mlir         |  40 +-
 mlir/test/AffineOps/invalid.mlir              |  26 +-
 mlir/test/AffineOps/ops.mlir                  |   4 +-
 mlir/test/EDSC/api-test.cpp                   |  36 +-
 mlir/test/EDSC/builder-api-test.cpp           |  20 +-
 mlir/test/IR/invalid.mlir                     |  62 +-
 mlir/test/IR/locations.mlir                   |   2 +-
 mlir/test/IR/parser.mlir                      |  76 +-
 mlir/test/IR/pretty-locations.mlir            |   2 +-
 .../Vectorize/lower_vector_transfers.mlir     |  58 +-
 .../Transforms/Vectorize/materialize.mlir     |  16 +-
 .../materialize_vectors_1d_to_1d.mlir         |  24 +-
 .../materialize_vectors_2d_to_1d.mlir         |  24 +-
 .../materialize_vectors_2d_to_2d.mlir         |  24 +-
 .../Transforms/Vectorize/normalize_maps.mlir  |  24 +-
 .../Transforms/Vectorize/vectorize_1d.mlir    |  62 +-
 .../Transforms/Vectorize/vectorize_2d.mlir    |  30 +-
 .../Transforms/Vectorize/vectorize_3d.mlir    |  20 +-
 .../Vectorize/vectorize_outer_loop_2d.mlir    |  18 +-
 .../vectorize_outer_loop_transpose_2d.mlir    |  42 +-
 .../Vectorize/vectorize_transpose_2d.mlir     |  42 +-
 mlir/test/Transforms/canonicalize.mlir        |  12 +-
 mlir/test/Transforms/constant-fold.mlir       |   4 +-
 mlir/test/Transforms/cse.mlir                 |   8 +-
 mlir/test/Transforms/dma-generate.mlir        | 154 ++--
 mlir/test/Transforms/loop-fusion.mlir         | 732 +++++++++---------
 mlir/test/Transforms/loop-tiling.mlir         |  48 +-
 mlir/test/Transforms/lower-affine.mlir        |  28 +-
 mlir/test/Transforms/memref-bound-check.mlir  |  50 +-
 mlir/test/Transforms/memref-dataflow-opt.mlir |  62 +-
 .../Transforms/memref-dependence-check.mlir   |  86 +-
 .../Transforms/parallelism-detection.mlir     |   6 +-
 .../Transforms/pipeline-data-transfer.mlir    |  50 +-
 .../simplify-affine-structures.mlir           |  38 +-
 mlir/test/Transforms/strip-debuginfo.mlir     |   2 +-
 mlir/test/Transforms/unroll-jam.mlir          |  42 +-
 mlir/test/Transforms/unroll.mlir              | 158 ++--
 70 files changed, 1304 insertions(+), 1294 deletions(-)

diff --git a/mlir/bindings/python/test/test_py2and3.py b/mlir/bindings/python/test/test_py2and3.py
index e2cae843b6aa..e1f0d96b6350 100644
--- a/mlir/bindings/python/test/test_py2and3.py
+++ b/mlir/bindings/python/test/test_py2and3.py
@@ -56,11 +56,11 @@ class EdscTest(unittest.TestCase):
     code = str(fun)
     # TODO(zinenko,ntv): use FileCheck for these tests
     self.assertIn(
-        '  "for"() {lower_bound: () -> (0), step: 1 : index, upper_bound: () -> (42)} : () -> () {\n',
+        '  "affine.for"() {lower_bound: () -> (0), step: 1 : index, upper_bound: () -> (42)} : () -> () {\n',
         code)
     self.assertIn("  ^bb1(%i0: index):", code)
     self.assertIn(
-        '    "for"(%c42, %2) {lower_bound: (d0) -> (d0), step: 2 : index, upper_bound: (d0) -> (d0)} : (index, index) -> () {\n',
+        '    "affine.for"(%c42, %2) {lower_bound: (d0) -> (d0), step: 2 : index, upper_bound: (d0) -> (d0)} : (index, index) -> () {\n',
         code)
     self.assertIn("    ^bb2(%i1: index):", code)
     self.assertIn(
@@ -76,19 +76,19 @@ class EdscTest(unittest.TestCase):
 
     code = str(fun)
     self.assertIn(
-        ' "for"() {lower_bound: () -> (0), step: 1 : index, upper_bound: () -> (5)} : () -> () {\n',
+        ' "affine.for"() {lower_bound: () -> (0), step: 1 : index, upper_bound: () -> (5)} : () -> () {\n',
         code)
     self.assertIn("  ^bb1(%i0: index):", code)
     self.assertIn(
-        '    "for"() {lower_bound: () -> (1), step: 3 : index, upper_bound: () -> (15)} : () -> () {\n',
+        '    "affine.for"() {lower_bound: () -> (1), step: 3 : index, upper_bound: () -> (15)} : () -> () {\n',
         code)
     self.assertIn("    ^bb2(%i1: index):", code)
     self.assertIn(
-        '      "for"() {lower_bound: () -> (2), step: 5 : index, upper_bound: () -> (25)} : () -> () {\n',
+        '      "affine.for"() {lower_bound: () -> (2), step: 5 : index, upper_bound: () -> (25)} : () -> () {\n',
         code)
     self.assertIn("      ^bb3(%i2: index):", code)
     self.assertIn(
-        '        "for"() {lower_bound: () -> (3), step: 7 : index, upper_bound: () -> (35)} : () -> () {\n',
+        '        "affine.for"() {lower_bound: () -> (3), step: 7 : index, upper_bound: () -> (35)} : () -> () {\n',
         code)
     self.assertIn("        ^bb4(%i3: index):", code)
     self.assertIn(
@@ -342,10 +342,10 @@ class EdscTest(unittest.TestCase):
 
     code = str(fun)
     self.assertIn(
-        '"for"() {lower_bound: () -> (0), step: 1 : index, upper_bound: () -> (10)}',
+        '"affine.for"() {lower_bound: () -> (0), step: 1 : index, upper_bound: () -> (10)}',
         code)
     self.assertIn(
-        '"for"() {lower_bound: () -> (0), step: 1 : index, upper_bound: () -> (42)}',
+        '"affine.for"() {lower_bound: () -> (0), step: 1 : index, upper_bound: () -> (42)}',
         code)
     self.assertIn("%0 = load %arg0[%i0, %i1] : memref<10x42xf32>", code)
     self.assertIn("%1 = addf %0, %cst : f32", code)
@@ -367,7 +367,7 @@ class EdscTest(unittest.TestCase):
 
     code = str(fun)
     self.assertIn(
-        '"for"() {lower_bound: () -> (0), step: 1 : index, upper_bound: () -> (32)} : () -> ()',
+        '"affine.for"() {lower_bound: () -> (0), step: 1 : index, upper_bound: () -> (32)} : () -> ()',
         code)
     self.assertIn("%0 = load %arg0[%i0, %i2] : memref<32x32xf32>", code)
     self.assertIn("%1 = load %arg1[%i2, %i1] : memref<32x32xf32>", code)
diff --git a/mlir/g3doc/Dialects/Affine.md b/mlir/g3doc/Dialects/Affine.md
index 55d26f0d9568..0c69c60cbe91 100644
--- a/mlir/g3doc/Dialects/Affine.md
+++ b/mlir/g3doc/Dialects/Affine.md
@@ -15,7 +15,7 @@ loops and if instructions), the result of a
 [`affine.apply` operation](#'affine.apply'-operation) that recursively takes as
 arguments any symbolic identifiers. Dimensions may be bound not only to anything
 that a symbol is bound to, but also to induction variables of enclosing
-[`for` operations](#'for'-operation), and the result of an
+[`affine.for` operations](#'affine.for'-operation), and the result of an
 [`affine.apply` operation](#'affine.apply'-operation) (which recursively may use
 other dimensions and symbols).
 
@@ -47,12 +47,12 @@ Example:
 %2 = affine.apply (i)[s0] -> (i+s0) (%42)[%n]
 ```
 
-#### 'for' operation {#'for'-operation}
+#### 'affine.for' operation {#'affine.for'-operation}
 
 Syntax:
 
 ``` {.ebnf}
-operation   ::= `for` ssa-id `=` lower-bound `to` upper-bound
+operation   ::= `affine.for` ssa-id `=` lower-bound `to` upper-bound
                       (`step` integer-literal)? `{` inst* `}`
 
 lower-bound ::= `max`? affine-map dim-and-symbol-use-list | shorthand-bound
@@ -60,17 +60,17 @@ upper-bound ::= `min`? affine-map dim-and-symbol-use-list | shorthand-bound
 shorthand-bound ::= ssa-id | `-`? integer-literal
 ```
 
-The `for` operation represents an affine loop nest, defining an SSA value for
-its induction variable. This SSA value always has type
+The `affine.for` operation represents an affine loop nest, defining an SSA value
+for its induction variable. This SSA value always has type
 [`index`](LangRef.md#index-type), which is the size of the machine word.
 
-The `for` operation executes its body a number of times iterating from a lower
-bound to an upper bound by a stride. The stride, represented by `step`, is a
-positive constant integer which defaults to "1" if not present. The lower and
+The `affine.for` operation executes its body a number of times iterating from a
+lower bound to an upper bound by a stride. The stride, represented by `step`, is
+a positive constant integer which defaults to "1" if not present. The lower and
 upper bounds specify a half-open range: the range includes the lower bound but
 does not include the upper bound.
 
-The lower and upper bounds of a `for` operation are represented as an
+The lower and upper bounds of a `affine.for` operation are represented as an
 application of an affine mapping to a list of SSA values passed to the map. The
 [same restrictions](#restrictions-on-dimensions-and-symbols) hold for these SSA
 values as for all bindings of SSA values to dimensions and symbols.
@@ -94,8 +94,8 @@ Example showing reverse iteration of the inner loop:
 
 func @simple_example(%A: memref<?x?xf32>, %B: memref<?x?xf32>) {
   %N = dim %A, 0 : memref<?x?xf32>
-  for %i = 0 to %N step 1 {
-    for %j = 0 to %N {   // implicitly steps by 1
+  affine.for %i = 0 to %N step 1 {
+    affine.for %j = 0 to %N {   // implicitly steps by 1
       %0 = affine.apply #map57(%j)[%N]
       %tmp = call @F1(%A, %i, %0) : (memref<?x?xf32>, index, index)->(f32)
       call @F2(%tmp, %B, %i, %0) : (f32, memref<?x?xf32>, index, index)->()
@@ -130,8 +130,8 @@ Example:
 #set = (d0, d1)[s0]: (d0 - 10 >= 0, s0 - d0 - 9 >= 0,
                       d1 - 10 >= 0, s0 - d1 - 9 >= 0)
 func @reduced_domain_example(%A, %X, %N) : (memref<10xi32>, i32, i32) {
-  for %i = 0 to %N {
-     for %j = 0 to %N {
+  affine.for %i = 0 to %N {
+     affine.for %j = 0 to %N {
        %0 = affine.apply #map42(%j)
        %tmp = call @S1(%X, %i, %0)
        affine.if #set(%i, %j)[%N] {
diff --git a/mlir/g3doc/Dialects/SuperVector.md b/mlir/g3doc/Dialects/SuperVector.md
index 09beb950e375..640325306c12 100644
--- a/mlir/g3doc/Dialects/SuperVector.md
+++ b/mlir/g3doc/Dialects/SuperVector.md
@@ -23,8 +23,8 @@ Examples:
 // pad with %f0 to handle the boundary case:
 %f0 = constant 0.0f : f32
 for %i0 = 0 to %0 {
-  for %i1 = 0 to %1 step 256 {
-    for %i2 = 0 to %2 step 32 {
+  affine.for %i1 = 0 to %1 step 256 {
+    affine.for %i2 = 0 to %2 step 32 {
       %v = vector_transfer_read %A, %i0, %i1, %i2, %f0
            {permutation_map: (d0, d1, d2) -> (d2, d1)} :
            (memref<?x?x?xf32>, index, index, f32) -> vector<32x256xf32>
@@ -34,7 +34,7 @@ for %i0 = 0 to %0 {
 // vector<128xf32>. The underlying implementation will require a 1-D vector
 // broadcast:
 for %i0 = 0 to %0 {
-  for %i1 = 0 to %1 {
+  affine.for %i1 = 0 to %1 {
     %3 = vector_transfer_read %A, %i0, %i1
          {permutation_map: (d0, d1) -> (0)} :
          (memref<?x?xf32>, index, index) -> vector<128xf32>
@@ -81,8 +81,8 @@ A notional lowering of vector_transfer_read could generate code resembling:
 %tmp = alloc() : vector<3x4x5xf32>
 %view_in_tmp = "element_type_cast"(%tmp) : memref<1xvector<3x4x5xf32>>
 for %i = 0 to 3 {
-  for %j = 0 to 4 {
-    for %k = 0 to 5 {
+  affine.for %j = 0 to 4 {
+    affine.for %k = 0 to 5 {
       %a = load %A[%expr1 + %k, %expr2, %expr3 + %i, %expr4] : memref<?x?x?x?xf32>
       store %tmp[%i, %j, %k] : vector<3x4x5xf32>
 }}}
@@ -102,7 +102,7 @@ lowered code would resemble:
 %tmp = alloc() : vector<3x4x5xf32>
 %view_in_tmp = "element_type_cast"(%tmp) : memref<1xvector<3x4x5xf32>>
 for %i = 0 to 3 {
-  for %k = 0 to 5 {
+  affine.for %k = 0 to 5 {
     %a = load %A[%expr1 + %k, %expr2, %expr3 + %i, %expr4] : memref<?x?x?x?xf32>
     store %tmp[%i, 0, %k] : vector<3x4x5xf32>
 }}
@@ -130,9 +130,9 @@ Examples:
 ```mlir {.mlir}
 // write vector<16x32x64xf32> into the slice `%A[%i0, %i1:%i1+32, %i2:%i2+64, %i3:%i3+16]`:
 for %i0 = 0 to %0 {
-  for %i1 = 0 to %1 step 32 {
-    for %i2 = 0 to %2 step 64 {
-      for %i3 = 0 to %3 step 16 {
+  affine.for %i1 = 0 to %1 step 32 {
+    affine.for %i2 = 0 to %2 step 64 {
+      affine.for %i3 = 0 to %3 step 16 {
         %val = `ssa-value` : vector<16x32x64xf32>
         vector_transfer_write %val, %A, %i0, %i1, %i2, %i3
           {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d2)} :
diff --git a/mlir/g3doc/LangRef.md b/mlir/g3doc/LangRef.md
index 9c248a924b95..13ab016a6ec8 100644
--- a/mlir/g3doc/LangRef.md
+++ b/mlir/g3doc/LangRef.md
@@ -40,10 +40,10 @@ which means that values are defined before use and have scope defined by their
 dominance relations. Operations may produce zero or more results, and each is a
 distinct SSA value with its own type defined by the [type system](#type-system).
 
-MLIR incorporates polyhedral compiler concepts, including `for` and `affine.if`
-operations defined by the [affine dialect](Dialects/Affine.md), which model
-affine loops and affine conditionals. It also includes affine maps integrated
-into the type system - they are key to the representation of data and
+MLIR incorporates polyhedral compiler concepts, including `affine.for` and
+`affine.if` operations defined by the [affine dialect](Dialects/Affine.md),
+which model affine loops and affine conditionals. It also includes affine maps
+integrated into the type system - they are key to the representation of data and
 [MemRefs](#memref-type), which are the representation for tensors in addressable
 memory. MLIR also supports a first-class Tensor type allowing it to concisely
 represent operations on N-dimensional arrays.
@@ -99,10 +99,10 @@ func @multiply(%A: memref<100x?xf32>, %B: memref<?x50xf32>)
   %C = alloc() : memref<100x50xf32>
 
   // Multiplication loop nest.
-  for  %i = 0 to 100 {
-     for %j = 0 to 50 {
+  affine.for %i = 0 to 100 {
+     affine.for %j = 0 to 50 {
         store 0 to %C[%i, %j] : memref<100x50xf32>
-        for %k = 0 to %n {
+        affine.for %k = 0 to %n {
            %a_v  = load %A[%i, %k] : memref<100x?xf32>
            %b_v  = load %B[%k, %j] : memref<?x50xf32>
            %prod = mulf %a_v, %b_v : f32
@@ -1697,8 +1697,8 @@ The arity of indices is the rank of the memref (i.e., if the memref loaded from
 is of rank 3, then 3 indices are required for the load following the memref
 identifier).
 
-In an `affine.if` or `for` body, the indices of a load are restricted to SSA
-values bound to surrounding loop induction variables,
+In an `affine.if` or `affine.for` body, the indices of a load are restricted to
+SSA values bound to surrounding loop induction variables,
 [symbols](#dimensions-and-symbols), results of a
 [`constant` operation](#'constant'-operation), or the result of an
 `affine.apply` operation that can in turn take as arguments all of the
@@ -1719,10 +1719,10 @@ Example:
 
 **Context:** The `load` and `store` instructions are specifically crafted to
 fully resolve a reference to an element of a memref, and (in affine `affine.if`
-and `for` instructions) the compiler can follow use-def chains (e.g. through
-[`affine.apply`](Dialects/Affine.md#'affine.apply'-operation) operations) to
-precisely analyze references at compile-time using polyhedral techniques. This
-is possible because of the
+and `affine.for` instructions) the compiler can follow use-def chains (e.g.
+through [`affine.apply`](Dialects/Affine.md#'affine.apply'-operation)
+operations) to precisely analyze references at compile-time using polyhedral
+techniques. This is possible because of the
 [restrictions on dimensions and symbols](Dialects/Affine.md#restrictions-on-dimensions-and-symbols)
 in these contexts.
 
@@ -1755,10 +1755,11 @@ store %100, %A[%1, 1023] : memref<4x?xf32, #layout, hbm>
 
 **Context:** The `load` and `store` instructions are specifically crafted to
 fully resolve a reference to an element of a memref, and (in polyhedral
-`affine.if` and `for` instructions) the compiler can follow use-def chains (e.g.
-through [`affine.apply`](Dialects/Affine.md#'affine.apply'-operation)
-operations) to precisely analyze references at compile-time using polyhedral
-techniques. This is possible because of the
+`affine.if` and `affine.for` instructions) the compiler can follow use-def
+chains (e.g. through
+[`affine.apply`](Dialects/Affine.md#'affine.apply'-operation) operations) to
+precisely analyze references at compile-time using polyhedral techniques. This
+is possible because of the
 [restrictions on dimensions and symbols](Dialect/Affine.md#restrictions-on-dimensions-and-symbols)
 in these contexts.
 
diff --git a/mlir/g3doc/Passes.md b/mlir/g3doc/Passes.md
index 525918aa4296..8e5926aff3de 100644
--- a/mlir/g3doc/Passes.md
+++ b/mlir/g3doc/Passes.md
@@ -39,9 +39,9 @@ These restrictions may be lifted in the future.
 
 ### Output IR
 
-Functions with `for` and `affine.if` instructions eliminated. These functions
-may contain operations from the Standard dialect in addition to those already
-present before the pass.
+Functions with `affine.for` and `affine.if` instructions eliminated. These
+functions may contain operations from the Standard dialect in addition to those
+already present before the pass.
 
 ### Invariants
 
@@ -95,10 +95,10 @@ Input
 ```mlir
 func @loop_nest_tiled() -> memref<256x1024xf32> {
   %0 = alloc() : memref<256x1024xf32>
-  for %i0 = 0 to 256 step 32 {
-    for %i1 = 0 to 1024 step 32 {
-      for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 32)(%i0) {
-        for %i3 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 32)(%i1) {
+  affine.for %i0 = 0 to 256 step 32 {
+    affine.for %i1 = 0 to 1024 step 32 {
+      affine.for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 32)(%i0) {
+        affine.for %i3 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 32)(%i1) {
           %1 = load %0[%i2, %i3] : memref<256x1024xf32>
         }
       }
@@ -119,16 +119,16 @@ func @loop_nest_tiled() -> memref<256x1024xf32> {
   %c32 = constant 32 : index
   %c0 = constant 0 : index
   %0 = alloc() : memref<256x1024xf32>
-  for %i0 = 0 to 256 step 32 {
-    for %i1 = 0 to 1024 step 32 {
+  affine.for %i0 = 0 to 256 step 32 {
+    affine.for %i1 = 0 to 1024 step 32 {
       %1 = affine.apply #map1(%i0)
       %2 = affine.apply #map1(%i1)
       %3 = alloc() : memref<32x32xf32, 1>
       %4 = alloc() : memref<1xi32>
       dma_start %0[%1, %2], %3[%c0, %c0], %c1024, %4[%c0], %c1024, %c32 : memref<256x1024xf32>, memref<32x32xf32, 1>, memref<1xi32>
       dma_wait %4[%c0], %c1024 : memref<1xi32>
-      for %i2 = #map1(%i0) to #map2(%i0) {
-        for %i3 = #map1(%i1) to #map2(%i1) {
+      affine.for %i2 = #map1(%i0) to #map2(%i0) {
+        affine.for %i3 = #map1(%i1) to #map2(%i1) {
           %5 = affine.apply #map3(%i0, %i2)
           %6 = affine.apply #map3(%i1, %i3)
           %7 = load %3[%5, %6] : memref<32x32xf32, 1>
@@ -194,8 +194,8 @@ Input
 func @store_load_affine_apply() -> memref<10x10xf32> {
   %cf7 = constant 7.0 : f32
   %m = alloc() : memref<10x10xf32>
-  for %i0 = 0 to 10 {
-    for %i1 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
+    affine.for %i1 = 0 to 10 {
       %t0 = affine.apply (d0, d1) -> (d1 + 1)(%i0, %i1)
       %t1 = affine.apply (d0, d1) -> (d0)(%i0, %i1)
       %idx0 = affine.apply (d0, d1) -> (d1) (%t0, %t1)
@@ -217,8 +217,8 @@ Output
 func @store_load_affine_apply() -> memref<10x10xf32> {
   %cst = constant 7.000000e+00 : f32
   %0 = alloc() : memref<10x10xf32>
-  for %i0 = 0 to 10 {
-    for %i1 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
+    affine.for %i1 = 0 to 10 {
       %3 = affine.apply #map1(%1, %2)
       %4 = affine.apply #map2(%1, %2)
       store %cst, %0[%3, %4] : memref<10x10xf32>
@@ -258,7 +258,7 @@ Input
   %2 = alloc() : memref<1xf32>
   %c0 = constant 0 : index
   %c128 = constant 128 : index
-  for %i0 = 0 to 8 {
+  affine.for %i0 = 0 to 8 {
     dma_start %0[%i0], %1[%i0], %c128, %2[%c0] : memref<256xf32>, memref<32xf32, 1>, memref<1xf32>
     dma_wait %2[%c0], %c128 : memref<1xf32>
     %3 = load %1[%i0] : memref<32xf32, 1>
@@ -282,7 +282,7 @@ Output
   %1 = alloc() : memref<2x32xf32, 1>
   %2 = alloc() : memref<2x1xf32>
   dma_start %0[%c0], %1[%c0, %c0], %c128, %2[%c0, %c0] : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
-  for %i0 = 1 to 8 {
+  affine.for %i0 = 1 to 8 {
     %3 = affine.apply #map2(%i0)
     %4 = affine.apply #map2(%i0)
     dma_start %0[%i0], %1[%3, %i0], %c128, %2[%4, %c0] : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
diff --git a/mlir/g3doc/Rationale.md b/mlir/g3doc/Rationale.md
index bc2b14e289b8..91b215fc3112 100644
--- a/mlir/g3doc/Rationale.md
+++ b/mlir/g3doc/Rationale.md
@@ -150,8 +150,8 @@ func bar(%A : memref<8x?xf32, #lmap>) {
   // dynamically using dim instruction.
   %N = dim %A, 1 : memref<8x?xf32, #lmap>
 
-  for %i = 0 to 8 {
-    for %j = 0 to %N {
+  affine.for %i = 0 to 8 {
+    affine.for %j = 0 to %N {
       // A[i,j] += 1
       %s1 = load %A [%i, %j] : memref<8x?xf32, #lmap>
       %s2 = add %s1, 1
@@ -548,7 +548,7 @@ nested in an outer function that using affine loops.
 func @search(memref<?x?xi32 %A, <?xi32> %S, i32 %key) {
   %ni = dim %A, 0 : memref<?x?xi32>
   // This loop can be parallelized
-  for %i = 0 to %ni {
+  affine.for %i = 0 to %ni {
     call @search_body (%A, %S, %i) : (memref<?x?xi32>, memref<?xi32>, i32)
   }
   return
@@ -582,9 +582,9 @@ func @search_body(%A: memref<?x?xi32>, %S: memref<?xi32>, %key: i32) {
 
 As per the [MLIR spec](LangRef.md), the restrictions on dimensions and symbol
 identifiers to be used with the affine.apply instruction only apply to accesses
-inside `for` and `affine.if` instructions. However, an analysis of accesses
-inside the called function (`@search_body`) is necessary to determine if the
-`%i` loop could be parallelized: such function access analysis is calling
+inside `affine.for` and `affine.if` instructions. However, an analysis of
+accesses inside the called function (`@search_body`) is necessary to determine
+if the `%i` loop could be parallelized: such function access analysis is calling
 context sensitive.
 
 ### Non-affine loop bounds {#non-affine-loop-bounds}
@@ -604,8 +604,8 @@ for (i=0; i <N; i++)
 
 ```mlir {.mlir}
 func @outer_nest(%n) : (i32) {
-  for %i = 0 to %n {
-    for %j = 0 to %n {
+  affine.for %i = 0 to %n {
+    affine.for %j = 0 to %n {
       call @inner_nest(%i, %j, %n)
     }
   }
@@ -620,8 +620,8 @@ func @inner_nest(%i: i32, %j: i32, %n: i32) {
 }
 
 func @inner_nest2(%m, %n) -> i32 {
-  for %k = 0 to %m {
-    for %l = 0 to %n {
+  affine.for %k = 0 to %m {
+    affine.for %l = 0 to %n {
       ...
     }
   }
@@ -663,13 +663,13 @@ in a dilated convolution.
 func @conv2d(memref<16x1024x1024x3xf32, #lm0, vmem> %input,
              memref<5x5x3x32xf32, #lm0, vmem> %kernel,
              memref<16x512x512x32xf32, #lm0, vmem> %output) {
-  for %b = 0 to %batch {
-    for %oh = 0 to %output_height {
-      for %ow = 0 to %output_width {
-        for %of = 0 to %output_feature {
-          for %kh = 0 to %kernel_height {
-            for %kw = 0 to %kernel_width {
-              for %if = 0 to %input_feature {
+  affine.for %b = 0 to %batch {
+    affine.for %oh = 0 to %output_height {
+      affine.for %ow = 0 to %output_width {
+        affine.for %of = 0 to %output_feature {
+          affine.for %kh = 0 to %kernel_height {
+            affine.for %kw = 0 to %kernel_width {
+              affine.for %if = 0 to %input_feature {
                 // Calculate input indices.
                 %1_0 = affine.apply #map1_0 (%0#1, %0#2, %0#4, %0#5)
                   [%h_stride, %w_stride, %h_kernel_dilation, %w_kernel_dilation,
@@ -913,10 +913,10 @@ func @dma_hbm_to_vmem(memref<1024 x f32, #layout_map0, hbm> %a,
     representation. 2(b) requires no change, but impacts how cost models look at
     index and layout maps.
 
-### `affine.if` and `for` Extensions for "Escaping Scalars" {#extensions-for-"escaping-scalars"}
+### `affine.if` and `affine.for` Extensions for "Escaping Scalars" {#extensions-for-"escaping-scalars"}
 
 We considered providing a representation for SSA values that are live out of
-`if/else` conditional bodies and loop carried in `for` loops. We
+`if/else` conditional bodies and loop carried in `affine.for` loops. We
 ultimately abandoned this approach due to its complexity. In the current design
 of MLIR, scalar variables cannot escape for loops or if instructions. In
 situations, where escaping is necessary, we use zero-dimensional tensors and
@@ -948,7 +948,7 @@ Example:
 // Return sum of elements in 1-dimensional mref A
 func int32 @sum(%A : memref<?xi32>, %N : i32) -> (i32) {
    %init = 0
-   %result = for %i = 0 to N with %tmp(%init) {
+   %result = affine.for %i = 0 to N with %tmp(%init) {
       %value = load %A[%i]
       %sum = %value + %tmp
       yield %sum
@@ -978,7 +978,7 @@ Example:
 // Compute sum of half of the array
 func int32 @sum_half(%A, %N) {
    %s0 = 0
-   %s1 = for %i = 1 ... N step 1 with %s2 (%s0) {
+   %s1 = affine.for %i = 1 ... N step 1 with %s2 (%s0) {
        %s3 = if (%i >= %N / 2) {
           %v0 = load %A[%i]
           %s4 = %s2 + %v0
diff --git a/mlir/g3doc/RationaleSimplifiedPolyhedralForm.md b/mlir/g3doc/RationaleSimplifiedPolyhedralForm.md
index f51eff45633a..b40f6708d0df 100644
--- a/mlir/g3doc/RationaleSimplifiedPolyhedralForm.md
+++ b/mlir/g3doc/RationaleSimplifiedPolyhedralForm.md
@@ -184,8 +184,8 @@ Our simple example above would be represented as:
 
 ```mlir
   mlfunc @simple_example(... %N) {
-    for %i = 0 ... %N step 1 {
-      for %j = 0 ... %N step 1 {
+    affine.for %i = 0 ... %N step 1 {
+      affine.for %j = 0 ... %N step 1 {
         // identity noop in this case, but can exist in general.
         %0,%1 = affine.apply #57(%i, %j)
 
@@ -203,8 +203,8 @@ The example with the reduced domain would be represented with an if instruction:
 
 ```mlir
   mlfunc @reduced_domain_example(... %N) {
-    for %i = 0 ... %N step 1 {
-      for %j = 0 ... %N step 1 {
+    affine.for %i = 0 ... %N step 1 {
+      affine.for %j = 0 ... %N step 1 {
         // identity noop in this case, but can exist in general.
         %0,%1 = affinecall #57(%i, %j)
 
@@ -233,8 +233,8 @@ that transformations call into):
 
 ```mlir
 mlfunc @skewed_domain_example(... %N) {
-  for %t1 = 0 ... 2*N-2 step 1 {
-    for %t2 = max(0, t1-N+1) ... min(N, t1) step 1 {
+  affine.for %t1 = 0 ... 2*N-2 step 1 {
+    affine.for %t2 = max(0, t1-N+1) ... min(N, t1) step 1 {
       (%i, %j) = (%t1-%t2, %t2)
       ...
     }
@@ -373,7 +373,7 @@ mlfunc's (if we support them) will also have to have domains.
 ### Lack of redundancy in IR
 
 The traditional form has multiple encodings for the same sorts of behavior: you
-end up having bits on `for` loops to specify whether codegen should use
+end up having bits on `affine.for` loops to specify whether codegen should use
 "atomic/separate" policies, unroll loops, etc. Instructions can be split or can
 generate multiple copies of their instruction because of overlapping domains,
 etc.
diff --git a/mlir/include/mlir/AffineOps/AffineOps.h b/mlir/include/mlir/AffineOps/AffineOps.h
index 2620db1407aa..d8e34dc7248b 100644
--- a/mlir/include/mlir/AffineOps/AffineOps.h
+++ b/mlir/include/mlir/AffineOps/AffineOps.h
@@ -88,15 +88,15 @@ public:
                                           MLIRContext *context);
 };
 
-/// The "for" instruction represents an affine loop nest, defining an SSA value
-/// for its induction variable. The induction variable is represented as a
+/// The "affine.for" instruction represents an affine loop nest, defining an SSA
+/// value for its induction variable. The induction variable is represented as a
 /// BlockArgument to the entry block of the body. The body and induction
-/// variable can be created automatically for new "for" ops with 'createBody'.
-/// This SSA value always has type index, which is the size of the machine word.
-/// The stride, represented by step, is a positive constant integer which
-/// defaults to "1" if not present. The lower and upper bounds specify a
-/// half-open range: the range includes the lower bound but does not include the
-/// upper bound.
+/// variable can be created automatically for new "affine.for" ops with
+/// 'createBody'. This SSA value always has type index, which is the size of the
+/// machine word. The stride, represented by step, is a positive constant
+/// integer which defaults to "1" if not present. The lower and upper bounds
+/// specify a half-open range: the range includes the lower bound but does not
+/// include the upper bound.
 ///
 /// The lower and upper bounds of a for operation are represented as an
 /// application of an affine mapping to a list of SSA values passed to the map.
@@ -108,7 +108,7 @@ public:
 ///
 /// Example:
 ///
-///   for %i = 1 to 10 {
+///   affine.for %i = 1 to 10 {
 ///     ...
 ///   }
 ///
@@ -131,7 +131,7 @@ public:
   static void getCanonicalizationPatterns(OwningRewritePatternList &results,
                                           MLIRContext *context);
 
-  static StringRef getOperationName() { return "for"; }
+  static StringRef getOperationName() { return "affine.for"; }
   static StringRef getStepAttrName() { return "step"; }
   static StringRef getLowerBoundAttrName() { return "lower_bound"; }
   static StringRef getUpperBoundAttrName() { return "upper_bound"; }
@@ -268,10 +268,10 @@ public:
   operand_range getOperands() { return {operand_begin(), operand_end()}; }
 
 private:
-  // 'for' instruction that contains this bound.
+  // 'affine.for' instruction that contains this bound.
   AffineForOp inst;
   // Start and end positions of this affine bound operands in the list of
-  // the containing 'for' instruction operands.
+  // the containing 'affine.for' instruction operands.
   unsigned opStart, opEnd;
   // Affine map for this bound.
   AffineMap map;
diff --git a/mlir/include/mlir/Analysis/AffineStructures.h b/mlir/include/mlir/Analysis/AffineStructures.h
index 92c809326e38..f9ea873d0f77 100644
--- a/mlir/include/mlir/Analysis/AffineStructures.h
+++ b/mlir/include/mlir/Analysis/AffineStructures.h
@@ -376,14 +376,15 @@ public:
 
   AffineExpr toAffineExpr(unsigned idx, MLIRContext *context);
 
-  /// Adds constraints (lower and upper bounds) for the specified 'for'
+  /// Adds constraints (lower and upper bounds) for the specified 'affine.for'
   /// instruction's Value using IR information stored in its bound maps. The
   /// right identifier is first looked up using forOp's Value. Asserts if the
-  /// Value corresponding to the 'for' instruction isn't found in the constraint
-  /// system. Returns failure for the yet unimplemented/unsupported cases.  Any
-  /// new identifiers that are found in the bound operands of the 'for'
-  /// instruction are added as trailing identifiers (either dimensional or
-  /// symbolic depending on whether the operand is a valid ML Function symbol).
+  /// Value corresponding to the 'affine.for' instruction isn't found in the
+  /// constraint system. Returns failure for the yet unimplemented/unsupported
+  /// cases.  Any new identifiers that are found in the bound operands of the
+  /// 'affine.for' instruction are added as trailing identifiers (either
+  /// dimensional or symbolic depending on whether the operand is a valid ML
+  /// Function symbol).
   //  TODO(bondhugula): add support for non-unit strides.
   LogicalResult addAffineForOpDomain(AffineForOp forOp);
 
diff --git a/mlir/include/mlir/Analysis/Utils.h b/mlir/include/mlir/Analysis/Utils.h
index 96e73166ca47..382ff8259956 100644
--- a/mlir/include/mlir/Analysis/Utils.h
+++ b/mlir/include/mlir/Analysis/Utils.h
@@ -45,7 +45,7 @@ class Instruction;
 class Value;
 
 /// Populates 'loops' with IVs of the loops surrounding 'inst' ordered from
-/// the outermost 'for' instruction to the innermost one.
+/// the outermost 'affine.for' instruction to the innermost one.
 //  TODO(bondhugula): handle 'affine.if' inst's.
 void getLoopIVs(Instruction &inst, SmallVectorImpl<AffineForOp> *loops);
 
@@ -113,8 +113,8 @@ AffineForOp insertBackwardComputationSlice(Instruction *srcOpInst,
 /// surrounding such op's.
 // For example, the memref region for a load operation at loop depth = 1:
 //
-//    for %i = 0 to 32 {
-//      for %ii = %i to (d0) -> (d0 + 8) (%i) {
+//    affine.for %i = 0 to 32 {
+//      affine.for %ii = %i to (d0) -> (d0 + 8) (%i) {
 //        load %A[%ii]
 //      }
 //    }
@@ -146,8 +146,8 @@ struct MemRefRegion {
   ///  For example, the memref region for this operation at loopDepth = 1 will
   ///  be:
   ///
-  ///    for %i = 0 to 32 {
-  ///      for %ii = %i to (d0) -> (d0 + 8) (%i) {
+  ///    affine.for %i = 0 to 32 {
+  ///      affine.for %ii = %i to (d0) -> (d0 + 8) (%i) {
   ///        load %A[%ii]
   ///      }
   ///    }
diff --git a/mlir/include/mlir/Analysis/VectorAnalysis.h b/mlir/include/mlir/Analysis/VectorAnalysis.h
index f8ed1dd28198..ffe4ea703327 100644
--- a/mlir/include/mlir/Analysis/VectorAnalysis.h
+++ b/mlir/include/mlir/Analysis/VectorAnalysis.h
@@ -76,9 +76,9 @@ shapeRatio(VectorType superVectorType, VectorType subVectorType);
 /// The following MLIR snippet:
 ///
 /// ```mlir
-///    for %i3 = 0 to %0 {
-///      for %i4 = 0 to %1 {
-///        for %i5 = 0 to %2 {
+///    affine.for %i3 = 0 to %0 {
+///      affine.for %i4 = 0 to %1 {
+///        affine.for %i5 = 0 to %2 {
 ///          %a5 = load %arg0[%i4, %i5, %i3] : memref<?x?x?xf32>
 ///    }}}
 /// ```
@@ -86,9 +86,9 @@ shapeRatio(VectorType superVectorType, VectorType subVectorType);
 /// may vectorize with {permutation_map: (d0, d1, d2) -> (d2, d1)} into:
 ///
 /// ```mlir
-///    for %i3 = 0 to %0 step 32 {
-///      for %i4 = 0 to %1 {
-///        for %i5 = 0 to %2 step 256 {
+///    affine.for %i3 = 0 to %0 step 32 {
+///      affine.for %i4 = 0 to %1 {
+///        affine.for %i5 = 0 to %2 step 256 {
 ///          %4 = vector_transfer_read %arg0, %i4, %i5, %i3
 ///               {permutation_map: (d0, d1, d2) -> (d2, d1)} :
 ///               (memref<?x?x?xf32>, index, index) -> vector<32x256xf32>
@@ -103,7 +103,7 @@ shapeRatio(VectorType superVectorType, VectorType subVectorType);
 ///
 /// ```mlir
 ///    %cst0 = constant 0 : index
-///    for %i0 = 0 to %0 {
+///    affine.for %i0 = 0 to %0 {
 ///      %a0 = load %arg0[%cst0, %cst0] : memref<?x?xf32>
 ///    }
 /// ```
@@ -111,7 +111,7 @@ shapeRatio(VectorType superVectorType, VectorType subVectorType);
 /// may vectorize with {permutation_map: (d0) -> (0)} into:
 ///
 /// ```mlir
-///    for %i0 = 0 to %0 step 128 {
+///    affine.for %i0 = 0 to %0 step 128 {
 ///      %3 = vector_transfer_read %arg0, %c0_0, %c0_0
 ///           {permutation_map: (d0, d1) -> (0)} :
 ///           (memref<?x?xf32>, index, index) -> vector<128xf32>
diff --git a/mlir/include/mlir/EDSC/Types.h b/mlir/include/mlir/EDSC/Types.h
index 35216684169a..f0ebbed1959b 100644
--- a/mlir/include/mlir/EDSC/Types.h
+++ b/mlir/include/mlir/EDSC/Types.h
@@ -341,7 +341,8 @@ protected:
 ///    (e.g. vectorValue = load(vectorView, zero)).
 ///
 /// Only ExprKind::StmtBlockLikeExpr have `enclosedStmts`, these comprise:
-/// 1. `For`-loops for which the `lhs` binds to the induction variable, `rhs`
+/// 1. `affine.for`-loops for which the `lhs` binds to the induction variable,
+/// `rhs`
 ///   binds to an Expr of kind `ExprKind::For` with lower-bound, upper-bound and
 ///   step respectively.
 // TODO(zinenko): this StmtBlockLikeExpr should be retired in favor of Expr
@@ -647,7 +648,7 @@ Stmt For(llvm::ArrayRef<Expr> indices, llvm::ArrayRef<Expr> lbs,
          llvm::ArrayRef<Expr> ubs, llvm::ArrayRef<Expr> steps,
          llvm::ArrayRef<Stmt> enclosedStmts);
 
-/// Define a 'for' loop from with multi-valued bounds.
+/// Define a 'affine.for' loop from with multi-valued bounds.
 ///
 ///    for max(lbs...) to min(ubs...) {}
 ///
diff --git a/mlir/include/mlir/Transforms/LoopUtils.h b/mlir/include/mlir/Transforms/LoopUtils.h
index 0404ab74244e..1d5203e77d5a 100644
--- a/mlir/include/mlir/Transforms/LoopUtils.h
+++ b/mlir/include/mlir/Transforms/LoopUtils.h
@@ -73,9 +73,10 @@ void getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
                               SmallVectorImpl<Value *> *operands,
                               FuncBuilder *builder);
 
-/// Skew the instructions in the body of a 'for' instruction with the specified
-/// instruction-wise shifts. The shifts are with respect to the original
-/// execution order, and are multiplied by the loop 'step' before being applied.
+/// Skew the instructions in the body of a 'affine.for' instruction with the
+/// specified instruction-wise shifts. The shifts are with respect to the
+/// original execution order, and are multiplied by the loop 'step' before being
+/// applied.
 LLVM_NODISCARD
 LogicalResult instBodySkew(AffineForOp forOp, ArrayRef<uint64_t> shifts,
                            bool unrollPrologueEpilogue = false);
diff --git a/mlir/include/mlir/Transforms/Utils.h b/mlir/include/mlir/Transforms/Utils.h
index ab5660be8711..b8976669f976 100644
--- a/mlir/include/mlir/Transforms/Utils.h
+++ b/mlir/include/mlir/Transforms/Utils.h
@@ -95,14 +95,14 @@ Instruction *createComposedAffineApplyOp(FuncBuilder *builder, Location loc,
 ///
 /// Before
 ///
-/// for %i = 0 to #map(%N)
+/// affine.for %i = 0 to #map(%N)
 ///   %idx = affine.apply (d0) -> (d0 mod 2) (%i)
 ///   send %A[%idx], ...
 ///   %v = "compute"(%idx, ...)
 ///
 /// After
 ///
-/// for %i = 0 to #map(%N)
+/// affine.for %i = 0 to #map(%N)
 ///   %idx = affine.apply (d0) -> (d0 mod 2) (%i)
 ///   send %A[%idx], ...
 ///   %idx_ = affine.apply (d0) -> (d0 mod 2) (%i)
diff --git a/mlir/lib/AffineOps/AffineOps.cpp b/mlir/lib/AffineOps/AffineOps.cpp
index 4badde9012b5..92035489e216 100644
--- a/mlir/lib/AffineOps/AffineOps.cpp
+++ b/mlir/lib/AffineOps/AffineOps.cpp
@@ -36,7 +36,7 @@ using llvm::dbgs;
 //===----------------------------------------------------------------------===//
 
 AffineOpsDialect::AffineOpsDialect(MLIRContext *context)
-    : Dialect(/*namePrefix=*/"", context) {
+    : Dialect(/*namePrefix=*/"affine", context) {
   addOperations<AffineApplyOp, AffineForOp, AffineIfOp>();
 }
 
@@ -69,7 +69,7 @@ bool mlir::isValidDim(Value *value) {
       return isTopLevelSymbol(dimOp->getOperand());
     return false;
   }
-  // This value is a block argument (which also includes 'for' loop IVs).
+  // This value is a block argument (which also includes 'affine.for' loop IVs).
   return true;
 }
 
@@ -969,7 +969,7 @@ static void printBound(AffineBound bound, const char *prefix, OpAsmPrinter *p) {
 }
 
 void AffineForOp::print(OpAsmPrinter *p) {
-  *p << "for ";
+  *p << "affine.for ";
   p->printOperand(getBody()->getArgument(0));
   *p << " = ";
   printBound(getLowerBound(), "max", p);
diff --git a/mlir/lib/Analysis/AffineAnalysis.cpp b/mlir/lib/Analysis/AffineAnalysis.cpp
index f786731e88a1..e2e9ef68b172 100644
--- a/mlir/lib/Analysis/AffineAnalysis.cpp
+++ b/mlir/lib/Analysis/AffineAnalysis.cpp
@@ -708,8 +708,8 @@ void MemRefAccess::getAccessMap(AffineValueMap *accessMap) const {
 // For example, given the following MLIR code with with "source" and
 // "destination" accesses to the same memref labled, and symbols %M, %N, %K:
 //
-//   for %i0 = 0 to 100 {
-//     for %i1 = 0 to 50 {
+//   affine.for %i0 = 0 to 100 {
+//     affine.for %i1 = 0 to 50 {
 //       %a0 = affine.apply
 //         (d0, d1) -> (d0 * 2 - d1 * 4 + s1, d1 * 3 - s0) (%i0, %i1)[%M, %N]
 //       // Source memref access.
@@ -717,8 +717,8 @@ void MemRefAccess::getAccessMap(AffineValueMap *accessMap) const {
 //     }
 //   }
 //
-//   for %i2 = 0 to 100 {
-//     for %i3 = 0 to 50 {
+//   affine.for %i2 = 0 to 100 {
+//     affine.for %i3 = 0 to 50 {
 //       %a1 = affine.apply
 //         (d0, d1) -> (d0 * 7 + d1 * 9 - s1, d1 * 11 + s0) (%i2, %i3)[%K, %M]
 //       // Destination memref access.
diff --git a/mlir/lib/Analysis/TestParallelismDetection.cpp b/mlir/lib/Analysis/TestParallelismDetection.cpp
index b954f0e67d9a..7ed59b403cde 100644
--- a/mlir/lib/Analysis/TestParallelismDetection.cpp
+++ b/mlir/lib/Analysis/TestParallelismDetection.cpp
@@ -15,7 +15,7 @@
 // limitations under the License.
 // =============================================================================
 //
-// This file implements a pass to detect parallel affine 'for' ops.
+// This file implements a pass to detect parallel affine 'affine.for' ops.
 //
 //===----------------------------------------------------------------------===//
 
@@ -40,7 +40,8 @@ FunctionPassBase *mlir::createParallelismDetectionTestPass() {
   return new TestParallelismDetection();
 }
 
-// Walks the function and emits a note for all 'for' ops detected as parallel.
+// Walks the function and emits a note for all 'affine.for' ops detected as
+// parallel.
 void TestParallelismDetection::runOnFunction() {
   Function *f = getFunction();
   FuncBuilder b(f);
diff --git a/mlir/lib/Analysis/Utils.cpp b/mlir/lib/Analysis/Utils.cpp
index 5a6e1f84b35f..6bc395c46bdd 100644
--- a/mlir/lib/Analysis/Utils.cpp
+++ b/mlir/lib/Analysis/Utils.cpp
@@ -38,11 +38,11 @@ using namespace mlir;
 using llvm::SmallDenseMap;
 
 /// Populates 'loops' with IVs of the loops surrounding 'inst' ordered from
-/// the outermost 'for' instruction to the innermost one.
+/// the outermost 'affine.for' instruction to the innermost one.
 void mlir::getLoopIVs(Instruction &inst, SmallVectorImpl<AffineForOp> *loops) {
   auto *currInst = inst.getParentInst();
   AffineForOp currAffineForOp;
-  // Traverse up the hierarchy collecing all 'for' instruction while
+  // Traverse up the hierarchy collecing all 'affine.for' instruction while
   // skipping over 'affine.if' instructions.
   while (currInst && ((currAffineForOp = currInst->dyn_cast<AffineForOp>()) ||
                       currInst->isa<AffineIfOp>())) {
@@ -162,8 +162,8 @@ LogicalResult MemRefRegion::unionBoundingBox(const MemRefRegion &other) {
 //  For example, the memref region for this load operation at loopDepth = 1 will
 //  be as below:
 //
-//    for %i = 0 to 32 {
-//      for %ii = %i to (d0) -> (d0 + 8) (%i) {
+//    affine.for %i = 0 to 32 {
+//      affine.for %ii = %i to (d0) -> (d0 + 8) (%i) {
 //        load %A[%ii]
 //      }
 //    }
@@ -683,7 +683,7 @@ static Optional<int64_t> getMemoryFootprintBytes(Block &block,
                                                  int memorySpace) {
   SmallDenseMap<Value *, std::unique_ptr<MemRefRegion>, 4> regions;
 
-  // Walk this 'for' instruction to gather all memory regions.
+  // Walk this 'affine.for' instruction to gather all memory regions.
   bool error = false;
   block.walk(start, end, [&](Instruction *opInst) {
     if (!opInst->isa<LoadOp>() && !opInst->isa<StoreOp>()) {
diff --git a/mlir/lib/EDSC/MLIREmitter.cpp b/mlir/lib/EDSC/MLIREmitter.cpp
index 1196748a0af1..89c66b089412 100644
--- a/mlir/lib/EDSC/MLIREmitter.cpp
+++ b/mlir/lib/EDSC/MLIREmitter.cpp
@@ -146,7 +146,8 @@ Value *mlir::edsc::MLIREmitter::emitExpr(Expr e) {
   if (auto expr = e.dyn_cast<StmtBlockLikeExpr>()) {
     if (expr.getKind() == ExprKind::For) {
       auto exprGroups = expr.getAllArgumentGroups();
-      assert(exprGroups.size() == 3 && "expected 3 expr groups in `for`");
+      assert(exprGroups.size() == 3 &&
+             "expected 3 expr groups in `affine.for`");
       assert(!exprGroups[0].empty() && "expected at least one lower bound");
       assert(!exprGroups[1].empty() && "expected at least one upper bound");
       assert(exprGroups[2].size() == 1 &&
diff --git a/mlir/lib/EDSC/Types.cpp b/mlir/lib/EDSC/Types.cpp
index 72c453718e7c..ac8b98e38c30 100644
--- a/mlir/lib/EDSC/Types.cpp
+++ b/mlir/lib/EDSC/Types.cpp
@@ -526,8 +526,8 @@ Stmt mlir::edsc::For(llvm::ArrayRef<Expr> idxs, llvm::ArrayRef<MaxExpr> lbs,
 Stmt mlir::edsc::MaxMinFor(const Bindable &idx, ArrayRef<Expr> lbs,
                            ArrayRef<Expr> ubs, Expr step,
                            ArrayRef<Stmt> enclosedStmts) {
-  assert(!lbs.empty() && "'for' loop must have lower bounds");
-  assert(!ubs.empty() && "'for' loop must have upper bounds");
+  assert(!lbs.empty() && "'affine.for' loop must have lower bounds");
+  assert(!ubs.empty() && "'affine.for' loop must have upper bounds");
 
   // Use a null expression as a sentinel between lower and upper bound
   // expressions in the list of children.
@@ -964,7 +964,7 @@ void mlir::edsc::Expr::print(raw_ostream &os) const {
   } else if (auto stmtLikeExpr = this->dyn_cast<StmtBlockLikeExpr>()) {
     switch (stmtLikeExpr.getKind()) {
     // We only print the lb, ub and step here, which are the StmtBlockLike
-    // part of the `for` StmtBlockLikeExpr.
+    // part of the `affine.for` StmtBlockLikeExpr.
     case ExprKind::For: {
       auto exprGroups = stmtLikeExpr.getAllArgumentGroups();
       assert(exprGroups.size() == 3 &&
diff --git a/mlir/lib/Transforms/DmaGeneration.cpp b/mlir/lib/Transforms/DmaGeneration.cpp
index 7f8c7e411e8f..4fa040d73eb8 100644
--- a/mlir/lib/Transforms/DmaGeneration.cpp
+++ b/mlir/lib/Transforms/DmaGeneration.cpp
@@ -343,7 +343,7 @@ bool DmaGeneration::generateDma(const MemRefRegion &region, Block *block,
     auto fastMemRefType = top.getMemRefType(
         fastBufferShape, memRefType.getElementType(), {}, fastMemorySpace);
 
-    // Create the fast memory space buffer just before the 'for'
+    // Create the fast memory space buffer just before the 'affine.for'
     // instruction.
     fastMemRef = prologue.create<AllocOp>(loc, fastMemRefType)->getResult();
     // Record it.
@@ -472,7 +472,7 @@ bool DmaGeneration::runOnBlock(Block *block) {
   // approach is conservative in some cases at the moment, we do a check later
   // and report an error with location info.
   // TODO(bondhugula): An 'affine.if' instruction is being treated similar to an
-  // operation instruction. 'affine.if''s could have 'for's in them;
+  // operation instruction. 'affine.if''s could have 'affine.for's in them;
   // treat them separately.
 
   // Get to the first load, store, or for op.
@@ -494,7 +494,7 @@ bool DmaGeneration::runOnBlock(Block *block) {
                     fastMemCapacityBytes);
       };
 
-      // If the memory footprint of the 'for' loop is higher than fast
+      // If the memory footprint of the 'affine.for' loop is higher than fast
       // memory capacity (when provided), we recurse to DMA at an inner level
       // until we find a depth at which footprint fits in fast mem capacity. If
       // the footprint can't be calculated, we assume for now it fits. Recurse
@@ -507,7 +507,7 @@ bool DmaGeneration::runOnBlock(Block *block) {
         runOnBlock(/*begin=*/curBegin, /*end=*/it);
         // Recurse onto the body of this loop.
         runOnBlock(forOp->getBody());
-        // The next region starts right after the 'for' instruction.
+        // The next region starts right after the 'affine.for' instruction.
         curBegin = std::next(it);
       } else {
         // We have enough capacity, i.e., DMAs will be computed for the portion
@@ -698,7 +698,8 @@ uint64_t DmaGeneration::runOnBlock(Block::iterator begin, Block::iterator end) {
       [&](const SmallMapVector<Value *, std::unique_ptr<MemRefRegion>, 4>
               &regions) {
         for (const auto &regionEntry : regions) {
-          // For each region, hoist DMA transfer past all invariant 'for's.
+          // For each region, hoist DMA transfer past all invariant
+          // 'affine.for's.
           Block::iterator dmaPlacementReadStart, dmaPlacementWriteStart;
           Block *dmaPlacementBlock;
           findHighestBlockForPlacement(
diff --git a/mlir/lib/Transforms/LoopFusion.cpp b/mlir/lib/Transforms/LoopFusion.cpp
index 8e1fc5053484..84644bf11a04 100644
--- a/mlir/lib/Transforms/LoopFusion.cpp
+++ b/mlir/lib/Transforms/LoopFusion.cpp
@@ -641,7 +641,8 @@ bool MemRefDependenceGraph::init(Function *f) {
       // all loads and store accesses it contains.
       LoopNestStateCollector collector;
       collector.collect(&inst);
-      // Return false if a non 'for' region was found (not currently supported).
+      // Return false if a non 'affine.for' region was found (not currently
+      // supported).
       if (collector.hasNonForRegion)
         return false;
       Node node(nextNodeId++, &inst);
diff --git a/mlir/lib/Transforms/LoopTiling.cpp b/mlir/lib/Transforms/LoopTiling.cpp
index 0b629531df00..314864d3f3cb 100644
--- a/mlir/lib/Transforms/LoopTiling.cpp
+++ b/mlir/lib/Transforms/LoopTiling.cpp
@@ -258,7 +258,8 @@ LogicalResult mlir::tileCodeGen(MutableArrayRef<AffineForOp> band,
 // Returns all maximal outermost perfect loop nests to tile.
 static void getTileableBands(Function *f,
                              std::vector<SmallVector<AffineForOp, 6>> *bands) {
-  // Get maximal perfect nest of 'for' insts starting from root (inclusive).
+  // Get maximal perfect nest of 'affine.for' insts starting from root
+  // (inclusive).
   auto getMaximalPerfectLoopNest = [&](AffineForOp root) {
     SmallVector<AffineForOp, 6> band;
     AffineForOp currInst = root;
diff --git a/mlir/lib/Transforms/LoopUnroll.cpp b/mlir/lib/Transforms/LoopUnroll.cpp
index a16237e6452a..173a171e589c 100644
--- a/mlir/lib/Transforms/LoopUnroll.cpp
+++ b/mlir/lib/Transforms/LoopUnroll.cpp
@@ -158,8 +158,8 @@ void LoopUnroll::runOnFunction() {
   }
 }
 
-/// Unrolls a 'for' inst. Returns success if the loop was unrolled, failure
-/// otherwise. The default unroll factor is 4.
+/// Unrolls a 'affine.for' inst. Returns success if the loop was unrolled,
+/// failure otherwise. The default unroll factor is 4.
 LogicalResult LoopUnroll::runOnAffineForOp(AffineForOp forOp) {
   // Use the function callback if one was provided.
   if (getUnrollFactor) {
diff --git a/mlir/lib/Transforms/LoopUnrollAndJam.cpp b/mlir/lib/Transforms/LoopUnrollAndJam.cpp
index 03c06b4b4500..240f39604881 100644
--- a/mlir/lib/Transforms/LoopUnrollAndJam.cpp
+++ b/mlir/lib/Transforms/LoopUnrollAndJam.cpp
@@ -96,7 +96,7 @@ void LoopUnrollAndJam::runOnFunction() {
     runOnAffineForOp(forOp);
 }
 
-/// Unroll and jam a 'for' inst. Default unroll jam factor is
+/// Unroll and jam a 'affine.for' inst. Default unroll jam factor is
 /// kDefaultUnrollJamFactor. Return failure if nothing was done.
 LogicalResult LoopUnrollAndJam::runOnAffineForOp(AffineForOp forOp) {
   // Unroll and jam by the factor that was passed if any.
diff --git a/mlir/lib/Transforms/LowerAffine.cpp b/mlir/lib/Transforms/LowerAffine.cpp
index 3061bcd254db..cb65720cee33 100644
--- a/mlir/lib/Transforms/LowerAffine.cpp
+++ b/mlir/lib/Transforms/LowerAffine.cpp
@@ -276,7 +276,7 @@ static Value *buildMinMaxReductionSeq(Location loc, CmpIPredicate predicate,
   return value;
 }
 
-// Convert a "for" loop to a flow of blocks.  Return `false` on success.
+// Convert a "affine.for" loop to a flow of blocks.  Return `false` on success.
 //
 // Create an SESE region for the loop (including its body) and append it to the
 // end of the current region.  The loop region consists of the initialization
@@ -323,8 +323,9 @@ bool LowerAffinePass::lowerAffineFor(AffineForOp forOp) {
   auto loc = forOp->getLoc();
   auto *forInst = forOp->getInstruction();
 
-  // Start by splitting the block containing the 'for' into two parts.  The part
-  // before will get the init code, the part after will be the end point.
+  // Start by splitting the block containing the 'affine.for' into two parts.
+  // The part before will get the init code, the part after will be the end
+  // point.
   auto *initBlock = forInst->getBlock();
   auto *endBlock = initBlock->splitBlock(forInst);
 
diff --git a/mlir/lib/Transforms/LowerVectorTransfers.cpp b/mlir/lib/Transforms/LowerVectorTransfers.cpp
index cde28c6517d7..7f6be3581897 100644
--- a/mlir/lib/Transforms/LowerVectorTransfers.cpp
+++ b/mlir/lib/Transforms/LowerVectorTransfers.cpp
@@ -56,9 +56,9 @@
 ///    // Read the slice `%A[%i0, %i1:%i1+256, %i2:%i2+32]` into
 ///    // vector<32x256xf32> and pad with %f0 to handle the boundary case:
 ///    %f0 = constant 0.0f : f32
-///    for %i0 = 0 to %0 {
-///      for %i1 = 0 to %1 step 256 {
-///        for %i2 = 0 to %2 step 32 {
+///    affine.for %i0 = 0 to %0 {
+///      affine.for %i1 = 0 to %1 step 256 {
+///        affine.for %i2 = 0 to %2 step 32 {
 ///          %v = vector_transfer_read %A, %i0, %i1, %i2, %f0
 ///               {permutation_map: (d0, d1, d2) -> (d2, d1)} :
 ///               (memref<?x?x?xf32>, index, index, f32) -> vector<32x256xf32>
@@ -70,8 +70,8 @@
 /// abstraction):
 ///
 /// ```mlir {.mlir}
-///    for %d2 = 0 to 256 {
-///      for %d1 = 0 to 32 {
+///    affine.for %d2 = 0 to 256 {
+///      affine.for %d1 = 0 to 32 {
 ///        %s = %A[%i0, %i1 + %d1, %i2 + %d2] : f32
 ///        %tmp[%d2, %d1] = %s
 ///      }
diff --git a/mlir/lib/Transforms/MaterializeVectors.cpp b/mlir/lib/Transforms/MaterializeVectors.cpp
index 0a7eaabbb09d..ebdb0c8e83eb 100644
--- a/mlir/lib/Transforms/MaterializeVectors.cpp
+++ b/mlir/lib/Transforms/MaterializeVectors.cpp
@@ -100,10 +100,10 @@
 ///    mlfunc @materialize(%M : index, %N : index, %O : index, %P : index) {
 ///      %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
 ///      %f1 = constant splat<vector<4x4x4xf32>, 1.000000e+00> :
-///      vector<4x4x4xf32> for %i0 = 0 to %M step 4 {
-///        for %i1 = 0 to %N step 4 {
-///          for %i2 = 0 to %O {
-///            for %i3 = 0 to %P step 4 {
+///      vector<4x4x4xf32> affine.for %i0 = 0 to %M step 4 {
+///        affine.for %i1 = 0 to %N step 4 {
+///          affine.for %i2 = 0 to %O {
+///            affine.for %i3 = 0 to %P step 4 {
 ///              vector_transfer_write %f1, %A, %i0, %i1, %i2, %i3
 ///                {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d0)} :
 ///                 vector<4x4x4xf32>, memref<?x?x?x?xf32, 0>,
@@ -119,10 +119,10 @@
 ///    mlfunc @materialize(%M : index, %N : index, %O : index, %P : index) {
 ///      %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
 ///      %f1 = constant splat<vector<4x4xf32>, 1.000000e+00> : vector<4x4x4xf32>
-///       for %i0 = 0 to %arg0 step 4 {
-///         for %i1 = 0 to %arg1 step 4 {
-///           for %i2 = 0 to %arg2 {
-///             for %i3 = 0 to %arg3 step 4 {
+///       affine.for %i0 = 0 to %arg0 step 4 {
+///         affine.for %i1 = 0 to %arg1 step 4 {
+///           affine.for %i2 = 0 to %arg2 {
+///             affine.for %i3 = 0 to %arg3 step 4 {
 ///               %1 = affine.apply (d0, d1, d2, d3) -> (d0, d1, d2, d3)
 ///                    (%i0, %i1, %i2, %i3)
 ///               vector_transfer_write f1, %0, %1#0, %1#1, %1#2, %1#3
@@ -286,10 +286,10 @@ static Value *substitute(Value *v, VectorType hwVectorType,
 /// super-vectorization has been applied:
 ///
 /// ```mlir
-/// for %i0 = 0 to %M {
-///   for %i1 = 0 to %N step 3 {
-///     for %i2 = 0 to %O {
-///       for %i3 = 0 to %P step 32 {
+/// affine.for %i0 = 0 to %M {
+///   affine.for %i1 = 0 to %N step 3 {
+///     affine.for %i2 = 0 to %O {
+///       affine.for %i3 = 0 to %P step 32 {
 ///         %r = vector_transfer_read(%A, map(%i..)#0, map(%i..)#1, map(%i..)#2)
 ///                                   -> vector<3x32xf32>
 ///         ...
diff --git a/mlir/lib/Transforms/MemRefDataFlowOpt.cpp b/mlir/lib/Transforms/MemRefDataFlowOpt.cpp
index a35a159443d5..a7045b3b541a 100644
--- a/mlir/lib/Transforms/MemRefDataFlowOpt.cpp
+++ b/mlir/lib/Transforms/MemRefDataFlowOpt.cpp
@@ -19,7 +19,7 @@
 // potentially getting rid of intermediate memref's entirely.
 // TODO(mlir-team): In the future, similar techniques could be used to eliminate
 // dead memref store's and perform more complex forwarding when support for
-// SSA scalars live out of 'for'/'affine.if' statements is available.
+// SSA scalars live out of 'affine.for'/'affine.if' statements is available.
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Analysis/AffineAnalysis.h"
@@ -55,7 +55,7 @@ namespace {
 //
 // (* A dependence being satisfied at a block: a dependence that is satisfied by
 // virtue of the destination instruction appearing textually / lexically after
-// the source instruction within the body of a 'for' instruction; thus, a
+// the source instruction within the body of a 'affine.for' instruction; thus, a
 // dependence is always either satisfied by a loop or by a block).
 //
 // The above conditions are simple to check, sufficient, and powerful for most
@@ -139,8 +139,8 @@ void MemRefDataFlowOpt::forwardStoreToLoad(LoadOp loadOp) {
       // Check if this store is a candidate for forwarding; we only forward if
       // the dependence from the store is carried by the *body* of innermost
       // common surrounding loop. As an example this filters out cases like:
-      // for %i0
-      //   for %i1
+      // affine.for %i0
+      //   affine.for %i1
       //     %idx = affine.apply (d0) -> (d0 + 1) (%i0)
       //     store %A[%idx]
       //     load %A[%i0]
diff --git a/mlir/lib/Transforms/PipelineDataTransfer.cpp b/mlir/lib/Transforms/PipelineDataTransfer.cpp
index 9809a1460721..b59071aa9fe0 100644
--- a/mlir/lib/Transforms/PipelineDataTransfer.cpp
+++ b/mlir/lib/Transforms/PipelineDataTransfer.cpp
@@ -66,11 +66,11 @@ static unsigned getTagMemRefPos(Instruction &dmaInst) {
   return 0;
 }
 
-/// Doubles the buffer of the supplied memref on the specified 'for' instruction
-/// by adding a leading dimension of size two to the memref. Replaces all uses
-/// of the old memref by the new one while indexing the newly added dimension by
-/// the loop IV of the specified 'for' instruction modulo 2. Returns false if
-/// such a replacement cannot be performed.
+/// Doubles the buffer of the supplied memref on the specified 'affine.for'
+/// instruction by adding a leading dimension of size two to the memref.
+/// Replaces all uses of the old memref by the new one while indexing the newly
+/// added dimension by the loop IV of the specified 'affine.for' instruction
+/// modulo 2. Returns false if such a replacement cannot be performed.
 static bool doubleBuffer(Value *oldMemRef, AffineForOp forOp) {
   auto *forBody = forOp->getBody();
   FuncBuilder bInner(forBody, forBody->begin());
@@ -104,7 +104,7 @@ static bool doubleBuffer(Value *oldMemRef, AffineForOp forOp) {
                                                    dynamicDimCount++));
   }
 
-  // Create and place the alloc right before the 'for' instruction.
+  // Create and place the alloc right before the 'affine.for' instruction.
   Value *newMemRef =
       bOuter.create<AllocOp>(forInst->getLoc(), newMemRefType, allocOperands);
 
@@ -139,9 +139,9 @@ static bool doubleBuffer(Value *oldMemRef, AffineForOp forOp) {
 /// Returns success if the IR is in a valid state.
 void PipelineDataTransfer::runOnFunction() {
   // Do a post order walk so that inner loop DMAs are processed first. This is
-  // necessary since 'for' instructions nested within would otherwise become
-  // invalid (erased) when the outer loop is pipelined (the pipelined one gets
-  // deleted and replaced by a prologue, a new steady-state loop and an
+  // necessary since 'affine.for' instructions nested within would otherwise
+  // become invalid (erased) when the outer loop is pipelined (the pipelined one
+  // gets deleted and replaced by a prologue, a new steady-state loop and an
   // epilogue).
   forOps.clear();
   getFunction()->walkPostOrder<AffineForOp>(
diff --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Transforms/Utils/LoopUtils.cpp
index e5f1fef990f0..bf0c3ced2e2a 100644
--- a/mlir/lib/Transforms/Utils/LoopUtils.cpp
+++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp
@@ -71,7 +71,7 @@ void mlir::getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
   auto lb = b->create<AffineApplyOp>(forOp->getLoc(), lbMap, lbOperands);
 
   // For each upper bound expr, get the range.
-  // Eg: for %i = lb to min (ub1, ub2),
+  // Eg: affine.for %i = lb to min (ub1, ub2),
   // where tripCountExprs yield (tr1, tr2), we create affine.apply's:
   // lb + tr1 - tr1 % ufactor, lb + tr2 - tr2 % ufactor; the results of all
   // these affine.apply's make up the cleanup loop lower bound.
@@ -161,8 +161,8 @@ void mlir::promoteSingleIterationLoops(Function *f) {
       [](AffineForOp forOp) { promoteIfSingleIteration(forOp); });
 }
 
-/// Generates a 'for' inst with the specified lower and upper bounds while
-/// generating the right IV remappings for the shifted instructions. The
+/// Generates a 'affine.for' inst with the specified lower and upper bounds
+/// while generating the right IV remappings for the shifted instructions. The
 /// instruction blocks that go into the loop are specified in instGroupQueue
 /// starting from the specified offset, and in that order; the first element of
 /// the pair specifies the shift applied to that group of instructions; note
@@ -216,10 +216,10 @@ generateLoop(AffineMap lbMap, AffineMap ubMap,
   return loopChunk;
 }
 
-/// Skew the instructions in the body of a 'for' instruction with the specified
-/// instruction-wise shifts. The shifts are with respect to the original
-/// execution order, and are multiplied by the loop 'step' before being applied.
-/// A shift of zero for each instruction will lead to no change.
+/// Skew the instructions in the body of a 'affine.for' instruction with the
+/// specified instruction-wise shifts. The shifts are with respect to the
+/// original execution order, and are multiplied by the loop 'step' before being
+/// applied. A shift of zero for each instruction will lead to no change.
 // The skewing of instructions with respect to one another can be used for
 // example to allow overlap of asynchronous operations (such as DMA
 // communication) with computation, or just relative shifting of instructions
@@ -267,7 +267,7 @@ LogicalResult mlir::instBodySkew(AffineForOp forOp, ArrayRef<uint64_t> shifts,
 
   // An array of instruction groups sorted by shift amount; each group has all
   // instructions with the same shift in the order in which they appear in the
-  // body of the 'for' inst.
+  // body of the 'affine.for' inst.
   std::vector<std::vector<Instruction *>> sortedInstGroups(maxShift + 1);
   unsigned pos = 0;
   for (auto &inst : *forOp->getBody()) {
@@ -499,7 +499,7 @@ void mlir::sinkLoop(AffineForOp forOp, unsigned loopDepth) {
 // bounds, the resulting IR resembles:
 //
 // ```mlir
-//    for %i = max (`iv, ...) to min (`iv` + `offset`) {
+//    affine.for %i = max (`iv, ...) to min (`iv` + `offset`) {
 //      ...
 //    }
 // ```
diff --git a/mlir/lib/Transforms/Utils/Utils.cpp b/mlir/lib/Transforms/Utils/Utils.cpp
index 7bf9993b7c85..7a44a6277a63 100644
--- a/mlir/lib/Transforms/Utils/Utils.cpp
+++ b/mlir/lib/Transforms/Utils/Utils.cpp
@@ -199,14 +199,14 @@ bool mlir::replaceAllMemRefUsesWith(Value *oldMemRef, Value *newMemRef,
 ///
 /// Before
 ///
-/// for %i = 0 to #map(%N)
+/// affine.for %i = 0 to #map(%N)
 ///   %idx = affine.apply (d0) -> (d0 mod 2) (%i)
 ///   "send"(%idx, %A, ...)
 ///   "compute"(%idx)
 ///
 /// After
 ///
-/// for %i = 0 to #map(%N)
+/// affine.for %i = 0 to #map(%N)
 ///   %idx = affine.apply (d0) -> (d0 mod 2) (%i)
 ///   "send"(%idx, %A, ...)
 ///   %idx_ = affine.apply (d0) -> (d0 mod 2) (%i)
diff --git a/mlir/lib/Transforms/Vectorize.cpp b/mlir/lib/Transforms/Vectorize.cpp
index 955e38f4b396..a52129ed0d6e 100644
--- a/mlir/lib/Transforms/Vectorize.cpp
+++ b/mlir/lib/Transforms/Vectorize.cpp
@@ -113,7 +113,7 @@ using namespace mlir;
 ///
 /// At a high level, a vectorized load in a loop will resemble:
 /// ```mlir
-///   for %i = ? to ? step ? {
+///   affine.for %i = ? to ? step ? {
 ///     %v_a = "vector_transfer_read" (A, %i) : (memref<?xf32>, index) ->
 ///                                              vector<128xf32>
 ///   }
@@ -309,7 +309,7 @@ using namespace mlir;
 /// ```mlir
 /// mlfunc @fill(%A : memref<128xf32>) -> () {
 ///   %f1 = constant 1.0 : f32
-///   for %i0 = 0 to 32 {
+///   affine.for %i0 = 0 to 32 {
 ///     store %f1, %A[%i0] : memref<128xf32, 0>
 ///   }
 ///   return
@@ -322,7 +322,7 @@ using namespace mlir;
 /// is still subject to exploratory tradeoffs. In particular, say we want to
 /// vectorize by a factor 128, we want to transform the following input:
 /// ```mlir
-///   for %i = %M to %N {
+///   affine.for %i = %M to %N {
 ///     %a = load A[%i] : memref<?xf32>
 ///   }
 /// ```
@@ -331,8 +331,8 @@ using namespace mlir;
 /// memory promotion etc) say after stripmining (and potentially unrolling in
 /// the case of LLVM's SLP vectorizer):
 /// ```mlir
-///   for %i = floor(%M, 128) to ceil(%N, 128) {
-///     for %ii = max(%M, 128 * %i) to min(%N, 128*%i + 127) {
+///   affine.for %i = floor(%M, 128) to ceil(%N, 128) {
+///     affine.for %ii = max(%M, 128 * %i) to min(%N, 128*%i + 127) {
 ///       %a = load A[%ii] : memref<?xf32>
 ///     }
 ///   }
@@ -341,7 +341,7 @@ using namespace mlir;
 /// Instead, we seek to vectorize early and freeze vector types before
 /// scheduling, so we want to generate a pattern that resembles:
 /// ```mlir
-///   for %i = ? to ? step ? {
+///   affine.for %i = ? to ? step ? {
 ///     %v_a = "vector_transfer_read" (A, %i) : (memref<?xf32>, index) ->
 ///                                              vector<128xf32>
 ///   }
@@ -362,7 +362,7 @@ using namespace mlir;
 /// For the simple strawman example above, vectorizing for a 1-D vector
 /// abstraction of size 128 returns code similar to:
 /// ```mlir
-///   for %i = %M to %N step 128 {
+///   affine.for %i = %M to %N step 128 {
 ///     %v_a = "vector_transfer_read" (A, %i) : (memref<?xf32>, index) ->
 ///                                              vector<128xf32>
 ///   }
@@ -391,20 +391,20 @@ using namespace mlir;
 ///   %C = alloc (%M, %N) : memref<?x?xf32, 0>
 ///   %f1 = constant 1.0 : f32
 ///   %f2 = constant 2.0 : f32
-///   for %i0 = 0 to %M {
-///     for %i1 = 0 to %N {
+///   affine.for %i0 = 0 to %M {
+///     affine.for %i1 = 0 to %N {
 ///       // non-scoped %f1
 ///       store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
 ///     }
 ///   }
-///   for %i2 = 0 to %M {
-///     for %i3 = 0 to %N {
+///   affine.for %i2 = 0 to %M {
+///     affine.for %i3 = 0 to %N {
 ///       // non-scoped %f2
 ///       store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
 ///     }
 ///   }
-///   for %i4 = 0 to %M {
-///     for %i5 = 0 to %N {
+///   affine.for %i4 = 0 to %M {
+///     affine.for %i5 = 0 to %N {
 ///       %a5 = load %A[%i4, %i5] : memref<?x?xf32, 0>
 ///       %b5 = load %B[%i4, %i5] : memref<?x?xf32, 0>
 ///       %s5 = addf %a5, %b5 : f32
@@ -438,24 +438,24 @@ using namespace mlir;
 ///   %2 = alloc(%arg0, %arg1) : memref<?x?xf32>
 ///   %cst = constant 1.0 : f32
 ///   %cst_0 = constant 2.0 : f32
-///   for %i0 = 0 to %arg0 {
-///     for %i1 = 0 to %arg1 step 256 {
+///   affine.for %i0 = 0 to %arg0 {
+///     affine.for %i1 = 0 to %arg1 step 256 {
 ///       %cst_1 = constant splat<vector<256xf32>, 1.0> :
 ///                vector<256xf32>
 ///       "vector_transfer_write"(%cst_1, %0, %i0, %i1) :
 ///                (vector<256xf32>, memref<?x?xf32>, index, index) -> ()
 ///     }
 ///   }
-///   for %i2 = 0 to %arg0 {
-///     for %i3 = 0 to %arg1 step 256 {
+///   affine.for %i2 = 0 to %arg0 {
+///     affine.for %i3 = 0 to %arg1 step 256 {
 ///       %cst_2 = constant splat<vector<256xf32>, 2.0> :
 ///                vector<256xf32>
 ///       "vector_transfer_write"(%cst_2, %1, %i2, %i3) :
 ///                (vector<256xf32>, memref<?x?xf32>, index, index) -> ()
 ///     }
 ///   }
-///   for %i4 = 0 to %arg0 {
-///     for %i5 = 0 to %arg1 step 256 {
+///   affine.for %i4 = 0 to %arg0 {
+///     affine.for %i5 = 0 to %arg1 step 256 {
 ///       %3 = "vector_transfer_read"(%0, %i4, %i5) :
 ///                      (memref<?x?xf32>, index, index) -> vector<256xf32>
 ///       %4 = "vector_transfer_read"(%1, %i4, %i5) :
@@ -494,24 +494,24 @@ using namespace mlir;
 ///   %2 = alloc(%arg0, %arg1) : memref<?x?xf32>
 ///   %cst = constant 1.0 : f32
 ///   %cst_0 = constant 2.0 : f32
-///   for %i0 = 0 to %arg0 step 32 {
-///     for %i1 = 0 to %arg1 step 256 {
+///   affine.for %i0 = 0 to %arg0 step 32 {
+///     affine.for %i1 = 0 to %arg1 step 256 {
 ///       %cst_1 = constant splat<vector<32x256xf32>, 1.0> :
 ///                vector<32x256xf32>
 ///       "vector_transfer_write"(%cst_1, %0, %i0, %i1) :
 ///                (vector<32x256xf32>, memref<?x?xf32>, index, index) -> ()
 ///     }
 ///   }
-///   for %i2 = 0 to %arg0 step 32 {
-///     for %i3 = 0 to %arg1 step 256 {
+///   affine.for %i2 = 0 to %arg0 step 32 {
+///     affine.for %i3 = 0 to %arg1 step 256 {
 ///       %cst_2 = constant splat<vector<32x256xf32>, 2.0> :
 ///                vector<32x256xf32>
 ///       "vector_transfer_write"(%cst_2, %1, %i2, %i3) :
 ///                (vector<32x256xf32>, memref<?x?xf32>, index, index) -> ()
 ///     }
 ///   }
-///   for %i4 = 0 to %arg0 step 32 {
-///     for %i5 = 0 to %arg1 step 256 {
+///   affine.for %i4 = 0 to %arg0 step 32 {
+///     affine.for %i5 = 0 to %arg1 step 256 {
 ///       %3 = "vector_transfer_read"(%0, %i4, %i5) :
 ///                (memref<?x?xf32>, index, index) -> vector<32x256xf32>
 ///       %4 = "vector_transfer_read"(%1, %i4, %i5) :
diff --git a/mlir/test/AffineOps/canonicalize.mlir b/mlir/test/AffineOps/canonicalize.mlir
index 4fd44be85381..90f6aede0d5b 100644
--- a/mlir/test/AffineOps/canonicalize.mlir
+++ b/mlir/test/AffineOps/canonicalize.mlir
@@ -47,7 +47,7 @@
 func @compose_affine_maps_1dto2d_no_symbols() {
   %0 = alloc() : memref<4x4xf32>
 
-  for %i0 = 0 to 15 {
+  affine.for %i0 = 0 to 15 {
     // Test load[%x, %x]
 
     %x0 = affine.apply (d0) -> (d0 - 1) (%i0)
@@ -93,7 +93,7 @@ func @compose_affine_maps_1dto2d_no_symbols() {
 func @compose_affine_maps_1dto2d_with_symbols() {
   %0 = alloc() : memref<4x4xf32>
 
-  for %i0 = 0 to 15 {
+  affine.for %i0 = 0 to 15 {
     // Test load[%x0, %x0] with symbol %c4
     %c4 = constant 4 : index
     %x0 = affine.apply (d0)[s0] -> (d0 - s0) (%i0)[%c4]
@@ -134,13 +134,13 @@ func @compose_affine_maps_2d_tile() {
   %c4 = constant 4 : index
   %c8 = constant 8 : index
 
-  for %i0 = 0 to 3 {
+  affine.for %i0 = 0 to 3 {
     %x0 = affine.apply (d0)[s0] -> (d0 ceildiv s0) (%i0)[%c4]
-    for %i1 = 0 to 3 {
+    affine.for %i1 = 0 to 3 {
       %x1 = affine.apply (d0)[s0] -> (d0 ceildiv s0) (%i1)[%c8]
-      for %i2 = 0 to 3 {
+      affine.for %i2 = 0 to 3 {
         %x2 = affine.apply (d0)[s0] -> (d0 mod s0) (%i2)[%c4]
-        for %i3 = 0 to 3 {
+        affine.for %i3 = 0 to 3 {
           %x3 = affine.apply (d0)[s0] -> (d0 mod s0) (%i3)[%c8]
 
           %x40 = affine.apply (d0, d1, d2, d3)[s0, s1] ->
@@ -166,9 +166,9 @@ func @compose_affine_maps_dependent_loads() {
   %0 = alloc() : memref<16x32xf32>
   %1 = alloc() : memref<16x32xf32>
 
-  for %i0 = 0 to 3 {
-    for %i1 = 0 to 3 {
-      for %i2 = 0 to 3 {
+  affine.for %i0 = 0 to 3 {
+    affine.for %i1 = 0 to 3 {
+      affine.for %i2 = 0 to 3 {
         %c3 = constant 3 : index
         %c7 = constant 7 : index
 
@@ -212,7 +212,7 @@ func @compose_affine_maps_dependent_loads() {
 func @compose_affine_maps_diamond_dependency() {
   %0 = alloc() : memref<4x4xf32>
 
-  for %i0 = 0 to 15 {
+  affine.for %i0 = 0 to 15 {
     %a = affine.apply (d0) -> (d0 - 1) (%i0)
     %b = affine.apply (d0) -> (d0 + 7) (%a)
     %c = affine.apply (d0) -> (d0 * 4) (%a)
@@ -232,8 +232,8 @@ func @arg_used_as_dim_and_symbol(%arg0: memref<100x100xf32>, %arg1: index) {
   %c9 = constant 9 : index
   %1 = alloc() : memref<100x100xf32, 1>
   %2 = alloc() : memref<1xi32>
-  for %i0 = 0 to 100 {
-    for %i1 = 0 to 100 {
+  affine.for %i0 = 0 to 100 {
+    affine.for %i1 = 0 to 100 {
       %3 = affine.apply (d0, d1)[s0, s1] -> (d1 + s0 + s1)
         (%i0, %i1)[%arg1, %c9]
       %4 = affine.apply (d0, d1, d3) -> (d3 - (d0 + d1))
@@ -253,7 +253,7 @@ func @trivial_maps() {
   %0 = alloc() : memref<10xf32>
   %c0 = constant 0 : index
   %cst = constant 0.000000e+00 : f32
-  for %i1 = 0 to 10 {
+  affine.for %i1 = 0 to 10 {
     %1 = affine.apply ()[s0] -> (s0)()[%c0]
     store %cst, %0[%1] : memref<10xf32>
     %2 = load %0[%c0] : memref<10xf32>
@@ -380,7 +380,7 @@ func @mix_dims_and_symbols_g(%M: index, %N: index) -> (index, index, index) {
 // CHECK-LABEL: func @symbolic_semi_affine(%arg0: index, %arg1: index, %arg2: memref<?xf32>) {
 func @symbolic_semi_affine(%M: index, %N: index, %A: memref<?xf32>) {
   %f1 = constant 1.0 : f32
-  for %i0 = 1 to 100 {
+  affine.for %i0 = 1 to 100 {
     %1 = affine.apply ()[s0] -> (s0 + 1) ()[%M]
     %2 = affine.apply (d0)[s0] -> (d0 floordiv s0) (%i0)[%1]
     // CHECK-DAG: {{.*}} = affine.apply [[symbolic_semi_affine]](%i0)[%arg0]
@@ -404,20 +404,20 @@ func @constant_fold_bounds(%N : index) {
   %c3 = affine.apply (d0, d1) -> (d0 + d1) (%c1, %c2)
   %l = "foo"() : () -> index
 
-  // CHECK:  for %i0 = 5 to 7 {
-  for %i = max (d0, d1) -> (0, d0 + d1)(%c2, %c3) to min (d0, d1) -> (d0 - 2, 32*d1) (%c9, %c1) {
+  // CHECK:  affine.for %i0 = 5 to 7 {
+  affine.for %i = max (d0, d1) -> (0, d0 + d1)(%c2, %c3) to min (d0, d1) -> (d0 - 2, 32*d1) (%c9, %c1) {
     "foo"(%i, %c3) : (index, index) -> ()
   }
 
   // Bound takes a non-constant argument but can still be folded.
-  // CHECK:  for %i1 = 1 to 7 {
-  for %j = max (d0) -> (0, 1)(%N) to min (d0, d1) -> (7, 9)(%N, %l) {
+  // CHECK:  affine.for %i1 = 1 to 7 {
+  affine.for %j = max (d0) -> (0, 1)(%N) to min (d0, d1) -> (7, 9)(%N, %l) {
     "foo"(%j, %c3) : (index, index) -> ()
   }
 
   // None of the bounds can be folded.
-  // CHECK: for %i2 = max [[MAP0]]()[%0] to min [[MAP1]]()[%arg0] {
-  for %k = max ()[s0] -> (0, s0) ()[%l] to min ()[s0] -> (100, s0)()[%N] {
+  // CHECK: affine.for %i2 = max [[MAP0]]()[%0] to min [[MAP1]]()[%arg0] {
+  affine.for %k = max ()[s0] -> (0, s0) ()[%l] to min ()[s0] -> (100, s0)()[%N] {
     "foo"(%k, %c3) : (index, index) -> ()
   }
   return
diff --git a/mlir/test/AffineOps/invalid.mlir b/mlir/test/AffineOps/invalid.mlir
index b9093c756b7d..69260a7fce60 100644
--- a/mlir/test/AffineOps/invalid.mlir
+++ b/mlir/test/AffineOps/invalid.mlir
@@ -5,7 +5,7 @@
 #map = (d0)[s0] -> (d0 + s0)
 
 func @affine_apply_invalid_dim(%arg : index) {
-  for %n0 = 0 to 7 {
+  affine.for %n0 = 0 to 7 {
     %dim = addi %arg, %arg : index
 
     // expected-error@+1 {{operand cannot be used as a dimension id}}
@@ -19,7 +19,7 @@ func @affine_apply_invalid_dim(%arg : index) {
 #map0 = (d0)[s0] -> (d0 + s0)
 
 func @affine_apply_invalid_sym() {
-  for %i0 = 0 to 7 {
+  affine.for %i0 = 0 to 7 {
     // expected-error@+1 {{operand cannot be used as a symbol}}
     %0 = affine.apply #map0(%i0)[%i0]
   }
@@ -31,11 +31,11 @@ func @affine_apply_invalid_sym() {
 #map = (d0)[s0] -> (d0 + s0)
 
 func @affine_for_lower_bound_invalid_dim(%arg : index) {
-  for %n0 = 0 to 7 {
+  affine.for %n0 = 0 to 7 {
     %dim = addi %arg, %arg : index
 
     // expected-error@+1 {{operand cannot be used as a dimension id}}
-    for %n1 = 0 to #map(%dim)[%arg] {
+    affine.for %n1 = 0 to #map(%dim)[%arg] {
     }
   }
   return
@@ -46,11 +46,11 @@ func @affine_for_lower_bound_invalid_dim(%arg : index) {
 #map = (d0)[s0] -> (d0 + s0)
 
 func @affine_for_upper_bound_invalid_dim(%arg : index) {
-  for %n0 = 0 to 7 {
+  affine.for %n0 = 0 to 7 {
     %dim = addi %arg, %arg : index
 
     // expected-error@+1 {{operand cannot be used as a dimension id}}
-    for %n1 = #map(%dim)[%arg] to 7 {
+    affine.for %n1 = #map(%dim)[%arg] to 7 {
     }
   }
   return
@@ -61,9 +61,9 @@ func @affine_for_upper_bound_invalid_dim(%arg : index) {
 #map0 = (d0)[s0] -> (d0 + s0)
 
 func @affine_for_lower_bound_invalid_sym() {
-  for %i0 = 0 to 7 {
+  affine.for %i0 = 0 to 7 {
     // expected-error@+1 {{operand cannot be used as a symbol}}
-    for %n0 = #map0(%i0)[%i0] to 7 {
+    affine.for %n0 = #map0(%i0)[%i0] to 7 {
     }
   }
   return
@@ -74,9 +74,9 @@ func @affine_for_lower_bound_invalid_sym() {
 #map0 = (d0)[s0] -> (d0 + s0)
 
 func @affine_for_upper_bound_invalid_sym() {
-  for %i0 = 0 to 7 {
+  affine.for %i0 = 0 to 7 {
     // expected-error@+1 {{operand cannot be used as a symbol}}
-    for %n0 = 0 to #map0(%i0)[%i0] {
+    affine.for %n0 = 0 to #map0(%i0)[%i0] {
     }
   }
   return
@@ -87,7 +87,7 @@ func @affine_for_upper_bound_invalid_sym() {
 #set0 = (i)[N] : (i >= 0, N - i >= 0)
 
 func @affine_if_invalid_dim(%arg : index) {
-  for %n0 = 0 to 7 {
+  affine.for %n0 = 0 to 7 {
     %dim = addi %arg, %arg : index
 
     // expected-error@+1 {{operand cannot be used as a dimension id}}
@@ -101,7 +101,7 @@ func @affine_if_invalid_dim(%arg : index) {
 #set0 = (i)[N] : (i >= 0, N - i >= 0)
 
 func @affine_if_invalid_sym() {
-  for %i0 = 0 to 7 {
+  affine.for %i0 = 0 to 7 {
     // expected-error@+1 {{operand cannot be used as a symbol}}
     affine.if #set0(%i0)[%i0] {}
   }
@@ -113,7 +113,7 @@ func @affine_if_invalid_sym() {
 #set0 = (i)[N] : (i >= 0, N - i >= 0)
 
 func @affine_if_invalid_dimop_dim(%arg0: index, %arg1: index, %arg2: index, %arg3: index) {
-  for %n0 = 0 to 7 {
+  affine.for %n0 = 0 to 7 {
     %0 = alloc(%arg0, %arg1, %arg2, %arg3) : memref<?x?x?x?xf32>
     %dim = dim %0, 0 : memref<?x?x?x?xf32>
 
diff --git a/mlir/test/AffineOps/ops.mlir b/mlir/test/AffineOps/ops.mlir
index e265c6be3a42..6e60c624c726 100644
--- a/mlir/test/AffineOps/ops.mlir
+++ b/mlir/test/AffineOps/ops.mlir
@@ -2,9 +2,9 @@
 
 // Check that the attributes for the affine operations are round-tripped.
 func @attributes() {
-  // CHECK: for %i
+  // CHECK: affine.for %i
   // CHECK-NEXT: } {some_attr: true}
-  for %i = 0 to 10 {
+  affine.for %i = 0 to 10 {
   } {some_attr: true}
 
   // CHECK: if
diff --git a/mlir/test/EDSC/api-test.cpp b/mlir/test/EDSC/api-test.cpp
index 4446fbc9cbc1..8d39af520df2 100644
--- a/mlir/test/EDSC/api-test.cpp
+++ b/mlir/test/EDSC/api-test.cpp
@@ -143,7 +143,7 @@ TEST_FUNC(cond_branch) {
   f->print(llvm::outs());
 }
 
-// Inject a EDSC-constructed `for` loop with bounds coming from function
+// Inject a EDSC-constructed `affine.for` loop with bounds coming from function
 // arguments.
 TEST_FUNC(dynamic_for_func_args) {
   auto indexType = IndexType::get(&globalContext());
@@ -164,7 +164,7 @@ TEST_FUNC(dynamic_for_func_args) {
 
   // clang-format off
   // CHECK-LABEL: func @dynamic_for_func_args(%arg0: index, %arg1: index) {
-  // CHECK:  for %i0 = (d0) -> (d0)(%arg0) to (d0) -> (d0)(%arg1) step 3 {
+  // CHECK:  affine.for %i0 = (d0) -> (d0)(%arg0) to (d0) -> (d0)(%arg1) step 3 {
   // CHECK:  {{.*}} = affine.apply ()[s0] -> (s0 * 3)()[%arg0]
   // CHECK:  {{.*}} = affine.apply ()[s0, s1] -> (s1 + s0 * 3)()[%arg0, %arg1]
   // CHECK:  {{.*}} = affine.apply ()[s0] -> (s0 + 3)()[%arg0]
@@ -172,7 +172,7 @@ TEST_FUNC(dynamic_for_func_args) {
   f->print(llvm::outs());
 }
 
-// Inject a EDSC-constructed `for` loop with non-constant bounds that are
+// Inject a EDSC-constructed `affine.for` loop with non-constant bounds that are
 // obtained from AffineApplyOp (also constructed using EDSC operator
 // overloads).
 TEST_FUNC(dynamic_for) {
@@ -200,12 +200,12 @@ TEST_FUNC(dynamic_for) {
   // CHECK-LABEL: func @dynamic_for(%arg0: index, %arg1: index, %arg2: index, %arg3: index) {
   // CHECK:        %0 = affine.apply ()[s0, s1] -> (s0 - s1)()[%arg0, %arg1]
   // CHECK-NEXT:   %1 = affine.apply ()[s0, s1] -> (s0 + s1)()[%arg2, %arg3]
-  // CHECK-NEXT:   for %i0 = (d0) -> (d0)(%0) to (d0) -> (d0)(%1) step 2 {
+  // CHECK-NEXT:   affine.for %i0 = (d0) -> (d0)(%0) to (d0) -> (d0)(%1) step 2 {
   // clang-format on
   f->print(llvm::outs());
 }
 
-// Inject a EDSC-constructed empty `for` loop with max/min bounds that
+// Inject a EDSC-constructed empty `affine.for` loop with max/min bounds that
 // corresponds to
 //
 //     for max(%arg0, %arg1) to (%arg2, %arg3) step 1
@@ -234,7 +234,7 @@ TEST_FUNC(max_min_for) {
 
   // clang-format off
   // CHECK-LABEL: func @max_min_for(%arg0: index, %arg1: index, %arg2: index, %arg3: index) {
-  // CHECK:  for %i0 = max (d0, d1) -> (d0, d1)(%arg0, %arg1) to min (d0, d1) -> (d0, d1)(%arg2, %arg3) {
+  // CHECK:  affine.for %i0 = max (d0, d1) -> (d0, d1)(%arg0, %arg1) to min (d0, d1) -> (d0, d1)(%arg2, %arg3) {
   // clang-format on
   f->print(llvm::outs());
 }
@@ -334,7 +334,7 @@ TEST_FUNC(assignments_1) {
 
   // clang-format off
   // CHECK-LABEL: func @assignments(%arg0: memref<4xf32>, %arg1: memref<4xf32>, %arg2: memref<4xf32>) {
-  // CHECK: for %[[iv:.*]] = 0 to 4 {
+  // CHECK: affine.for %[[iv:.*]] = 0 to 4 {
   // CHECK:   %[[a:.*]] = load %arg0[%[[iv]]] : memref<4xf32>
   // CHECK:   %[[b:.*]] = load %arg1[%[[iv]]] : memref<4xf32>
   // CHECK:   %[[tmp:.*]] = mulf %[[a]], %[[b]] : f32
@@ -348,7 +348,7 @@ TEST_FUNC(assignments_2) {
 
   // clang-format off
   // CHECK-LABEL: func @assignments(%arg0: memref<?xf32>, %arg1: memref<?xf32>, %arg2: memref<?xf32>) {
-  // CHECK: for %[[iv:.*]] = {{.*}} to {{.*}} {
+  // CHECK: affine.for %[[iv:.*]] = {{.*}} to {{.*}} {
   // CHECK:   %[[a:.*]] = load %arg0[%[[iv]]] : memref<?xf32>
   // CHECK:   %[[b:.*]] = load %arg1[%[[iv]]] : memref<?xf32>
   // CHECK:   %[[tmp:.*]] = mulf %[[a]], %[[b]] : f32
@@ -405,13 +405,13 @@ TEST_FUNC(tile_2d) {
   //       CHECK: %[[M:[0-9]+]] = dim %arg0, 0 : memref<?x?x?xf32>
   //  CHECK-NEXT: %[[N:[0-9]+]] = dim %arg0, 1 : memref<?x?x?xf32>
   //  CHECK-NEXT: %[[P:[0-9]+]] = dim %arg0, 2 : memref<?x?x?xf32>
-  //       CHECK:   for %i0 = (d0) -> (d0)(%[[ZERO]]) to (d0) -> (d0)(%[[M]]) step 512 {
-  //  CHECK-NEXT:     for %i1 = (d0) -> (d0)(%[[ZERO]]) to (d0) -> (d0)(%[[N]]) step 1024 {
-  //  CHECK-NEXT:       for %i2 = (d0) -> (d0)(%[[ZERO]]) to (d0) -> (d0)(%[[P]]) {
-  //  CHECK-NEXT:         for %i3 = max (d0)[s0] -> (s0, d0)(%i0)[%[[ZERO]]] to min (d0)[s0] -> (s0, d0 + 512)(%i0)[%[[M]]] step 16 {
-  //  CHECK-NEXT:           for %i4 = max (d0)[s0] -> (s0, d0)(%i1)[%[[ZERO]]] to min (d0)[s0] -> (s0, d0 + 1024)(%i1)[%[[N]]] step 32 {
-  //  CHECK-NEXT:             for %i5 = max (d0, d1)[s0] -> (s0, d0, d1)(%i1, %i4)[%[[ZERO]]] to min (d0, d1)[s0] -> (s0, d0 + 1024, d1 + 32)(%i1, %i4)[%[[N]]] {
-  //  CHECK-NEXT:               for %i6 = max (d0, d1)[s0] -> (s0, d0, d1)(%i0, %i3)[%[[ZERO]]] to min (d0, d1)[s0] -> (s0, d0 + 512, d1 + 16)(%i0, %i3)[%[[M]]] {
+  //       CHECK:   affine.for %i0 = (d0) -> (d0)(%[[ZERO]]) to (d0) -> (d0)(%[[M]]) step 512 {
+  //  CHECK-NEXT:     affine.for %i1 = (d0) -> (d0)(%[[ZERO]]) to (d0) -> (d0)(%[[N]]) step 1024 {
+  //  CHECK-NEXT:       affine.for %i2 = (d0) -> (d0)(%[[ZERO]]) to (d0) -> (d0)(%[[P]]) {
+  //  CHECK-NEXT:         affine.for %i3 = max (d0)[s0] -> (s0, d0)(%i0)[%[[ZERO]]] to min (d0)[s0] -> (s0, d0 + 512)(%i0)[%[[M]]] step 16 {
+  //  CHECK-NEXT:           affine.for %i4 = max (d0)[s0] -> (s0, d0)(%i1)[%[[ZERO]]] to min (d0)[s0] -> (s0, d0 + 1024)(%i1)[%[[N]]] step 32 {
+  //  CHECK-NEXT:             affine.for %i5 = max (d0, d1)[s0] -> (s0, d0, d1)(%i1, %i4)[%[[ZERO]]] to min (d0, d1)[s0] -> (s0, d0 + 1024, d1 + 32)(%i1, %i4)[%[[N]]] {
+  //  CHECK-NEXT:               affine.for %i6 = max (d0, d1)[s0] -> (s0, d0, d1)(%i0, %i3)[%[[ZERO]]] to min (d0, d1)[s0] -> (s0, d0 + 512, d1 + 16)(%i0, %i3)[%[[M]]] {
   //  CHECK-NEXT:                 {{.*}} = load {{.*}}[%i6, %i5, %i2] : memref<?x?x?xf32>
   //  CHECK-NEXT:                 {{.*}} = load {{.*}}[%i6, %i5, %i2] : memref<?x?x?xf32>
   //  CHECK-NEXT:                 {{.*}} = addf {{.*}}, {{.*}} : f32
@@ -421,9 +421,9 @@ TEST_FUNC(tile_2d) {
   //  CHECK-NEXT:           }
   //  CHECK-NEXT:         }
   //  CHECK-NEXT:       }
-  //  CHECK-NEXT:       for %i7 = (d0) -> (d0)(%[[ZERO]]) to (d0) -> (d0)(%[[P]]) {
-  //  CHECK-NEXT:         for %i8 = max (d0)[s0] -> (s0, d0)(%i0)[%[[ZERO]]] to min (d0)[s0] -> (s0, d0 + 512)(%i0)[%[[M]]] {
-  //  CHECK-NEXT:           for %i9 = max (d0)[s0] -> (s0, d0)(%i1)[%[[ZERO]]] to min (d0)[s0] -> (s0, d0 + 1024)(%i1)[%[[N]]] {
+  //  CHECK-NEXT:       affine.for %i7 = (d0) -> (d0)(%[[ZERO]]) to (d0) -> (d0)(%[[P]]) {
+  //  CHECK-NEXT:         affine.for %i8 = max (d0)[s0] -> (s0, d0)(%i0)[%[[ZERO]]] to min (d0)[s0] -> (s0, d0 + 512)(%i0)[%[[M]]] {
+  //  CHECK-NEXT:           affine.for %i9 = max (d0)[s0] -> (s0, d0)(%i1)[%[[ZERO]]] to min (d0)[s0] -> (s0, d0 + 1024)(%i1)[%[[N]]] {
   //  CHECK-NEXT:             {{.*}} = load {{.*}}[%i8, %i9, %i7] : memref<?x?x?xf32>
   //  CHECK-NEXT:             {{.*}} = load {{.*}}[%i8, %i9, %i7] : memref<?x?x?xf32>
   //  CHECK-NEXT:             {{.*}}= addf {{.*}}, {{.*}} : f32
diff --git a/mlir/test/EDSC/builder-api-test.cpp b/mlir/test/EDSC/builder-api-test.cpp
index ec6d12a08768..5d7000e89504 100644
--- a/mlir/test/EDSC/builder-api-test.cpp
+++ b/mlir/test/EDSC/builder-api-test.cpp
@@ -80,11 +80,11 @@ TEST_FUNC(builder_dynamic_for_func_args) {
 
   // clang-format off
   // CHECK-LABEL: func @builder_dynamic_for_func_args(%arg0: index, %arg1: index) {
-  // CHECK:  for %i0 = (d0) -> (d0)(%arg0) to (d0) -> (d0)(%arg1) step 3 {
+  // CHECK:  affine.for %i0 = (d0) -> (d0)(%arg0) to (d0) -> (d0)(%arg1) step 3 {
   // CHECK:  {{.*}} = affine.apply ()[s0] -> (s0 * 3)()[%arg0]
   // CHECK:  {{.*}} = affine.apply ()[s0, s1] -> (s1 + s0 * 3)()[%arg0, %arg1]
   // CHECK:  {{.*}} = affine.apply ()[s0] -> (s0 + 3)()[%arg0]
-  // CHECK:  for %i1 = (d0) -> (d0)(%arg0) to (d0) -> (d0)(%arg1) step 2 {
+  // CHECK:  affine.for %i1 = (d0) -> (d0)(%arg0) to (d0) -> (d0)(%arg1) step 2 {
   // CHECK:    {{.*}} = affine.apply (d0, d1) -> ((d0 + d1 * 3) floordiv 32)(%i0, %i1)
   // CHECK:    {{.*}} = affine.apply (d0, d1) -> (((d0 + d1 * 3) floordiv 32) * 31)(%i0, %i1)
   // CHECK:    {{.*}} = affine.apply (d0, d1) -> ((((d0 + d1 * 3) floordiv 32) * 31) ceildiv 32)(%i0, %i1)
@@ -119,7 +119,7 @@ TEST_FUNC(builder_dynamic_for) {
   // CHECK-LABEL: func @builder_dynamic_for(%arg0: index, %arg1: index, %arg2: index, %arg3: index) {
   // CHECK:        %0 = affine.apply ()[s0, s1] -> (s0 - s1)()[%arg0, %arg1]
   // CHECK-NEXT:   %1 = affine.apply ()[s0, s1] -> (s0 + s1)()[%arg2, %arg3]
-  // CHECK-NEXT:   for %i0 = (d0) -> (d0)(%0) to (d0) -> (d0)(%1) step 2 {
+  // CHECK-NEXT:   affine.for %i0 = (d0) -> (d0)(%0) to (d0) -> (d0)(%1) step 2 {
   // clang-format on
   f->print(llvm::outs());
 }
@@ -140,7 +140,7 @@ TEST_FUNC(builder_max_min_for) {
 
   // clang-format off
   // CHECK-LABEL: func @builder_max_min_for(%arg0: index, %arg1: index, %arg2: index, %arg3: index) {
-  // CHECK:  for %i0 = max (d0, d1) -> (d0, d1)(%arg0, %arg1) to min (d0, d1) -> (d0, d1)(%arg2, %arg3) {
+  // CHECK:  affine.for %i0 = max (d0, d1) -> (d0, d1)(%arg0, %arg1) to min (d0, d1) -> (d0, d1)(%arg2, %arg3) {
   // CHECK:  return
   // clang-format on
   f->print(llvm::outs());
@@ -344,16 +344,16 @@ TEST_FUNC(builder_helpers) {
   });
 
   // CHECK-LABEL: @builder_helpers
-  //      CHECK:   for %i0 = (d0) -> (d0)({{.*}}) to (d0) -> (d0)({{.*}}) {
-  // CHECK-NEXT:     for %i1 = (d0) -> (d0)({{.*}}) to (d0) -> (d0)({{.*}}) {
-  // CHECK-NEXT:       for %i2 = (d0) -> (d0)({{.*}}) to (d0) -> (d0)({{.*}}) {
+  //      CHECK:   affine.for %i0 = (d0) -> (d0)({{.*}}) to (d0) -> (d0)({{.*}}) {
+  // CHECK-NEXT:     affine.for %i1 = (d0) -> (d0)({{.*}}) to (d0) -> (d0)({{.*}}) {
+  // CHECK-NEXT:       affine.for %i2 = (d0) -> (d0)({{.*}}) to (d0) -> (d0)({{.*}}) {
   // CHECK-NEXT:         [[a:%.*]] = load %arg0[%i0, %i1, %i2] : memref<?x?x?xf32>
   // CHECK-NEXT:         [[b:%.*]] = addf {{.*}}, [[a]] : f32
   // CHECK-NEXT:         [[c:%.*]] = load %arg1[%i0, %i1, %i2] : memref<?x?x?xf32>
   // CHECK-NEXT:         [[d:%.*]] = addf [[b]], [[c]] : f32
   // CHECK-NEXT:         store [[d]], %arg2[%i0, %i1, %i2] : memref<?x?x?xf32>
   // CHECK-NEXT:       }
-  // CHECK-NEXT:       for %i3 = (d0) -> (d0)(%c0_1) to (d0) -> (d0)(%2) {
+  // CHECK-NEXT:       affine.for %i3 = (d0) -> (d0)(%c0_1) to (d0) -> (d0)(%2) {
   // CHECK-NEXT:         [[a:%.*]] = load %arg1[%i0, %i1, %i3] : memref<?x?x?xf32>
   // CHECK-NEXT:         [[b:%.*]] = load %arg0[%i0, %i1, %i3] : memref<?x?x?xf32>
   // CHECK-NEXT:         [[c:%.*]] = addf [[b]], [[a]] : f32
@@ -392,8 +392,8 @@ TEST_FUNC(custom_ops) {
   });
 
   // CHECK-LABEL: @custom_ops
-  // CHECK: for %i0 {{.*}}
-  // CHECK:   for %i1 {{.*}}
+  // CHECK: affine.for %i0 {{.*}}
+  // CHECK:   affine.for %i1 {{.*}}
   // CHECK:     {{.*}} = "my_custom_op"{{.*}} : (index, index) -> index
   // CHECK:     "my_custom_inst_0"{{.*}} : (index, index) -> ()
   // CHECK:     [[TWO:%[a-z0-9]+]] = "my_custom_inst_2"{{.*}} : (index, index) -> (index, index)
diff --git a/mlir/test/IR/invalid.mlir b/mlir/test/IR/invalid.mlir
index d6319028491b..650f023c1857 100644
--- a/mlir/test/IR/invalid.mlir
+++ b/mlir/test/IR/invalid.mlir
@@ -204,24 +204,24 @@ func @illegaltype(i0) // expected-error {{invalid integer width}}
 // -----
 
 func @malformed_for_percent() {
-  for i = 1 to 10 { // expected-error {{expected SSA operand}}
+  affine.for i = 1 to 10 { // expected-error {{expected SSA operand}}
 
 // -----
 
 func @malformed_for_equal() {
-  for %i 1 to 10 { // expected-error {{expected '='}}
+  affine.for %i 1 to 10 { // expected-error {{expected '='}}
 
 // -----
 
 func @malformed_for_to() {
-  for %i = 1 too 10 { // expected-error {{expected 'to' between bounds}}
+  affine.for %i = 1 too 10 { // expected-error {{expected 'to' between bounds}}
   }
 }
 
 // -----
 
 func @incomplete_for() {
-  for %i = 1 to 10 step 2
+  affine.for %i = 1 to 10 step 2
 }        // expected-error {{expected '{' to begin a region}}
 
 // -----
@@ -230,19 +230,19 @@ func @incomplete_for() {
 
 func @reference_to_iv_in_bound() {
   // expected-error@+1 {{operand use before it's defined}}
-  for %i0 = #map0(%i0) to 10 {
+  affine.for %i0 = #map0(%i0) to 10 {
   }
 }
 
 // -----
 
 func @nonconstant_step(%1 : i32) {
-  for %2 = 1 to 5 step %1 { // expected-error {{expected non-function type}}
+  affine.for %2 = 1 to 5 step %1 { // expected-error {{expected non-function type}}
 
 // -----
 
 func @for_negative_stride() {
-  for %i = 1 to 10 step -1
+  affine.for %i = 1 to 10 step -1
 }        // expected-error@-1 {{expected step to be representable as a positive signed integer}}
 
 // -----
@@ -254,7 +254,7 @@ func @non_instruction() {
 // -----
 
 func @invalid_if_conditional2() {
-  for %i = 1 to 10 {
+  affine.for %i = 1 to 10 {
     affine.if (i)[N] : (i >= )  // expected-error {{expected '== 0' or '>= 0' at end of affine constraint}}
   }
 }
@@ -262,7 +262,7 @@ func @invalid_if_conditional2() {
 // -----
 
 func @invalid_if_conditional3() {
-  for %i = 1 to 10 {
+  affine.for %i = 1 to 10 {
     affine.if (i)[N] : (i == 1) // expected-error {{expected '0' after '=='}}
   }
 }
@@ -270,7 +270,7 @@ func @invalid_if_conditional3() {
 // -----
 
 func @invalid_if_conditional4() {
-  for %i = 1 to 10 {
+  affine.for %i = 1 to 10 {
     affine.if (i)[N] : (i >= 2) // expected-error {{expected '0' after '>='}}
   }
 }
@@ -278,7 +278,7 @@ func @invalid_if_conditional4() {
 // -----
 
 func @invalid_if_conditional5() {
-  for %i = 1 to 10 {
+  affine.for %i = 1 to 10 {
     affine.if (i)[N] : (i <= 0 ) // expected-error {{expected '== 0' or '>= 0' at end of affine constraint}}
   }
 }
@@ -286,7 +286,7 @@ func @invalid_if_conditional5() {
 // -----
 
 func @invalid_if_conditional6() {
-  for %i = 1 to 10 {
+  affine.for %i = 1 to 10 {
     affine.if (i) : (i) // expected-error {{expected '== 0' or '>= 0' at end of affine constraint}}
   }
 }
@@ -294,7 +294,7 @@ func @invalid_if_conditional6() {
 // -----
 // TODO (support affine.if (1)?
 func @invalid_if_conditional7() {
-  for %i = 1 to 10 {
+  affine.for %i = 1 to 10 {
     affine.if (i) : (1) // expected-error {{expected '== 0' or '>= 0' at end of affine constraint}}
   }
 }
@@ -440,8 +440,8 @@ func @undef() {
 // -----
 
 func @duplicate_induction_var() {
-  for %i = 1 to 10 {   // expected-error {{previously defined here}}
-    for %i = 1 to 10 { // expected-error {{redefinition of SSA value '%i'}}
+  affine.for %i = 1 to 10 {   // expected-error {{previously defined here}}
+    affine.for %i = 1 to 10 { // expected-error {{redefinition of SSA value '%i'}}
     }
   }
   return
@@ -450,7 +450,7 @@ func @duplicate_induction_var() {
 // -----
 
 func @dominance_failure() {
-  for %i = 1 to 10 {
+  affine.for %i = 1 to 10 {
   }
   "xxx"(%i) : (index)->()   // expected-error {{operand #0 does not dominate this use}}
   return
@@ -477,7 +477,7 @@ func @return_type_mismatch() -> i32 {
 // -----
 
 func @return_inside_loop() {
-  for %i = 1 to 100 {
+  affine.for %i = 1 to 100 {
     // expected-error@-1 {{op expects body block to not have a terminator}}
     return
   }
@@ -522,7 +522,7 @@ func @referer() {
 #map1 = (i)[j] -> (i+j)
 
 func @bound_symbol_mismatch(%N : index) {
-  for %i = #map1(%N) to 100 {
+  affine.for %i = #map1(%N) to 100 {
   // expected-error@-1 {{symbol operand count and integer set symbol count must match}}
   }
   return
@@ -533,7 +533,7 @@ func @bound_symbol_mismatch(%N : index) {
 #map1 = (i)[j] -> (i+j)
 
 func @bound_dim_mismatch(%N : index) {
-  for %i = #map1(%N, %N)[%N] to 100 {
+  affine.for %i = #map1(%N, %N)[%N] to 100 {
   // expected-error@-1 {{dim operand count and integer set dim count must match}}
   }
   return
@@ -542,7 +542,7 @@ func @bound_dim_mismatch(%N : index) {
 // -----
 
 func @large_bound() {
-  for %i = 1 to 9223372036854775810 {
+  affine.for %i = 1 to 9223372036854775810 {
   // expected-error@-1 {{integer constant out of range for attribute}}
   }
   return
@@ -551,7 +551,7 @@ func @large_bound() {
 // -----
 
 func @max_in_upper_bound(%N : index) {
-  for %i = 1 to max (i)->(N, 100) { //expected-error {{expected non-function type}}
+  affine.for %i = 1 to max (i)->(N, 100) { //expected-error {{expected non-function type}}
   }
   return
 }
@@ -559,7 +559,7 @@ func @max_in_upper_bound(%N : index) {
 // -----
 
 func @step_typo() {
-  for %i = 1 to 100 step -- 1 { //expected-error {{expected constant integer}}
+  affine.for %i = 1 to 100 step -- 1 { //expected-error {{expected constant integer}}
   }
   return
 }
@@ -567,7 +567,7 @@ func @step_typo() {
 // -----
 
 func @invalid_bound_map(%N : i32) {
-  for %i = 1 to (i)->(j)(%N) { //expected-error {{use of undeclared identifier}}
+  affine.for %i = 1 to (i)->(j)(%N) { //expected-error {{use of undeclared identifier}}
   }
   return
 }
@@ -580,7 +580,7 @@ func @invalid_bound_map(%N : i32) {
 #set0 = (i)[N] : (i >= 0, N - i >= 0)
 
 func @invalid_if_operands1(%N : index) {
-  for %i = 1 to 10 {
+  affine.for %i = 1 to 10 {
     affine.if #set0(%i) {
     // expected-error@-1 {{symbol operand count and integer set symbol count must match}}
 
@@ -588,7 +588,7 @@ func @invalid_if_operands1(%N : index) {
 #set0 = (i)[N] : (i >= 0, N - i >= 0)
 
 func @invalid_if_operands2(%N : index) {
-  for %i = 1 to 10 {
+  affine.for %i = 1 to 10 {
     affine.if #set0()[%N] {
     // expected-error@-1 {{dim operand count and integer set dim count must match}}
 
@@ -596,7 +596,7 @@ func @invalid_if_operands2(%N : index) {
 #set0 = (i)[N] : (i >= 0, N - i >= 0)
 
 func @invalid_if_operands3(%N : index) {
-  for %i = 1 to 10 {
+  affine.for %i = 1 to 10 {
     affine.if #set0(%i)[%i] {
     // expected-error@-1 {{operand cannot be used as a symbol}}
     }
@@ -751,11 +751,11 @@ func @f(f32) {
 // -----
 
 func @f(%m : memref<?x?xf32>) {
-  for %i0 = 0 to 42 {
+  affine.for %i0 = 0 to 42 {
     // expected-error@+1 {{operand #2 does not dominate this use}}
     %x = load %m[%i0, %i1] : memref<?x?xf32>
   }
-  for %i1 = 0 to 42 {
+  affine.for %i1 = 0 to 42 {
   }
   return
 }
@@ -805,7 +805,7 @@ func @type_alias_unknown(!unknown_alias) -> () { // expected-error {{undefined t
 
 // Check ill-formed opaque tensor.
 func @complex_loops() {
-  for %i1 = 1 to 100 {
+  affine.for %i1 = 1 to 100 {
   // expected-error @+1 {{expected '"' in string literal}}
   "opaqueIntTensor"(){bar: opaque<"", tensor<2x1x4xi32>, "0x686]>} : () -> ()
 
@@ -839,7 +839,7 @@ func @invalid_affine_structure() {
 
 func @missing_for_max(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
   // expected-error @+1 {{lower loop bound affine map with multiple results requires 'max' prefix}}
-  for %i0 = ()[s]->(0,s-1)()[%arg0] to %arg1 {
+  affine.for %i0 = ()[s]->(0,s-1)()[%arg0] to %arg1 {
   }
   return
 }
@@ -848,7 +848,7 @@ func @missing_for_max(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
 
 func @missing_for_min(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
   // expected-error @+1 {{upper loop bound affine map with multiple results requires 'min' prefix}}
-  for %i0 = %arg0 to ()[s]->(100,s+1)()[%arg1] {
+  affine.for %i0 = %arg0 to ()[s]->(100,s+1)()[%arg1] {
   }
   return
 }
diff --git a/mlir/test/IR/locations.mlir b/mlir/test/IR/locations.mlir
index 3b27301cfae6..ac4925e3e526 100644
--- a/mlir/test/IR/locations.mlir
+++ b/mlir/test/IR/locations.mlir
@@ -13,7 +13,7 @@ func @inline_notation() -> i32 loc("mysource.cc":10:8) {
   %2 = constant 4 : index loc(callsite("foo" at "mysource.cc":10:8))
 
   // CHECK: } loc(fused["foo", "mysource.cc":10:8])
-  for %i0 = 0 to 8 {
+  affine.for %i0 = 0 to 8 {
   } loc(fused["foo", "mysource.cc":10:8])
 
   // CHECK: } loc(fused<"myPass">["foo", "foo2"])
diff --git a/mlir/test/IR/parser.mlir b/mlir/test/IR/parser.mlir
index 92fbc0e19f83..c66c6c0614b0 100644
--- a/mlir/test/IR/parser.mlir
+++ b/mlir/test/IR/parser.mlir
@@ -208,8 +208,8 @@ func @identity_functor(%a : () -> ()) -> (() -> ())  {
 func @func_ops_in_loop() {
   // CHECK: %0 = "foo"() : () -> i64
   %a = "foo"() : ()->i64
-  // CHECK: for %i0 = 1 to 10 {
-  for %i = 1 to 10 {
+  // CHECK: affine.for %i0 = 1 to 10 {
+  affine.for %i = 1 to 10 {
     // CHECK: %1 = "doo"() : () -> f32
     %b = "doo"() : ()->f32
     // CHECK: "bar"(%0, %1) : (i64, f32) -> ()
@@ -224,10 +224,10 @@ func @func_ops_in_loop() {
 
 // CHECK-LABEL: func @loops() {
 func @loops() {
-  // CHECK: for %i0 = 1 to 100 step 2 {
-  for %i = 1 to 100 step 2 {
-    // CHECK: for %i1 = 1 to 200 {
-    for %j = 1 to 200 {
+  // CHECK: affine.for %i0 = 1 to 100 step 2 {
+  affine.for %i = 1 to 100 step 2 {
+    // CHECK: affine.for %i1 = 1 to 200 {
+    affine.for %j = 1 to 200 {
     }        // CHECK:     }
   }          // CHECK:   }
   return     // CHECK:   return
@@ -235,14 +235,14 @@ func @loops() {
 
 // CHECK-LABEL: func @complex_loops() {
 func @complex_loops() {
-  for %i1 = 1 to 100 {      // CHECK:   for %i0 = 1 to 100 {
-    for %j1 = 1 to 100 {    // CHECK:     for %i1 = 1 to 100 {
+  affine.for %i1 = 1 to 100 {      // CHECK:   affine.for %i0 = 1 to 100 {
+    affine.for %j1 = 1 to 100 {    // CHECK:     affine.for %i1 = 1 to 100 {
        // CHECK: "foo"(%i0, %i1) : (index, index) -> ()
        "foo"(%i1, %j1) : (index,index) -> ()
     }                       // CHECK:     }
     "boo"() : () -> ()      // CHECK:     "boo"() : () -> ()
-    for %j2 = 1 to 10 {     // CHECK:     for %i2 = 1 to 10 {
-      for %k2 = 1 to 10 {   // CHECK:       for %i3 = 1 to 10 {
+    affine.for %j2 = 1 to 10 {     // CHECK:     affine.for %i2 = 1 to 10 {
+      affine.for %k2 = 1 to 10 {   // CHECK:       affine.for %i3 = 1 to 10 {
         "goo"() : () -> ()  // CHECK:         "goo"() : () -> ()
       }                     // CHECK:       }
     }                       // CHECK:     }
@@ -253,8 +253,8 @@ func @complex_loops() {
 // CHECK: func @triang_loop(%arg0: index, %arg1: memref<?x?xi32>) {
 func @triang_loop(%arg0: index, %arg1: memref<?x?xi32>) {
   %c = constant 0 : i32       // CHECK: %c0_i32 = constant 0 : i32
-  for %i0 = 1 to %arg0 {      // CHECK: for %i0 = 1 to %arg0 {
-    for %i1 = (d0)[]->(d0)(%i0)[] to %arg0 {  // CHECK:   for %i1 = #map{{[0-9]+}}(%i0) to %arg0 {
+  affine.for %i0 = 1 to %arg0 {      // CHECK: affine.for %i0 = 1 to %arg0 {
+    affine.for %i1 = (d0)[]->(d0)(%i0)[] to %arg0 {  // CHECK:   affine.for %i1 = #map{{[0-9]+}}(%i0) to %arg0 {
       store %c, %arg1[%i0, %i1] : memref<?x?xi32>  // CHECK: store %c0_i32, %arg1[%i0, %i1]
     }          // CHECK:     }
   }            // CHECK:   }
@@ -263,8 +263,8 @@ func @triang_loop(%arg0: index, %arg1: memref<?x?xi32>) {
 
 // CHECK: func @minmax_loop(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
 func @minmax_loop(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
-  // CHECK: for %i0 = max #map{{.*}}()[%arg0] to min #map{{.*}}()[%arg1] {
-  for %i0 = max()[s]->(0,s-1)()[%arg0] to min()[s]->(100,s+1)()[%arg1] {
+  // CHECK: affine.for %i0 = max #map{{.*}}()[%arg0] to min #map{{.*}}()[%arg1] {
+  affine.for %i0 = max()[s]->(0,s-1)()[%arg0] to min()[s]->(100,s+1)()[%arg1] {
     // CHECK: "foo"(%arg2, %i0) : (memref<100xf32>, index) -> ()
     "foo"(%arg2, %i0) : (memref<100xf32>, index) -> ()
   }      // CHECK:   }
@@ -275,24 +275,24 @@ func @minmax_loop(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
 func @loop_bounds(%N : index) {
   // CHECK: %0 = "foo"(%arg0) : (index) -> index
   %s = "foo"(%N) : (index) -> index
-  // CHECK: for %i0 = %0 to %arg0
-  for %i = %s to %N {
-    // CHECK: for %i1 = #map{{[0-9]+}}(%i0) to 0
-    for %j = (d0)[]->(d0)(%i)[] to 0 step 1 {
+  // CHECK: affine.for %i0 = %0 to %arg0
+  affine.for %i = %s to %N {
+    // CHECK: affine.for %i1 = #map{{[0-9]+}}(%i0) to 0
+    affine.for %j = (d0)[]->(d0)(%i)[] to 0 step 1 {
        // CHECK: %1 = affine.apply #map{{.*}}(%i0, %i1)[%0]
        %w1 = affine.apply(d0, d1)[s0] -> (d0+d1) (%i, %j) [%s]
        // CHECK: %2 = affine.apply #map{{.*}}(%i0, %i1)[%0]
        %w2 = affine.apply(d0, d1)[s0] -> (s0+1) (%i, %j) [%s]
-       // CHECK: for %i2 = #map{{.*}}(%1, %i0)[%arg0] to #map{{.*}}(%2, %i1)[%0] {
-       for %k = #bound_map1 (%w1, %i)[%N] to (i, j)[s] -> (i + j + s) (%w2, %j)[%s] {
+       // CHECK: affine.for %i2 = #map{{.*}}(%1, %i0)[%arg0] to #map{{.*}}(%2, %i1)[%0] {
+       affine.for %k = #bound_map1 (%w1, %i)[%N] to (i, j)[s] -> (i + j + s) (%w2, %j)[%s] {
           // CHECK: "foo"(%i0, %i1, %i2) : (index, index, index) -> ()
           "foo"(%i, %j, %k) : (index, index, index)->()
           // CHECK: %c30 = constant 30 : index
           %c = constant 30 : index
           // CHECK: %3 = affine.apply #map{{.*}}(%arg0, %c30)
           %u = affine.apply (d0, d1)->(d0+d1) (%N, %c)
-          // CHECK: for %i3 = max #map{{.*}}(%i0)[%3] to min #map{{.*}}(%i2)[%c30] {
-          for %l = max #bound_map2(%i)[%u] to min #bound_map2(%k)[%c] {
+          // CHECK: affine.for %i3 = max #map{{.*}}(%i0)[%3] to min #map{{.*}}(%i2)[%c30] {
+          affine.for %l = max #bound_map2(%i)[%u] to min #bound_map2(%k)[%c] {
             // CHECK: "bar"(%i3) : (index) -> ()
             "bar"(%l) : (index) -> ()
           } // CHECK:           }
@@ -305,7 +305,7 @@ func @loop_bounds(%N : index) {
 // CHECK-LABEL: func @ifinst(%arg0: index) {
 func @ifinst(%N: index) {
   %c = constant 200 : index // CHECK   %c200 = constant 200
-  for %i = 1 to 10 {           // CHECK   for %i0 = 1 to 10 {
+  affine.for %i = 1 to 10 {           // CHECK   affine.for %i0 = 1 to 10 {
     affine.if #set0(%i)[%N, %c] {     // CHECK     affine.if #set0(%i0)[%arg0, %c200] {
       %x = constant 1 : i32
        // CHECK: %c1_i32 = constant 1 : i32
@@ -328,7 +328,7 @@ func @ifinst(%N: index) {
 // CHECK-LABEL: func @simple_ifinst(%arg0: index) {
 func @simple_ifinst(%N: index) {
   %c = constant 200 : index // CHECK   %c200 = constant 200
-  for %i = 1 to 10 {           // CHECK   for %i0 = 1 to 10 {
+  affine.for %i = 1 to 10 {           // CHECK   affine.for %i0 = 1 to 10 {
     affine.if #set0(%i)[%N, %c] {     // CHECK     affine.if #set0(%i0)[%arg0, %c200] {
       %x = constant 1 : i32
        // CHECK: %c1_i32 = constant 1 : i32
@@ -549,18 +549,18 @@ func @funcattrwithblock() -> ()
 #map_non_simple2 = ()[s0, s1] -> (s0 + s1)
 #map_non_simple3 = ()[s0] -> (s0 + 3)
 func @funcsimplemap(%arg0: index, %arg1: index) -> () {
-  for %i0 = 0 to #map_simple0()[] {
-  // CHECK: for %i0 = 0 to 10 {
-    for %i1 = 0 to #map_simple1()[%arg1] {
-    // CHECK: for %i1 = 0 to %arg1 {
-      for %i2 = 0 to #map_non_simple0(%i0)[] {
-      // CHECK: for %i2 = 0 to #map{{[a-z_0-9]*}}(%i0) {
-        for %i3 = 0 to #map_non_simple1(%i0)[%arg1] {
-        // CHECK: for %i3 = 0 to #map{{[a-z_0-9]*}}(%i0)[%arg1] {
-          for %i4 = 0 to #map_non_simple2()[%arg1, %arg0] {
-          // CHECK: for %i4 = 0 to #map{{[a-z_0-9]*}}()[%arg1, %arg0] {
-            for %i5 = 0 to #map_non_simple3()[%arg0] {
-            // CHECK: for %i5 = 0 to #map{{[a-z_0-9]*}}()[%arg0] {
+  affine.for %i0 = 0 to #map_simple0()[] {
+  // CHECK: affine.for %i0 = 0 to 10 {
+    affine.for %i1 = 0 to #map_simple1()[%arg1] {
+    // CHECK: affine.for %i1 = 0 to %arg1 {
+      affine.for %i2 = 0 to #map_non_simple0(%i0)[] {
+      // CHECK: affine.for %i2 = 0 to #map{{[a-z_0-9]*}}(%i0) {
+        affine.for %i3 = 0 to #map_non_simple1(%i0)[%arg1] {
+        // CHECK: affine.for %i3 = 0 to #map{{[a-z_0-9]*}}(%i0)[%arg1] {
+          affine.for %i4 = 0 to #map_non_simple2()[%arg1, %arg0] {
+          // CHECK: affine.for %i4 = 0 to #map{{[a-z_0-9]*}}()[%arg1, %arg0] {
+            affine.for %i5 = 0 to #map_non_simple3()[%arg0] {
+            // CHECK: affine.for %i5 = 0 to #map{{[a-z_0-9]*}}()[%arg0] {
               %c42_i32 = constant 42 : i32
             }
           }
@@ -745,9 +745,9 @@ func @sparsevectorattr() -> () {
 // CHECK-LABEL: func @loops_with_blockids() {
 func @loops_with_blockids() {
 ^block0:
-  for %i = 1 to 100 step 2 {
+  affine.for %i = 1 to 100 step 2 {
   ^block1:
-    for %j = 1 to 200 {
+    affine.for %j = 1 to 200 {
     ^block2:
     }
   }
diff --git a/mlir/test/IR/pretty-locations.mlir b/mlir/test/IR/pretty-locations.mlir
index bc5a319c99e3..defde9e9c708 100644
--- a/mlir/test/IR/pretty-locations.mlir
+++ b/mlir/test/IR/pretty-locations.mlir
@@ -18,7 +18,7 @@ func @inline_notation() -> i32 loc("mysource.cc":10:8) {
   %3 = constant 4 : index loc(callsite("foo" at callsite("mysource1.cc":10:8 at callsite("mysource2.cc":13:8 at "mysource3.cc":100:10))))
 
   // CHECK: } ["foo", mysource.cc:10:8]
-  for %i0 = 0 to 8 {
+  affine.for %i0 = 0 to 8 {
   } loc(fused["foo", "mysource.cc":10:8])
 
   // CHECK: } <"myPass">["foo", "foo2"]
diff --git a/mlir/test/Transforms/Vectorize/lower_vector_transfers.mlir b/mlir/test/Transforms/Vectorize/lower_vector_transfers.mlir
index 013f6351a176..a55c79f11417 100644
--- a/mlir/test/Transforms/Vectorize/lower_vector_transfers.mlir
+++ b/mlir/test/Transforms/Vectorize/lower_vector_transfers.mlir
@@ -6,8 +6,8 @@
 // CHECK-LABEL: func @materialize_read_1d() {
 func @materialize_read_1d() {
   %A = alloc () : memref<7x42xf32>
-  for %i0 = 0 to 7 step 4 {
-    for %i1 = 0 to 42 step 4 {
+  affine.for %i0 = 0 to 7 step 4 {
+    affine.for %i1 = 0 to 42 step 4 {
       %f1 = vector_transfer_read %A, %i0, %i1 {permutation_map: (d0, d1) -> (d0)} : (memref<7x42xf32>, index, index) -> vector<4xf32>
       %ip1 = affine.apply (d0) -> (d0 + 1) (%i1)
       %f2 = vector_transfer_read %A, %i0, %ip1 {permutation_map: (d0, d1) -> (d0)} : (memref<7x42xf32>, index, index) -> vector<4xf32>
@@ -29,11 +29,11 @@ func @materialize_read_1d() {
 // CHECK-LABEL: func @materialize_read_1d_partially_specialized
 func @materialize_read_1d_partially_specialized(%dyn1 : index, %dyn2 : index, %dyn4 : index) {
   %A = alloc (%dyn1, %dyn2, %dyn4) : memref<7x?x?x42x?xf32>
-  for %i0 = 0 to 7 {
-    for %i1 = 0 to %dyn1 {
-      for %i2 = 0 to %dyn2 {
-        for %i3 = 0 to 42 step 2 {
-          for %i4 = 0 to %dyn4 {
+  affine.for %i0 = 0 to 7 {
+    affine.for %i1 = 0 to %dyn1 {
+      affine.for %i2 = 0 to %dyn2 {
+        affine.for %i3 = 0 to 42 step 2 {
+          affine.for %i4 = 0 to %dyn4 {
             %f1 = vector_transfer_read %A, %i0, %i1, %i2, %i3, %i4 {permutation_map: (d0, d1, d2, d3, d4) -> (d3)} : ( memref<7x?x?x42x?xf32>, index, index, index, index, index) -> vector<4xf32>
             %i3p1 = affine.apply (d0) -> (d0 + 1) (%i3)
             %f2 = vector_transfer_read %A, %i0, %i1, %i2, %i3p1, %i4 {permutation_map: (d0, d1, d2, d3, d4) -> (d3)} : ( memref<7x?x?x42x?xf32>, index, index, index, index, index) -> vector<4xf32>
@@ -54,19 +54,19 @@ func @materialize_read_1d_partially_specialized(%dyn1 : index, %dyn2 : index, %d
 // CHECK-LABEL: func @materialize_read(%arg0: index, %arg1: index, %arg2: index, %arg3: index) {
 func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
   // CHECK-NEXT:  %0 = alloc(%arg0, %arg1, %arg2, %arg3) : memref<?x?x?x?xf32>
-  // CHECK-NEXT:  for %[[I0:.*]] = 0 to %arg0 step 3 {
-  // CHECK-NEXT:    for %[[I1:.*]] = 0 to %arg1 {
-  // CHECK-NEXT:      for %[[I2:.*]] = 0 to %arg2 {
-  // CHECK-NEXT:        for %[[I3:.*]] = 0 to %arg3 step 5 {
+  // CHECK-NEXT:  affine.for %[[I0:.*]] = 0 to %arg0 step 3 {
+  // CHECK-NEXT:    affine.for %[[I1:.*]] = 0 to %arg1 {
+  // CHECK-NEXT:      affine.for %[[I2:.*]] = 0 to %arg2 {
+  // CHECK-NEXT:        affine.for %[[I3:.*]] = 0 to %arg3 step 5 {
   //      CHECK:          %[[D0:.*]] = dim %0, 0 : memref<?x?x?x?xf32>
   // CHECK-NEXT:          %[[D1:.*]] = dim %0, 1 : memref<?x?x?x?xf32>
   // CHECK-NEXT:          %[[D2:.*]] = dim %0, 2 : memref<?x?x?x?xf32>
   // CHECK-NEXT:          %[[D3:.*]] = dim %0, 3 : memref<?x?x?x?xf32>
   //      CHECK:          %[[ALLOC:.*]] = alloc() : memref<5x4x3xf32>
   // CHECK-NEXT:          %[[VECTOR_VIEW:.*]] = vector_type_cast %[[ALLOC]] : memref<5x4x3xf32>, memref<1xvector<5x4x3xf32>>
-  // CHECK-NEXT:          for %[[I4:.*]] = 0 to 3 {
-  // CHECK-NEXT:            for %[[I5:.*]] = 0 to 4 {
-  // CHECK-NEXT:              for %[[I6:.*]] = 0 to 5 {
+  // CHECK-NEXT:          affine.for %[[I4:.*]] = 0 to 3 {
+  // CHECK-NEXT:            affine.for %[[I5:.*]] = 0 to 4 {
+  // CHECK-NEXT:              affine.for %[[I6:.*]] = 0 to 5 {
   // CHECK-NEXT:                %[[C0:.*]] = constant 0 : index
   // CHECK-NEXT:                %[[C1:.*]] = constant 1 : index
   // CHECK-NEXT:                {{.*}} = affine.apply #[[ADD]](%[[I0]], %[[I4]])
@@ -117,10 +117,10 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
   // Check that I3 + I6 (of size 5) read from last index load(..., L3) and write into first index store(I6, ...)
   // Other dimensions are just accessed with I1, I2 resp.
   %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
-  for %i0 = 0 to %M step 3 {
-    for %i1 = 0 to %N {
-      for %i2 = 0 to %O {
-        for %i3 = 0 to %P step 5 {
+  affine.for %i0 = 0 to %M step 3 {
+    affine.for %i1 = 0 to %N {
+      affine.for %i2 = 0 to %O {
+        affine.for %i3 = 0 to %P step 5 {
           %f = vector_transfer_read %A, %i0, %i1, %i2, %i3 {permutation_map: (d0, d1, d2, d3) -> (d3, 0, d0)} : (memref<?x?x?x?xf32, 0>, index, index, index, index) -> vector<5x4x3xf32>
         }
       }
@@ -133,10 +133,10 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
 func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
   // CHECK-NEXT:  %0 = alloc(%arg0, %arg1, %arg2, %arg3) : memref<?x?x?x?xf32>
   // CHECK-NEXT:  %cst = constant splat<vector<5x4x3xf32>, 1.000000e+00> : vector<5x4x3xf32>
-  // CHECK-NEXT:  for %[[I0:.*]] = 0 to %arg0 step 3 {
-  // CHECK-NEXT:    for %[[I1:.*]] = 0 to %arg1 step 4 {
-  // CHECK-NEXT:      for %[[I2:.*]] = 0 to %arg2 {
-  // CHECK-NEXT:        for %[[I3:.*]] = 0 to %arg3 step 5 {
+  // CHECK-NEXT:  affine.for %[[I0:.*]] = 0 to %arg0 step 3 {
+  // CHECK-NEXT:    affine.for %[[I1:.*]] = 0 to %arg1 step 4 {
+  // CHECK-NEXT:      affine.for %[[I2:.*]] = 0 to %arg2 {
+  // CHECK-NEXT:        affine.for %[[I3:.*]] = 0 to %arg3 step 5 {
   //      CHECK:          %[[D0:.*]] = dim %0, 0 : memref<?x?x?x?xf32>
   // CHECK-NEXT:          %[[D1:.*]] = dim %0, 1 : memref<?x?x?x?xf32>
   // CHECK-NEXT:          %[[D2:.*]] = dim %0, 2 : memref<?x?x?x?xf32>
@@ -144,9 +144,9 @@ func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
   // CHECK:               %[[ALLOC:.*]] = alloc() : memref<5x4x3xf32>
   // CHECK-NEXT:          %[[VECTOR_VIEW:.*]] = vector_type_cast {{.*}} : memref<5x4x3xf32>, memref<1xvector<5x4x3xf32>>
   //      CHECK:          store %cst, {{.*}} : memref<1xvector<5x4x3xf32>>
-  // CHECK-NEXT:          for %[[I4:.*]] = 0 to 3 {
-  // CHECK-NEXT:            for %[[I5:.*]] = 0 to 4 {
-  // CHECK-NEXT:              for %[[I6:.*]] = 0 to 5 {
+  // CHECK-NEXT:          affine.for %[[I4:.*]] = 0 to 3 {
+  // CHECK-NEXT:            affine.for %[[I5:.*]] = 0 to 4 {
+  // CHECK-NEXT:              affine.for %[[I6:.*]] = 0 to 5 {
   // CHECK-NEXT:                %[[C0:.*]] = constant 0 : index
   // CHECK-NEXT:                %[[C1:.*]] = constant 1 : index
   // CHECK-NEXT:                {{.*}} = affine.apply #[[ADD]](%[[I0]], %[[I4]])
@@ -201,10 +201,10 @@ func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
   // Other dimension is just accessed with I2.
   %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
   %f1 = constant splat<vector<5x4x3xf32>, 1.000000e+00> : vector<5x4x3xf32>
-  for %i0 = 0 to %M step 3 {
-    for %i1 = 0 to %N step 4 {
-      for %i2 = 0 to %O {
-        for %i3 = 0 to %P step 5 {
+  affine.for %i0 = 0 to %M step 3 {
+    affine.for %i1 = 0 to %N step 4 {
+      affine.for %i2 = 0 to %O {
+        affine.for %i3 = 0 to %P step 5 {
           vector_transfer_write %f1, %A, %i0, %i1, %i2, %i3 {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d0)} : vector<5x4x3xf32>, memref<?x?x?x?xf32, 0>, index, index, index, index
         }
       }
diff --git a/mlir/test/Transforms/Vectorize/materialize.mlir b/mlir/test/Transforms/Vectorize/materialize.mlir
index 80458c753335..ce445ec75bbf 100644
--- a/mlir/test/Transforms/Vectorize/materialize.mlir
+++ b/mlir/test/Transforms/Vectorize/materialize.mlir
@@ -10,10 +10,10 @@
 func @materialize(%M : index, %N : index, %O : index, %P : index) {
   %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
   %f1 = constant splat<vector<4x4x4xf32>, 1.000000e+00> : vector<4x4x4xf32>
-  // CHECK:  for %i0 = 0 to %arg0 step 4 {
-  // CHECK-NEXT:    for %i1 = 0 to %arg1 step 4 {
-  // CHECK-NEXT:      for %i2 = 0 to %arg2 {
-  // CHECK-NEXT:        for %i3 = 0 to %arg3 step 4 {
+  // CHECK:  affine.for %i0 = 0 to %arg0 step 4 {
+  // CHECK-NEXT:    affine.for %i1 = 0 to %arg1 step 4 {
+  // CHECK-NEXT:      affine.for %i2 = 0 to %arg2 {
+  // CHECK-NEXT:        affine.for %i3 = 0 to %arg3 step 4 {
   // CHECK-NEXT:          %[[a:[0-9]+]] = {{.*}}[[ID1]](%i0)
   // CHECK-NEXT:          %[[b:[0-9]+]] = {{.*}}[[ID1]](%i1)
   // CHECK-NEXT:          %[[c:[0-9]+]] = {{.*}}[[ID1]](%i2)
@@ -25,10 +25,10 @@ func @materialize(%M : index, %N : index, %O : index, %P : index) {
   // CHECK:          vector_transfer_write {{.*}}, %0, {{.*}}, %[[b2]], {{.*}} {permutation_map: #[[D0D1D2D3TOD1D0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index
   // CHECK:          %[[b3:[0-9]+]] = {{.*}}[[D0P3]](%i1)
   // CHECK:          vector_transfer_write {{.*}}, %0, {{.*}}, %[[b3]], {{.*}} {permutation_map: #[[D0D1D2D3TOD1D0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index
-  for %i0 = 0 to %M step 4 {
-    for %i1 = 0 to %N step 4 {
-      for %i2 = 0 to %O {
-        for %i3 = 0 to %P step 4 {
+  affine.for %i0 = 0 to %M step 4 {
+    affine.for %i1 = 0 to %N step 4 {
+      affine.for %i2 = 0 to %O {
+        affine.for %i3 = 0 to %P step 4 {
           "vector_transfer_write"(%f1, %A, %i0, %i1, %i2, %i3) {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d0)} : (vector<4x4x4xf32>, memref<?x?x?x?xf32, 0>, index, index, index, index) -> ()
         }
       }
diff --git a/mlir/test/Transforms/Vectorize/materialize_vectors_1d_to_1d.mlir b/mlir/test/Transforms/Vectorize/materialize_vectors_1d_to_1d.mlir
index b5f771d7e625..71c442b965e1 100644
--- a/mlir/test/Transforms/Vectorize/materialize_vectors_1d_to_1d.mlir
+++ b/mlir/test/Transforms/Vectorize/materialize_vectors_1d_to_1d.mlir
@@ -15,8 +15,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
   %f1 = constant 1.0 : f32
   %f2 = constant 2.0 : f32
   // 4x unroll (jammed by construction).
-  // CHECK: for %i0 = 0 to %arg0 {
-  // CHECK-NEXT:   for %i1 = 0 to %arg1 step 32 {
+  // CHECK: affine.for %i0 = 0 to %arg0 {
+  // CHECK-NEXT:   affine.for %i1 = 0 to %arg1 step 32 {
   // CHECK-NEXT:     [[CST0:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
   // CHECK-NEXT:     [[CST1:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
   // CHECK-NEXT:     [[CST2:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
@@ -34,15 +34,15 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
   // CHECK-NEXT:     [[VAL31:%.*]] = affine.apply [[D0P24]]{{.*}}
   // CHECK-NEXT:     vector_transfer_write [[CST3]], {{.*}}, [[VAL30]], [[VAL31]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
   //
-  for %i0 = 0 to %M {
-    for %i1 = 0 to %N {
+  affine.for %i0 = 0 to %M {
+    affine.for %i1 = 0 to %N {
       // non-scoped %f1
       store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
     }
   }
   // 4x unroll (jammed by construction).
-  // CHECK: for %i2 = 0 to %arg0 {
-  // CHECK-NEXT:   for %i3 = 0 to %arg1 step 32 {
+  // CHECK: affine.for %i2 = 0 to %arg0 {
+  // CHECK-NEXT:   affine.for %i3 = 0 to %arg1 step 32 {
   // CHECK-NEXT:     [[CST0:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
   // CHECK-NEXT:     [[CST1:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
   // CHECK-NEXT:     [[CST2:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
@@ -60,15 +60,15 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
   // CHECK-NEXT:     [[VAL31:%.*]] = affine.apply [[D0P24]]{{.*}}
   // CHECK-NEXT:     vector_transfer_write [[CST3]], {{.*}}, [[VAL30]], [[VAL31]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
   //
-  for %i2 = 0 to %M {
-    for %i3 = 0 to %N {
+  affine.for %i2 = 0 to %M {
+    affine.for %i3 = 0 to %N {
       // non-scoped %f2
       store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
     }
   }
   // 4x unroll (jammed by construction).
-  // CHECK: for %i4 = 0 to %arg0 {
-  // CHECK-NEXT:   for %i5 = 0 to %arg1 step 32 {
+  // CHECK: affine.for %i4 = 0 to %arg0 {
+  // CHECK-NEXT:   affine.for %i5 = 0 to %arg1 step 32 {
   // CHECK-NEXT:     {{.*}} = affine.apply
   // CHECK-NEXT:     {{.*}} = affine.apply
   // CHECK-NEXT:     {{.*}} = vector_transfer_read
@@ -110,8 +110,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
   // CHECK-NEXT:     {{.*}} = affine.apply
   // CHECK-NEXT:     vector_transfer_write
   //
-  for %i4 = 0 to %M {
-    for %i5 = 0 to %N {
+  affine.for %i4 = 0 to %M {
+    affine.for %i5 = 0 to %N {
       %a5 = load %A[%i4, %i5] : memref<?x?xf32, 0>
       %b5 = load %B[%i4, %i5] : memref<?x?xf32, 0>
       %s5 = addf %a5, %b5 : f32
diff --git a/mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_1d.mlir b/mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_1d.mlir
index 92df49fa8fa4..62149c323b67 100644
--- a/mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_1d.mlir
+++ b/mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_1d.mlir
@@ -15,8 +15,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
   %f1 = constant 1.0 : f32
   %f2 = constant 2.0 : f32
   // (3x2)x unroll (jammed by construction).
-  // CHECK: for %i0 = 0 to %arg0 step 3 {
-  // CHECK-NEXT:   for %i1 = 0 to %arg1 step 16 {
+  // CHECK: affine.for %i0 = 0 to %arg0 step 3 {
+  // CHECK-NEXT:   affine.for %i1 = 0 to %arg1 step 16 {
   // CHECK-NEXT:     {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
   // CHECK-NEXT:     {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
   // CHECK-NEXT:     {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
@@ -41,26 +41,26 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
   // CHECK-NEXT:     [[VAL50:%.*]] = affine.apply [[D0P2]](%i0)
   // CHECK-NEXT:     [[VAL51:%.*]] = affine.apply [[D0P8]](%i1)
   // CHECK-NEXT:     vector_transfer_write {{.*}}, {{.*}}, [[VAL50]], [[VAL51]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
-  for %i0 = 0 to %M {
-    for %i1 = 0 to %N {
+  affine.for %i0 = 0 to %M {
+    affine.for %i1 = 0 to %N {
       // non-scoped %f1
       store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
     }
   }
   // (3x2)x unroll (jammed by construction).
-  // CHECK: for %i2 = 0 to %arg0 step 3 {
-  // CHECK-NEXT:   for %i3 = 0 to %arg1 step 16 {
+  // CHECK: affine.for %i2 = 0 to %arg0 step 3 {
+  // CHECK-NEXT:   affine.for %i3 = 0 to %arg1 step 16 {
   // .....
-  for %i2 = 0 to %M {
-    for %i3 = 0 to %N {
+  affine.for %i2 = 0 to %M {
+    affine.for %i3 = 0 to %N {
       // non-scoped %f2
       // CHECK does (3x4)x unrolling.
       store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
     }
   }
   // (3x2)x unroll (jammed by construction).
-  // CHECK: for %i4 = 0 to %arg0 step 3 {
-  // CHECK-NEXT:   for %i5 = 0 to %arg1 step 16 {
+  // CHECK: affine.for %i4 = 0 to %arg0 step 3 {
+  // CHECK-NEXT:   affine.for %i5 = 0 to %arg1 step 16 {
   // CHECK-NEXT:     {{.*}} = affine.apply
   // CHECK-NEXT:     {{.*}} = affine.apply
   // CHECK-NEXT:     {{.*}} = vector_transfer_read
@@ -122,8 +122,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
   // CHECK-NEXT:     {{.*}} = affine.apply
   // CHECK-NEXT:     vector_transfer_write
   //
-  for %i4 = 0 to %M {
-    for %i5 = 0 to %N {
+  affine.for %i4 = 0 to %M {
+    affine.for %i5 = 0 to %N {
       %a5 = load %A[%i4, %i5] : memref<?x?xf32, 0>
       %b5 = load %B[%i4, %i5] : memref<?x?xf32, 0>
       %s5 = addf %a5, %b5 : f32
diff --git a/mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_2d.mlir b/mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_2d.mlir
index 36ec96e30b4a..59705eca69e4 100644
--- a/mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_2d.mlir
+++ b/mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_2d.mlir
@@ -13,8 +13,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
   %f1 = constant 1.0 : f32
   %f2 = constant 2.0 : f32
   // 2x unroll (jammed by construction).
-  // CHECK: for %i0 = 0 to %arg0 step 3 {
-  // CHECK-NEXT:   for %i1 = 0 to %arg1 step 32 {
+  // CHECK: affine.for %i0 = 0 to %arg0 step 3 {
+  // CHECK-NEXT:   affine.for %i1 = 0 to %arg1 step 32 {
   // CHECK-NEXT:     {{.*}} = constant splat<vector<3x16xf32>, 1.000000e+00> : vector<3x16xf32>
   // CHECK-NEXT:     {{.*}} = constant splat<vector<3x16xf32>, 1.000000e+00> : vector<3x16xf32>
   // CHECK-NEXT:     [[VAL00:%.*]] = affine.apply [[ID1]](%i0)
@@ -24,15 +24,15 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
   // CHECK-NEXT:     [[VAL11:%.*]] = affine.apply [[D0P16]](%i1)
   // CHECK-NEXT:     vector_transfer_write {{.*}}, {{.*}}, [[VAL10]], [[VAL11]] {permutation_map: [[ID2]]} : vector<3x16xf32>
   //
-  for %i0 = 0 to %M {
-    for %i1 = 0 to %N {
+  affine.for %i0 = 0 to %M {
+    affine.for %i1 = 0 to %N {
       // non-scoped %f1
       store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
     }
   }
   // 2x unroll (jammed by construction).
-  // CHECK: for %i2 = 0 to %arg0 step 3 {
-  // CHECK-NEXT:   for %i3 = 0 to %arg1 step 32 {
+  // CHECK: affine.for %i2 = 0 to %arg0 step 3 {
+  // CHECK-NEXT:   affine.for %i3 = 0 to %arg1 step 32 {
   // CHECK-NEXT:     {{.*}} = constant splat<vector<3x16xf32>, 2.000000e+00> : vector<3x16xf32>
   // CHECK-NEXT:     {{.*}} = constant splat<vector<3x16xf32>, 2.000000e+00> : vector<3x16xf32>
   // CHECK-NEXT:     [[VAL00:%.*]] = affine.apply [[ID1]](%i2)
@@ -42,15 +42,15 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
   // CHECK-NEXT:     [[VAL11:%.*]] = affine.apply [[D0P16]](%i3)
   // CHECK-NEXT:     vector_transfer_write {{.*}}, {{.*}}, [[VAL10]], [[VAL11]] {permutation_map: [[ID2]]} : vector<3x16xf32>
   //
-  for %i2 = 0 to %M {
-    for %i3 = 0 to %N {
+  affine.for %i2 = 0 to %M {
+    affine.for %i3 = 0 to %N {
       // non-scoped %f2
       store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
     }
   }
   // 2x unroll (jammed by construction).
-  // CHECK: for %i4 = 0 to %arg0 step 3 {
-  // CHECK-NEXT:   for %i5 = 0 to %arg1 step 32 {
+  // CHECK: affine.for %i4 = 0 to %arg0 step 3 {
+  // CHECK-NEXT:   affine.for %i5 = 0 to %arg1 step 32 {
   // CHECK-NEXT:     {{.*}} = affine.apply
   // CHECK-NEXT:     {{.*}} = affine.apply
   // CHECK-NEXT:     {{.*}} = vector_transfer_read
@@ -72,8 +72,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
   // CHECK-NEXT:     {{.*}} = affine.apply
   // CHECK-NEXT:     vector_transfer_write
   //
-  for %i4 = 0 to %M {
-    for %i5 = 0 to %N {
+  affine.for %i4 = 0 to %M {
+    affine.for %i5 = 0 to %N {
       %a5 = load %A[%i4, %i5] : memref<?x?xf32, 0>
       %b5 = load %B[%i4, %i5] : memref<?x?xf32, 0>
       %s5 = addf %a5, %b5 : f32
diff --git a/mlir/test/Transforms/Vectorize/normalize_maps.mlir b/mlir/test/Transforms/Vectorize/normalize_maps.mlir
index 9569dbe07fe0..076d2c756338 100644
--- a/mlir/test/Transforms/Vectorize/normalize_maps.mlir
+++ b/mlir/test/Transforms/Vectorize/normalize_maps.mlir
@@ -9,19 +9,19 @@
 
 // CHECK-LABEL: func @simple()
 func @simple() {
-  for %i0 = 0 to 7 {
+  affine.for %i0 = 0 to 7 {
     %0 = affine.apply (d0) -> (d0) (%i0)
     %1 = affine.apply (d0) -> (d0) (%0)
     %2 = affine.apply (d0, d1) -> (d0 + d1) (%0, %0)
     %3 = affine.apply (d0, d1) -> (d0 - d1) (%0, %0)
   }
-  // CHECK-NEXT: for %i0 = 0 to 7
+  // CHECK-NEXT: affine.for %i0 = 0 to 7
   // CHECK-NEXT:   {{.*}} affine.apply #[[ID1]](%i0)
   // CHECK-NEXT:   {{.*}} affine.apply #[[D0TIMES2]](%i0)
   // CHECK-NEXT:   {{.*}} affine.apply #[[ZERO]]()
 
-  for %i1 = 0 to 7 {
-    for %i2 = 0 to 42 {
+  affine.for %i1 = 0 to 7 {
+    affine.for %i2 = 0 to 42 {
       %20 = affine.apply (d0, d1) -> (d1) (%i1, %i2)
       %21 = affine.apply (d0, d1) -> (d0) (%i1, %i2)
       %22 = affine.apply (d0, d1) -> (d0 + d1) (%20, %21)
@@ -29,15 +29,15 @@ func @simple() {
       %24 = affine.apply (d0, d1) -> (-d0 + d1) (%20, %21)
     }
   }
-  //      CHECK: for %i1 = 0 to 7
-  // CHECK-NEXT:   for %i2 = 0 to 42
+  //      CHECK: affine.for %i1 = 0 to 7
+  // CHECK-NEXT:   affine.for %i2 = 0 to 42
   // CHECK-NEXT:     {{.*}} affine.apply #[[D0PLUSD1]](%i1, %i2)
   // CHECK-NEXT:     {{.*}} affine.apply #[[MINSD0PLUSD1]](%i1, %i2)
   // CHECK-NEXT:     {{.*}} affine.apply #[[D0MINUSD1]](%i1, %i2)
 
-  for %i3 = 0 to 16 {
-    for %i4 = 0 to 47 step 2 {
-      for %i5 = 0 to 78 step 16 {
+  affine.for %i3 = 0 to 16 {
+    affine.for %i4 = 0 to 47 step 2 {
+      affine.for %i5 = 0 to 78 step 16 {
         %50 = affine.apply (d0) -> (d0) (%i3)
         %51 = affine.apply (d0) -> (d0) (%i4)
         %52 = affine.apply (d0) -> (d0) (%i5)
@@ -47,9 +47,9 @@ func @simple() {
       }
     }
   }
-  // CHECK:      for %i3 = 0 to 16
-  // CHECK-NEXT:   for %i4 = 0 to 47 step 2
-  // CHECK-NEXT:     for %i5 = 0 to 78 step 16
+  // CHECK:      affine.for %i3 = 0 to 16
+  // CHECK-NEXT:   affine.for %i4 = 0 to 47 step 2
+  // CHECK-NEXT:     affine.for %i5 = 0 to 78 step 16
   // CHECK-NEXT:       {{.*}} affine.apply #[[ID1]](%i3)
   // CHECK-NEXT:       {{.*}} affine.apply #[[ID1]](%i4)
   // CHECK-NEXT:       {{.*}} affine.apply #[[ID1]](%i5)
diff --git a/mlir/test/Transforms/Vectorize/vectorize_1d.mlir b/mlir/test/Transforms/Vectorize/vectorize_1d.mlir
index 05e31dbdea5a..c812db2d4981 100644
--- a/mlir/test/Transforms/Vectorize/vectorize_1d.mlir
+++ b/mlir/test/Transforms/Vectorize/vectorize_1d.mlir
@@ -23,17 +23,17 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
 //
 // CHECK: for {{.*}} step 128
 // CHECK-NEXT: {{.*}} = vector_transfer_read %arg0, [[C0]], [[C0]] {permutation_map: #[[map_proj_d0d1_0]]} : (memref<?x?xf32>, index, index) -> vector<128xf32>
-   for %i0 = 0 to %M { // vectorized due to scalar -> vector
+   affine.for %i0 = 0 to %M { // vectorized due to scalar -> vector
      %a0 = load %A[%cst0, %cst0] : memref<?x?xf32>
    }
 //
 // CHECK:for {{.*}} [[ARG_M]] {
-   for %i1 = 0 to %M { // not vectorized
+   affine.for %i1 = 0 to %M { // not vectorized
      %a1 = load %A[%i1, %i1] : memref<?x?xf32>
    }
 //
-// CHECK:   for %i{{[0-9]*}} = 0 to [[ARG_M]] {
-   for %i2 = 0 to %M { // not vectorized, would vectorize with --test-fastest-varying=1
+// CHECK:   affine.for %i{{[0-9]*}} = 0 to [[ARG_M]] {
+   affine.for %i2 = 0 to %M { // not vectorized, would vectorize with --test-fastest-varying=1
      %r2 = affine.apply (d0) -> (d0) (%i2)
      %a2 = load %A[%r2#0, %cst0] : memref<?x?xf32>
    }
@@ -41,7 +41,7 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
 // CHECK:for [[IV3:%[a-zA-Z0-9]+]] = 0 to [[ARG_M]] step 128
 // CHECK-NEXT:   [[APP3:%[a-zA-Z0-9]+]] = affine.apply {{.*}}[[IV3]]
 // CHECK-NEXT:   {{.*}} = vector_transfer_read %arg0, [[C0]], [[APP3]] {permutation_map: #[[map_proj_d0d1_d1]]} : {{.*}} -> vector<128xf32>
-   for %i3 = 0 to %M { // vectorized
+   affine.for %i3 = 0 to %M { // vectorized
      %r3 = affine.apply (d0) -> (d0) (%i3)
      %a3 = load %A[%cst0, %r3#0] : memref<?x?xf32>
    }
@@ -51,8 +51,8 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
 // CHECK-NEXT:   [[APP50:%[0-9]+]] = affine.apply {{.*}}([[IV4]], [[IV5]])
 // CHECK-NEXT:   [[APP51:%[0-9]+]] = affine.apply {{.*}}([[IV4]], [[IV5]])
 // CHECK-NEXT:   {{.*}} = vector_transfer_read %arg0, [[APP50]], [[APP51]] {permutation_map: #[[map_proj_d0d1_d1]]} : {{.*}} -> vector<128xf32>
-   for %i4 = 0 to %M { // vectorized
-     for %i5 = 0 to %N { // not vectorized, would vectorize with --test-fastest-varying=1
+   affine.for %i4 = 0 to %M { // vectorized
+     affine.for %i5 = 0 to %N { // not vectorized, would vectorize with --test-fastest-varying=1
        %r50 = affine.apply (d0, d1) -> (d1) (%i4, %i5)
        %r51 = affine.apply (d0, d1) -> (d0) (%i4, %i5)
        %a5 = load %A[%r50, %r51] : memref<?x?xf32>
@@ -61,8 +61,8 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
 //
 // CHECK: for [[IV6:%[i0-9]*]] = 0 to [[ARG_M]] {
 // CHECK-NEXT:   for [[IV7:%[i0-9]*]] = 0 to [[ARG_N]] {
-   for %i6 = 0 to %M { // not vectorized, would vectorize with --test-fastest-varying=1
-     for %i7 = 0 to %N { // not vectorized, can never vectorize
+   affine.for %i6 = 0 to %M { // not vectorized, would vectorize with --test-fastest-varying=1
+     affine.for %i7 = 0 to %N { // not vectorized, can never vectorize
        %r70 = affine.apply (d0, d1) -> (d1 + d0) (%i6, %i7)
        %r71 = affine.apply (d0, d1) -> (d0) (%i6, %i7)
        %a7 = load %A[%r70, %r71] : memref<?x?xf32>
@@ -74,8 +74,8 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
 // CHECK-NEXT:   [[APP9_0:%[0-9]+]] = affine.apply {{.*}}([[IV8]], [[IV9]])
 // CHECK-NEXT:   [[APP9_1:%[0-9]+]] = affine.apply {{.*}}([[IV8]], [[IV9]])
 // CHECK-NEXT:   {{.*}} = vector_transfer_read %arg0, [[APP9_0]], [[APP9_1]] {permutation_map: #[[map_proj_d0d1_d1]]} : {{.*}} -> vector<128xf32>
-   for %i8 = 0 to %M { // vectorized
-     for %i9 = 0 to %N {
+   affine.for %i8 = 0 to %M { // vectorized
+     affine.for %i9 = 0 to %N {
        %r90 = affine.apply (d0, d1) -> (d1) (%i8, %i9)
        %r91 = affine.apply (d0, d1) -> (d0 + d1) (%i8, %i9)
        %a9 = load %A[%r90, %r91] : memref<?x?xf32>
@@ -84,8 +84,8 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
 //
 // CHECK: for [[IV10:%[i0-9]*]] = 0 to %{{[0-9]*}} {
 // CHECK:   for [[IV11:%[i0-9]*]] = 0 to %{{[0-9]*}} {
-   for %i10 = 0 to %M { // not vectorized, need per load transposes
-     for %i11 = 0 to %N { // not vectorized, need per load transposes
+   affine.for %i10 = 0 to %M { // not vectorized, need per load transposes
+     affine.for %i11 = 0 to %N { // not vectorized, need per load transposes
        %r11_0 = affine.apply (d0, d1) -> (d0) (%i10, %i11)
        %r11_1 = affine.apply (d0, d1) -> (d1) (%i10, %i11)
        %a11 = load %A[%r11_0, %r11_1] : memref<?x?xf32>
@@ -98,9 +98,9 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
 // CHECK: for [[IV12:%[i0-9]*]] = 0 to %{{[0-9]*}} {
 // CHECK:   for [[IV13:%[i0-9]*]] = 0 to %{{[0-9]*}} {
 // CHECK:     for [[IV14:%[i0-9]+]] = 0 to [[ARG_P]] step 128
-   for %i12 = 0 to %M { // not vectorized, can never vectorize
-     for %i13 = 0 to %N { // not vectorized, can never vectorize
-       for %i14 = 0 to %P { // vectorized
+   affine.for %i12 = 0 to %M { // not vectorized, can never vectorize
+     affine.for %i13 = 0 to %N { // not vectorized, can never vectorize
+       affine.for %i14 = 0 to %P { // vectorized
          %r14_0 = affine.apply (d0, d1, d2) -> (d1) (%i12, %i13, %i14)
          %r14_1 = affine.apply (d0, d1, d2) -> (d0 + d1) (%i12, %i13, %i14)
          %r14_2 = affine.apply (d0, d1, d2) -> (d0 + d2) (%i12, %i13, %i14)
@@ -109,24 +109,24 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
      }
    }
 //
-// CHECK:  for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
-   for %i15 = 0 to %M { // not vectorized due to condition below
+// CHECK:  affine.for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
+   affine.for %i15 = 0 to %M { // not vectorized due to condition below
      affine.if #set0(%i15) {
        %a15 = load %A[%cst0, %cst0] : memref<?x?xf32>
      }
    }
 //
-// CHECK:  for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
-   for %i16 = 0 to %M { // not vectorized, can't vectorize a vector load
+// CHECK:  affine.for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
+   affine.for %i16 = 0 to %M { // not vectorized, can't vectorize a vector load
      %a16 = alloc(%M) : memref<?xvector<2xf32>>
      %l16 = load %a16[%i16] : memref<?xvector<2xf32>>
    }
 //
-// CHECK: for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
+// CHECK: affine.for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
 // CHECK:   for [[IV18:%[a-zA-Z0-9]+]] = 0 to [[ARG_M]] step 128
 // CHECK:     {{.*}} = vector_transfer_read %arg0, [[C0]], [[C0]] {permutation_map: #[[map_proj_d0d1_0]]} : {{.*}} -> vector<128xf32>
-   for %i17 = 0 to %M { // not vectorized, the 1-D pattern that matched %i18 in DFS post-order prevents vectorizing %i17
-     for %i18 = 0 to %M { // vectorized due to scalar -> vector
+   affine.for %i17 = 0 to %M { // not vectorized, the 1-D pattern that matched %i18 in DFS post-order prevents vectorizing %i17
+     affine.for %i18 = 0 to %M { // vectorized due to scalar -> vector
        %a18 = load %A[%cst0, %cst0] : memref<?x?xf32>
      }
    }
@@ -139,24 +139,24 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
   %C = alloc (%M, %N) : memref<?x?xf32, 0>
   %f1 = constant 1.0 : f32
   %f2 = constant 2.0 : f32
-  for %i0 = 0 to %M {
-    for %i1 = 0 to %N {
+  affine.for %i0 = 0 to %M {
+    affine.for %i1 = 0 to %N {
       // CHECK: [[C1:%.*]] = constant splat<vector<128xf32>, 1.000000e+00> : vector<128xf32>
       // CHECK: vector_transfer_write [[C1]], {{.*}} {permutation_map: #[[map_proj_d0d1_d1]]} : vector<128xf32>, memref<?x?xf32>, index, index
       // non-scoped %f1
       store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
     }
   }
-  for %i2 = 0 to %M {
-    for %i3 = 0 to %N {
+  affine.for %i2 = 0 to %M {
+    affine.for %i3 = 0 to %N {
       // CHECK: [[C3:%.*]] = constant splat<vector<128xf32>, 2.000000e+00> : vector<128xf32>
       // CHECK: vector_transfer_write [[C3]], {{.*}} {permutation_map: #[[map_proj_d0d1_d1]]} : vector<128xf32>, memref<?x?xf32>, index, index
       // non-scoped %f2
       store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
     }
   }
-  for %i4 = 0 to %M {
-    for %i5 = 0 to %N {
+  affine.for %i4 = 0 to %M {
+    affine.for %i5 = 0 to %N {
       // CHECK: [[A5:%.*]] = vector_transfer_read %0, {{.*}} {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<128xf32>
       // CHECK: [[B5:%.*]] = vector_transfer_read %1, {{.*}} {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<128xf32>
       // CHECK: [[S5:%.*]] = addf [[A5]], [[B5]] : vector<128xf32>
@@ -188,10 +188,10 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
 // CHECK-LABEL: @vec_rejected
 func @vec_rejected(%A : memref<?x?xf32>, %C : memref<?x?xf32>) {
   %N = dim %A, 0 : memref<?x?xf32>
-  for %i = 0 to %N {
+  affine.for %i = 0 to %N {
 // CHECK-NOT: vector
     %a = load %A[%i, %i] : memref<?x?xf32> // not vectorized
-    for %j = 0 to %N {
+    affine.for %j = 0 to %N {
       %b = load %A[%i, %j] : memref<?x?xf32> // may be vectorized
 // CHECK-NOT: vector
       %c = addf %a, %b : f32 // not vectorized because %a wasn't
diff --git a/mlir/test/Transforms/Vectorize/vectorize_2d.mlir b/mlir/test/Transforms/Vectorize/vectorize_2d.mlir
index d847f6bb5ceb..59c7483749b9 100644
--- a/mlir/test/Transforms/Vectorize/vectorize_2d.mlir
+++ b/mlir/test/Transforms/Vectorize/vectorize_2d.mlir
@@ -11,13 +11,13 @@ func @vec2d(%A : memref<?x?x?xf32>) {
    // CHECK:   for {{.*}} = 0 to %1 step 32
    // CHECK:     for {{.*}} = 0 to %2 step 256
    // Example:
-   // for %i0 = 0 to %0 {
-   //   for %i1 = 0 to %1 step 32 {
-   //     for %i2 = 0 to %2 step 256 {
+   // affine.for %i0 = 0 to %0 {
+   //   affine.for %i1 = 0 to %1 step 32 {
+   //     affine.for %i2 = 0 to %2 step 256 {
    //       %3 = "vector_transfer_read"(%arg0, %i0, %i1, %i2) : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
-   for %i0 = 0 to %M {
-     for %i1 = 0 to %N {
-       for %i2 = 0 to %P {
+   affine.for %i0 = 0 to %M {
+     affine.for %i1 = 0 to %N {
+       affine.for %i2 = 0 to %P {
          %a2 = load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
        }
      }
@@ -27,9 +27,9 @@ func @vec2d(%A : memref<?x?x?xf32>) {
    // CHECK:     for  {{.*}} = 0 to %2 {
    // For the case: --test-fastest-varying=1 --test-fastest-varying=0 no
    // vectorization happens because of loop nesting order .
-   for %i3 = 0 to %M {
-     for %i4 = 0 to %N {
-       for %i5 = 0 to %P {
+   affine.for %i3 = 0 to %M {
+     affine.for %i4 = 0 to %N {
+       affine.for %i5 = 0 to %P {
          %a5 = load %A[%i4, %i5, %i3] : memref<?x?x?xf32>
        }
      }
@@ -43,24 +43,24 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
   %C = alloc (%M, %N) : memref<?x?xf32, 0>
   %f1 = constant 1.0 : f32
   %f2 = constant 2.0 : f32
-  for %i0 = 0 to %M {
-    for %i1 = 0 to %N {
+  affine.for %i0 = 0 to %M {
+    affine.for %i1 = 0 to %N {
       // CHECK: [[C1:%.*]] = constant splat<vector<32x256xf32>, 1.000000e+00> : vector<32x256xf32>
       // CHECK: vector_transfer_write [[C1]], {{.*}} {permutation_map: #[[map_proj_d0d1_d0d1]]} : vector<32x256xf32>, memref<?x?xf32>, index, index
       // non-scoped %f1
       store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
     }
   }
-  for %i2 = 0 to %M {
-    for %i3 = 0 to %N {
+  affine.for %i2 = 0 to %M {
+    affine.for %i3 = 0 to %N {
       // CHECK: [[C3:%.*]] = constant splat<vector<32x256xf32>, 2.000000e+00> : vector<32x256xf32>
       // CHECK: vector_transfer_write [[C3]], {{.*}} {permutation_map: #[[map_proj_d0d1_d0d1]]}  : vector<32x256xf32>, memref<?x?xf32>, index, index
       // non-scoped %f2
       store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
     }
   }
-  for %i4 = 0 to %M {
-    for %i5 = 0 to %N {
+  affine.for %i4 = 0 to %M {
+    affine.for %i5 = 0 to %N {
       // CHECK: [[A5:%.*]] = vector_transfer_read %0, {{.*}} {permutation_map: #[[map_proj_d0d1_d0d1]]} : (memref<?x?xf32>, index, index) -> vector<32x256xf32>
       // CHECK: [[B5:%.*]] = vector_transfer_read %1, {{.*}} {permutation_map: #[[map_proj_d0d1_d0d1]]} : (memref<?x?xf32>, index, index) -> vector<32x256xf32>
       // CHECK: [[S5:%.*]] = addf [[A5]], [[B5]] : vector<32x256xf32>
diff --git a/mlir/test/Transforms/Vectorize/vectorize_3d.mlir b/mlir/test/Transforms/Vectorize/vectorize_3d.mlir
index 1a6bee585ee6..08ca27dbeee4 100644
--- a/mlir/test/Transforms/Vectorize/vectorize_3d.mlir
+++ b/mlir/test/Transforms/Vectorize/vectorize_3d.mlir
@@ -7,17 +7,17 @@ func @vec3d(%A : memref<?x?x?xf32>) {
    %0 = dim %A, 0 : memref<?x?x?xf32>
    %1 = dim %A, 1 : memref<?x?x?xf32>
    %2 = dim %A, 2 : memref<?x?x?xf32>
-   // CHECK: for %i0 = 0 to %0 {
-   // CHECK:   for %i1 = 0 to %0 {
-   // CHECK:     for %i2 = 0 to %0 step 32 {
-   // CHECK:       for %i3 = 0 to %1 step 64 {
-   // CHECK:         for %i4 = 0 to %2 step 256 {
+   // CHECK: affine.for %i0 = 0 to %0 {
+   // CHECK:   affine.for %i1 = 0 to %0 {
+   // CHECK:     affine.for %i2 = 0 to %0 step 32 {
+   // CHECK:       affine.for %i3 = 0 to %1 step 64 {
+   // CHECK:         affine.for %i4 = 0 to %2 step 256 {
    // CHECK:           %3 = vector_transfer_read %arg0, %i2, %i3, %i4 {permutation_map: #[[map_proj_d0d1d2_d0d1d2]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x64x256xf32>
-   for %t0 = 0 to %0 {
-     for %t1 = 0 to %0 {
-       for %i0 = 0 to %0 {
-         for %i1 = 0 to %1 {
-           for %i2 = 0 to %2 {
+   affine.for %t0 = 0 to %0 {
+     affine.for %t1 = 0 to %0 {
+       affine.for %i0 = 0 to %0 {
+         affine.for %i1 = 0 to %1 {
+           affine.for %i2 = 0 to %2 {
              %a2 = load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
            }
          }
diff --git a/mlir/test/Transforms/Vectorize/vectorize_outer_loop_2d.mlir b/mlir/test/Transforms/Vectorize/vectorize_outer_loop_2d.mlir
index 4654ab810dfe..d00b99f1716a 100644
--- a/mlir/test/Transforms/Vectorize/vectorize_outer_loop_2d.mlir
+++ b/mlir/test/Transforms/Vectorize/vectorize_outer_loop_2d.mlir
@@ -7,13 +7,13 @@ func @vec2d(%A : memref<?x?x?xf32>) {
    %M = dim %A, 0 : memref<?x?x?xf32>
    %N = dim %A, 1 : memref<?x?x?xf32>
    %P = dim %A, 2 : memref<?x?x?xf32>
-   // CHECK: for %i0 = 0 to %0 step 32
-   // CHECK:   for %i1 = 0 to %1 {
-   // CHECK:     for %i2 = 0 to %2 step 256
+   // CHECK: affine.for %i0 = 0 to %0 step 32
+   // CHECK:   affine.for %i1 = 0 to %1 {
+   // CHECK:     affine.for %i2 = 0 to %2 step 256
    // CHECK:       {{.*}} = vector_transfer_read %arg0, %i0, %i1, %i2 {permutation_map: #[[map_proj_d0d1d2_d0d2]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
-   for %i0 = 0 to %M {
-     for %i1 = 0 to %N {
-       for %i2 = 0 to %P {
+   affine.for %i0 = 0 to %M {
+     affine.for %i1 = 0 to %N {
+       affine.for %i2 = 0 to %P {
          %a2 = load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
        }
      }
@@ -23,9 +23,9 @@ func @vec2d(%A : memref<?x?x?xf32>) {
    // CHECK:     for  {{.*}} = 0 to %2 {
    // For the case: --test-fastest-varying=2 --test-fastest-varying=0 no
    // vectorization happens because of loop nesting order
-   for %i3 = 0 to %M {
-     for %i4 = 0 to %N {
-       for %i5 = 0 to %P {
+   affine.for %i3 = 0 to %M {
+     affine.for %i4 = 0 to %N {
+       affine.for %i5 = 0 to %P {
          %a5 = load %A[%i4, %i5, %i3] : memref<?x?x?xf32>
        }
      }
diff --git a/mlir/test/Transforms/Vectorize/vectorize_outer_loop_transpose_2d.mlir b/mlir/test/Transforms/Vectorize/vectorize_outer_loop_transpose_2d.mlir
index 0eebf8165357..a8a8d5d7790f 100644
--- a/mlir/test/Transforms/Vectorize/vectorize_outer_loop_transpose_2d.mlir
+++ b/mlir/test/Transforms/Vectorize/vectorize_outer_loop_transpose_2d.mlir
@@ -12,20 +12,20 @@ func @vec2d(%A : memref<?x?x?xf32>) {
    // CHECK:     for  {{.*}} = 0 to %2 {
    // For the case: --test-fastest-varying=0 --test-fastest-varying=2 no
    // vectorization happens because of loop nesting order.
-   for %i0 = 0 to %M {
-     for %i1 = 0 to %N {
-       for %i2 = 0 to %P {
+   affine.for %i0 = 0 to %M {
+     affine.for %i1 = 0 to %N {
+       affine.for %i2 = 0 to %P {
          %a2 = load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
        }
      }
    }
-   // CHECK: for %i3 = 0 to %0 step 32
-   // CHECK:   for %i4 = 0 to %1 step 256
-   // CHECK:     for %i5 = 0 to %2 {
+   // CHECK: affine.for %i3 = 0 to %0 step 32
+   // CHECK:   affine.for %i4 = 0 to %1 step 256
+   // CHECK:     affine.for %i5 = 0 to %2 {
    // CHECK:       {{.*}} = vector_transfer_read %arg0, %i4, %i5, %i3 {permutation_map: #[[map_proj_d0d1d2_d2d0]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
-   for %i3 = 0 to %M {
-     for %i4 = 0 to %N {
-       for %i5 = 0 to %P {
+   affine.for %i3 = 0 to %M {
+     affine.for %i4 = 0 to %N {
+       affine.for %i5 = 0 to %P {
          %a5 = load %A[%i4, %i5, %i3] : memref<?x?x?xf32>
        }
      }
@@ -37,26 +37,26 @@ func @vec2d_imperfectly_nested(%A : memref<?x?x?xf32>) {
    %0 = dim %A, 0 : memref<?x?x?xf32>
    %1 = dim %A, 1 : memref<?x?x?xf32>
    %2 = dim %A, 2 : memref<?x?x?xf32>
-   // CHECK: for %i0 = 0 to %0 step 32 {
-   // CHECK:   for %i1 = 0 to %1 {
-   // CHECK:     for %i2 = 0 to %2 step 256 {
+   // CHECK: affine.for %i0 = 0 to %0 step 32 {
+   // CHECK:   affine.for %i1 = 0 to %1 {
+   // CHECK:     affine.for %i2 = 0 to %2 step 256 {
    // CHECK:       %3 = vector_transfer_read %arg0, %i2, %i1, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d0]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
-   // CHECK:   for %i3 = 0 to %1 step 256 {
-   // CHECK:     for %i4 = 0 to %2 {
+   // CHECK:   affine.for %i3 = 0 to %1 step 256 {
+   // CHECK:     affine.for %i4 = 0 to %2 {
    // CHECK:       %4 = vector_transfer_read %arg0, %i3, %i4, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d0]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
-   // CHECK:     for %i5 = 0 to %2 {
+   // CHECK:     affine.for %i5 = 0 to %2 {
    // CHECK:       %5 = vector_transfer_read %arg0, %i3, %i5, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d0]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
-   for %i0 = 0 to %0 {
-     for %i1 = 0 to %1 {
-       for %i2 = 0 to %2 {
+   affine.for %i0 = 0 to %0 {
+     affine.for %i1 = 0 to %1 {
+       affine.for %i2 = 0 to %2 {
          %a2 = load %A[%i2, %i1, %i0] : memref<?x?x?xf32>
        }
      }
-     for %i3 = 0 to %1 {
-       for %i4 = 0 to %2 {
+     affine.for %i3 = 0 to %1 {
+       affine.for %i4 = 0 to %2 {
          %a4 = load %A[%i3, %i4, %i0] : memref<?x?x?xf32>
        }
-       for %i5 = 0 to %2 {
+       affine.for %i5 = 0 to %2 {
          %a5 = load %A[%i3, %i5, %i0] : memref<?x?x?xf32>
        }
      }
diff --git a/mlir/test/Transforms/Vectorize/vectorize_transpose_2d.mlir b/mlir/test/Transforms/Vectorize/vectorize_transpose_2d.mlir
index 1ba563b3442a..b8e4e0758902 100644
--- a/mlir/test/Transforms/Vectorize/vectorize_transpose_2d.mlir
+++ b/mlir/test/Transforms/Vectorize/vectorize_transpose_2d.mlir
@@ -12,20 +12,20 @@ func @vec2d(%A : memref<?x?x?xf32>) {
    // CHECK:     for  {{.*}} = 0 to %2 {
    // For the case: --test-fastest-varying=0 --test-fastest-varying=1 no
    // vectorization happens because of loop nesting order.
-  for %i0 = 0 to %M {
-     for %i1 = 0 to %N {
-       for %i2 = 0 to %P {
+  affine.for %i0 = 0 to %M {
+     affine.for %i1 = 0 to %N {
+       affine.for %i2 = 0 to %P {
          %a2 = load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
        }
      }
    }
-   // CHECK: for %i3 = 0 to %0 step 32
-   // CHECK:   for %i4 = 0 to %1 {
-   // CHECK:     for %i5 = 0 to %2 step 256
+   // CHECK: affine.for %i3 = 0 to %0 step 32
+   // CHECK:   affine.for %i4 = 0 to %1 {
+   // CHECK:     affine.for %i5 = 0 to %2 step 256
    // CHECK:       {{.*}} = vector_transfer_read %arg0, %i4, %i5, %i3 {permutation_map: #[[map_proj_d0d1d2_d2d1]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
-   for %i3 = 0 to %M {
-     for %i4 = 0 to %N {
-       for %i5 = 0 to %P {
+   affine.for %i3 = 0 to %M {
+     affine.for %i4 = 0 to %N {
+       affine.for %i5 = 0 to %P {
          %a5 = load %A[%i4, %i5, %i3] : memref<?x?x?xf32>
        }
      }
@@ -37,26 +37,26 @@ func @vec2d_imperfectly_nested(%A : memref<?x?x?xf32>) {
    %0 = dim %A, 0 : memref<?x?x?xf32>
    %1 = dim %A, 1 : memref<?x?x?xf32>
    %2 = dim %A, 2 : memref<?x?x?xf32>
-   // CHECK: for %i0 = 0 to %0 step 32 {
-   // CHECK:   for %i1 = 0 to %1 step 256 {
-   // CHECK:     for %i2 = 0 to %2 {
+   // CHECK: affine.for %i0 = 0 to %0 step 32 {
+   // CHECK:   affine.for %i1 = 0 to %1 step 256 {
+   // CHECK:     affine.for %i2 = 0 to %2 {
    // CHECK:       %3 = vector_transfer_read %arg0, %i2, %i1, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d1]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
-   // CHECK:   for %i3 = 0 to %1 {
-   // CHECK:     for %i4 = 0 to %2 step 256 {
+   // CHECK:   affine.for %i3 = 0 to %1 {
+   // CHECK:     affine.for %i4 = 0 to %2 step 256 {
    // CHECK:       %4 = vector_transfer_read %arg0, %i3, %i4, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d1]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
-   // CHECK:     for %i5 = 0 to %2 step 256 {
+   // CHECK:     affine.for %i5 = 0 to %2 step 256 {
    // CHECK:       %5 = vector_transfer_read %arg0, %i3, %i5, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d1]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
-   for %i0 = 0 to %0 {
-     for %i1 = 0 to %1 {
-       for %i2 = 0 to %2 {
+   affine.for %i0 = 0 to %0 {
+     affine.for %i1 = 0 to %1 {
+       affine.for %i2 = 0 to %2 {
          %a2 = load %A[%i2, %i1, %i0] : memref<?x?x?xf32>
        }
      }
-     for %i3 = 0 to %1 {
-       for %i4 = 0 to %2 {
+     affine.for %i3 = 0 to %1 {
+       affine.for %i4 = 0 to %2 {
          %a4 = load %A[%i3, %i4, %i0] : memref<?x?x?xf32>
        }
-       for %i5 = 0 to %2 {
+       affine.for %i5 = 0 to %2 {
          %a5 = load %A[%i3, %i5, %i0] : memref<?x?x?xf32>
        }
      }
diff --git a/mlir/test/Transforms/canonicalize.mlir b/mlir/test/Transforms/canonicalize.mlir
index 142770f71b6f..94edd91004b4 100644
--- a/mlir/test/Transforms/canonicalize.mlir
+++ b/mlir/test/Transforms/canonicalize.mlir
@@ -205,10 +205,10 @@ func @dyn_shape_fold(%L : index, %M : index) -> (memref<? x ? x i32>, memref<? x
   // CHECK-NEXT: %2 = alloc() : memref<512x1024xi32>
   %c = alloc(%K, %N) : memref<? x ? x i32>
 
-  // CHECK: for %i0 =
-  for %i = 0 to %L {
-    // CHECK-NEXT: for %i1 =
-    for %j = 0 to 10 {
+  // CHECK: affine.for %i0 =
+  affine.for %i = 0 to %L {
+    // CHECK-NEXT: affine.for %i1 =
+    affine.for %j = 0 to 10 {
       // CHECK-NEXT: %4 = load %0[%i0, %i1] : memref<?x1024xf32>
       // CHECK-NEXT: store %4, %1[%c0, %c0, %i0, %i1, %c0] : memref<4x1024x8x512x?xf32>
       %v = load %a[%i, %j] : memref<?x?xf32>
@@ -234,8 +234,8 @@ func @merge_constants() -> (index, index) {
 // CHECK-LABEL: func @hoist_constant
 func @hoist_constant(%arg0: memref<8xi32>) {
   // CHECK-NEXT: %c42_i32 = constant 42 : i32
-  // CHECK-NEXT: for %i0 = 0 to 8 {
-  for %i0 = 0 to 8 {
+  // CHECK-NEXT: affine.for %i0 = 0 to 8 {
+  affine.for %i0 = 0 to 8 {
     // CHECK-NEXT: store %c42_i32, %arg0[%i0]
     %c42_i32 = constant 42 : i32
     store %c42_i32, %arg0[%i0] : memref<8xi32>
diff --git a/mlir/test/Transforms/constant-fold.mlir b/mlir/test/Transforms/constant-fold.mlir
index b9197b967ce6..b40daa1df6f3 100644
--- a/mlir/test/Transforms/constant-fold.mlir
+++ b/mlir/test/Transforms/constant-fold.mlir
@@ -2,8 +2,8 @@
 
 // CHECK-LABEL: @test(%arg0: memref<f32>) {
 func @test(%p : memref<f32>) {
-  for %i0 = 0 to 128 {
-    for %i1 = 0 to 8 { // CHECK: for %i1 = 0 to 8 {
+  affine.for %i0 = 0 to 128 {
+    affine.for %i1 = 0 to 8 { // CHECK: affine.for %i1 = 0 to 8 {
       %0 = constant 4.5 : f32
       %1 = constant 1.5 : f32
 
diff --git a/mlir/test/Transforms/cse.mlir b/mlir/test/Transforms/cse.mlir
index 38d95a8abecb..617bd800fedb 100644
--- a/mlir/test/Transforms/cse.mlir
+++ b/mlir/test/Transforms/cse.mlir
@@ -113,8 +113,8 @@ func @down_propagate_for() {
   // CHECK: %c1_i32 = constant 1 : i32
   %0 = constant 1 : i32
 
-  // CHECK-NEXT: for %i0 = 0 to 4 {
-  for %i = 0 to 4 {
+  // CHECK-NEXT: affine.for %i0 = 0 to 4 {
+  affine.for %i = 0 to 4 {
     // CHECK-NEXT: "foo"(%c1_i32, %c1_i32) : (i32, i32) -> ()
     %1 = constant 1 : i32
     "foo"(%0, %1) : (i32, i32) -> ()
@@ -145,8 +145,8 @@ func @down_propagate() -> i32 {
 /// Check that operation definitions are NOT propagated up the dominance tree.
 // CHECK-LABEL: @up_propagate_for
 func @up_propagate_for() -> i32 {
-  // CHECK: for %i0 = 0 to 4 {
-  for %i = 0 to 4 {
+  // CHECK: affine.for %i0 = 0 to 4 {
+  affine.for %i = 0 to 4 {
     // CHECK-NEXT: %c1_i32 = constant 1 : i32
     // CHECK-NEXT: "foo"(%c1_i32) : (i32) -> ()
     %0 = constant 1 : i32
diff --git a/mlir/test/Transforms/dma-generate.mlir b/mlir/test/Transforms/dma-generate.mlir
index 1b3d35e1154c..dfdfb7a14c3c 100644
--- a/mlir/test/Transforms/dma-generate.mlir
+++ b/mlir/test/Transforms/dma-generate.mlir
@@ -42,7 +42,7 @@ func @loop_nest_1d() {
   // Second DMA transfer.
   // CHECK:       dma_start %1[%c256], %5[%c0], %c256_0, %6[%c0] : memref<512xf32>, memref<256xf32, 2>, memref<1xi32>
   // CHECK-NEXT:  dma_wait %6[%c0], %c256_0 : memref<1xi32>
-  // CHECK: for %i0 = 0 to 256 {
+  // CHECK: affine.for %i0 = 0 to 256 {
       // CHECK-NEXT: %7 = load %3[%i0] : memref<256xf32, 2>
       // CHECK:      %8 = affine.apply [[MAP_PLUS_256]](%i0)
       // CHECK:      %9 = affine.apply [[MAP_MINUS_256]](%8)
@@ -55,7 +55,7 @@ func @loop_nest_1d() {
   // CHECK-NEXT: dealloc %4 : memref<1xi32>
   // CHECK-NEXT: dealloc %3 : memref<256xf32, 2>
   // CHECK-NEXT: return
-  for %i = 0 to 256 {
+  affine.for %i = 0 to 256 {
     load %A[%i] : memref<256 x f32>
     %idx = affine.apply (d0) -> (d0 + 256)(%i)
     load %B[%idx] : memref<512 x f32>
@@ -82,20 +82,20 @@ func @loop_nest_1d() {
 // INCOMING DMA for C.
 // CHECK-DAG:  dma_start %arg2[%c0, %c0], [[BUFC]][%c0, %c0], %c16384_0, [[TAGC]][%c0] : memref<512x32xf32>, memref<512x32xf32, 2>, memref<1xi32>
 // CHECK-DAG:  dma_wait [[TAGC]][%c0], %c16384_0 : memref<1xi32>
-// CHECK-NEXT:  for %i0 = 0 to 32 {
-// CHECK-NEXT:    for %i1 = 0 to 32 {
-// CHECK-NEXT:      for %i2 = 0 to 32 {
-// CHECK-NEXT:        for %i3 = 0 to 16 {
+// CHECK-NEXT:  affine.for %i0 = 0 to 32 {
+// CHECK-NEXT:    affine.for %i1 = 0 to 32 {
+// CHECK-NEXT:      affine.for %i2 = 0 to 32 {
+// CHECK-NEXT:        affine.for %i3 = 0 to 16 {
 // CHECK-NEXT:          %7 = affine.apply #map{{[0-9]+}}(%i1, %i3)
 // CHECK-NEXT:          %8 = load [[BUFB]][%7, %i0] : memref<512x32xf32, 2>
 // CHECK-NEXT:          "foo"(%8) : (f32) -> ()
 // CHECK-NEXT:        }
-// CHECK-NEXT:        for %i4 = 0 to 16 {
+// CHECK-NEXT:        affine.for %i4 = 0 to 16 {
 // CHECK-NEXT:          %9 = affine.apply #map{{[0-9]+}}(%i2, %i4)
 // CHECK-NEXT:          %10 = load [[BUFA]][%9, %i1] : memref<512x32xf32, 2>
 // CHECK-NEXT:          "bar"(%10) : (f32) -> ()
 // CHECK-NEXT:        }
-// CHECK-NEXT:        for %i5 = 0 to 16 {
+// CHECK-NEXT:        affine.for %i5 = 0 to 16 {
 // CHECK-NEXT:          %11 = "abc_compute"() : () -> f32
 // CHECK-NEXT:          %12 = affine.apply #map{{[0-9]+}}(%i2, %i5)
 // CHECK-NEXT:          %13 = load [[BUFC]][%12, %i0] : memref<512x32xf32, 2>
@@ -123,20 +123,20 @@ func @loop_nest_high_d(%A: memref<512 x 32 x f32>,
   // DMAs will be performed at this level (jT is the first loop without a stride).
   // A and B are read, while C is both read and written. A total of three new buffers
   // are allocated and existing load's/store's are replaced by accesses to those buffers.
-  for %jT = 0 to 32 {
-    for %kT = 0 to 32 {
-      for %iT = 0 to 32 {
-        for %kk = 0 to 16 { // k intratile
+  affine.for %jT = 0 to 32 {
+    affine.for %kT = 0 to 32 {
+      affine.for %iT = 0 to 32 {
+        affine.for %kk = 0 to 16 { // k intratile
           %k = affine.apply (d0, d1) -> (16*d0 + d1) (%kT, %kk)
           %v0 = load %B[%k, %jT] : memref<512 x 32 x f32>
           "foo"(%v0) : (f32) -> ()
         }
-        for %ii = 0 to 16 { // i intratile.
+        affine.for %ii = 0 to 16 { // i intratile.
           %i = affine.apply (d0, d1) -> (16*d0 + d1)(%iT, %ii)
           %v1 = load %A[%i, %kT] : memref<512 x 32 x f32>
           "bar"(%v1) : (f32) -> ()
         }
-        for %ii_ = 0 to 16 { // i intratile.
+        affine.for %ii_ = 0 to 16 { // i intratile.
           %v2 = "abc_compute"() : () -> f32
           %i_ = affine.apply (d0, d1) -> (16*d0 + d1)(%iT, %ii_)
           %v3 =  load %C[%i_, %jT] : memref<512 x 32 x f32>
@@ -155,13 +155,13 @@ func @loop_nest_high_d(%A: memref<512 x 32 x f32>,
 //
 // CHECK-LABEL: func @loop_nest_modulo() {
 // CHECK:       %0 = alloc() : memref<256x8xf32>
-// CHECK-NEXT:    for %i0 = 0 to 32 step 4 {
+// CHECK-NEXT:    affine.for %i0 = 0 to 32 step 4 {
 // CHECK-NEXT:      %1 = affine.apply #map{{[0-9]+}}(%i0)
 // CHECK-NEXT:      %2 = alloc() : memref<1x2xf32, 2>
 // CHECK-NEXT:      %3 = alloc() : memref<1xi32>
 // CHECK-NEXT:      dma_start %0[%1, %c0], %2[%c0, %c0], %c2, %3[%c0] : memref<256x8xf32>, memref<1x2xf32, 2>, memref<1xi32>
 // CHECK-NEXT:      dma_wait %3[%c0], %c2 : memref<1xi32>
-// CHECK-NEXT:      for %i1 = 0 to 8 {
+// CHECK-NEXT:      affine.for %i1 = 0 to 8 {
 //                    ...
 //                    ...
 // CHECK:           }
@@ -171,9 +171,9 @@ func @loop_nest_high_d(%A: memref<512 x 32 x f32>,
 // CHECK-NEXT:    return
 func @loop_nest_modulo() {
   %A = alloc() : memref<256 x 8 x f32>
-  for %i = 0 to 32 step 4 {
+  affine.for %i = 0 to 32 step 4 {
     // DMAs will be performed at this level (%j is the first unit stride loop)
-    for %j = 0 to 8 {
+    affine.for %j = 0 to 8 {
       %idx = affine.apply (d0) -> (d0 mod 2) (%j)
       // A buffer of size 32 x 2 will be allocated (original buffer was 256 x 8).
       %v = load %A[%i, %idx] : memref<256 x 8 x f32>
@@ -187,17 +187,17 @@ func @loop_nest_modulo() {
 // CHECK-LABEL: func @loop_nest_tiled() -> memref<256x1024xf32> {
 func @loop_nest_tiled() -> memref<256x1024xf32> {
   %0 = alloc() : memref<256x1024xf32>
-  for %i0 = 0 to 256 step 32 {
-    for %i1 = 0 to 1024 step 32 {
+  affine.for %i0 = 0 to 256 step 32 {
+    affine.for %i1 = 0 to 1024 step 32 {
 // CHECK:      %3 = alloc() : memref<32x32xf32, 2>
 // CHECK-NEXT: %4 = alloc() : memref<1xi32>
 // Strided DMA here: 32 x 32 tile in a 256 x 1024 memref.
 // CHECK-NEXT: dma_start %0[%1, %2], %3[%c0, %c0], %c1024, %4[%c0], %c1024_0, %c32 : memref<256x1024xf32>, memref<32x32xf32, 2>, memref<1xi32>
 // CHECK-NEXT: dma_wait
-// CHECK-NEXT: for %i2 = #map
-// CHECK-NEXT:   for %i3 = #map
-      for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 32)(%i0) {
-        for %i3 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 32)(%i1) {
+// CHECK-NEXT: affine.for %i2 = #map
+// CHECK-NEXT:   affine.for %i3 = #map
+      affine.for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 32)(%i0) {
+        affine.for %i3 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 32)(%i1) {
           // CHECK-NEXT: %5 = affine.apply [[MAP_INDEX_DIFF_EVEN]](%i0, %i1, %i2, %i3)
           // CHECK-NEXT: %6 = affine.apply [[MAP_INDEX_DIFF_ODD]](%i0, %i1, %i2, %i3)
           // CHECK-NEXT: %7 = load %3[%5, %6] : memref<32x32xf32, 2>
@@ -218,8 +218,8 @@ func @dma_constant_dim_access(%A : memref<100x100xf32>) {
   // No strided DMA needed here.
   // CHECK:      dma_start %arg0[%c1, %c0], %0[%c0, %c0], %c100, %1[%c0] : memref<100x100xf32>, memref<1x100xf32, 2>,
   // CHECK-NEXT: dma_wait %1[%c0], %c100 : memref<1xi32>
-  for %i = 0 to 100 {
-    for %j = 0 to ()[s0] -> (s0) ()[%N] {
+  affine.for %i = 0 to 100 {
+    affine.for %j = 0 to ()[s0] -> (s0) ()[%N] {
       // CHECK:      %2 = affine.apply [[MAP_D0_MINUS_ONE]](%c1_0, %i1)
       // CHECK:      %3 = affine.apply [[MAP_D1]](%c1_0, %i1)
       // CHECK-NEXT: %4 = load %0[%2, %3] : memref<1x100xf32, 2>
@@ -232,8 +232,8 @@ func @dma_constant_dim_access(%A : memref<100x100xf32>) {
 // CHECK-LABEL: func @dma_with_symbolic_accesses
 func @dma_with_symbolic_accesses(%A : memref<100x100xf32>, %M : index) {
   %N = constant 9 : index
-  for %i = 0 to 100 {
-    for %j = 0 to 100 {
+  affine.for %i = 0 to 100 {
+    affine.for %j = 0 to 100 {
       %idy = affine.apply (d0, d1) [s0, s1] -> (d1 + s0 + s1)(%i, %j)[%M, %N]
       load %A[%i, %idy] : memref<100 x 100 x f32>
     }
@@ -243,8 +243,8 @@ func @dma_with_symbolic_accesses(%A : memref<100x100xf32>, %M : index) {
 // CHECK-NEXT:  %2 = alloc() : memref<1xi32>
 // CHECK-NEXT:  dma_start %arg0[%c0, %0], %1[%c0, %c0], %c10000, %2[%c0]
 // CHECK-NEXT:  dma_wait %2[%c0], %c10000
-// CHECK-NEXT:  for %i0 = 0 to 100 {
-// CHECK-NEXT:    for %i1 = 0 to 100 {
+// CHECK-NEXT:  affine.for %i0 = 0 to 100 {
+// CHECK-NEXT:    affine.for %i1 = 0 to 100 {
 // CHECK-NEXT:      %3 = affine.apply [[MAP_SYM_SHIFT]](%i0, %i1)[%arg1, %c9]
 // CHECK-NEXT:      %4 = affine.apply [[MAP_3D_D1]](%arg1, %i0, %3)
 // CHECK-NEXT:      %5 = affine.apply [[MAP_SUB_OFFSET]](%arg1, %i0, %3)
@@ -263,8 +263,8 @@ func @dma_with_symbolic_loop_bounds(%A : memref<100x100xf32>, %M : index, %N: in
 // CHECK-NEXT:  %1 = alloc() : memref<1xi32>
 // CHECK-NEXT:  dma_start %arg0[%c0, %c0], %0[%c0, %c0], %c10000, %1[%c0] : memref<100x100xf32>, memref<100x100xf32, 2>, memref<1xi32>
 // CHECK-NEXT:  dma_wait %1[%c0], %c10000 : memref<1xi32>
-  for %i = 0 to 100 {
-    for %j = %M to %N {
+  affine.for %i = 0 to 100 {
+    affine.for %j = %M to %N {
       %idy = affine.apply (d1) [s0] -> (d1 + s0)(%j)[%K]
       load %A[%i, %idy] : memref<100 x 100 x f32>
     }
@@ -278,8 +278,8 @@ func @dma_with_symbolic_loop_bounds(%A : memref<100x100xf32>, %M : index, %N: in
 func @dma_unknown_size(%arg0: memref<?x?xf32>) {
   %M = dim %arg0, 0 : memref<? x ? x f32>
   %N = dim %arg0, 0 : memref<? x ? x f32>
-  for %i = 0 to %M {
-    for %j = 0 to %N {
+  affine.for %i = 0 to %M {
+    affine.for %j = 0 to %N {
       // If this loop nest isn't tiled, the access requires a non-constant DMA
       // size -- not yet implemented.
       // CHECK: %2 = load %arg0[%i0, %i1] : memref<?x?xf32>
@@ -294,9 +294,9 @@ func @dma_unknown_size(%arg0: memref<?x?xf32>) {
 
 // CHECK-LABEL: func @dma_memref_3d
 func @dma_memref_3d(%arg0: memref<1024x1024x1024xf32>) {
-  for %i = 0 to 1024 {
-    for %j = 0 to 1024 {
-      for %k = 0 to 1024 {
+  affine.for %i = 0 to 1024 {
+    affine.for %j = 0 to 1024 {
+      affine.for %k = 0 to 1024 {
         %idx = affine.apply (d0) -> (d0 mod 128)(%i)
         %idy = affine.apply (d0) -> (d0 mod 128)(%j)
         %idz = affine.apply (d0) -> (d0 mod 128)(%k)
@@ -330,8 +330,8 @@ func @dma_memref_3d(%arg0: memref<1024x1024x1024xf32>) {
 // CHECK-LABEL: func @multi_load_store_union() {
 func @multi_load_store_union() {
   %A = alloc() : memref<512 x 512 x f32>
-  for %i = 0 to 256 {
-    for %j = 0 to 256 {
+  affine.for %i = 0 to 256 {
+    affine.for %j = 0 to 256 {
       %idx = affine.apply (d0) -> (d0 + 64)(%i)
       %idy = affine.apply (d0) -> (d0 + 128)(%j)
       %ishift = affine.apply (d0) -> (d0 + 2)(%i)
@@ -355,8 +355,8 @@ func @multi_load_store_union() {
 // CHECK-NEXT:  dma_start %0[%c2_1, %c2_2], %1[%c0, %c0], %c170372_3, %2[%c0], %c512_4, %c446_5 : memref<512x512xf32>, memref<382x446xf32, 2>, memref<1xi32>
 // CHECK-NEXT:  dma_wait %2[%c0], %c170372_3 : memref<1xi32>
 // CHECK-NEXT:  %3 = alloc() : memref<1xi32>
-// CHECK-NEXT:  for %i0 = 0 to 256 {
-// CHECK-NEXT:    for %i1 = 0 to 256 {
+// CHECK-NEXT:  affine.for %i0 = 0 to 256 {
+// CHECK-NEXT:    affine.for %i1 = 0 to 256 {
 // CHECK-NEXT:      %4 = affine.apply [[MAP_PLUS_64]](%i0)
 // CHECK-NEXT:      %5 = affine.apply [[MAP_PLUS_128]](%i1)
 // CHECK-NEXT:      %6 = affine.apply [[MAP_PLUS_2]](%i0)
@@ -395,7 +395,7 @@ func @dma_loop_straightline_interspersed() {
   %c255 = constant 255 : index
   %A = alloc() : memref<256 x f32>
   %v = load %A[%c0] : memref<256 x f32>
-  for %i = 1 to 255 {
+  affine.for %i = 1 to 255 {
     load %A[%i] : memref<256 x f32>
   }
   %l = load %A[%c255] : memref<256 x f32>
@@ -416,7 +416,7 @@ func @dma_loop_straightline_interspersed() {
 // CHECK-NEXT:  %5 = alloc() : memref<1xi32>
 // CHECK-NEXT:  dma_start %0[%c1_0], %4[%c0], %c254, %5[%c0] : memref<256xf32>, memref<254xf32, 2>, memref<1xi32>
 // CHECK-NEXT:  dma_wait %5[%c0], %c254 : memref<1xi32>
-// CHECK-NEXT:  for %i0 = 1 to 255 {
+// CHECK-NEXT:  affine.for %i0 = 1 to 255 {
 // CHECK-NEXT:    %6 = affine.apply [[MAP_MINUS_ONE]](%i0)
 // CHECK-NEXT:    %7 = load %4[%6] : memref<254xf32, 2>
 // CHECK-NEXT:  }
@@ -442,10 +442,10 @@ func @dma_loop_straightline_interspersed() {
 func @dma_mixed_loop_blocks() {
   %c0 = constant 0 : index
   %A = alloc() : memref<256 x 256 x vector<8 x f32>>
-  for %i = 0 to 256 {
+  affine.for %i = 0 to 256 {
     %v = load %A[%c0, %c0] : memref<256 x 256 x vector<8 x f32>>
     "foo"(%v) : (vector<8 x f32>) -> ()
-    for %j = 0 to 256 {
+    affine.for %j = 0 to 256 {
       %w = load %A[%i, %j] : memref<256 x 256 x vector<8 x f32>>
       "bar"(%w) : (vector<8 x f32>) -> ()
     }
@@ -457,17 +457,17 @@ func @dma_mixed_loop_blocks() {
 // CHECK-DAG:   [[TAG:%[0-9]+]] = alloc() : memref<1xi32>
 // CHECK:       dma_start [[MEM]][%c0, %c0], [[BUF]][%c0, %c0], %c65536, [[TAG]][%c0] : memref<256x256xvector<8xf32>>, memref<256x256xvector<8xf32>, 2>, memref<1xi32>
 // CHECK-NEXT:  dma_wait [[TAG]][%c0], %c65536 : memref<1xi32>
-// CHECK-NEXT:  for %i0 = 0 to 256 {
+// CHECK-NEXT:  affine.for %i0 = 0 to 256 {
 // CHECK-NEXT:    %3 = load [[BUF]][%c0_0, %c0_0] : memref<256x256xvector<8xf32>, 2>
-// CHECK:         for %i1 = 0 to 256 {
+// CHECK:         affine.for %i1 = 0 to 256 {
 // CHECK-NEXT:      %4 = load [[BUF]][%i0, %i1] : memref<256x256xvector<8xf32>, 2>
 
 // -----
 
 // CHECK-LABEL: func @relative_loop_bounds
 func @relative_loop_bounds(%arg0: memref<1027xf32>) {
-  for %i0 = 0 to 1024 {
-    for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 4)(%i0) {
+  affine.for %i0 = 0 to 1024 {
+    affine.for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 4)(%i0) {
       %0 = constant 0.0 : f32
       store %0, %arg0[%i2] : memref<1027xf32>
     }
@@ -476,8 +476,8 @@ func @relative_loop_bounds(%arg0: memref<1027xf32>) {
 }
 // CHECK:      [[BUF:%[0-9]+]] = alloc() : memref<1027xf32, 2>
 // CHECK-NEXT: [[MEM:%[0-9]+]] = alloc() : memref<1xi32>
-// CHECK-NEXT: for %i0 = 0 to 1024 {
-// CHECK-NEXT:    for %i1 = {{#map[0-9]+}}(%i0) to {{#map[0-9]+}}(%i0) {
+// CHECK-NEXT: affine.for %i0 = 0 to 1024 {
+// CHECK-NEXT:    affine.for %i1 = {{#map[0-9]+}}(%i0) to {{#map[0-9]+}}(%i0) {
 // CHECK-NEXT:      %cst = constant 0.000000e+00 : f32
 // CHECK-NEXT:      store %cst, [[BUF]][%i1] : memref<1027xf32, 2>
 // CHECK-NEXT:    }
@@ -487,7 +487,7 @@ func @relative_loop_bounds(%arg0: memref<1027xf32>) {
 
 // ----
 
-// This should create a buffer of size 2 for %arg2.
+// This should create a buffer of size 2 affine.for %arg2.
 
 #map_lb = (d0) -> (d0)
 #map_ub = (d0) -> (d0 + 3)
@@ -498,9 +498,9 @@ func @test_analysis_util(%arg0: memref<4x4x16x1xf32>, %arg1: memref<144x9xf32>,
   %0 = alloc() : memref<64x1xf32>
   %1 = alloc() : memref<144x4xf32>
   %2 =  constant 0.0 : f32
-  for %i8 = 0 to 9 step 3 {
-    for %i9 = #map_lb(%i8) to #map_ub(%i8) {
-      for %i17 = 0 to 64 {
+  affine.for %i8 = 0 to 9 step 3 {
+    affine.for %i9 = #map_lb(%i8) to #map_ub(%i8) {
+      affine.for %i17 = 0 to 64 {
         %23 = affine.apply #map_acc(%i9)
         %25 = load %arg2[%23] : memref<2xf32>
         %26 = affine.apply #map_lb(%i17)
@@ -511,11 +511,11 @@ func @test_analysis_util(%arg0: memref<4x4x16x1xf32>, %arg1: memref<144x9xf32>,
   }
   return %arg1, %arg2 : memref<144x9xf32>, memref<2xf32>
 }
-// CHECK:       for %i0 = 0 to 9 step 3 {
+// CHECK:       affine.for %i0 = 0 to 9 step 3 {
 // CHECK:         [[BUF:%[0-9]+]] = alloc() : memref<2xf32, 2>
 // CHECK:         dma_start %arg2[%4], [[BUF]]
 // CHECK:         dma_wait %6[%c0], %c2_0 : memref<1xi32>
-// CHECK:         for %i1 =
+// CHECK:         affine.for %i1 =
 
 // -----
 
@@ -524,17 +524,17 @@ func @test_analysis_util(%arg0: memref<4x4x16x1xf32>, %arg1: memref<144x9xf32>,
 
 // FAST-MEM-16KB-LABEL: func @load_store_same_memref
 func @load_store_same_memref(%arg0: memref<256x1024xf32>) {
-  // FAST-MEM-16KB:  for %i0 = 0 to 256 step 4
-  for %i0 = 0 to 256 step 4 {
+  // FAST-MEM-16KB:  affine.for %i0 = 0 to 256 step 4
+  affine.for %i0 = 0 to 256 step 4 {
     // FAST-MEM-16KB: [[BUF:%[0-9]+]] = alloc() : memref<4x1024xf32, 2>
     // FAST-MEM-16KB:    dma_start %arg0
     // FAST-MEM-16KB-NEXT: dma_wait
-    // FAST-MEM-16KB:  for %i1
-    for %i1 = 0 to 1024 step 4 {
-      // FAST-MEM-16KB:  for %i2
-      for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 4)(%i0) {
-        // FAST-MEM-16KB:  for %i3
-        for %i3 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 4)(%i1) {
+    // FAST-MEM-16KB:  affine.for %i1
+    affine.for %i1 = 0 to 1024 step 4 {
+      // FAST-MEM-16KB:  affine.for %i2
+      affine.for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 4)(%i0) {
+        // FAST-MEM-16KB:  affine.for %i3
+        affine.for %i3 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 4)(%i1) {
           %3 = load %arg0[%i2, %i3] : memref<256x1024xf32>
           %4 = mulf %3, %3 : f32
           store %4, %arg0[%i2, %i3] : memref<256x1024xf32>
@@ -560,12 +560,12 @@ func @load_store_same_memref(%arg0: memref<256x1024xf32>) {
 #map1 = (d0) -> (d0 + 4)
 // FAST-MEM-16KB-LABEL: func @simple_matmul
 func @simple_matmul(%arg0: memref<8x8xvector<64xf32>>, %arg1: memref<8x8xvector<64xf32>>, %arg2: memref<8x8xvector<64xf32>>) -> memref<8x8xvector<64xf32>> {
-  for %i = 0 to 8 step 4 {
-    for %j = 0 to 8 step 4 {
-      for %k = 0 to 8 step 4 {
-        for %ii = #map0(%i) to #map1(%i) {
-          for %jj = #map0(%j) to #map1(%j) {
-            for %kk = #map0(%k) to #map1(%k) {
+  affine.for %i = 0 to 8 step 4 {
+    affine.for %j = 0 to 8 step 4 {
+      affine.for %k = 0 to 8 step 4 {
+        affine.for %ii = #map0(%i) to #map1(%i) {
+          affine.for %jj = #map0(%j) to #map1(%j) {
+            affine.for %kk = #map0(%k) to #map1(%k) {
               %5 = load %arg0[%ii, %kk] : memref<8x8xvector<64xf32>>
               %6 = load %arg1[%kk, %jj] : memref<8x8xvector<64xf32>>
               %7 = load %arg2[%ii, %jj] : memref<8x8xvector<64xf32>>
@@ -580,18 +580,18 @@ func @simple_matmul(%arg0: memref<8x8xvector<64xf32>>, %arg1: memref<8x8xvector<
   }
   return %arg2 : memref<8x8xvector<64xf32>>
 }
-// FAST-MEM-16KB: for %i0 = 0 to 8 step 4 {
-// FAST-MEM-16KB:   for %i1 = 0 to 8 step 4 {
+// FAST-MEM-16KB: affine.for %i0 = 0 to 8 step 4 {
+// FAST-MEM-16KB:   affine.for %i1 = 0 to 8 step 4 {
 // FAST-MEM-16KB:     dma_start %arg2
 // FAST-MEM-16KB:     dma_wait
-// FAST-MEM-16KB:     for %i2 = 0 to 8 step 4 {
+// FAST-MEM-16KB:     affine.for %i2 = 0 to 8 step 4 {
 // FAST-MEM-16KB:       dma_start %arg0
 // FAST-MEM-16KB:       dma_wait
 // FAST-MEM-16KB:       dma_start %arg1
 // FAST-MEM-16KB:       dma_wait
-// FAST-MEM-16KB:       for %i3 = #map{{[0-9]+}}(%i0) to #map{{[0-9]+}}(%i0) {
-// FAST-MEM-16KB-NEXT:    for %i4 = #map{{[0-9]+}}(%i1) to #map{{[0-9]+}}(%i1) {
-// FAST-MEM-16KB-NEXT:      for %i5 = #map{{[0-9]+}}(%i2) to #map{{[0-9]+}}(%i2) {
+// FAST-MEM-16KB:       affine.for %i3 = #map{{[0-9]+}}(%i0) to #map{{[0-9]+}}(%i0) {
+// FAST-MEM-16KB-NEXT:    affine.for %i4 = #map{{[0-9]+}}(%i1) to #map{{[0-9]+}}(%i1) {
+// FAST-MEM-16KB-NEXT:      affine.for %i5 = #map{{[0-9]+}}(%i2) to #map{{[0-9]+}}(%i2) {
 // FAST-MEM-16KB:           }
 // FAST-MEM-16KB:         }
 // FAST-MEM-16KB:       }
diff --git a/mlir/test/Transforms/loop-fusion.mlir b/mlir/test/Transforms/loop-fusion.mlir
index 0e67e1178f8d..4d21d006ff16 100644
--- a/mlir/test/Transforms/loop-fusion.mlir
+++ b/mlir/test/Transforms/loop-fusion.mlir
@@ -17,13 +17,13 @@ func @should_fuse_raw_dep_for_locality() {
   %m = alloc() : memref<10xf32>
   %cf7 = constant 7.0 : f32
 
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
     store %cf7, %m[%i0] : memref<10xf32>
   }
-  for %i1 = 0 to 10 {
+  affine.for %i1 = 0 to 10 {
     %v0 = load %m[%i1] : memref<10xf32>
   }
-  // CHECK:      for %i0 = 0 to 10 {
+  // CHECK:      affine.for %i0 = 0 to 10 {
   // CHECK-NEXT:   %1 = affine.apply [[MAP0]](%i0, %i0)
   // CHECK-NEXT:   store %cst, %0[%1] : memref<1xf32>
   // CHECK-NEXT:   %2 = affine.apply [[MAP0]](%i0, %i0)
@@ -45,23 +45,23 @@ func @should_fuse_reduction_to_pointwise() {
 
   %cf7 = constant 7.0 : f32
 
-  for %i0 = 0 to 10 {
-    for %i1 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
+    affine.for %i1 = 0 to 10 {
       %v0 = load %b[%i0] : memref<10xf32>
       %v1 = load %a[%i0, %i1] : memref<10x10xf32>
       %v3 = addf %v0, %v1 : f32
       store %v3, %b[%i0] : memref<10xf32>
     }
   }
-  for %i2 = 0 to 10 {
+  affine.for %i2 = 0 to 10 {
     %v4 = load %b[%i2] : memref<10xf32>
     store %v4, %c[%i2] : memref<10xf32>
   }
 
   // Should fuse in entire inner loop on %i1 from source loop nest, as %i1
   // is not used in the access function of the store/load on %b.
-  // CHECK:       for %i0 = 0 to 10 {
-  // CHECK-NEXT:    for %i1 = 0 to 10 {
+  // CHECK:       affine.for %i0 = 0 to 10 {
+  // CHECK-NEXT:    affine.for %i1 = 0 to 10 {
   // CHECK-NEXT:      %3 = affine.apply [[MAP0]](%i0, %i0)
   // CHECK-NEXT:      %4 = load %0[%3] : memref<1xf32>
   // CHECK-NEXT:      %5 = load %1[%i0, %i1] : memref<10x10xf32>
@@ -89,15 +89,15 @@ func @should_fuse_loop_nests_with_shifts() {
   %a = alloc() : memref<10x10xf32>
   %cf7 = constant 7.0 : f32
 
-  for %i0 = 0 to 9 {
-    for %i1 = 0 to 9 {
+  affine.for %i0 = 0 to 9 {
+    affine.for %i1 = 0 to 9 {
       %idx = affine.apply (d0) -> (d0 + 1) (%i0)
       %idy = affine.apply (d0) -> (d0 + 1) (%i1)
       store %cf7, %a[%idx, %idy] : memref<10x10xf32>
     }
   }
-  for %i2 = 1 to 10 {
-    for %i3 = 1 to 10 {
+  affine.for %i2 = 1 to 10 {
+    affine.for %i3 = 1 to 10 {
       %v0 = load %a[%i2, %i3] : memref<10x10xf32>
     }
   }
@@ -110,8 +110,8 @@ func @should_fuse_loop_nests_with_shifts() {
   // *) Fifth affine apply shifts the loads access function by '-1', because
   //    of the offset induced by reducing the memref shape from 10x10 to 9x9.
   // NOTE: Should create a private memref with reduced shape 9x9xf32.
-  // CHECK:      for %i0 = 1 to 10 {
-  // CHECK-NEXT:   for %i1 = 1 to 10 {
+  // CHECK:      affine.for %i0 = 1 to 10 {
+  // CHECK-NEXT:   affine.for %i1 = 1 to 10 {
   // CHECK-NEXT:     %1 = affine.apply [[MAP_SHIFT_MINUS_ONE_R1]](%i0)
   // CHECK-NEXT:     %2 = affine.apply [[MAP_SHIFT_MINUS_ONE_R1]](%i1)
   // CHECK-NEXT:     %3 = affine.apply [[MAP_SHIFT_BY_ONE]](%1)
@@ -139,27 +139,27 @@ func @should_fuse_loop_nest() {
   %b = alloc() : memref<10x10xf32>
   %cf7 = constant 7.0 : f32
 
-  for %i0 = 0 to 10 {
-    for %i1 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
+    affine.for %i1 = 0 to 10 {
       store %cf7, %a[%i0, %i1] : memref<10x10xf32>
     }
   }
-  for %i2 = 0 to 10 {
-    for %i3 = 0 to 10 {
+  affine.for %i2 = 0 to 10 {
+    affine.for %i3 = 0 to 10 {
       %v0 = load %a[%i3, %i2] : memref<10x10xf32>
       store %v0, %b[%i2, %i3] : memref<10x10xf32>
     }
   }
-  for %i4 = 0 to 10 {
-    for %i5 = 0 to 10 {
+  affine.for %i4 = 0 to 10 {
+    affine.for %i5 = 0 to 10 {
       %v1 = load %b[%i4, %i5] : memref<10x10xf32>
     }
   }
   // Expecting private memref for '%a' first, then private memref for '%b'.
   // CHECK-DAG:  [[NEWA:%[0-9]+]] = alloc() : memref<1x1xf32>
   // CHECK-DAG:  [[NEWB:%[0-9]+]] = alloc() : memref<1x1xf32>
-  // CHECK:      for %i0 = 0 to 10 {
-  // CHECK-NEXT:   for %i1 = 0 to 10 {
+  // CHECK:      affine.for %i0 = 0 to 10 {
+  // CHECK-NEXT:   affine.for %i1 = 0 to 10 {
   // CHECK-NEXT:     %2 = affine.apply [[MAP_D2_D0_DIFF]](%i1, %i0, %i1, %i0)
   // CHECK-NEXT:     %3 = affine.apply [[MAP_D3_D1_DIFF]](%i1, %i0, %i1, %i0)
   // CHECK-NEXT:     store %cst, [[NEWA]][%2, %3] : memref<1x1xf32>
@@ -190,23 +190,23 @@ func @should_fuse_across_intermediate_loop_with_no_deps() {
 
   %cf7 = constant 7.0 : f32
 
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
     %v0 = load %a[%i0] : memref<10xf32>
     store %v0, %b[%i0] : memref<10xf32>
   }
-  for %i1 = 0 to 10 {
+  affine.for %i1 = 0 to 10 {
     store %cf7, %c[%i1] : memref<10xf32>
   }
-  for %i2 = 0 to 10 {
+  affine.for %i2 = 0 to 10 {
     %v1 = load %b[%i2] : memref<10xf32>
   }
 
   // Should fuse first loop (past second loop with no dependences) into third.
   // Note that fusion creates a private memref '%2' for the fused loop nest.
-  // CHECK:      for %i0 = 0 to 10 {
+  // CHECK:      affine.for %i0 = 0 to 10 {
   // CHECK-NEXT:   store %cst, %2[%i0] : memref<10xf32>
   // CHECK-NEXT: }
-  // CHECK:      for %i1 = 0 to 10 {
+  // CHECK:      affine.for %i1 = 0 to 10 {
   // CHECK-NEXT:   %3 = load %1[%i1] : memref<10xf32>
   // CHECK-NEXT:   %4 = affine.apply [[MAP0]](%i1, %i1)
   // CHECK-NEXT:   store %3, %0[%4] : memref<1xf32>
@@ -228,13 +228,13 @@ func @should_fuse_all_loops() {
   %cf7 = constant 7.0 : f32
 
   // Set up flow dependences from first and second loops to third.
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
     store %cf7, %a[%i0] : memref<10xf32>
   }
-  for %i1 = 0 to 10 {
+  affine.for %i1 = 0 to 10 {
     store %cf7, %b[%i1] : memref<10xf32>
   }
-  for %i2 = 0 to 10 {
+  affine.for %i2 = 0 to 10 {
     %v0 = load %a[%i2] : memref<10xf32>
     %v1 = load %b[%i2] : memref<10xf32>
   }
@@ -243,7 +243,7 @@ func @should_fuse_all_loops() {
   // Expecting private memref for '%a' first, then private memref for '%b'.
   // CHECK-DAG: [[NEWA:%[0-9]+]] = alloc() : memref<1xf32>
   // CHECK-DAG: [[NEWB:%[0-9]+]] = alloc() : memref<1xf32>
-  // CHECK:      for %i0 = 0 to 10 {
+  // CHECK:      affine.for %i0 = 0 to 10 {
   // CHECK-NEXT:   %2 = affine.apply [[MAP0]](%i0, %i0)
   // CHECK-NEXT:   store %cst, [[NEWA]][%2] : memref<1xf32>
   // CHECK-NEXT:   %3 = affine.apply [[MAP0]](%i0, %i0)
@@ -269,27 +269,27 @@ func @should_fuse_first_and_second_loops() {
 
   %cf7 = constant 7.0 : f32
 
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
     store %cf7, %a[%i0] : memref<10xf32>
   }
-  for %i1 = 0 to 10 {
+  affine.for %i1 = 0 to 10 {
     %v0 = load %a[%i1] : memref<10xf32>
     store %cf7, %b[%i1] : memref<10xf32>
   }
-  for %i2 = 0 to 10 {
+  affine.for %i2 = 0 to 10 {
     %v1 = load %c[%i2] : memref<10xf32>
   }
 
   // Should fuse first loop into the second (last loop should not be fused).
   // Should create private memref '%2' for fused loop.
-  // CHECK:      for %i0 = 0 to 10 {
+  // CHECK:      affine.for %i0 = 0 to 10 {
   // CHECK-NEXT:   %3 = affine.apply [[MAP0]](%i0, %i0)
   // CHECK-NEXT:   store %cst, %0[%3] : memref<1xf32>
   // CHECK-NEXT:   %4 = affine.apply [[MAP0]](%i0, %i0)
   // CHECK-NEXT:   %5 = load %0[%4] : memref<1xf32>
   // CHECK-NEXT:   store %cst, %1[%i0] : memref<10xf32>
   // CHECK-NEXT: }
-  // CHECK:      for %i1 = 0 to 10 {
+  // CHECK:      affine.for %i1 = 0 to 10 {
   // CHECK-NEXT:   %6 = load %2[%i1] : memref<10xf32>
   // CHECK-NEXT: }
   // CHECK-NEXT: return
@@ -311,28 +311,28 @@ func @should_not_fuse_would_create_cycle() {
   // 1) loop0 -> loop1 on memref '%a'
   // 2) loop0 -> loop2 on memref '%b'
   // 3) loop1 -> loop2 on memref '%c'
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
     %v0 = load %a[%i0] : memref<10xf32>
     store %cf7, %b[%i0] : memref<10xf32>
   }
-  for %i1 = 0 to 10 {
+  affine.for %i1 = 0 to 10 {
     store %cf7, %a[%i1] : memref<10xf32>
     %v1 = load %c[%i1] : memref<10xf32>
   }
-  for %i2 = 0 to 10 {
+  affine.for %i2 = 0 to 10 {
     %v2 = load %b[%i2] : memref<10xf32>
     store %cf7, %c[%i2] : memref<10xf32>
   }
   // Should not fuse: fusing loop first loop into last would create a cycle.
-  // CHECK:      for %i0 = 0 to 10 {
+  // CHECK:      affine.for %i0 = 0 to 10 {
   // CHECK-NEXT:   %3 = load %0[%i0] : memref<10xf32>
   // CHECK-NEXT:   store %cst, %1[%i0] : memref<10xf32>
   // CHECK-NEXT: }
-  // CHECK:      for %i1 = 0 to 10 {
+  // CHECK:      affine.for %i1 = 0 to 10 {
   // CHECK-NEXT:   store %cst, %0[%i1] : memref<10xf32>
   // CHECK-NEXT:   %4 = load %2[%i1] : memref<10xf32>
   // CHECK-NEXT: }
-  // CHECK:      for %i2 = 0 to 10 {
+  // CHECK:      affine.for %i2 = 0 to 10 {
   // CHECK-NEXT:   %5 = load %1[%i2] : memref<10xf32>
   // CHECK-NEXT:   store %cst, %2[%i2] : memref<10xf32>
   // CHECK-NEXT: }
@@ -347,23 +347,23 @@ func @should_not_fuse_across_waw_dep() {
   %m = alloc() : memref<10xf32>
   %cf7 = constant 7.0 : f32
 
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
     store %cf7, %m[%i0] : memref<10xf32>
   }
-  for %i1 = 0 to 10 {
+  affine.for %i1 = 0 to 10 {
     store %cf7, %m[%i1] : memref<10xf32>
   }
-  for %i2 = 0 to 10 {
+  affine.for %i2 = 0 to 10 {
     %v1 = load %m[%i2] : memref<10xf32>
   }
   // Fusing loop %i0 to %i2 would violate the WAW dependence between %i0 and %i1
-  // CHECK:      for %i0 = 0 to 10 {
+  // CHECK:      affine.for %i0 = 0 to 10 {
   // CHECK-NEXT:   store %cst, %0[%i0] : memref<10xf32>
   // CHECK-NEXT: }
-  // CHECK:      for %i1 = 0 to 10 {
+  // CHECK:      affine.for %i1 = 0 to 10 {
   // CHECK-NEXT:   store %cst, %0[%i1] : memref<10xf32>
   // CHECK-NEXT: }
-  // CHECK:      for %i2 = 0 to 10 {
+  // CHECK:      affine.for %i2 = 0 to 10 {
   // CHECK-NEXT:   %1 = load %0[%i2] : memref<10xf32>
   // CHECK-NEXT: }
   // CHECK-NEXT: return
@@ -380,27 +380,27 @@ func @should_fuse_and_move_to_preserve_war_dep() {
   %b = alloc() : memref<10xf32>
   %cf7 = constant 7.0 : f32
 
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
     %v0 = load %a[%i0] : memref<10xf32>
     store %v0, %b[%i0] : memref<10xf32>
   }
-  for %i1 = 0 to 10 {
+  affine.for %i1 = 0 to 10 {
     store %cf7, %a[%i1] : memref<10xf32>
   }
-  for %i2 = 0 to 10 {
+  affine.for %i2 = 0 to 10 {
     %v1 = load %b[%i2] : memref<10xf32>
   }
   // Loops '%i1' and '%i2' have no dependences. We can fuse a slice of '%i0'
   // into '%i2' if we move the fused loop nest before '%i1', which preserves
   // the WAR dependence from load '%a' in '%i0' to the store '%a' in loop '%i1'.
-  // CHECK:       for %i0 = 0 to 10 {
+  // CHECK:       affine.for %i0 = 0 to 10 {
   // CHECK-NEXT:    %2 = load %1[%i0] : memref<10xf32>
   // CHECK-NEXT:    %3 = affine.apply [[MAP0]](%i0, %i0)
   // CHECK-NEXT:    store %2, %0[%3] : memref<1xf32>
   // CHECK-NEXT:    %4 = affine.apply [[MAP0]](%i0, %i0)
   // CHECK-NEXT:    %5 = load %0[%4] : memref<1xf32>
   // CHECK-NEXT:  }
-  // CHECK-NEXT:  for %i1 = 0 to 10 {
+  // CHECK-NEXT:  affine.for %i1 = 0 to 10 {
   // CHECK-NEXT:    store %cst, %1[%i1] : memref<10xf32>
   // CHECK-NEXT:  }
   // CHECK-NEXT:  return
@@ -416,20 +416,20 @@ func @should_fuse_with_private_memref_if_top_level_access() {
   %m = alloc() : memref<10xf32>
   %cf7 = constant 7.0 : f32
 
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
     store %cf7, %m[%i0] : memref<10xf32>
   }
-  for %i1 = 0 to 10 {
+  affine.for %i1 = 0 to 10 {
     %v0 = load %m[%i1] : memref<10xf32>
   }
 
   %c0 = constant 4 : index
   %v1 = load %m[%c0] : memref<10xf32>
   // Top-level load to '%m' should prevent fusion.
-  // CHECK:      for %i0 = 0 to 10 {
+  // CHECK:      affine.for %i0 = 0 to 10 {
   // CHECK-NEXT:   store %cst, %1[%i0] : memref<10xf32>
   // CHECK-NEXT: }
-  // CHECK-NEXT: for %i1 = 0 to 10 {
+  // CHECK-NEXT: affine.for %i1 = 0 to 10 {
   // CHECK-NEXT:   %2 = affine.apply [[MAP0]](%i1, %i1)
   // CHECK-NEXT:   store %cst, %0[%2] : memref<1xf32>
   // CHECK-NEXT:   %3 = affine.apply [[MAP0]](%i1, %i1)
@@ -447,13 +447,13 @@ func @should_fuse_no_top_level_access() {
   %m = alloc() : memref<10xf32>
   %cf7 = constant 7.0 : f32
 
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
     store %cf7, %m[%i0] : memref<10xf32>
   }
-  for %i1 = 0 to 10 {
+  affine.for %i1 = 0 to 10 {
     %v0 = load %m[%i1] : memref<10xf32>
   }
-  // CHECK:      for %i0 = 0 to 10 {
+  // CHECK:      affine.for %i0 = 0 to 10 {
   // CHECK-NEXT:   %1 = affine.apply [[MAP0]](%i0, %i0)
   // CHECK-NEXT:   store %cst, %0[%1] : memref<1xf32>
   // CHECK-NEXT:   %2 = affine.apply [[MAP0]](%i0, %i0)
@@ -472,20 +472,20 @@ func @should_not_fuse_if_inst_at_top_level() {
   %m = alloc() : memref<10xf32>
   %cf7 = constant 7.0 : f32
 
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
     store %cf7, %m[%i0] : memref<10xf32>
   }
-  for %i1 = 0 to 10 {
+  affine.for %i1 = 0 to 10 {
     %v0 = load %m[%i1] : memref<10xf32>
   }
   %c0 = constant 4 : index
   affine.if #set0(%c0) {
   }
   // Top-level IfOp should prevent fusion.
-  // CHECK:      for %i0 = 0 to 10 {
+  // CHECK:      affine.for %i0 = 0 to 10 {
   // CHECK-NEXT:   store %cst, %0[%i0] : memref<10xf32>
   // CHECK-NEXT: }
-  // CHECK:      for %i1 = 0 to 10 {
+  // CHECK:      affine.for %i1 = 0 to 10 {
   // CHECK-NEXT:   %1 = load %0[%i1] : memref<10xf32>
   // CHECK-NEXT: }
   return
@@ -501,20 +501,20 @@ func @should_not_fuse_if_inst_in_loop_nest() {
   %cf7 = constant 7.0 : f32
   %c4 = constant 4 : index
 
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
     store %cf7, %m[%i0] : memref<10xf32>
   }
-  for %i1 = 0 to 10 {
+  affine.for %i1 = 0 to 10 {
     affine.if #set0(%c4) {
     }
     %v0 = load %m[%i1] : memref<10xf32>
   }
 
   // IfOp in ForInst should prevent fusion.
-  // CHECK:      for %i0 = 0 to 10 {
+  // CHECK:      affine.for %i0 = 0 to 10 {
   // CHECK-NEXT:   store %cst, %0[%i0] : memref<10xf32>
   // CHECK-NEXT: }
-  // CHECK:      for %i1 = 0 to 10 {
+  // CHECK:      affine.for %i1 = 0 to 10 {
   // CHECK-NEXT:   affine.if #set0(%c4) {
   // CHECK-NEXT:   }  
   // CHECK-NEXT:   %1 = load %0[%i1] : memref<10xf32>
@@ -533,24 +533,24 @@ func @permute_and_fuse() {
   %m = alloc() : memref<10x20x30xf32>
 
   %cf7 = constant 7.0 : f32
-  for %i0 = 0 to 10 {
-    for %i1 = 0 to 20 {
-      for %i2 = 0 to 30 {
+  affine.for %i0 = 0 to 10 {
+    affine.for %i1 = 0 to 20 {
+      affine.for %i2 = 0 to 30 {
         store %cf7, %m[%i0, %i1, %i2] : memref<10x20x30xf32>
       }
     }
   }
-  for %i3 = 0 to 30 {
-    for %i4 = 0 to 10 {
-      for %i5 = 0 to 20 {
+  affine.for %i3 = 0 to 30 {
+    affine.for %i4 = 0 to 10 {
+      affine.for %i5 = 0 to 20 {
         %v0 = load %m[%i4, %i5, %i3] : memref<10x20x30xf32>
         "foo"(%v0) : (f32) -> ()
       }
     }
   }
-// CHECK:       for %i0 = 0 to 30 {
-// CHECK-NEXT:    for %i1 = 0 to 10 {
-// CHECK-NEXT:      for %i2 = 0 to 20 {
+// CHECK:       affine.for %i0 = 0 to 30 {
+// CHECK-NEXT:    affine.for %i1 = 0 to 10 {
+// CHECK-NEXT:      affine.for %i2 = 0 to 20 {
 // CHECK-NEXT:        %1 = affine.apply [[MAP0]](%i1, %i2, %i0, %i1, %i2, %i0)
 // CHECK-NEXT:        %2 = affine.apply [[MAP1]](%i1, %i2, %i0, %i1, %i2, %i0)
 // CHECK-NEXT:        %3 = affine.apply [[MAP2]](%i1, %i2, %i0, %i1, %i2, %i0)
@@ -579,22 +579,22 @@ func @permute_and_fuse() {
 func @fuse_reshape_64_16_4(%in : memref<64xf32>) {
   %out = alloc() : memref<16x4xf32>
 
-  for %i0 = 0 to 64 {
+  affine.for %i0 = 0 to 64 {
     %v = load %in[%i0] : memref<64xf32>
     %idx = affine.apply (d0) -> (d0 floordiv 4) (%i0)
     %idy = affine.apply (d0) -> (d0 mod 4) (%i0)
     store %v, %out[%idx, %idy] : memref<16x4xf32>
   }
 
-  for %i1 = 0 to 16 {
-    for %i2 = 0 to 4 {
+  affine.for %i1 = 0 to 16 {
+    affine.for %i2 = 0 to 4 {
       %w = load %out[%i1, %i2] : memref<16x4xf32>
       "foo"(%w) : (f32) -> ()
     }
   }
   return
-  // CHECK:      for %i0 =
-  // CHECK-NEXT:   for %i1 =
+  // CHECK:      affine.for %i0 =
+  // CHECK-NEXT:   affine.for %i1 =
   // CHECK-NOT:    for
   // CHECK:        }
   // CHECK-NEXT: }
@@ -613,19 +613,19 @@ func @fuse_reshape_16_4_64() {
   %in = alloc() : memref<16x4xf32>
   %out = alloc() : memref<64xf32>
 
-  for %i0 = 0 to 16 {
-    for %i1 = 0 to 4 {
+  affine.for %i0 = 0 to 16 {
+    affine.for %i1 = 0 to 4 {
       %v = load %in[%i0, %i1] : memref<16x4xf32>
       %idx = affine.apply (d0, d1) -> (4*d0 + d1) (%i0, %i1)
       store %v, %out[%idx] : memref<64xf32>
     }
   }
 
-  for %i2 = 0 to 64 {
+  affine.for %i2 = 0 to 64 {
     %w = load %out[%i2] : memref<64xf32>
     "foo"(%w) : (f32) -> ()
   }
-// CHECK:       for %i0 = 0 to 64 {
+// CHECK:       affine.for %i0 = 0 to 64 {
 // CHECK-NEXT:    %2 = affine.apply [[MAP0]](%i0)
 // CHECK-NEXT:    %3 = affine.apply [[MAP1]](%i0)
 // CHECK-NEXT:    %4 = load %1[%2, %3] : memref<16x4xf32>
@@ -651,12 +651,12 @@ func @R6_to_R2_reshape_square() -> memref<64x9xi32> {
   %live_out = alloc() : memref<64x9xi32>
 
   // Initialize input.
-  for %i0 = 0 to 2 {
-    for %i1 = 0 to 2 {
-      for %i2 = 0 to 3 {
-        for %i3 = 0 to 3 {
-          for %i4 = 0 to 16 {
-            for %i5 = 0 to 1 {
+  affine.for %i0 = 0 to 2 {
+    affine.for %i1 = 0 to 2 {
+      affine.for %i2 = 0 to 3 {
+        affine.for %i3 = 0 to 3 {
+          affine.for %i4 = 0 to 16 {
+            affine.for %i5 = 0 to 1 {
               %val = "foo"(%i0, %i1, %i2, %i3, %i4, %i5) : (index, index, index, index, index, index) -> i32
               store %val, %in[%i0, %i1, %i2, %i3, %i4, %i5] : memref<2x2x3x3x16x1xi32>
             }
@@ -666,8 +666,8 @@ func @R6_to_R2_reshape_square() -> memref<64x9xi32> {
     }
   }
 
-  for %ii = 0 to 64 {
-    for %jj = 0 to 9 {
+  affine.for %ii = 0 to 64 {
+    affine.for %jj = 0 to 9 {
       // Convert output coordinates to linear index.
       %a0 = affine.apply (d0, d1) -> (d0 * 9 + d1) (%ii, %jj)
       %0 = affine.apply (d0) -> (d0 floordiv (2 * 3 * 3 * 16 * 1))(%a0)
@@ -681,8 +681,8 @@ func @R6_to_R2_reshape_square() -> memref<64x9xi32> {
     }
   }
 
-  for %i = 0 to 64 {
-    for %j = 0 to 9 {
+  affine.for %i = 0 to 64 {
+    affine.for %j = 0 to 9 {
       %a = load %out[%i, %j] : memref<64x9xi32>
       %b = muli %a, %a : i32
       store %b, %live_out[%i, %j] : memref<64x9xi32>
@@ -718,8 +718,8 @@ func @R6_to_R2_reshape_square() -> memref<64x9xi32> {
 // CHECK:       %0 = alloc() : memref<1x2x3x3x16x1xi32>
 // CHECK:       %1 = alloc() : memref<1x1xi32>
 // CHECK:       %2 = alloc() : memref<64x9xi32>
-// CHECK-NEXT:  for %i0 = 0 to 64 {
-// CHECK-NEXT:    for %i1 = 0 to 9 {
+// CHECK-NEXT:  affine.for %i0 = 0 to 64 {
+// CHECK-NEXT:    affine.for %i1 = 0 to 9 {
 // CHECK-NEXT:      %3 = affine.apply [[MAP0]](%i0, %i1)
 // CHECK-NEXT:      %4 = affine.apply [[MAP1]](%i0, %i1)
 // CHECK-NEXT:      %5 = affine.apply [[MAP2]](%i0, %i1)
@@ -769,14 +769,14 @@ func @fuse_symbolic_bounds(%M : index, %N : index) {
   %c0 = constant 0.0 : f32
   %s = constant 5 : index
 
-  for %i0 = 0 to %M {
-    for %i1 = 0 to (d0) -> (d0 + 5) (%N) {
+  affine.for %i0 = 0 to %M {
+    affine.for %i1 = 0 to (d0) -> (d0 + 5) (%N) {
       store %c0, %m[%i0, %i1] : memref<? x ? x f32>
     }
   }
 
-  for %i2 = 0 to %M {
-    for %i3 = 0 to %N {
+  affine.for %i2 = 0 to %M {
+    affine.for %i3 = 0 to %N {
       %idy = affine.apply (d0)[s0] -> (d0 + s0) (%i3)[%s]
       %v = load %m[%i2, %idy] : memref<? x ? x f32>
     }
@@ -793,16 +793,16 @@ func @should_fuse_reduction_at_depth1() {
   %a = alloc() : memref<10x100xf32>
   %b = alloc() : memref<10xf32>
 
-  for %i0 = 0 to 10 {
-    for %i1 = 0 to 100 {
+  affine.for %i0 = 0 to 10 {
+    affine.for %i1 = 0 to 100 {
       %v0 = load %b[%i0] : memref<10xf32>
       %v1 = load %a[%i0, %i1] : memref<10x100xf32>
       %v2 = "maxf"(%v0, %v1) : (f32, f32) -> f32
       store %v2, %b[%i0] : memref<10xf32>
     }
   }
-  for %i2 = 0 to 10 {
-    for %i3 = 0 to 100 {
+  affine.for %i2 = 0 to 10 {
+    affine.for %i3 = 0 to 100 {
       %v3 = load %b[%i2] : memref<10xf32>
       %v4 = load %a[%i2, %i3] : memref<10x100xf32>
       %v5 = subf %v4, %v3 : f32
@@ -813,8 +813,8 @@ func @should_fuse_reduction_at_depth1() {
   // loop nest, which improves locality and enables subsequence passes to
   // decrease the reduction memref size and possibly place it in a faster
   // memory space.
-  // CHECK:       for %i0 = 0 to 10 {
-  // CHECK-NEXT:    for %i1 = 0 to 100 {
+  // CHECK:       affine.for %i0 = 0 to 10 {
+  // CHECK-NEXT:    affine.for %i1 = 0 to 100 {
   // CHECK-NEXT:      %2 = affine.apply [[MAP0]](%i0, %i0)
   // CHECK-NEXT:      %3 = load %0[%2] : memref<1xf32>
   // CHECK-NEXT:      %4 = load %1[%i0, %i1] : memref<10x100xf32>
@@ -822,7 +822,7 @@ func @should_fuse_reduction_at_depth1() {
   // CHECK-NEXT:      %6 = affine.apply [[MAP0]](%i0, %i0)
   // CHECK-NEXT:      store %5, %0[%6] : memref<1xf32>
   // CHECK-NEXT:    }
-  // CHECK-NEXT:    for %i2 = 0 to 100 {
+  // CHECK-NEXT:    affine.for %i2 = 0 to 100 {
   // CHECK-NEXT:      %7 = affine.apply [[MAP0]](%i0, %i0)
   // CHECK-NEXT:      %8 = load %0[%7] : memref<1xf32>
   // CHECK-NEXT:      %9 = load %1[%i0, %i2] : memref<10x100xf32>
@@ -844,19 +844,19 @@ func @should_fuse_at_src_depth1_and_dst_depth1() {
   %a = alloc() : memref<100x16xf32>
   %b = alloc() : memref<100x16xf32>
 
-  for %i0 = 0 to 100 {
-    for %i1 = 0 to 16 {
+  affine.for %i0 = 0 to 100 {
+    affine.for %i1 = 0 to 16 {
       %v0 = load %a[%i0, %i1] : memref<100x16xf32>
       "op0"(%v0) : (f32) -> ()
     }
-    for %i2 = 0 to 16 {
+    affine.for %i2 = 0 to 16 {
       %v1 = "op1"() : () -> (f32)
       store %v1, %b[%i0, %i2] : memref<100x16xf32>
     }
   }
 
-  for %i3 = 0 to 100 {
-    for %i4 = 0 to 16 {
+  affine.for %i3 = 0 to 100 {
+    affine.for %i4 = 0 to 16 {
       %v2 = load %b[%i3, %i4] : memref<100x16xf32>
       "op2"(%v2) : (f32) -> ()
     }
@@ -866,18 +866,18 @@ func @should_fuse_at_src_depth1_and_dst_depth1() {
   // destination loop nest at depth2 causes extra computation. Instead,
   // the fusion algorithm should detect that the source loop should be sliced
   // at depth 1 and the slice should be inserted at depth 1.
-  // CHECK:       for %i0 = 0 to 100 {
-  // CHECK-NEXT:    for %i1 = 0 to 16 {
+  // CHECK:       affine.for %i0 = 0 to 100 {
+  // CHECK-NEXT:    affine.for %i1 = 0 to 16 {
   // CHECK-NEXT:      %2 = load %1[%i0, %i1] : memref<100x16xf32>
   // CHECK-NEXT:      "op0"(%2) : (f32) -> ()
   // CHECK-NEXT:    }
-  // CHECK-NEXT:    for %i2 = 0 to 16 {
+  // CHECK-NEXT:    affine.for %i2 = 0 to 16 {
   // CHECK-NEXT:      %3 = "op1"() : () -> f32
   // CHECK-NEXT:      %4 = affine.apply [[MAP0]](%i0, %i0, %i2)
   // CHECK-NEXT:      %5 = affine.apply [[MAP1]](%i0, %i0, %i2)
   // CHECK-NEXT:      store %3, %0[%4, %5] : memref<1x16xf32>
   // CHECK-NEXT:    }
-  // CHECK-NEXT:    for %i3 = 0 to 16 {
+  // CHECK-NEXT:    affine.for %i3 = 0 to 16 {
   // CHECK-NEXT:      %6 = affine.apply [[MAP0]](%i0, %i0, %i3)
   // CHECK-NEXT:      %7 = affine.apply [[MAP1]](%i0, %i0, %i3)
   // CHECK-NEXT:      %8 = load %0[%6, %7] : memref<1x16xf32>
@@ -897,20 +897,20 @@ func @should_fuse_src_depth1_at_dst_depth2() {
   %a = alloc() : memref<100xf32>
   %c0 = constant 0.0 : f32
 
-  for %i0 = 0 to 100 {
+  affine.for %i0 = 0 to 100 {
     store %c0, %a[%i0] : memref<100xf32>
   }
 
-  for %i1 = 0 to 10 {
-    for %i2 = 0 to 10 {
+  affine.for %i1 = 0 to 10 {
+    affine.for %i2 = 0 to 10 {
       %a0 = affine.apply (d0, d1) -> (d0 * 10 + d1) (%i1, %i2)
       %v0 = load %a[%a0] : memref<100xf32>
     }
   }
   // The source loop nest slice loop bound is a function of both destination
   // loop IVs, so we should slice at depth 1 and insert the slice at depth 2.
-  // CHECK:       for %i0 = 0 to 10 {
-  // CHECK-NEXT:    for %i1 = 0 to 10 {
+  // CHECK:       affine.for %i0 = 0 to 10 {
+  // CHECK-NEXT:    affine.for %i1 = 0 to 10 {
   // CHECK-NEXT:      %1 = affine.apply [[MAP0]](%i0, %i1)
   // CHECK-NEXT:      %2 = affine.apply [[MAP1]](%i0, %i1, %1)
   // CHECK-NEXT:      store %cst, %0[%2] : memref<1xf32>
@@ -930,16 +930,16 @@ func @fusion_at_depth0_not_currently_supported() {
   %0 = alloc() : memref<10xf32>
   %c0 = constant 0 : index
   %cst = constant 0.000000e+00 : f32
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
     store %cst, %0[%i0] : memref<10xf32>
   }
-  for %i1 = 0 to 10 {
+  affine.for %i1 = 0 to 10 {
     %1 = load %0[%c0] : memref<10xf32>
   }
   // NOTE: Should shrink memref size to 1 element access by load in dst loop
   // nest, and make the store in the slice store to the same element.
   // CHECK-DAG:   %0 = alloc() : memref<1xf32>
-  // CHECK:       for %i0 = 0 to 10 {
+  // CHECK:       affine.for %i0 = 0 to 10 {
   // CHECK-NEXT:    store %cst, %0[%c0] : memref<1xf32>
   // CHECK-NEXT:    %1 = load %0[%c0_0] : memref<1xf32>
   // CHECK-NEXT:  }
@@ -965,18 +965,18 @@ func @should_fuse_deep_loop_nests() {
   %c1 = constant 1 : index
   %c1_0 = constant 1 : index
   %cst = constant 0.000000e+00 : f32
-  for %i0 = 0 to 2 {
-    for %i1 = 0 to 2 {
-      for %i2 = 0 to 3 {
-        for %i3 = 0 to 3 {
-          for %i4 = 0 to 16 {
-            for %i5 = 0 to 10 {
+  affine.for %i0 = 0 to 2 {
+    affine.for %i1 = 0 to 2 {
+      affine.for %i2 = 0 to 3 {
+        affine.for %i3 = 0 to 3 {
+          affine.for %i4 = 0 to 16 {
+            affine.for %i5 = 0 to 10 {
               %3 = load %0[%i0, %i1, %i2, %i3, %i4, %i5]
                 : memref<2x2x3x3x16x10xf32, 2>
             }
           }
-          for %i6 = 0 to 16 {
-            for %i7 = 0 to 10 {
+          affine.for %i6 = 0 to 16 {
+            affine.for %i7 = 0 to 10 {
               store %cst, %1[%i0, %i1, %i2, %i3, %i6, %i7]
                 : memref<2x2x3x3x16x10xf32, 2>
             }
@@ -985,22 +985,22 @@ func @should_fuse_deep_loop_nests() {
       }
     }
   }
-  for %i8 = 0 to 3 {
-    for %i9 = 0 to 3 {
-      for %i10 = 0 to 2 {
-        for %i11 = 0 to 2 {
-          for %i12 = 0 to 3 {
-            for %i13 = 0 to 3 {
-              for %i14 = 0 to 2 {
-                for %i15 = 0 to 2 {
-                  for %i16 = 0 to 16 {
-                    for %i17 = 0 to 10 {
+  affine.for %i8 = 0 to 3 {
+    affine.for %i9 = 0 to 3 {
+      affine.for %i10 = 0 to 2 {
+        affine.for %i11 = 0 to 2 {
+          affine.for %i12 = 0 to 3 {
+            affine.for %i13 = 0 to 3 {
+              affine.for %i14 = 0 to 2 {
+                affine.for %i15 = 0 to 2 {
+                  affine.for %i16 = 0 to 16 {
+                    affine.for %i17 = 0 to 10 {
                       %5 = load %0[%i14, %i15, %i12, %i13, %i16, %i17]
                         : memref<2x2x3x3x16x10xf32, 2>
                     }
                   }
-                  for %i18 = 0 to 16 {
-                    for %i19 = 0 to 10 {
+                  affine.for %i18 = 0 to 16 {
+                    affine.for %i19 = 0 to 10 {
                       %6 = load %1[%i10, %i11, %i8, %i9, %i18, %i19]
                         : memref<2x2x3x3x16x10xf32, 2>
                     }
@@ -1018,19 +1018,19 @@ func @should_fuse_deep_loop_nests() {
 // where the destination loops nests have been interchanged.
 
 // CHECK-DAG:   %0 = alloc() : memref<1x1x1x1x16x10xf32, 2>
-// CHECK:       for %i0 = 0 to 3 {
-// CHECK-NEXT:    for %i1 = 0 to 3 {
-// CHECK-NEXT:      for %i2 = 0 to 2 {
-// CHECK-NEXT:        for %i3 = 0 to 2 {
-// CHECK-NEXT:          for %i4 = 0 to 3 {
-// CHECK-NEXT:            for %i5 = 0 to 3 {
-// CHECK-NEXT:              for %i6 = 0 to 16 {
-// CHECK-NEXT:                for %i7 = 0 to 10 {
+// CHECK:       affine.for %i0 = 0 to 3 {
+// CHECK-NEXT:    affine.for %i1 = 0 to 3 {
+// CHECK-NEXT:      affine.for %i2 = 0 to 2 {
+// CHECK-NEXT:        affine.for %i3 = 0 to 2 {
+// CHECK-NEXT:          affine.for %i4 = 0 to 3 {
+// CHECK-NEXT:            affine.for %i5 = 0 to 3 {
+// CHECK-NEXT:              affine.for %i6 = 0 to 16 {
+// CHECK-NEXT:                affine.for %i7 = 0 to 10 {
 // CHECK-NEXT:                  %3 = load %1[%i2, %i3, %i0, %i1, %i6, %i7] : memref<2x2x3x3x16x10xf32, 2>
 // CHECK-NEXT:                }
 // CHECK-NEXT:              }
-// CHECK-NEXT:              for %i8 = 0 to 16 {
-// CHECK-NEXT:                for %i9 = 0 to 10 {
+// CHECK-NEXT:              affine.for %i8 = 0 to 16 {
+// CHECK-NEXT:                affine.for %i9 = 0 to 10 {
 // CHECK-NEXT:                  %4 = affine.apply [[MAP0]](%i2, %i3, %i0, %i1, %i2, %i3, %i0, %i1, %i8, %i9)
 // CHECK-NEXT:                  %5 = affine.apply [[MAP1]](%i2, %i3, %i0, %i1, %i2, %i3, %i0, %i1, %i8, %i9)
 // CHECK-NEXT:                  %6 = affine.apply [[MAP2]](%i2, %i3, %i0, %i1, %i2, %i3, %i0, %i1, %i8, %i9)
@@ -1040,15 +1040,15 @@ func @should_fuse_deep_loop_nests() {
 // CHECK-NEXT:                  store %cst, %0[%4, %5, %6, %7, %8, %9] : memref<1x1x1x1x16x10xf32, 2>
 // CHECK-NEXT:                }
 // CHECK-NEXT:              }
-// CHECK-NEXT:              for %i10 = 0 to 2 {
-// CHECK-NEXT:                for %i11 = 0 to 2 {
-// CHECK-NEXT:                  for %i12 = 0 to 16 {
-// CHECK-NEXT:                    for %i13 = 0 to 10 {
+// CHECK-NEXT:              affine.for %i10 = 0 to 2 {
+// CHECK-NEXT:                affine.for %i11 = 0 to 2 {
+// CHECK-NEXT:                  affine.for %i12 = 0 to 16 {
+// CHECK-NEXT:                    affine.for %i13 = 0 to 10 {
 // CHECK-NEXT:                      %10 = load %1[%i10, %i11, %i4, %i5, %i12, %i13] : memref<2x2x3x3x16x10xf32, 2>
 // CHECK-NEXT:                    }
 // CHECK-NEXT:                  }
-// CHECK-NEXT:                  for %i14 = 0 to 16 {
-// CHECK-NEXT:                    for %i15 = 0 to 10 {
+// CHECK-NEXT:                  affine.for %i14 = 0 to 16 {
+// CHECK-NEXT:                    affine.for %i15 = 0 to 10 {
 // CHECK-NEXT:                      %11 = affine.apply [[MAP0]](%i2, %i3, %i0, %i1, %i2, %i3, %i0, %i1, %i14, %i15)
 // CHECK-NEXT:                      %12 = affine.apply [[MAP1]](%i2, %i3, %i0, %i1, %i2, %i3, %i0, %i1, %i14, %i15)
 // CHECK-NEXT:                      %13 = affine.apply [[MAP2]](%i2, %i3, %i0, %i1, %i2, %i3, %i0, %i1, %i14, %i15)
@@ -1082,17 +1082,17 @@ func @should_fuse_at_depth1_and_reduce_slice_trip_count() {
   %c0 = constant 0 : index
   %cf0 = constant 0.0 : f32
 
-  for %i0 = 0 to 4 {
-    for %i1 = 0 to 256 {
+  affine.for %i0 = 0 to 4 {
+    affine.for %i1 = 0 to 256 {
       %v0 = load %b[%i0, %i1] : memref<4x256xf32>
     }
-    for %i2 = 0 to 256 {
+    affine.for %i2 = 0 to 256 {
       store %cf0, %a[%i0, %i2] : memref<4x256xf32>
     }
   }
 
-  for %d0 = 0 to 4 {
-    for %d1 = 0 to 16 {
+  affine.for %d0 = 0 to 4 {
+    affine.for %d1 = 0 to 16 {
       %v1 = load %a[%d0, %d1] : memref<4x256xf32>
     }
   }
@@ -1106,16 +1106,16 @@ func @should_fuse_at_depth1_and_reduce_slice_trip_count() {
   // is reduced from the original shape from 4x256 to 4x16 because of the
   // data accessed by the load.
   // CHECK-DAG:   %0 = alloc() : memref<1x16xf32>
-  // CHECK:       for %i0 = 0 to 4 {
-  // CHECK-NEXT:    for %i1 = 0 to 256 {
+  // CHECK:       affine.for %i0 = 0 to 4 {
+  // CHECK-NEXT:    affine.for %i1 = 0 to 256 {
   // CHECK-NEXT:      %2 = load %1[%i0, %i1] : memref<4x256xf32>
   // CHECK-NEXT:    }
-  // CHECK-NEXT:    for %i2 = 0 to 16 {
+  // CHECK-NEXT:    affine.for %i2 = 0 to 16 {
   // CHECK-NEXT:      %3 = affine.apply [[MAP0]](%i0, %i0, %i2)
   // CHECK-NEXT:      %4 = affine.apply [[MAP1]](%i0, %i0, %i2)
   // CHECK-NEXT:      store %cst, %0[%3, %4] : memref<1x16xf32>
   // CHECK-NEXT:    }
-  // CHECK-NEXT:    for %i3 = 0 to 16 {
+  // CHECK-NEXT:    affine.for %i3 = 0 to 16 {
   // CHECK-NEXT:      %5 = affine.apply [[MAP0]](%i0, %i0, %i3)
   // CHECK-NEXT:      %6 = affine.apply [[MAP1]](%i0, %i0, %i3)
   // CHECK-NEXT:      %7 = load %0[%5, %6] : memref<1x16xf32>
@@ -1133,31 +1133,31 @@ func @should_fuse_at_depth1_with_trip_count_20() {
   %c0 = constant 0 : index
   %cf0 = constant 0.0 : f32
 
-  for %i0 = 0 to 100 {
+  affine.for %i0 = 0 to 100 {
     store %cf0, %a[%i0]: memref<100xf32>
   }
 
-  for %i1 = 0 to 5 {
-    for %i2 = 0 to 10 {
+  affine.for %i1 = 0 to 5 {
+    affine.for %i2 = 0 to 10 {
       %v0 = load %a[%i2]: memref<100xf32>
     }
-    for %i3 = 0 to 10 {
-      for %i4 = 0 to 20 {
+    affine.for %i3 = 0 to 10 {
+      affine.for %i4 = 0 to 20 {
         %v1 = load %a[%i4]: memref<100xf32>
       }
     }
   }
   // NOTE: The size of the private memref created for fusion is shrunk to 20xf32
   // CHECK-DAG:   %0 = alloc() : memref<20xf32>
-  // CHECK:       for %i0 = 0 to 5 {
-  // CHECK-NEXT:    for %i1 = 0 to 20 {
+  // CHECK:       affine.for %i0 = 0 to 5 {
+  // CHECK-NEXT:    affine.for %i1 = 0 to 20 {
   // CHECK-NEXT:      store %cst, %0[%i1] : memref<20xf32>
   // CHECK-NEXT:    }
-  // CHECK-NEXT:    for %i2 = 0 to 10 {
+  // CHECK-NEXT:    affine.for %i2 = 0 to 10 {
   // CHECK-NEXT:      %1 = load %0[%i2] : memref<20xf32>
   // CHECK-NEXT:    }
-  // CHECK-NEXT:    for %i3 = 0 to 10 {
-  // CHECK-NEXT:      for %i4 = 0 to 20 {
+  // CHECK-NEXT:    affine.for %i3 = 0 to 10 {
+  // CHECK-NEXT:      affine.for %i4 = 0 to 20 {
   // CHECK-NEXT:        %2 = load %0[%i4] : memref<20xf32>
   // CHECK-NEXT:      }
   // CHECK-NEXT:    }
@@ -1174,31 +1174,31 @@ func @should_fuse_at_depth1_with_trip_count_19() {
   %c0 = constant 0 : index
   %cf0 = constant 0.0 : f32
 
-  for %i0 = 0 to 100 {
+  affine.for %i0 = 0 to 100 {
     store %cf0, %a[%i0]: memref<100xf32>
   }
 
-  for %i1 = 0 to 5 {
-    for %i2 = 0 to 19 {
+  affine.for %i1 = 0 to 5 {
+    affine.for %i2 = 0 to 19 {
       %v0 = load %a[%i2]: memref<100xf32>
     }
-    for %i3 = 0 to 10 {
-      for %i4 = 0 to 10 {
+    affine.for %i3 = 0 to 10 {
+      affine.for %i4 = 0 to 10 {
         %v1 = load %a[%i4]: memref<100xf32>
       }
     }
   }
   // NOTE: The size of the private memref created for fusion is shrunk to 19xf32
   // CHECK-DAG:   %0 = alloc() : memref<19xf32>
-  // CHECK:       for %i0 = 0 to 5 {
-  // CHECK-NEXT:    for %i1 = 0 to 19 {
+  // CHECK:       affine.for %i0 = 0 to 5 {
+  // CHECK-NEXT:    affine.for %i1 = 0 to 19 {
   // CHECK-NEXT:      store %cst, %0[%i1] : memref<19xf32>
   // CHECK-NEXT:    }
-  // CHECK-NEXT:    for %i2 = 0 to 19 {
+  // CHECK-NEXT:    affine.for %i2 = 0 to 19 {
   // CHECK-NEXT:      %1 = load %0[%i2] : memref<19xf32>
   // CHECK-NEXT:    }
-  // CHECK-NEXT:    for %i3 = 0 to 10 {
-  // CHECK-NEXT:      for %i4 = 0 to 10 {
+  // CHECK-NEXT:    affine.for %i3 = 0 to 10 {
+  // CHECK-NEXT:      affine.for %i4 = 0 to 10 {
   // CHECK-NEXT:        %2 = load %0[%i4] : memref<19xf32>
   // CHECK-NEXT:      }
   // CHECK-NEXT:    }
@@ -1216,26 +1216,26 @@ func @should_fuse_with_private_memrefs_with_diff_shapes() {
   %m = alloc() : memref<100xf32>
   %cf7 = constant 7.0 : f32
 
-  for %i0 = 0 to 100 {
+  affine.for %i0 = 0 to 100 {
     store %cf7, %m[%i0] : memref<100xf32>
   }
-  for %i1 = 0 to 17 {
+  affine.for %i1 = 0 to 17 {
     %v0 = load %m[%i1] : memref<100xf32>
   }
-  for %i2 = 0 to 82 {
+  affine.for %i2 = 0 to 82 {
     %v1 = load %m[%i2] : memref<100xf32>
   }
   // Should create two new private memrefs customized to the shapes accessed
   // by loops %i1 and %i2.
   // CHECK-DAG:  %0 = alloc() : memref<1xf32>
   // CHECK-DAG:  %1 = alloc() : memref<1xf32>
-  // CHECK:      for %i0 = 0 to 17 {
+  // CHECK:      affine.for %i0 = 0 to 17 {
   // CHECK-NEXT:   %2 = affine.apply [[MAP0]](%i0, %i0)
   // CHECK-NEXT:   store %cst, %1[%2] : memref<1xf32>
   // CHECK-NEXT:   %3 = affine.apply [[MAP0]](%i0, %i0)
   // CHECK-NEXT:   %4 = load %1[%3] : memref<1xf32>
   // CHECK-NEXT: }
-  // CHECK-NEXT: for %i1 = 0 to 82 {
+  // CHECK-NEXT: affine.for %i1 = 0 to 82 {
   // CHECK-NEXT:   %5 = affine.apply [[MAP0]](%i1, %i1)
   // CHECK-NEXT:   store %cst, %0[%5] : memref<1xf32>
   // CHECK-NEXT:   %6 = affine.apply [[MAP0]](%i1, %i1)
@@ -1251,18 +1251,18 @@ func @should_fuse_with_private_memrefs_with_diff_shapes() {
 func @should_not_fuse_live_out_arg(%arg0: memref<10xf32>) {
   %cf7 = constant 7.0 : f32
 
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
     store %cf7, %arg0[%i0] : memref<10xf32>
   }
-  for %i1 = 0 to 10 {
+  affine.for %i1 = 0 to 10 {
     %v0 = load %arg0[%i1] : memref<10xf32>
   }
   // This tests that the loop nest '%i0' should not be removed after fusion
   // because it writes to memref argument '%arg0'.
-  // CHECK:       for %i0 = 0 to 10 {
+  // CHECK:       affine.for %i0 = 0 to 10 {
   // CHECK-NEXT:    store %cst, %arg0[%i0] : memref<10xf32>
   // CHECK-NEXT:  }
-  // CHECK-NEXT:  for %i1 = 0 to 10 {
+  // CHECK-NEXT:  affine.for %i1 = 0 to 10 {
   // CHECK-NEXT:    %0 = load %arg0[%i1] : memref<10xf32>
   // CHECK-NEXT:  }
   // CHECK-NEXT:  return
@@ -1275,19 +1275,19 @@ func @should_not_fuse_live_out_arg(%arg0: memref<10xf32>) {
 func @should_not_fuse_escaping_memref() -> memref<10xf32> {
   %cf7 = constant 7.0 : f32
   %m = alloc() : memref<10xf32>
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
     store %cf7, %m[%i0] : memref<10xf32>
   }
-  for %i1 = 0 to 10 {
+  affine.for %i1 = 0 to 10 {
     %v0 = load %m[%i1] : memref<10xf32>
   }
   // This tests that the loop nest '%i0' should not be removed after fusion
   // because it writes to memref '%m' which is returned by the function. 
   // CHECK-DAG:   %0 = alloc() : memref<10xf32>
-  // CHECK:       for %i0 = 0 to 10 {
+  // CHECK:       affine.for %i0 = 0 to 10 {
   // CHECK-NEXT:    store %cst, %0[%i0] : memref<10xf32>
   // CHECK-NEXT:  }
-  // CHECK-NEXT:  for %i1 = 0 to 10 {
+  // CHECK-NEXT:  affine.for %i1 = 0 to 10 {
   // CHECK-NEXT:    %1 = load %0[%i1] : memref<10xf32>
   // CHECK-NEXT:  }
   // CHECK-NEXT:  return %0 : memref<10xf32>
@@ -1302,17 +1302,17 @@ func @R3_to_R2_reshape() {
 
   %c0 = constant 0 : index
 
-  for %i0 = 0 to 2 {
-    for %i1 = 0 to 3 {
-      for %i2 = 0 to 16 {
+  affine.for %i0 = 0 to 2 {
+    affine.for %i1 = 0 to 3 {
+      affine.for %i2 = 0 to 16 {
         %val = "foo"(%i0, %i1, %i2) : (index, index, index) -> i32
         store %val, %in[%i0, %i1, %i2] : memref<2x3x16xi32>
       }
     }
   }
 
-  for %ii = 0 to 32 {
-    for %jj = 0 to 3 {
+  affine.for %ii = 0 to 32 {
+    affine.for %jj = 0 to 3 {
       %a0 = affine.apply (d0, d1) -> (d0 * 3 + d1) (%ii, %jj)
       %idx = affine.apply (d0) -> (d0 floordiv (3 * 16)) (%a0)
       %v = load %in[%idx, %jj, %c0]
@@ -1330,8 +1330,8 @@ func @R3_to_R2_reshape() {
 
 // CHECK-LABEL: func @R3_to_R2_reshape()
 // CHECK-DAG:    %0 = alloc() : memref<1x1x1xi32>
-// CHECK:        for %i0 = 0 to 32 {
-// CHECK-NEXT:     for %i1 = 0 to 3 {
+// CHECK:        affine.for %i0 = 0 to 32 {
+// CHECK-NEXT:     affine.for %i1 = 0 to 3 {
 // CHECK-NEXT:      %1 = affine.apply [[MAP0]](%i0, %i1)
 // CHECK-NEXT:      %2 = "foo"(%1, %i1, %c0) : (index, index, index) -> i32
 // CHECK-NEXT:      %3 = affine.apply [[MAP2]](%i0, %i1, %1, %i1, %c0)
@@ -1357,19 +1357,19 @@ func @should_not_fuse_multi_output_producer() {
 
   %cf7 = constant 7.0 : f32
 
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
     store %cf7, %a[%i0] : memref<10xf32>
     store %cf7, %b[%i0] : memref<10xf32>
   }
-  for %i1 = 0 to 10 {
+  affine.for %i1 = 0 to 10 {
     %v0 = load %a[%i1] : memref<10xf32>
   }
 
-  // CHECK:       for %i0 = 0 to 10 {
+  // CHECK:       affine.for %i0 = 0 to 10 {
   // CHECK-NEXT:    store %cst, %0[%i0] : memref<10xf32>
   // CHECK-NEXT:    store %cst, %1[%i0] : memref<10xf32>
   // CHECK-NEXT:  }
-  // CHECK-NEXT:  for %i1 = 0 to 10 {
+  // CHECK-NEXT:  affine.for %i1 = 0 to 10 {
   // CHECK-NEXT:    %2 = load %0[%i1] : memref<10xf32>
   // CHECK-NEXT:  }
   // CHECK-NEXT:  return
@@ -1386,30 +1386,30 @@ func @fusion_preventing_deps_on_middle_loop() {
 
   %cf7 = constant 7.0 : f32
 
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
     %v0 = load %a[%i0] : memref<10xf32>
     store %v0, %b[%i0] : memref<10xf32>
   }
-  for %i1 = 0 to 10 {
+  affine.for %i1 = 0 to 10 {
     store %cf7, %a[%i1] : memref<10xf32>
     %v1 = load %c[%i1] : memref<10xf32>
   }
-  for %i2 = 0 to 10 {
+  affine.for %i2 = 0 to 10 {
     %v2 = load %b[%i2] : memref<10xf32>
     store %v2, %c[%i2] : memref<10xf32>
   }
   // Loops '%i0' and '%i2' cannot fuse along producer/consumer edge on memref
   // '%b', because of the WAR dep from '%i0' to '%i1' on memref '%a' and
   // because of the WAR dep from '%i1' to '%i2' on memref '%c'.
-  // CHECK:       for %i0 = 0 to 10 {
+  // CHECK:       affine.for %i0 = 0 to 10 {
   // CHECK-NEXT:    %3 = load %0[%i0] : memref<10xf32>
   // CHECK-NEXT:    store %3, %1[%i0] : memref<10xf32>
   // CHECK-NEXT:  }
-  // CHECK-NEXT:  for %i1 = 0 to 10 {
+  // CHECK-NEXT:  affine.for %i1 = 0 to 10 {
   // CHECK-NEXT:    store %cst, %0[%i1] : memref<10xf32>
   // CHECK-NEXT:    %4 = load %2[%i1] : memref<10xf32>
   // CHECK-NEXT:  }
-  // CHECK-NEXT:  for %i2 = 0 to 10 {
+  // CHECK-NEXT:  affine.for %i2 = 0 to 10 {
   // CHECK-NEXT:    %5 = load %1[%i2] : memref<10xf32>
   // CHECK-NEXT:    store %5, %2[%i2] : memref<10xf32>
   // CHECK-NEXT:  }
@@ -1429,17 +1429,17 @@ func @should_fuse_and_move_to_preserve_war_dep() {
 
   %cf7 = constant 7.0 : f32
 
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
     %v0 = load %b[%i0] : memref<10xf32>
     store %v0, %a[%i0] : memref<10xf32>
   }
-  for %i1 = 0 to 3 {
+  affine.for %i1 = 0 to 3 {
     %v2 = load %c[%i1] : memref<10xf32>
   }
-  for %i2 = 0 to 5 {
+  affine.for %i2 = 0 to 5 {
     store %cf7, %b[%i2] : memref<10xf32>
   }
-  for %i3 = 0 to 10 {
+  affine.for %i3 = 0 to 10 {
     %v1 = load %a[%i3] : memref<10xf32>
     store %cf7, %c[%i3] : memref<10xf32>
   }
@@ -1458,10 +1458,10 @@ func @should_fuse_and_move_to_preserve_war_dep() {
   // if the fused loop nest is inserted between loops '%i1' and '%i2'.
 
   // CHECK-DAG:   %0 = alloc() : memref<1xf32>
-  // CHECK:       for %i0 = 0 to 3 {
+  // CHECK:       affine.for %i0 = 0 to 3 {
   // CHECK-NEXT:    %3 = load %2[%i0] : memref<10xf32>
   // CHECK-NEXT:  }
-  // CHECK-NEXT:  for %i1 = 0 to 10 {
+  // CHECK-NEXT:  affine.for %i1 = 0 to 10 {
   // CHECK-NEXT:    %4 = load %1[%i1] : memref<10xf32>
   // CHECK-NEXT:    %5 = affine.apply [[MAP0]](%i1, %i1)
   // CHECK-NEXT:    store %4, %0[%5] : memref<1xf32>
@@ -1469,7 +1469,7 @@ func @should_fuse_and_move_to_preserve_war_dep() {
   // CHECK-NEXT:    %7 = load %0[%6] : memref<1xf32>
   // CHECK-NEXT:    store %cst, %2[%i1] : memref<10xf32>
   // CHECK-NEXT:  }
-  // CHECK-NEXT:  for %i2 = 0 to 5 {
+  // CHECK-NEXT:  affine.for %i2 = 0 to 5 {
   // CHECK-NEXT:    store %cst, %1[%i2] : memref<10xf32>
   // CHECK-NEXT:  }
   // CHECK-NEXT:  return
@@ -1486,30 +1486,30 @@ func @fusion_preventing_dep_on_constant() {
 
   %cf7 = constant 7.0 : f32
 
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
     %v0 = load %b[%i0] : memref<10xf32>
     store %cf7, %a[%i0] : memref<10xf32>
   }
-  for %i1 = 0 to 10 {
+  affine.for %i1 = 0 to 10 {
     store %cf7, %b[%i1] : memref<10xf32>
   }
   %cf11 = constant 11.0 : f32
-  for %i2 = 0 to 10 {
+  affine.for %i2 = 0 to 10 {
     %v2 = load %a[%i2] : memref<10xf32>
     store %cf11, %c[%i2] : memref<10xf32>
   }
   // Loops '%i0' and '%i2' cannot fuse along producer/consumer edge on memref
   // '%a', because of the WAR dep from '%i0' to '%i1' on memref '%b' and
   // because of the SSA value dep from '%cf11' def to use in '%i2'.
-  // CHECK:       for %i0 = 0 to 10 {
+  // CHECK:       affine.for %i0 = 0 to 10 {
   // CHECK-NEXT:    %3 = load %1[%i0] : memref<10xf32>
   // CHECK-NEXT:    store %cst, %0[%i0] : memref<10xf32>
   // CHECK-NEXT:  }
-  // CHECK-NEXT:  for %i1 = 0 to 10 {
+  // CHECK-NEXT:  affine.for %i1 = 0 to 10 {
   // CHECK-NEXT:    store %cst, %1[%i1] : memref<10xf32>
   // CHECK-NEXT:  }
   // CHECK-NEXT:  %cst_0 = constant 1.100000e+01 : f32
-  // CHECK-NEXT:  for %i2 = 0 to 10 {
+  // CHECK-NEXT:  affine.for %i2 = 0 to 10 {
   // CHECK-NEXT:    %4 = load %0[%i2] : memref<10xf32>
   // CHECK-NEXT:    store %cst_0, %2[%i2] : memref<10xf32>
   // CHECK-NEXT:  }
@@ -1529,14 +1529,14 @@ func @should_fuse_and_preserve_dep_on_constant() {
 
   %cf7 = constant 7.0 : f32
   %cf11 = constant 11.0 : f32
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
     %v0 = load %b[%i0] : memref<10xf32>
     store %cf7, %a[%i0] : memref<10xf32>
   }
-  for %i1 = 0 to 10 {
+  affine.for %i1 = 0 to 10 {
     store %cf7, %b[%i1] : memref<10xf32>
   }
-  for %i2 = 0 to 10 {
+  affine.for %i2 = 0 to 10 {
     %v2 = load %a[%i2] : memref<10xf32>
     store %cf11, %c[%i2] : memref<10xf32>
   }
@@ -1546,7 +1546,7 @@ func @should_fuse_and_preserve_dep_on_constant() {
   // the SSA value dep from '%cf11' def to use in '%i2'.
 
   // CHECK:       %cst_0 = constant 1.100000e+01 : f32
-  // CHECK-NEXT:  for %i0 = 0 to 10 {
+  // CHECK-NEXT:  affine.for %i0 = 0 to 10 {
   // CHECK-NEXT:    %3 = load %1[%i0] : memref<10xf32>
   // CHECK-NEXT:    %4 = affine.apply [[MAP0]](%i0, %i0)
   // CHECK-NEXT:    store %cst, %0[%4] : memref<1xf32>
@@ -1554,7 +1554,7 @@ func @should_fuse_and_preserve_dep_on_constant() {
   // CHECK-NEXT:    %6 = load %0[%5] : memref<1xf32>
   // CHECK-NEXT:    store %cst_0, %2[%i0] : memref<10xf32>
   // CHECK-NEXT:  }
-  // CHECK-NEXT:  for %i1 = 0 to 10 {
+  // CHECK-NEXT:  affine.for %i1 = 0 to 10 {
   // CHECK-NEXT:    store %cst, %1[%i1] : memref<10xf32>
   // CHECK-NEXT:  }
   // CHECK-NEXT:  return
@@ -1572,25 +1572,25 @@ func @should_fuse_and_preserve_dep_on_constant() {
 func @should_fuse_at_depth_above_loop_carried_dependence(%arg0: memref<64x4xf32>, %arg1: memref<64x4xf32>) {
   %out = alloc() : memref<64x4xf32>
   %0 = constant 0.0 : f32
-  for %i0 = 0 to 64 {
-    for %i1 = 0 to 4 {
+  affine.for %i0 = 0 to 64 {
+    affine.for %i1 = 0 to 4 {
       store %0, %out[%i0, %i1] : memref<64x4xf32>
     }
   }
-  for %i2 = 0 to 4 {
-    for %i3 = 0 to 4 {
-      for %i4 = 0 to 16 {
+  affine.for %i2 = 0 to 4 {
+    affine.for %i3 = 0 to 4 {
+      affine.for %i4 = 0 to 16 {
         %1 = affine.apply (d0, d1) -> (d0 * 16 - d1 + 15)(%i3, %i4)
         %2 = load %arg1[%1, %i2] : memref<64x4xf32>
         "op0"(%2) : (f32) -> ()
       }
-      for %i5 = 0 to 4 {
-        for %i6 = 0 to 16 {
+      affine.for %i5 = 0 to 4 {
+        affine.for %i6 = 0 to 16 {
           %3 = affine.apply (d0, d1) -> (d0 * 16 - d1 + 15)(%i5, %i6)
           %4 = load %arg0[%3, %i3] : memref<64x4xf32>
           "op1"(%4) : (f32) -> ()
         }
-        for %i7 = 0 to 16 {
+        affine.for %i7 = 0 to 16 {
           %5 = "op2"() : () -> (f32)
           %6 = affine.apply (d0, d1) -> (d0 * 16 + d1)(%i5, %i7)
           %7 = load %out[%6, %i2] : memref<64x4xf32>
@@ -1610,25 +1610,25 @@ func @should_fuse_at_depth_above_loop_carried_dependence(%arg0: memref<64x4xf32>
   // memref size can be reduced to 128x1xf32.
 
   // CHECK:       %0 = alloc() : memref<64x1xf32>
-  // CHECK:       for %i0 = 0 to 4 {
-  // CHECK-NEXT:    for %i1 = 0 to 64 {
+  // CHECK:       affine.for %i0 = 0 to 4 {
+  // CHECK-NEXT:    affine.for %i1 = 0 to 64 {
   // CHECK-NEXT:      %1 = affine.apply [[MAP0]](%i0, %i1, %i0)
   // CHECK-NEXT:      %2 = affine.apply [[MAP1]](%i0, %i1, %i0)
   // CHECK-NEXT:      store %cst, %0[%1, %2] : memref<64x1xf32>
   // CHECK-NEXT:    }
-  // CHECK-NEXT:    for %i2 = 0 to 4 {
-  // CHECK-NEXT:      for %i3 = 0 to 16 {
+  // CHECK-NEXT:    affine.for %i2 = 0 to 4 {
+  // CHECK-NEXT:      affine.for %i3 = 0 to 16 {
   // CHECK-NEXT:        %3 = affine.apply [[MAP2]](%i2, %i3)
   // CHECK-NEXT:        %4 = load %arg1[%3, %i0] : memref<64x4xf32>
   // CHECK-NEXT:        "op0"(%4) : (f32) -> ()
   // CHECK-NEXT:      }
-  // CHECK-NEXT:      for %i4 = 0 to 4 {
-  // CHECK-NEXT:        for %i5 = 0 to 16 {
+  // CHECK-NEXT:      affine.for %i4 = 0 to 4 {
+  // CHECK-NEXT:        affine.for %i5 = 0 to 16 {
   // CHECK-NEXT:          %5 = affine.apply [[MAP2]](%i4, %i5)
   // CHECK-NEXT:          %6 = load %arg0[%5, %i2] : memref<64x4xf32>
   // CHECK-NEXT:          "op1"(%6) : (f32) -> ()
   // CHECK-NEXT:        }
-  // CHECK-NEXT:        for %i6 = 0 to 16 {
+  // CHECK-NEXT:        affine.for %i6 = 0 to 16 {
   // CHECK-NEXT:          %7 = "op2"() : () -> f32
   // CHECK-NEXT:          %8 = affine.apply [[MAP3]](%i4, %i6)
   // CHECK-NEXT:          %9 = affine.apply [[MAP0]](%i0, %8, %i0)
@@ -1657,14 +1657,14 @@ func @should_fuse_after_private_memref_creation() {
 
   %cf7 = constant 7.0 : f32
 
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
     store %cf7, %a[%i0] : memref<10xf32>
   }
-  for %i1 = 0 to 10 {
+  affine.for %i1 = 0 to 10 {
     %v0 = load %a[%i1] : memref<10xf32>
     store %v0, %b[%i1] : memref<10xf32>
   }
-  for %i2 = 0 to 10 {
+  affine.for %i2 = 0 to 10 {
     %v1 = load %a[%i2] : memref<10xf32>
     store %v1, %b[%i2] : memref<10xf32>
   }
@@ -1675,14 +1675,14 @@ func @should_fuse_after_private_memref_creation() {
   // private memref, the dependence between '%i0' and '%i1' on memref '%a' no
   // longer exists, so '%i0' can now be fused into '%i2'.
 
-  // CHECK:       for %i0 = 0 to 10 {
+  // CHECK:       affine.for %i0 = 0 to 10 {
   // CHECK-NEXT:    %3 = affine.apply [[MAP0]](%i0, %i0)
   // CHECK-NEXT:    store %cst, %1[%3] : memref<1xf32>
   // CHECK-NEXT:    %4 = affine.apply [[MAP0]](%i0, %i0)
   // CHECK-NEXT:    %5 = load %1[%4] : memref<1xf32>
   // CHECK-NEXT:    store %5, %2[%i0] : memref<10xf32>
   // CHECK-NEXT:  }
-  // CHECK-NEXT:  for %i1 = 0 to 10 {
+  // CHECK-NEXT:  affine.for %i1 = 0 to 10 {
   // CHECK-NEXT:    %6 = affine.apply [[MAP0]](%i1, %i1)
   // CHECK-NEXT:    store %cst, %0[%6] : memref<1xf32>
   // CHECK-NEXT:    %7 = affine.apply [[MAP0]](%i1, %i1)
@@ -1702,12 +1702,12 @@ func @should_fuse_after_one_loop_interchange() {
   %a = alloc() : memref<10xf32>
 
   %cf0 = constant 0.0 : f32
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
     store %cf0, %a[%i0] : memref<10xf32>
   }
 
-  for %i1 = 0 to 5 {
-    for %i2 = 0 to 10 {
+  affine.for %i1 = 0 to 5 {
+    affine.for %i2 = 0 to 10 {
       %v0 = load %a[%i2] : memref<10xf32>
       store %v0, %a[%i2] : memref<10xf32>
     }
@@ -1719,10 +1719,10 @@ func @should_fuse_after_one_loop_interchange() {
   // at loop depth 1, because the loop carrying the dependence has been
   // interchanged and is now at depth 2.
 
-  // CHECK:       for %i0 = 0 to 10 {
+  // CHECK:       affine.for %i0 = 0 to 10 {
   // CHECK-NEXT:    %1 = affine.apply [[MAP0]](%i0, %i0)
   // CHECK-NEXT:    store %cst, %0[%1] : memref<1xf32>
-  // CHECK-NEXT:    for %i1 = 0 to 5 {
+  // CHECK-NEXT:    affine.for %i1 = 0 to 5 {
   // CHECK-NEXT:      %2 = affine.apply [[MAP0]](%i0, %i0)
   // CHECK-NEXT:      %3 = load %0[%2] : memref<1xf32>
   // CHECK-NEXT:      %4 = affine.apply [[MAP0]](%i0, %i0)
@@ -1743,16 +1743,16 @@ func @should_fuse_after_two_loop_interchanges() {
   %a = alloc() : memref<6x8xf32>
 
   %cf0 = constant 0.0 : f32
-  for %i0 = 0 to 6 {
-    for %i1 = 0 to 8 {
+  affine.for %i0 = 0 to 6 {
+    affine.for %i1 = 0 to 8 {
       store %cf0, %a[%i0, %i1] : memref<6x8xf32>
     }
   }
 
-  for %i2 = 0 to 4 {
-    for %i3 = 0 to 6 {
-      for %i4 = 0 to 2 {
-        for %i5 = 0 to 8 {
+  affine.for %i2 = 0 to 4 {
+    affine.for %i3 = 0 to 6 {
+      affine.for %i4 = 0 to 2 {
+        affine.for %i5 = 0 to 8 {
           %v0 = load %a[%i3, %i5] : memref<6x8xf32>
           %v1 = addf %v0, %v0 : f32
           store %v1, %a[%i3, %i5] : memref<6x8xf32>
@@ -1768,13 +1768,13 @@ func @should_fuse_after_two_loop_interchanges() {
   // '%i5', then loop '%i0' can be fused at loop depth 2, because the loop
   // carring the dependences have been interchanged with loops at depth > 2.
 
-  // CHECK:       for %i0 = 0 to 6 {
-  // CHECK-NEXT:    for %i1 = 0 to 8 {
+  // CHECK:       affine.for %i0 = 0 to 6 {
+  // CHECK-NEXT:    affine.for %i1 = 0 to 8 {
   // CHECK-NEXT:      %1 = affine.apply [[MAP0]](%i0, %i1, %i0, %i1)
   // CHECK-NEXT:      %2 = affine.apply [[MAP1]](%i0, %i1, %i0, %i1)
   // CHECK-NEXT:      store %cst, %0[%1, %2] : memref<1x1xf32>
-  // CHECK-NEXT:      for %i2 = 0 to 4 {
-  // CHECK-NEXT:        for %i3 = 0 to 2 {
+  // CHECK-NEXT:      affine.for %i2 = 0 to 4 {
+  // CHECK-NEXT:        affine.for %i3 = 0 to 2 {
   // CHECK-NEXT:          %3 = affine.apply [[MAP0]](%i0, %i1, %i0, %i1)
   // CHECK-NEXT:          %4 = affine.apply [[MAP1]](%i0, %i1, %i0, %i1)
   // CHECK-NEXT:          %5 = load %0[%3, %4] : memref<1x1xf32>
@@ -1794,17 +1794,17 @@ func @should_fuse_after_two_loop_interchanges() {
 
 func @should_fuse_live_out_writer(%arg0 : memref<10xf32>) -> memref<10xf32> {
   %cst = constant 0.000000e+00 : f32
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
     store %cst, %arg0[%i0] : memref<10xf32>
   }
-  for %i1 = 0 to 10 {
+  affine.for %i1 = 0 to 10 {
     %1 = load %arg0[%i1] : memref<10xf32>
     store %1, %arg0[%i1] : memref<10xf32>
   }
   return %arg0 : memref<10xf32>
 
   // CHECK:       %cst = constant 0.000000e+00 : f32
-  // CHECK-NEXT:  for %i0 = 0 to 10 {
+  // CHECK-NEXT:  affine.for %i0 = 0 to 10 {
   // CHECK-NEXT:    store %cst, %arg0[%i0] : memref<10xf32>
   // CHECK-NEXT:    %0 = load %arg0[%i0] : memref<10xf32>
   // CHECK-NEXT:    store %0, %arg0[%i0] : memref<10xf32>
@@ -1823,20 +1823,20 @@ func @should_fuse_live_out_writer(%arg0 : memref<10xf32>) -> memref<10xf32> {
 
 // CHECK-LABEL: slice_tile
 func @slice_tile(%arg0: memref<128x8xf32>, %arg1: memref<32x8xf32>, %0 : f32) -> memref<32x8xf32> {
-  for %i0 = 0 to 32 {
-    for %i1 = 0 to 8 {
+  affine.for %i0 = 0 to 32 {
+    affine.for %i1 = 0 to 8 {
       store %0, %arg1[%i0, %i1] : memref<32x8xf32>
     }
   }
-  for %i = 0 to 2 {
-    for %j = 0 to 8 {
-      for %k = 0 to 8 {
-        for %kk = 0 to 16 {
+  affine.for %i = 0 to 2 {
+    affine.for %j = 0 to 8 {
+      affine.for %k = 0 to 8 {
+        affine.for %kk = 0 to 16 {
           %1 = affine.apply #map(%k, %kk)
           %2 = load %arg0[%1, %j] : memref<128x8xf32>
           %3 = "foo"(%2) : (f32) -> f32
         }
-        for %ii = 0 to 16 {
+        affine.for %ii = 0 to 16 {
           %6 = affine.apply #map(%i, %ii)
           %7 = load %arg1[%6, %j] : memref<32x8xf32>
           %8 = addf %7, %7 : f32
@@ -1847,18 +1847,18 @@ func @slice_tile(%arg0: memref<128x8xf32>, %arg1: memref<32x8xf32>, %0 : f32) ->
   }
   return %arg1 : memref<32x8xf32>
 }
-// CHECK:       for %i0 = 0 to 2 {
-// CHECK-NEXT:    for %i1 = 0 to 8 {
-// CHECK-NEXT:      for %i2 = [[MAP_LB]](%i0) to [[MAP_UB]](%i0) {
+// CHECK:       affine.for %i0 = 0 to 2 {
+// CHECK-NEXT:    affine.for %i1 = 0 to 8 {
+// CHECK-NEXT:      affine.for %i2 = [[MAP_LB]](%i0) to [[MAP_UB]](%i0) {
 // CHECK-NEXT:        store %arg2, %arg1[%i2, %i1] : memref<32x8xf32>
 // CHECK-NEXT:      }
-// CHECK-NEXT:      for %i3 = 0 to 8 {
-// CHECK-NEXT:        for %i4 = 0 to 16 {
+// CHECK-NEXT:      affine.for %i3 = 0 to 8 {
+// CHECK-NEXT:        affine.for %i4 = 0 to 16 {
 // CHECK-NEXT:          %0 = affine.apply #map{{[0-9]+}}(%i3, %i4)
 // CHECK-NEXT:          %1 = load %arg0[%0, %i1] : memref<128x8xf32>
 // CHECK-NEXT:          %2 = "foo"(%1) : (f32) -> f32
 // CHECK-NEXT:        }
-// CHECK-NEXT:        for %i5 = 0 to 16 {
+// CHECK-NEXT:        affine.for %i5 = 0 to 16 {
 // CHECK-NEXT:          %3 = affine.apply #map{{[0-9]+}}(%i0, %i5)
 // CHECK-NEXT:          %4 = load %arg1[%3, %i1] : memref<32x8xf32>
 // CHECK-NEXT:          %5 = addf %4, %4 : f32
@@ -1879,9 +1879,9 @@ func @test_add_slice_bounds() {
   %cf7 = constant 7.0 : f32
   %c0 = constant 0 : index
 
-  for %i0 = 0 to 10 {
-    for %i1 = 0 to 10 {
-      for %i2 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
+    affine.for %i1 = 0 to 10 {
+      affine.for %i2 = 0 to 10 {
         %a0 = affine.apply (d0) -> (d0) (%i0)
         %a1 = affine.apply (d0) -> (d0) (%i0)
         %a2 = affine.apply (d0, d1) -> (d0 - d1) (%a0, %a1)
@@ -1889,17 +1889,17 @@ func @test_add_slice_bounds() {
       }
     }
   }
-  for %i3 = 0 to 10 {
-    for %i4 = 0 to 10 {
-      for %i5 = 0 to 10 {
+  affine.for %i3 = 0 to 10 {
+    affine.for %i4 = 0 to 10 {
+      affine.for %i5 = 0 to 10 {
         %v0 = load %a[%c0] : memref<10xf32>
       }
     }
   }
 
-// CHECK:        for %i0 = 0 to 10 {
-// CHECK-NEXT:     for %i1 = 0 to 10 {
-// CHECK-NEXT:       for %i2 = 0 to 10 {
+// CHECK:        affine.for %i0 = 0 to 10 {
+// CHECK-NEXT:     affine.for %i1 = 0 to 10 {
+// CHECK-NEXT:       affine.for %i2 = 0 to 10 {
 // CHECK-NEXT:         %2 = affine.apply #map2(%i0)
 // CHECK-NEXT:         %3 = affine.apply #map2(%i0)
 // CHECK-NEXT:         %4 = affine.apply #map3(%2, %3)
@@ -1907,9 +1907,9 @@ func @test_add_slice_bounds() {
 // CHECK-NEXT:       }
 // CHECK-NEXT:     }
 // CHECK-NEXT:   }
-// CHECK-NEXT:   for %i3 = 0 to 10 {
-// CHECK-NEXT:     for %i4 = 0 to 10 {
-// CHECK-NEXT:       for %i5 = 0 to 10 {
+// CHECK-NEXT:   affine.for %i3 = 0 to 10 {
+// CHECK-NEXT:     affine.for %i4 = 0 to 10 {
+// CHECK-NEXT:       affine.for %i5 = 0 to 10 {
 // CHECK-NEXT:         %5 = load %0[%c0] : memref<10xf32>
 // CHECK-NEXT:       }
 // CHECK-NEXT:     }
@@ -1926,31 +1926,31 @@ func @should_fuse_init_loops_siblings_then_shared_producer(%arg0: memref<10x10xf
   %cst = constant 0.000000e+00 : f32
   %cst_0 = constant 1.000000e+00 : f32
   %cst_1 = constant 7.000000e+00 : f32
-  for %i0 = 0 to 10 {
-    for %i1 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
+    affine.for %i1 = 0 to 10 {
       store %cst_1, %0[%i0, %i1] : memref<10x10xf32>
     }
   }
-  for %i2 = 0 to 3 {
-    for %i3 = 0 to 3 {
+  affine.for %i2 = 0 to 3 {
+    affine.for %i3 = 0 to 3 {
       store %cst, %arg0[%i2, %i3] : memref<10x10xf32>
     }
   }
-  for %i4 = 0 to 3 {
-    for %i5 = 0 to 3 {
+  affine.for %i4 = 0 to 3 {
+    affine.for %i5 = 0 to 3 {
       %1 = load %0[%i4, %i5] : memref<10x10xf32>
       %2 = load %arg0[%i4, %i5] : memref<10x10xf32>
       %3 = mulf %1, %2 : f32
       store %3, %arg0[%i4, %i5] : memref<10x10xf32>
     }
   }
-  for %i6 = 0 to 3 {
-    for %i7 = 0 to 3 {
+  affine.for %i6 = 0 to 3 {
+    affine.for %i7 = 0 to 3 {
       store %cst_0, %arg1[%i6, %i7] : memref<10x10xf32>
     }
   }
-  for %i8 = 0 to 3 {
-    for %i9 = 0 to 3 {
+  affine.for %i8 = 0 to 3 {
+    affine.for %i9 = 0 to 3 {
       %4 = load %0[%i8, %i9] : memref<10x10xf32>
       %5 = load %arg1[%i8, %i9] : memref<10x10xf32>
       %6 = addf %4, %5 : f32
@@ -1966,8 +1966,8 @@ func @should_fuse_init_loops_siblings_then_shared_producer(%arg0: memref<10x10xf
   //         that loop nest '%i0' now has a single user after Pass 2 fused its
   //         two users together).
 
-// CHECK:        for %i0 = 0 to 3 {
-// CHECK-NEXT:     for %i1 = 0 to 3 {
+// CHECK:        affine.for %i0 = 0 to 3 {
+// CHECK-NEXT:     affine.for %i1 = 0 to 3 {
 // CHECK-NEXT:       %1 = affine.apply [[MAP0]](%i0, %i1, %i0, %i1)
 // CHECK-NEXT:       %2 = affine.apply [[MAP1]](%i0, %i1, %i0, %i1)
 // CHECK-NEXT:       store %cst_1, %0[%1, %2] : memref<1x1xf32>
@@ -2005,14 +2005,14 @@ func @two_matrix_vector_products() {
   %cf7 = constant 7.0 : f32
 
   // Populate input matrix.
-  for %i0 = 0 to 10 {
-    for %i1 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
+    affine.for %i1 = 0 to 10 {
       store %cf7, %in_matrix[%i0, %i1] : memref<10x10xf32>
     }
   }
   // out_vec0 = in_matrix x in_vec0
-  for %i2 = 0 to 10 {
-    for %i3 = 0 to 10 {
+  affine.for %i2 = 0 to 10 {
+    affine.for %i3 = 0 to 10 {
       %v0 = load %in_matrix[%i2, %i3] : memref<10x10xf32>
       %v1 = load %in_vec0[%i3] : memref<10xf32>
       %v2 = mulf %v0, %v1 : f32
@@ -2022,8 +2022,8 @@ func @two_matrix_vector_products() {
     }
   }
   // out_vec1 = in_matrix x in_vec1
-  for %i4 = 0 to 10 {
-    for %i5 = 0 to 10 {
+  affine.for %i4 = 0 to 10 {
+    affine.for %i5 = 0 to 10 {
       %v5 = load %in_matrix[%i4, %i5] : memref<10x10xf32>
       %v6 = load %in_vec1[%i5] : memref<10xf32>
       %v7 = mulf %v5, %v6 : f32
@@ -2033,13 +2033,13 @@ func @two_matrix_vector_products() {
     }
   }
 
-// CHECK:        for %i0 = 0 to 10 {
-// CHECK-NEXT:     for %i1 = 0 to 10 {
+// CHECK:        affine.for %i0 = 0 to 10 {
+// CHECK-NEXT:     affine.for %i1 = 0 to 10 {
 // CHECK-NEXT:       %5 = affine.apply [[MAP2]](%i0, %i1, %i0)
 // CHECK-NEXT:       %6 = affine.apply [[MAP3]](%i0, %i1, %i0)
 // CHECK-NEXT:       store %cst, %0[%5, %6] : memref<10x1xf32>
 // CHECK-NEXT:     }
-// CHECK-NEXT:     for %i2 = 0 to 10 {
+// CHECK-NEXT:     affine.for %i2 = 0 to 10 {
 // CHECK-NEXT:       %7 = affine.apply [[MAP2]](%i0, %i2, %i0)
 // CHECK-NEXT:       %8 = affine.apply [[MAP3]](%i0, %i2, %i0)
 // CHECK-NEXT:       %9 = load %0[%7, %8] : memref<10x1xf32>
@@ -2049,7 +2049,7 @@ func @two_matrix_vector_products() {
 // CHECK-NEXT:       %13 = addf %11, %12 : f32
 // CHECK-NEXT:       store %13, %3[%i0] : memref<10xf32>
 // CHECK-NEXT:     }
-// CHECK-NEXT:     for %i3 = 0 to 10 {
+// CHECK-NEXT:     affine.for %i3 = 0 to 10 {
 // CHECK-NEXT:       %14 = affine.apply [[MAP2]](%i0, %i3, %i0)
 // CHECK-NEXT:       %15 = affine.apply [[MAP3]](%i0, %i3, %i0)
 // CHECK-NEXT:       %16 = load %0[%14, %15] : memref<10x1xf32>
@@ -2070,28 +2070,28 @@ func @two_matrix_vector_products() {
 
 func @should_not_slice_past_slice_barrier() {
   %0 = alloc() : memref<100x16xf32>
-  for %i0 = 0 to 100 {
-    for %i1 = 0 to 16 {
+  affine.for %i0 = 0 to 100 {
+    affine.for %i1 = 0 to 16 {
       %1 = "op1"() : () -> f32
       store %1, %0[%i0, %i1] : memref<100x16xf32>
     } {slice_fusion_barrier: true}
   }
-  for %i2 = 0 to 100 {
-    for %i3 = 0 to 16 {
+  affine.for %i2 = 0 to 100 {
+    affine.for %i3 = 0 to 16 {
       %2 = load %0[%i2, %i3] : memref<100x16xf32>
       "op2"(%2) : (f32) -> ()
     }
   }
   // The 'slice_fusion_barrier' attribute on '%i1' prevents slicing the
   // iteration space of '%i1' and any enclosing loop nests.
-// CHECK:        for %i0 = 0 to 100 {
-// CHECK-NEXT:     for %i1 = 0 to 16 {
+// CHECK:        affine.for %i0 = 0 to 100 {
+// CHECK-NEXT:     affine.for %i1 = 0 to 16 {
 // CHECK-NEXT:       %1 = "op1"() : () -> f32
 // CHECK-NEXT:       %2 = affine.apply [[MAP3]](%i0, %i0, %i1)
 // CHECK-NEXT:       %3 = affine.apply [[MAP4]](%i0, %i0, %i1)
 // CHECK-NEXT:       store %1, %0[%2, %3] : memref<1x16xf32>
 // CHECK-NEXT:     } {slice_fusion_barrier: true}
-// CHECK-NEXT:     for %i2 = 0 to 16 {
+// CHECK-NEXT:     affine.for %i2 = 0 to 16 {
 // CHECK-NEXT:       %4 = affine.apply [[MAP3]](%i0, %i0, %i2)
 // CHECK-NEXT:       %5 = affine.apply [[MAP4]](%i0, %i0, %i2)
 // CHECK-NEXT:       %6 = load %0[%4, %5] : memref<1x16xf32>
@@ -2107,18 +2107,18 @@ func @should_not_slice_past_slice_barrier() {
 func @fuse_across_dim_mismatch(%arg0: memref<4x4x16x1xf32>, %arg1: memref<144x9xf32>, %arg2: memref<9xf32>) {
   %1 = alloc() : memref<144x4xf32>
   %2 = constant 0.0 : f32
-  for %i2 = 0 to 9 {
-    for %i3 = 0 to 4 {
-      for %i5 = 0 to 16 {
+  affine.for %i2 = 0 to 9 {
+    affine.for %i3 = 0 to 4 {
+      affine.for %i5 = 0 to 16 {
         %7 = affine.apply #map0(%i2, %i5)
         store %2, %1[%7, %i3] : memref<144x4xf32>
       }
     }
   }
-  for %i6 = 0 to 9 {
-    for %i7 = 0 to 9 {
-      for %i8 = 0 to 4 {
-        for %i10 = 0 to 16 {
+  affine.for %i6 = 0 to 9 {
+    affine.for %i7 = 0 to 9 {
+      affine.for %i8 = 0 to 4 {
+        affine.for %i10 = 0 to 16 {
           %10 = affine.apply #map0(%i6, %i10)
           %11 = load %1[%10, %i8] : memref<144x4xf32>
         }
@@ -2132,10 +2132,10 @@ func @fuse_across_dim_mismatch(%arg0: memref<4x4x16x1xf32>, %arg1: memref<144x9x
 // MAXIMAL-NEXT: #map6 = (d0, d1, d2, d3, d4) -> (-d2 + d4)
 // MAXIMAL-LABEL: func @fuse_across_dim_mismatch
 // MAXIMAL:        %0 = alloc() : memref<1x1xf32>
-// MAXIMAL:        for %i0 = 0 to 9 {
-// MAXIMAL-NEXT:    for %i1 = 0 to 9 {
-// MAXIMAL-NEXT:      for %i2 = 0 to 4 {
-// MAXIMAL-NEXT:        for %i3 = 0 to 16 {
+// MAXIMAL:        affine.for %i0 = 0 to 9 {
+// MAXIMAL-NEXT:    affine.for %i1 = 0 to 9 {
+// MAXIMAL-NEXT:      affine.for %i2 = 0 to 4 {
+// MAXIMAL-NEXT:        affine.for %i3 = 0 to 16 {
 // MAXIMAL-NEXT:          %1 = affine.apply #map4(%i0, %i3)
 // MAXIMAL-NEXT:          %2 = affine.apply #map5(%i0, %i3, %i2, %1, %i2)
 // MAXIMAL-NEXT:          %3 = affine.apply #map6(%i0, %i3, %i2, %1, %i2)
@@ -2164,8 +2164,8 @@ func @fuse_across_varying_dims_complex() {
   %0 = alloc() : memref<2x2x3x3x16x1xf32>
   %1 = alloc() : memref<64x9xf32>
   %2 = alloc() : memref<144x4xf32>
-  for %i0 = 0 to 64 {
-    for %i1 = 0 to 9 {
+  affine.for %i0 = 0 to 64 {
+    affine.for %i1 = 0 to 9 {
       %4 = affine.apply #map3(%i0, %i1)
       %5 = affine.apply #map4(%i0, %i1)
       %6 = affine.apply #map5(%i0, %i1)
@@ -2175,23 +2175,23 @@ func @fuse_across_varying_dims_complex() {
       store %9, %1[%i0, %i1] : memref<64x9xf32>
     }
   }
-  for %i2 = 0 to 9 {
-    for %i3 = 0 to 4 {
-      for %i4 = 0 to 16 {
+  affine.for %i2 = 0 to 9 {
+    affine.for %i3 = 0 to 4 {
+      affine.for %i4 = 0 to 16 {
         %10 = affine.apply #map10(%i3, %i4)
         %11 = load %1[%10, %i2] : memref<64x9xf32>
       }
-      for %i5 = 0 to 16 {
+      affine.for %i5 = 0 to 16 {
         %13 = "bar"() : () -> f32
         %14 = affine.apply #map11(%i2, %i5)
         store %13, %2[%14, %i3] : memref<144x4xf32>
       }
     }
   }
-  for %i6 = 0 to 9 {
-    for %i7 = 0 to 9 {
-      for %i8 = 0 to 4 {
-        for %i9 = 0 to 16 {
+  affine.for %i6 = 0 to 9 {
+    affine.for %i7 = 0 to 9 {
+      affine.for %i8 = 0 to 4 {
+        affine.for %i9 = 0 to 16 {
           %15 = affine.apply #map12(%i8, %i9)
           %16 = load %1[%15, %i7] : memref<64x9xf32>
         }
@@ -2214,11 +2214,11 @@ func @fuse_across_varying_dims_complex() {
 // MAXIMAL-NEXT:  %c0 = constant 0 : index
 // MAXIMAL-NEXT:  %1 = alloc() : memref<2x2x3x3x16x1xf32>
 // MAXIMAL-NEXT:  %2 = alloc() : memref<144x4xf32>
-// MAXIMAL-NEXT:  for %i0 = 0 to 9 {
-// MAXIMAL-NEXT:    for %i1 = 0 to 9 {
-// MAXIMAL-NEXT:      for %i2 = 0 to 4 {
-// MAXIMAL-NEXT:        for %i3 = 0 to 16 {
-// MAXIMAL-NEXT:          for %i4 = 0 to 64 {
+// MAXIMAL-NEXT:  affine.for %i0 = 0 to 9 {
+// MAXIMAL-NEXT:    affine.for %i1 = 0 to 9 {
+// MAXIMAL-NEXT:      affine.for %i2 = 0 to 4 {
+// MAXIMAL-NEXT:        affine.for %i3 = 0 to 16 {
+// MAXIMAL-NEXT:          affine.for %i4 = 0 to 64 {
 // MAXIMAL-NEXT:            %3 = affine.apply #map5(%i4, %i0)
 // MAXIMAL-NEXT:            %4 = affine.apply #map6(%i4, %i0)
 // MAXIMAL-NEXT:            %5 = affine.apply #map7(%i4, %i0)
@@ -2229,14 +2229,14 @@ func @fuse_across_varying_dims_complex() {
 // MAXIMAL-NEXT:            %10 = affine.apply #map11(%i0, %i4, %i0)
 // MAXIMAL-NEXT:            store %8, %0[%9, %10] : memref<64x1xf32>
 // MAXIMAL-NEXT:          }
-// MAXIMAL-NEXT:          for %i5 = 0 to 4 {
-// MAXIMAL-NEXT:            for %i6 = 0 to 16 {
+// MAXIMAL-NEXT:          affine.for %i5 = 0 to 4 {
+// MAXIMAL-NEXT:            affine.for %i6 = 0 to 16 {
 // MAXIMAL-NEXT:              %11 = affine.apply #map12(%i5, %i6)
 // MAXIMAL-NEXT:              %12 = affine.apply #map10(%i0, %11, %i0)
 // MAXIMAL-NEXT:              %13 = affine.apply #map11(%i0, %11, %i0)
 // MAXIMAL-NEXT:              %14 = load %0[%12, %13] : memref<64x1xf32>
 // MAXIMAL-NEXT:            }
-// MAXIMAL-NEXT:            for %i7 = 0 to 16 {
+// MAXIMAL-NEXT:            affine.for %i7 = 0 to 16 {
 // MAXIMAL-NEXT:              %15 = "bar"() : () -> f32
 // MAXIMAL-NEXT:              %16 = affine.apply #map12(%i0, %i7)
 // MAXIMAL-NEXT:              store %15, %2[%16, %i5] : memref<144x4xf32>
@@ -2259,13 +2259,13 @@ func @should_fuse_with_slice_union() {
   %c0 = constant 0 : index
   %cf0 = constant 0.0 : f32
 
-  for %i0 = 0 to 100 {
+  affine.for %i0 = 0 to 100 {
     store %cf0, %a[%i0]: memref<100xf32>
   }
 
-  for %i1 = 10 to 20 {
+  affine.for %i1 = 10 to 20 {
     %v0 = load %a[%i1]: memref<100xf32>
-    for %i2 = 15 to 25 {
+    affine.for %i2 = 15 to 25 {
       %v1 = load %a[%i2]: memref<100xf32>
     }
   }
@@ -2274,14 +2274,14 @@ func @should_fuse_with_slice_union() {
   // remapping, and private memref size. The result is that the temporary
   // memref is reduced from 100xf32 to 15xf32 and properly indexed by
   // the fused loops based on the union calculation.
-// CHECK:      for %i0 = 10 to 20 {
-// CHECK-NEXT:   for %i1 = 10 to 25 {
+// CHECK:      affine.for %i0 = 10 to 20 {
+// CHECK-NEXT:   affine.for %i1 = 10 to 25 {
 // CHECK-NEXT:     %1 = affine.apply [[MAP3]](%i1)
 // CHECK-NEXT:     store %cst, %0[%1] : memref<15xf32>
 // CHECK-NEXT:   }
 // CHECK-NEXT:   %2 = affine.apply [[MAP3]](%i0)
 // CHECK-NEXT:   %3 = load %0[%2] : memref<15xf32>
-// CHECK-NEXT:   for %i2 = 15 to 25 {
+// CHECK-NEXT:   affine.for %i2 = 15 to 25 {
 // CHECK-NEXT:     %4 = affine.apply [[MAP3]](%i2)
 // CHECK-NEXT:     %5 = load %0[%4] : memref<15xf32>
 // CHECK-NEXT:   }
diff --git a/mlir/test/Transforms/loop-tiling.mlir b/mlir/test/Transforms/loop-tiling.mlir
index c18c0fccf4b6..ff1fd30ce20f 100644
--- a/mlir/test/Transforms/loop-tiling.mlir
+++ b/mlir/test/Transforms/loop-tiling.mlir
@@ -9,12 +9,12 @@
 // CHECK-DAG: [[UB_INTRA_TILE:#map[0-9]+]] = (d0, d1, d2) -> (d2 + 32, s0, 4096 floordiv s1)
 
 // CHECK-LABEL: func @loop_tiling()
-// CHECK-NEXT:   for %i0 = 0 to 256 step 32 {
-// CHECK-NEXT:     for %i1 = 0 to 512 step 32 {
-// CHECK-NEXT:       for %i2 = 0 to 1024 step 32 {
-// CHECK-NEXT:         for %i3 = [[IDENTITY]](%i0) to [[MAP0]](%i0) {
-// CHECK-NEXT:           for %i4 = [[IDENTITY]](%i1) to [[MAP0]](%i1) {
-// CHECK-NEXT:             for %i5 = [[IDENTITY]](%i2) to [[MAP0]](%i2) {
+// CHECK-NEXT:   affine.for %i0 = 0 to 256 step 32 {
+// CHECK-NEXT:     affine.for %i1 = 0 to 512 step 32 {
+// CHECK-NEXT:       affine.for %i2 = 0 to 1024 step 32 {
+// CHECK-NEXT:         affine.for %i3 = [[IDENTITY]](%i0) to [[MAP0]](%i0) {
+// CHECK-NEXT:           affine.for %i4 = [[IDENTITY]](%i1) to [[MAP0]](%i1) {
+// CHECK-NEXT:             affine.for %i5 = [[IDENTITY]](%i2) to [[MAP0]](%i2) {
 // CHECK-NEXT:               "foo"(%i3, %i4, %i5) : (index, index, index) -> ()
 // CHECK-NEXT:             }
 // CHECK-NEXT:           }
@@ -22,32 +22,32 @@
 // CHECK-NEXT:       }
 // CHECK-NEXT:     }
 // CHECK-NEXT:   }
-// CHECK-NEXT:   for %i6 = 0 to 50 step 32 {
-// CHECK-NEXT:     for %i7 = [[IDENTITY]](%i6) to min [[MAP1]](%i6) {
+// CHECK-NEXT:   affine.for %i6 = 0 to 50 step 32 {
+// CHECK-NEXT:     affine.for %i7 = [[IDENTITY]](%i6) to min [[MAP1]](%i6) {
 // CHECK-NEXT:       "bar"(%i7, %i7) : (index, index) -> ()
 // CHECK-NEXT:     }
 // CHECK-NEXT:   }
-// CHECK-NEXT: for %i8 = 0 to 21 step 32 {
-// CHECK-NEXT:    for %i9 = [[IDENTITY]](%i8) to 21 {
+// CHECK-NEXT: affine.for %i8 = 0 to 21 step 32 {
+// CHECK-NEXT:    affine.for %i9 = [[IDENTITY]](%i8) to 21 {
 // CHECK-NEXT:      "foobar"(%i9) : (index) -> ()
 // CHECK-NEXT:    }
 // CHECK-NEXT:  }
 // CHECK-NEXT:  return
 func @loop_tiling() {
-  for %i = 0 to 256 {
-    for %j = 0 to 512 {
-      for %k = 0 to 1024 {
+  affine.for %i = 0 to 256 {
+    affine.for %j = 0 to 512 {
+      affine.for %k = 0 to 1024 {
         "foo"(%i, %j, %k) : (index, index, index) -> ()
       }
     }
   }
 
-  for %x = 0 to 50 {
+  affine.for %x = 0 to 50 {
     "bar"(%x, %x) : (index, index) -> ()
   }
 
   // Intra-tile loop won't need a min expression.
-  for %y = 0 to 21 {
+  affine.for %y = 0 to 21 {
     "foobar"(%y) : (index) -> ()
   }
 
@@ -59,12 +59,12 @@ func @loop_tiling() {
 // CHECK-LABEL: func @loop_max_min_bound(%arg0: memref<?xi32>, %arg1: index, %arg2: index) {
 func @loop_max_min_bound(%A : memref<? x i32>, %L : index, %U : index) {
   %M = dim %A, 0 : memref<? x i32>
-  for %iTT = max #lb()[%L] to min #ub()[%M, %U] {
+  affine.for %iTT = max #lb()[%L] to min #ub()[%M, %U] {
       %out = affine.apply (d0) -> (d0) (%iTT)
   }
   return
-// CHECK:       for %i0 = max [[LB]]()[%arg1] to min [[UB]]()[%0, %arg2] step 32 {
-// CHECK-NEXT:    for %i1 = [[IDENTITY]](%i0) to min [[UB_INTRA_TILE]](%0, %arg2, %i0) {
+// CHECK:       affine.for %i0 = max [[LB]]()[%arg1] to min [[UB]]()[%0, %arg2] step 32 {
+// CHECK-NEXT:    affine.for %i1 = [[IDENTITY]](%i0) to min [[UB_INTRA_TILE]](%0, %arg2, %i0) {
 // CHECK-NEXT:      %1 = affine.apply [[IDENTITY]](%i1)
 // CHECK-NEXT:    }
 // CHECK-NEXT:  }
@@ -78,9 +78,9 @@ func @loop_max_min_bound(%A : memref<? x i32>, %L : index, %U : index) {
 
 // MODEL-LABEL: func @simple_matmul
 func @simple_matmul(%arg0: memref<8x8xvector<64xf32>>, %arg1: memref<8x8xvector<64xf32>>, %arg2: memref<8x8xvector<64xf32>>) -> memref<8x8xvector<64xf32>> {
-  for %i = 0 to 256 {
-    for %j = 0 to 256 {
-      for %k = 0 to 250 {
+  affine.for %i = 0 to 256 {
+    affine.for %j = 0 to 256 {
+      affine.for %k = 0 to 250 {
         %l = load %arg0[%i, %k] : memref<8x8xvector<64xf32>>
         %r = load %arg1[%k, %j] : memref<8x8xvector<64xf32>>
         %o = load %arg2[%i, %j] : memref<8x8xvector<64xf32>>
@@ -92,6 +92,6 @@ func @simple_matmul(%arg0: memref<8x8xvector<64xf32>>, %arg1: memref<8x8xvector<
   }
   return %arg2 : memref<8x8xvector<64xf32>>
 }
-// MODEL:       for %i0 = 0 to 256 step 4 {
-// MODEL-NEXT:    for %i1 = 0 to 256 step 4 {
-// MODEL-NEXT:      for %i2 = 0 to 250 step 5 {
+// MODEL:       affine.for %i0 = 0 to 256 step 4 {
+// MODEL-NEXT:    affine.for %i1 = 0 to 256 step 4 {
+// MODEL-NEXT:      affine.for %i2 = 0 to 250 step 5 {
diff --git a/mlir/test/Transforms/lower-affine.mlir b/mlir/test/Transforms/lower-affine.mlir
index ac55afdf7c27..0cbc6d012c53 100644
--- a/mlir/test/Transforms/lower-affine.mlir
+++ b/mlir/test/Transforms/lower-affine.mlir
@@ -24,7 +24,7 @@ func @body(index) -> ()
 // CHECK-NEXT:   return
 // CHECK-NEXT: }
 func @simple_loop() {
-  for %i = 1 to 42 {
+  affine.for %i = 1 to 42 {
     call @body(%i) : (index) -> ()
   }
   return
@@ -65,9 +65,9 @@ func @post(index) -> ()
 // CHECK-NEXT:   return
 // CHECK-NEXT: }
 func @imperfectly_nested_loops() {
-  for %i = 0 to 42 {
+  affine.for %i = 0 to 42 {
     call @pre(%i) : (index) -> ()
-    for %j = 7 to 56 step 2 {
+    affine.for %j = 7 to 56 step 2 {
       call @body2(%i, %j) : (index, index) -> ()
     }
     call @post(%i) : (index) -> ()
@@ -122,13 +122,13 @@ func @body3(index, index) -> ()
 // CHECK-NEXT:   return
 // CHECK-NEXT: }
 func @more_imperfectly_nested_loops() {
-  for %i = 0 to 42 {
+  affine.for %i = 0 to 42 {
     call @pre(%i) : (index) -> ()
-    for %j = 7 to 56 step 2 {
+    affine.for %j = 7 to 56 step 2 {
       call @body2(%i, %j) : (index, index) -> ()
     }
     call @mid(%i) : (index) -> ()
-    for %k = 18 to 37 step 3 {
+    affine.for %k = 18 to 37 step 3 {
       call @body3(%i, %k) : (index, index) -> ()
     }
     call @post(%i) : (index) -> ()
@@ -161,8 +161,8 @@ func @more_imperfectly_nested_loops() {
 // CHECK-NEXT:   return
 // CHECK-NEXT: }
 func @affine_apply_loops_shorthand(%N : index) {
-  for %i = 0 to %N {
-    for %j = (d0)[]->(d0)(%i)[] to 42 {
+  affine.for %i = 0 to %N {
+    affine.for %j = (d0)[]->(d0)(%i)[] to 42 {
       call @body2(%i, %j) : (index, index) -> ()
     }
   }
@@ -360,7 +360,7 @@ func @if_for() {
 // CHECK-NEXT: [[outerEndBB]]:
 // CHECK-NEXT:   br [[outerLoopInit:\^bb[0-9]+]]
   affine.if #set1(%i) {
-    for %j = 0 to 42 {
+    affine.for %j = 0 to 42 {
       affine.if #set2(%j) {
         call @body2(%i, %j) : (index, index) -> ()
       }
@@ -397,9 +397,9 @@ func @if_for() {
 // CHECK-NEXT:   %c1_9 = constant 1 : index
 // CHECK-NEXT:   %16 = addi %9, %c1_9 : index
 // CHECK-NEXT:   br [[outerLoopCond]](%16 : index)
-  for %k = 0 to 42 {
+  affine.for %k = 0 to 42 {
     affine.if #set2(%k) {
-      for %l = 0 to 42 {
+      affine.for %l = 0 to 42 {
         call @body3(%k, %l) : (index, index) -> ()
       }
     }
@@ -446,8 +446,8 @@ func @if_for() {
 // CHECK-NEXT:   return
 // CHECK-NEXT: }
 func @loop_min_max(%N : index) {
-  for %i = 0 to 42 {
-    for %j = max #lbMultiMap(%i)[%N] to min #ubMultiMap(%i)[%N] {
+  affine.for %i = 0 to 42 {
+    affine.for %j = max #lbMultiMap(%i)[%N] to min #ubMultiMap(%i)[%N] {
       call @body2(%i, %j) : (index, index) -> ()
     }
   }
@@ -486,7 +486,7 @@ func @loop_min_max(%N : index) {
 // CHECK-NEXT:   return
 // CHECK-NEXT: }
 func @min_reduction_tree(%v : index) {
-  for %i = 0 to min #map_7_values(%v)[] {
+  affine.for %i = 0 to min #map_7_values(%v)[] {
     call @body(%i) : (index) -> ()
   }
   return
diff --git a/mlir/test/Transforms/memref-bound-check.mlir b/mlir/test/Transforms/memref-bound-check.mlir
index 8a276d6763d4..41f566721355 100644
--- a/mlir/test/Transforms/memref-bound-check.mlir
+++ b/mlir/test/Transforms/memref-bound-check.mlir
@@ -11,8 +11,8 @@ func @test() {
   %A = alloc() : memref<9 x 9 x i32>
   %B = alloc() : memref<111 x i32>
 
-  for %i = -1 to 10 {
-    for %j = -1 to 10 {
+  affine.for %i = -1 to 10 {
+    affine.for %j = -1 to 10 {
       %idx0 = affine.apply (d0, d1) -> (d0)(%i, %j)
       %idx1 = affine.apply (d0, d1) -> (d1)(%i, %j)
       // Out of bound access.
@@ -27,7 +27,7 @@ func @test() {
     }
   }
 
-  for %k = 0 to 10 {
+  affine.for %k = 0 to 10 {
       // In bound.
       %u = load %B[%zero] : memref<111 x i32>
       // Out of bounds.
@@ -43,8 +43,8 @@ func @test_mod_floordiv_ceildiv() {
   %zero = constant 0 : index
   %A = alloc() : memref<128 x 64 x 64 x i32>
 
-  for %i = 0 to 256 {
-    for %j = 0 to 256 {
+  affine.for %i = 0 to 256 {
+    affine.for %j = 0 to 256 {
       %idx0 = affine.apply (d0, d1, d2) -> (d0 mod 128 + 1)(%i, %j, %j)
       %idx1 = affine.apply (d0, d1, d2) -> (d1 floordiv 4 + 1)(%i, %j, %j)
       %idx2 = affine.apply (d0, d1, d2) -> (d2 ceildiv 4)(%i, %j, %j)
@@ -69,8 +69,8 @@ func @test_no_out_of_bounds() {
   %C = alloc() : memref<257 x i32>
   %B = alloc() : memref<1 x i32>
 
-  for %i = 0 to 256 {
-    for %j = 0 to 256 {
+  affine.for %i = 0 to 256 {
+    affine.for %j = 0 to 256 {
       // All of these accesses are in bound; check that no errors are emitted.
       // CHECK: %3 = affine.apply {{#map.*}}(%i0, %i1)
       // CHECK-NEXT: %4 = load %0[%3, %c0] : memref<257x256xi32>
@@ -93,8 +93,8 @@ func @mod_div() {
   %zero = constant 0 : index
   %A = alloc() : memref<128 x 64 x 64 x i32>
 
-  for %i = 0 to 256 {
-    for %j = 0 to 256 {
+  affine.for %i = 0 to 256 {
+    affine.for %j = 0 to 256 {
       %idx0 = affine.apply (d0, d1, d2) -> (d0 mod 128 + 1)(%i, %j, %j)
       %idx1 = affine.apply (d0, d1, d2) -> (d1 floordiv 4 + 1)(%i, %j, %j)
       %idx2 = affine.apply (d0, d1, d2) -> (d2 ceildiv 4)(%i, %j, %j)
@@ -115,8 +115,8 @@ func @mod_div() {
 // CHECK-LABEL: func @mod_floordiv_nested() {
 func @mod_floordiv_nested() {
   %A = alloc() : memref<256 x 256 x i32>
-  for %i = 0 to 256 {
-    for %j = 0 to 256 {
+  affine.for %i = 0 to 256 {
+    affine.for %j = 0 to 256 {
       %idx0 = affine.apply (d0, d1) -> ((d0 mod 1024) floordiv 4)(%i, %j)
       %idx1 = affine.apply (d0, d1) -> ((((d1 mod 128) mod 32) ceildiv 4) * 32)(%i, %j)
       load %A[%idx0, %idx1] : memref<256 x 256 x i32> // expected-error {{'std.load' op memref out of upper bound access along dimension #2}}
@@ -128,7 +128,7 @@ func @mod_floordiv_nested() {
 // CHECK-LABEL: func @test_semi_affine_bailout
 func @test_semi_affine_bailout(%N : index) {
   %B = alloc() : memref<10 x i32>
-  for %i = 0 to 10 {
+  affine.for %i = 0 to 10 {
     %idx = affine.apply (d0)[s0] -> (d0 * s0)(%i)[%N]
     %y = load %B[%idx] : memref<10 x i32>
     // expected-error@-1 {{getMemRefRegion: compose affine map failed}}
@@ -139,7 +139,7 @@ func @test_semi_affine_bailout(%N : index) {
 // CHECK-LABEL: func @multi_mod_floordiv
 func @multi_mod_floordiv() {
   %A = alloc() : memref<2x2xi32>
-  for %ii = 0 to 64 {
+  affine.for %ii = 0 to 64 {
       %idx0 = affine.apply (d0) -> ((d0 mod 147456) floordiv 1152) (%ii)
       %idx1 = affine.apply (d0) -> (((d0 mod 147456) mod 1152) floordiv 384) (%ii)
       %v = load %A[%idx0, %idx1] : memref<2x2xi32>
@@ -154,8 +154,8 @@ func @delinearize_mod_floordiv() {
   %out = alloc() : memref<64x9xi32>
 
   // Reshape '%in' into '%out'.
-  for %ii = 0 to 64 {
-    for %jj = 0 to 9 {
+  affine.for %ii = 0 to 64 {
+    affine.for %jj = 0 to 9 {
       %a0 = affine.apply (d0, d1) -> (d0 * (9 * 1024) + d1 * 128) (%ii, %jj)
       %a10 = affine.apply (d0) ->
         (d0 floordiv (2 * 3 * 3 * 128 * 128)) (%a0)
@@ -190,7 +190,7 @@ func @out_of_bounds() {
   %in = alloc() : memref<1xi32>
   %c9 = constant 9 : i32
 
-  for %i0 = 10 to 11 {
+  affine.for %i0 = 10 to 11 {
     %idy = affine.apply (d0) ->  (100 * d0 floordiv 1000) (%i0)
     store %c9, %in[%idy] : memref<1xi32> // expected-error {{'std.store' op memref out of upper bound access along dimension #1}}
   }
@@ -210,8 +210,8 @@ func @out_of_bounds() {
 func @test_complex_mod_floordiv(%arg0: memref<4x4x16x1xf32>) {
   %c0 = constant 0 : index
   %0 = alloc() : memref<1x2x3x3x16x1xf32>
-  for %i0 = 0 to 64 {
-    for %i1 = 0 to 9 {
+  affine.for %i0 = 0 to 64 {
+    affine.for %i1 = 0 to 9 {
       %2 = affine.apply #map3(%i0, %i1)
       %3 = affine.apply #map4(%i0, %i1)
       %4 = affine.apply #map5(%i0, %i1)
@@ -231,8 +231,8 @@ func @test_complex_mod_floordiv(%arg0: memref<4x4x16x1xf32>) {
 func @test_mod_bound() {
   %0 = alloc() : memref<7 x f32>
   %1 = alloc() : memref<6 x f32>
-  for %i0 = 0 to 4096 {
-    for %i1 = #map0(%i0) to #map1(%i0) {
+  affine.for %i0 = 0 to 4096 {
+    affine.for %i1 = #map0(%i0) to #map1(%i0) {
       load %0[%i1] : memref<7 x f32>
       load %1[%i1] : memref<6 x f32>
       // expected-error@-1 {{'std.load' op memref out of upper bound access along dimension #1}}
@@ -253,13 +253,13 @@ func @test_floordiv_bound() {
   %1 = alloc() : memref<1026 x f32>
   %2 = alloc() : memref<4096 x f32>
   %N = constant 2048 : index
-  for %i0 = 0 to 4096 {
-    for %i1 = #map0(%i0) to #map1(%i0) {
+  affine.for %i0 = 0 to 4096 {
+    affine.for %i1 = #map0(%i0) to #map1(%i0) {
       load %0[%i1] : memref<1027 x f32>
       load %1[%i1] : memref<1026 x f32>
       // expected-error@-1 {{'std.load' op memref out of upper bound access along dimension #1}}
     }
-    for %i2 = 0 to #map2(%N) {
+    affine.for %i2 = 0 to #map2(%N) {
       // Within bounds.
       %v = load %2[%i2] : memref<4096 x f32>
     }
@@ -277,9 +277,9 @@ func @test_floordiv_bound() {
 
 // CHECK-LABEL: func @non_composed_bound_operand
 func @non_composed_bound_operand(%arg0: memref<1024xf32>) {
-  for %i0 = 4 to 1028 step 4 {
+  affine.for %i0 = 4 to 1028 step 4 {
     %i1 = affine.apply (d0) -> (d0 - 4) (%i0)
-    for %i2 = #map_lb(%i1) to #map_ub(%i1) {
+    affine.for %i2 = #map_lb(%i1) to #map_ub(%i1) {
         %0 = load %arg0[%i2] : memref<1024xf32>
     }
   }
diff --git a/mlir/test/Transforms/memref-dataflow-opt.mlir b/mlir/test/Transforms/memref-dataflow-opt.mlir
index 710d14c1cf9f..ed39d71eefdc 100644
--- a/mlir/test/Transforms/memref-dataflow-opt.mlir
+++ b/mlir/test/Transforms/memref-dataflow-opt.mlir
@@ -10,14 +10,14 @@
 func @simple_store_load() {
   %cf7 = constant 7.0 : f32
   %m = alloc() : memref<10xf32>
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
     store %cf7, %m[%i0] : memref<10xf32>
     %v0 = load %m[%i0] : memref<10xf32>
     %v1 = addf %v0, %v0 : f32
   }
   return
 // CHECK:       %cst = constant 7.000000e+00 : f32
-// CHECK-NEXT:  for %i0 = 0 to 10 {
+// CHECK-NEXT:  affine.for %i0 = 0 to 10 {
 // CHECK-NEXT:    %0 = addf %cst, %cst : f32
 // CHECK-NEXT:  }
 // CHECK-NEXT:  return
@@ -30,7 +30,7 @@ func @multi_store_load() {
   %cf8 = constant 8.0 : f32
   %cf9 = constant 9.0 : f32
   %m = alloc() : memref<10xf32>
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
     store %cf7, %m[%i0] : memref<10xf32>
     %v0 = load %m[%i0] : memref<10xf32>
     %v1 = addf %v0, %v0 : f32
@@ -45,7 +45,7 @@ func @multi_store_load() {
 // CHECK-NEXT:  %cst = constant 7.000000e+00 : f32
 // CHECK-NEXT:  %cst_0 = constant 8.000000e+00 : f32
 // CHECK-NEXT:  %cst_1 = constant 9.000000e+00 : f32
-// CHECK-NEXT:  for %i0 = 0 to 10 {
+// CHECK-NEXT:  affine.for %i0 = 0 to 10 {
 // CHECK-NEXT:    %0 = addf %cst, %cst : f32
 // CHECK-NEXT:    %1 = mulf %cst_1, %cst_1 : f32
 // CHECK-NEXT:  }
@@ -59,8 +59,8 @@ func @multi_store_load() {
 func @store_load_affine_apply() -> memref<10x10xf32> {
   %cf7 = constant 7.0 : f32
   %m = alloc() : memref<10x10xf32>
-  for %i0 = 0 to 10 {
-    for %i1 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
+    affine.for %i1 = 0 to 10 {
       %t0 = affine.apply (d0, d1) -> (d1 + 1)(%i0, %i1)
       %t1 = affine.apply (d0, d1) -> (d0)(%i0, %i1)
       %idx0 = affine.apply (d0, d1) -> (d1) (%t0, %t1)
@@ -75,8 +75,8 @@ func @store_load_affine_apply() -> memref<10x10xf32> {
   return %m : memref<10x10xf32>
 // CHECK:       %cst = constant 7.000000e+00 : f32
 // CHECK-NEXT:  %0 = alloc() : memref<10x10xf32>
-// CHECK-NEXT:  for %i0 = 0 to 10 {
-// CHECK-NEXT:    for %i1 = 0 to 10 {
+// CHECK-NEXT:  affine.for %i0 = 0 to 10 {
+// CHECK-NEXT:    affine.for %i1 = 0 to 10 {
 // CHECK-NEXT:      %1 = affine.apply [[MAP0]](%i0, %i1)
 // CHECK-NEXT:      %2 = affine.apply [[MAP1]](%i0, %i1)
 // CHECK-NEXT:      %3 = affine.apply [[MAP2]](%1, %2)
@@ -92,17 +92,17 @@ func @store_load_affine_apply() -> memref<10x10xf32> {
 func @store_load_nested(%N : index) {
   %cf7 = constant 7.0 : f32
   %m = alloc() : memref<10xf32>
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
     store %cf7, %m[%i0] : memref<10xf32>
-    for %i1 = 0 to %N {
+    affine.for %i1 = 0 to %N {
       %v0 = load %m[%i0] : memref<10xf32>
       %v1 = addf %v0, %v0 : f32
     }
   }
   return
 // CHECK:       %cst = constant 7.000000e+00 : f32
-// CHECK-NEXT:  for %i0 = 0 to 10 {
-// CHECK-NEXT:    for %i1 = 0 to %arg0 {
+// CHECK-NEXT:  affine.for %i0 = 0 to 10 {
+// CHECK-NEXT:    affine.for %i1 = 0 to %arg0 {
 // CHECK-NEXT:      %0 = addf %cst, %cst : f32
 // CHECK-NEXT:    }
 // CHECK-NEXT:  }
@@ -117,12 +117,12 @@ func @multi_store_load_nested_no_fwd(%N : index) {
   %cf7 = constant 7.0 : f32
   %cf8 = constant 8.0 : f32
   %m = alloc() : memref<10xf32>
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
     store %cf7, %m[%i0] : memref<10xf32>
-    for %i1 = 0 to %N {
+    affine.for %i1 = 0 to %N {
       store %cf8, %m[%i1] : memref<10xf32>
     }
-    for %i2 = 0 to %N {
+    affine.for %i2 = 0 to %N {
       // CHECK: %{{[0-9]+}} = load %0[%i0] : memref<10xf32>
       %v0 = load %m[%i0] : memref<10xf32>
       %v1 = addf %v0, %v0 : f32
@@ -138,9 +138,9 @@ func @store_load_store_nested_no_fwd(%N : index) {
   %cf7 = constant 7.0 : f32
   %cf9 = constant 9.0 : f32
   %m = alloc() : memref<10xf32>
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
     store %cf7, %m[%i0] : memref<10xf32>
-    for %i1 = 0 to %N {
+    affine.for %i1 = 0 to %N {
       // CHECK: %{{[0-9]+}} = load %0[%i0] : memref<10xf32>
       %v0 = load %m[%i0] : memref<10xf32>
       %v1 = addf %v0, %v0 : f32
@@ -159,16 +159,16 @@ func @multi_store_load_nested_fwd(%N : index) {
   %cf9 = constant 9.0 : f32
   %cf10 = constant 10.0 : f32
   %m = alloc() : memref<10xf32>
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
     store %cf7, %m[%i0] : memref<10xf32>
-    for %i1 = 0 to %N {
+    affine.for %i1 = 0 to %N {
       store %cf8, %m[%i1] : memref<10xf32>
     }
-    for %i2 = 0 to %N {
+    affine.for %i2 = 0 to %N {
       store %cf9, %m[%i2] : memref<10xf32>
     }
     store %cf10, %m[%i0] : memref<10xf32>
-    for %i3 = 0 to %N {
+    affine.for %i3 = 0 to %N {
       // CHECK-NOT: %{{[0-9]+}} = load
       %v0 = load %m[%i0] : memref<10xf32>
       %v1 = addf %v0, %v0 : f32
@@ -182,10 +182,10 @@ func @multi_store_load_nested_fwd(%N : index) {
 func @store_load_no_fwd() {
   %cf7 = constant 7.0 : f32
   %m = alloc() : memref<10xf32>
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
     store %cf7, %m[%i0] : memref<10xf32>
-    for %i1 = 0 to 10 {
-      for %i2 = 0 to 10 {
+    affine.for %i1 = 0 to 10 {
+      affine.for %i2 = 0 to 10 {
         // CHECK: load %{{[0-9]+}}
         %v0 = load %m[%i2] : memref<10xf32>
         %v1 = addf %v0, %v0 : f32
@@ -202,9 +202,9 @@ func @store_load_fwd() {
   %c0 = constant 0 : index
   %m = alloc() : memref<10xf32>
   store %cf7, %m[%c0] : memref<10xf32>
-  for %i0 = 0 to 10 {
-    for %i1 = 0 to 10 {
-      for %i2 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
+    affine.for %i1 = 0 to 10 {
+      affine.for %i2 = 0 to 10 {
         // CHECK-NOT: load %{{[0-9]}}+
         %v0 = load %m[%c0] : memref<10xf32>
         %v1 = addf %v0, %v0 : f32
@@ -223,9 +223,9 @@ func @store_load_store_nested_fwd(%N : index) -> f32 {
   %c0 = constant 0 : index
   %c1 = constant 1 : index
   %m = alloc() : memref<10xf32>
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
     store %cf7, %m[%i0] : memref<10xf32>
-    for %i1 = 0 to %N {
+    affine.for %i1 = 0 to %N {
       %v0 = load %m[%i0] : memref<10xf32>
       %v1 = addf %v0, %v0 : f32
       %idx = affine.apply (d0) -> (d0 + 1) (%i0)
@@ -236,9 +236,9 @@ func @store_load_store_nested_fwd(%N : index) -> f32 {
   %v3 = load %m[%c1] : memref<10xf32>
   return %v3 : f32
 // CHECK:       %0 = alloc() : memref<10xf32>
-// CHECK-NEXT:  for %i0 = 0 to 10 {
+// CHECK-NEXT:  affine.for %i0 = 0 to 10 {
 // CHECK-NEXT:    store %cst, %0[%i0] : memref<10xf32>
-// CHECK-NEXT:    for %i1 = 0 to %arg0 {
+// CHECK-NEXT:    affine.for %i1 = 0 to %arg0 {
 // CHECK-NEXT:      %1 = addf %cst, %cst : f32
 // CHECK-NEXT:      %2 = affine.apply [[MAP4]](%i0)
 // CHECK-NEXT:      store %cst_0, %0[%2] : memref<10xf32>
diff --git a/mlir/test/Transforms/memref-dependence-check.mlir b/mlir/test/Transforms/memref-dependence-check.mlir
index 0accc30630b5..00d0e730098f 100644
--- a/mlir/test/Transforms/memref-dependence-check.mlir
+++ b/mlir/test/Transforms/memref-dependence-check.mlir
@@ -13,14 +13,14 @@ func @store_may_execute_before_load() {
   // ancestor IfOp of the store, dominates the ancestor ForSmt of the load,
   // and thus the store "may" conditionally execute before the load.
   affine.if #set0(%c0) {
-    for %i0 = 0 to 10 {
+    affine.for %i0 = 0 to 10 {
       store %cf7, %m[%i0] : memref<10xf32>
       // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
       // expected-note@-2 {{dependence from 0 to 0 at depth 2 = false}}
       // expected-note@-3 {{dependence from 0 to 1 at depth 1 = true}}
     }
   }
-  for %i1 = 0 to 10 {
+  affine.for %i1 = 0 to 10 {
     %v0 = load %m[%i1] : memref<10xf32>
     // expected-note@-1 {{dependence from 1 to 1 at depth 1 = false}}
     // expected-note@-2 {{dependence from 1 to 1 at depth 2 = false}}
@@ -37,13 +37,13 @@ func @dependent_loops() {
   %cst = constant 7.000000e+00 : f32
   // There is a dependence from 0 to 1 at depth 1 (common surrounding loops 0)
   // because the first loop with the store dominates the second loop.
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
     store %cst, %0[%i0] : memref<10xf32>
     // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
     // expected-note@-2 {{dependence from 0 to 0 at depth 2 = false}}
     // expected-note@-3 {{dependence from 0 to 1 at depth 1 = true}}
   }
-  for %i1 = 0 to 10 {
+  affine.for %i1 = 0 to 10 {
     %1 = load %0[%i1] : memref<10xf32>
     // expected-note@-1 {{dependence from 1 to 1 at depth 1 = false}}
     // expected-note@-2 {{dependence from 1 to 1 at depth 2 = false}}
@@ -231,7 +231,7 @@ func @store_range_load_after_range() {
   %m = alloc() : memref<100xf32>
   %c7 = constant 7.0 : f32
   %c10 = constant 10 : index
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
     %a0 = affine.apply (d0) -> (d0) (%i0)
     store %c7, %m[%a0] : memref<100xf32>
     // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
@@ -254,7 +254,7 @@ func @store_load_func_symbol(%arg0: index, %arg1: index) {
   %m = alloc() : memref<100xf32>
   %c7 = constant 7.0 : f32
   %c10 = constant 10 : index
-  for %i0 = 0 to %arg1 {
+  affine.for %i0 = 0 to %arg1 {
     %a0 = affine.apply (d0) -> (d0) (%arg0)
     store %c7, %m[%a0] : memref<100xf32>
     // expected-note@-1 {{dependence from 0 to 0 at depth 1 = [1, +inf]}}
@@ -277,7 +277,7 @@ func @store_range_load_last_in_range() {
   %m = alloc() : memref<100xf32>
   %c7 = constant 7.0 : f32
   %c10 = constant 10 : index
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
     %a0 = affine.apply (d0) -> (d0) (%i0)
     // For dependence from 0 to 1, we do not have a loop carried dependence
     // because only the final write in the loop accesses the same element as the
@@ -305,7 +305,7 @@ func @store_range_load_before_range() {
   %m = alloc() : memref<100xf32>
   %c7 = constant 7.0 : f32
   %c0 = constant 0 : index
-  for %i0 = 1 to 11 {
+  affine.for %i0 = 1 to 11 {
     %a0 = affine.apply (d0) -> (d0) (%i0)
     store %c7, %m[%a0] : memref<100xf32>
     // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
@@ -328,7 +328,7 @@ func @store_range_load_first_in_range() {
   %m = alloc() : memref<100xf32>
   %c7 = constant 7.0 : f32
   %c0 = constant 0 : index
-  for %i0 = 1 to 11 {
+  affine.for %i0 = 1 to 11 {
     %a0 = affine.apply (d0) -> (d0) (%i0)
     // Dependence from 0 to 1 at depth 1 is a range because all loads at
     // constant index zero are reads after first store at index zero during
@@ -353,7 +353,7 @@ func @store_range_load_first_in_range() {
 func @store_plus_3() {
   %m = alloc() : memref<100xf32>
   %c7 = constant 7.0 : f32
-  for %i0 = 1 to 11 {
+  affine.for %i0 = 1 to 11 {
     %a0 = affine.apply (d0) -> (d0 + 3) (%i0)
     store %c7, %m[%a0] : memref<100xf32>
     // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
@@ -375,7 +375,7 @@ func @store_plus_3() {
 func @load_minus_2() {
   %m = alloc() : memref<100xf32>
   %c7 = constant 7.0 : f32
-  for %i0 = 2 to 11 {
+  affine.for %i0 = 2 to 11 {
     %a0 = affine.apply (d0) -> (d0) (%i0)
     store %c7, %m[%a0] : memref<100xf32>
     // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
@@ -397,8 +397,8 @@ func @load_minus_2() {
 func @perfectly_nested_loops_loop_independent() {
   %m = alloc() : memref<10x10xf32>
   %c7 = constant 7.0 : f32
-  for %i0 = 0 to 11 {
-    for %i1 = 0 to 11 {
+  affine.for %i0 = 0 to 11 {
+    affine.for %i1 = 0 to 11 {
       // Dependence from access 0 to 1 is loop independent at depth = 3.
       %a00 = affine.apply (d0, d1) -> (d0) (%i0, %i1)
       %a01 = affine.apply (d0, d1) -> (d1) (%i0, %i1)
@@ -428,8 +428,8 @@ func @perfectly_nested_loops_loop_independent() {
 func @perfectly_nested_loops_loop_carried_at_depth1() {
   %m = alloc() : memref<10x10xf32>
   %c7 = constant 7.0 : f32
-  for %i0 = 0 to 9 {
-    for %i1 = 0 to 9 {
+  affine.for %i0 = 0 to 9 {
+    affine.for %i1 = 0 to 9 {
       // Dependence from access 0 to 1 is loop carried at depth 1.
       %a00 = affine.apply (d0, d1) -> (d0) (%i0, %i1)
       %a01 = affine.apply (d0, d1) -> (d1) (%i0, %i1)
@@ -459,8 +459,8 @@ func @perfectly_nested_loops_loop_carried_at_depth1() {
 func @perfectly_nested_loops_loop_carried_at_depth2() {
   %m = alloc() : memref<10x10xf32>
   %c7 = constant 7.0 : f32
-  for %i0 = 0 to 10 {
-    for %i1 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
+    affine.for %i1 = 0 to 10 {
       // Dependence from access 0 to 1 is loop carried at depth 2.
       %a00 = affine.apply (d0, d1) -> (d0) (%i0, %i1)
       %a01 = affine.apply (d0, d1) -> (d1) (%i0, %i1)
@@ -491,8 +491,8 @@ func @one_common_loop() {
   %m = alloc() : memref<10x10xf32>
   %c7 = constant 7.0 : f32
   // There is a loop-independent dependence from access 0 to 1 at depth 2.
-  for %i0 = 0 to 10 {
-    for %i1 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
+    affine.for %i1 = 0 to 10 {
       %a00 = affine.apply (d0, d1) -> (d0) (%i0, %i1)
       %a01 = affine.apply (d0, d1) -> (d1) (%i0, %i1)
       store %c7, %m[%a00, %a01] : memref<10x10xf32>
@@ -502,7 +502,7 @@ func @one_common_loop() {
       // expected-note@-4 {{dependence from 0 to 1 at depth 1 = false}}
       // expected-note@-5 {{dependence from 0 to 1 at depth 2 = true}}
     }
-    for %i2 = 0 to 9 {
+    affine.for %i2 = 0 to 9 {
       %a10 = affine.apply (d0, d1) -> (d0) (%i0, %i2)
       %a11 = affine.apply (d0, d1) -> (d1) (%i0, %i2)
       %v0 = load %m[%a10, %a11] : memref<10x10xf32>
@@ -525,7 +525,7 @@ func @dependence_cycle() {
   // Dependences:
   // *) loop-independent dependence from access 1 to 2 at depth 2.
   // *) loop-carried dependence from access 3 to 0 at depth 1.
-  for %i0 = 0 to 9 {
+  affine.for %i0 = 0 to 9 {
     %a0 = affine.apply (d0) -> (d0) (%i0)
     %v0 = load %m.a[%a0] : memref<100xf32>
     // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
@@ -575,8 +575,8 @@ func @dependence_cycle() {
 func @negative_and_positive_direction_vectors(%arg0: index, %arg1: index) {
   %m = alloc() : memref<10x10xf32>
   %c7 = constant 7.0 : f32
-  for %i0 = 0 to %arg0 {
-    for %i1 = 0 to %arg1 {
+  affine.for %i0 = 0 to %arg0 {
+    affine.for %i1 = 0 to %arg1 {
       %a00 = affine.apply (d0, d1) -> (d0 - 1) (%i0, %i1)
       %a01 = affine.apply (d0, d1) -> (d1 + 1) (%i0, %i1)
       %v0 = load %m[%a00, %a01] : memref<10x10xf32>
@@ -605,8 +605,8 @@ func @negative_and_positive_direction_vectors(%arg0: index, %arg1: index) {
 func @war_raw_waw_deps() {
   %m = alloc() : memref<100xf32>
   %c7 = constant 7.0 : f32
-  for %i0 = 0 to 10 {
-    for %i1 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
+    affine.for %i1 = 0 to 10 {
       %a0 = affine.apply (d0) -> (d0 + 1) (%i1)
       %v0 = load %m[%a0] : memref<100xf32>
       // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
@@ -633,7 +633,7 @@ func @war_raw_waw_deps() {
 func @mod_deps() {
   %m = alloc() : memref<100xf32>
   %c7 = constant 7.0 : f32
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
     %a0 = affine.apply (d0) -> (d0 mod 2) (%i0)
     // Results are conservative here since we currently don't have a way to
     // represent strided sets in FlatAffineConstraints.
@@ -658,8 +658,8 @@ func @loop_nest_depth() {
   %0 = alloc() : memref<100x100xf32>
   %c7 = constant 7.0 : f32
 
-  for %i0 = 0 to 128 {
-    for %i1 = 0 to 8 {
+  affine.for %i0 = 0 to 128 {
+    affine.for %i1 = 0 to 8 {
       store %c7, %0[%i0, %i1] : memref<100x100xf32>
       // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
       // expected-note@-2 {{dependence from 0 to 0 at depth 2 = false}}
@@ -667,10 +667,10 @@ func @loop_nest_depth() {
       // expected-note@-4 {{dependence from 0 to 1 at depth 1 = true}}
     }
   }
-  for %i2 = 0 to 8 {
-    for %i3 = 0 to 8 {
-      for %i4 = 0 to 8 {
-        for %i5 = 0 to 16 {
+  affine.for %i2 = 0 to 8 {
+    affine.for %i3 = 0 to 8 {
+      affine.for %i4 = 0 to 8 {
+        affine.for %i5 = 0 to 16 {
           %8 = affine.apply (d0, d1) -> (d0 * 16 + d1)(%i4, %i5)
           %9 = load %0[%8, %i3] : memref<100x100xf32>
           // expected-note@-1 {{dependence from 1 to 0 at depth 1 = false}}
@@ -693,9 +693,9 @@ func @loop_nest_depth() {
 func @mod_div_3d() {
   %M = alloc() : memref<2x2x2xi32>
   %c0 = constant 0 : i32
-  for %i0 = 0 to 8 {
-    for %i1 = 0 to 8 {
-      for %i2 = 0 to 8 {
+  affine.for %i0 = 0 to 8 {
+    affine.for %i1 = 0 to 8 {
+      affine.for %i2 = 0 to 8 {
         %idx0 = affine.apply (d0, d1, d2) -> (d0 floordiv 4) (%i0, %i1, %i2)
         %idx1 = affine.apply (d0, d1, d2) -> (d1 mod 2) (%i0, %i1, %i2)
         %idx2 = affine.apply (d0, d1, d2) -> (d2 floordiv 4) (%i0, %i1, %i2)
@@ -719,12 +719,12 @@ func @delinearize_mod_floordiv() {
   %in = alloc() : memref<2x2x3x3x16x1xi32>
   %out = alloc() : memref<64x9xi32>
 
-  for %i0 = 0 to 2 {
-    for %i1 = 0 to 2 {
-      for %i2 = 0 to 3 {
-        for %i3 = 0 to 3 {
-          for %i4 = 0 to 16 {
-            for %i5 = 0 to 1 {
+  affine.for %i0 = 0 to 2 {
+    affine.for %i1 = 0 to 2 {
+      affine.for %i2 = 0 to 3 {
+        affine.for %i3 = 0 to 3 {
+          affine.for %i4 = 0 to 16 {
+            affine.for %i5 = 0 to 1 {
               store %val, %in[%i0, %i1, %i2, %i3, %i4, %i5] : memref<2x2x3x3x16x1xi32>
 // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
 // expected-note@-2 {{dependence from 0 to 0 at depth 2 = false}}
@@ -742,8 +742,8 @@ func @delinearize_mod_floordiv() {
     }
   }
 
-  for %ii = 0 to 64 {
-    for %jj = 0 to 9 {
+  affine.for %ii = 0 to 64 {
+    affine.for %jj = 0 to 9 {
       %a0 = affine.apply (d0, d1) -> (d0 * (9 * 1024) + d1 * 128) (%ii, %jj)
       %a10 = affine.apply (d0) ->
         (d0 floordiv (2 * 3 * 3 * 128 * 128)) (%a0)
diff --git a/mlir/test/Transforms/parallelism-detection.mlir b/mlir/test/Transforms/parallelism-detection.mlir
index 91f8f92c1dc5..2d76b2649df0 100644
--- a/mlir/test/Transforms/parallelism-detection.mlir
+++ b/mlir/test/Transforms/parallelism-detection.mlir
@@ -5,11 +5,11 @@ func @loop_nest_3d_outer_two_parallel(%N : index) {
   %0 = alloc() : memref<1024 x 1024 x vector<64xf32>>
   %1 = alloc() : memref<1024 x 1024 x vector<64xf32>>
   %2 = alloc() : memref<1024 x 1024 x vector<64xf32>>
-  for %i = 0 to %N {
+  affine.for %i = 0 to %N {
   // expected-note@-1 {{parallel loop}}
-    for %j = 0 to %N {
+    affine.for %j = 0 to %N {
     // expected-note@-1 {{parallel loop}}
-      for %k = 0 to %N {
+      affine.for %k = 0 to %N {
         %5 = load %0[%i, %k] : memref<1024x1024xvector<64xf32>>
         %6 = load %1[%k, %j] : memref<1024x1024xvector<64xf32>>
         %7 = load %2[%i, %j] : memref<1024x1024xvector<64xf32>>
diff --git a/mlir/test/Transforms/pipeline-data-transfer.mlir b/mlir/test/Transforms/pipeline-data-transfer.mlir
index 1f9383b68ab2..d7ae69c7bb03 100644
--- a/mlir/test/Transforms/pipeline-data-transfer.mlir
+++ b/mlir/test/Transforms/pipeline-data-transfer.mlir
@@ -16,13 +16,13 @@ func @loop_nest_dma() {
   %zero = constant 0 : index
   %num_elts = constant 128 : index
 
-  for %i = 0 to 8 {
+  affine.for %i = 0 to 8 {
     dma_start %A[%i], %Ah[%i], %num_elts, %tag[%zero] : memref<256 x f32>, memref<32 x f32, 1>, memref<1 x f32>
     dma_wait %tag[%zero], %num_elts : memref<1 x f32>
     %v = load %Ah[%i] : memref<32 x f32, (d0) -> (d0), 1>
     %r = "compute"(%v) : (f32) -> (f32)
     store %r, %Ah[%i] : memref<32 x f32, (d0) -> (d0), 1>
-    for %j = 0 to 128 {
+    affine.for %j = 0 to 128 {
       "do_more_compute"(%i, %j) : (index, index) -> ()
     }
   }
@@ -34,7 +34,7 @@ func @loop_nest_dma() {
 // CHECK-NEXT:  %3 = affine.apply [[MOD_2]](%c0)
 // CHECK-NEXT:  %4 = affine.apply [[MOD_2]](%c0)
 // CHECK-NEXT:  dma_start %0[%c0], %1[%3, %c0], %c128, %2[%4, %c0_0] : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
-// CHECK-NEXT:  for %i0 = 1 to 8 {
+// CHECK-NEXT:  affine.for %i0 = 1 to 8 {
 // CHECK-NEXT:    %5 = affine.apply [[MOD_2]](%i0)
 // CHECK-NEXT:    %6 = affine.apply [[MOD_2]](%i0)
 // CHECK-NEXT:    dma_start %0[%i0], %1[%5, %i0], %c128, %2[%6, %c0_0] : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
@@ -45,7 +45,7 @@ func @loop_nest_dma() {
 // CHECK-NEXT:    %10 = load %1[%9, %7] : memref<2x32xf32, 1>
 // CHECK-NEXT:    %11 = "compute"(%10) : (f32) -> f32
 // CHECK-NEXT:    store %11, %1[%9, %7] : memref<2x32xf32, 1>
-// CHECK-NEXT:    for %i1 = 0 to 128 {
+// CHECK-NEXT:    affine.for %i1 = 0 to 128 {
 // CHECK-NEXT:      "do_more_compute"(%7, %i1) : (index, index) -> ()
 // CHECK-NEXT:    }
 // CHECK-NEXT:  }
@@ -56,7 +56,7 @@ func @loop_nest_dma() {
 // CHECK-NEXT:  %15 = load %1[%14, %12] : memref<2x32xf32, 1>
 // CHECK-NEXT:  %16 = "compute"(%15) : (f32) -> f32
 // CHECK-NEXT:  store %16, %1[%14, %12] : memref<2x32xf32, 1>
-// CHECK-NEXT:  for %i2 = 0 to 128 {
+// CHECK-NEXT:  affine.for %i2 = 0 to 128 {
 // CHECK-NEXT:    "do_more_compute"(%12, %i2) : (index, index) -> ()
 // CHECK-NEXT:  }
 // CHECK-NEXT:  dealloc %2 : memref<2x1xf32>
@@ -70,7 +70,7 @@ func @loop_step(%arg0: memref<512xf32>,
                   %arg1: memref<512xf32>) {
   %c0 = constant 0 : index
   %c4 = constant 4 : index
-  for %i0 = 0 to 512 step 4 {
+  affine.for %i0 = 0 to 512 step 4 {
     %1 = alloc() : memref<4xf32, 1>
     %2 = alloc() : memref<1xi32>
     dma_start %arg0[%i0], %1[%c0], %c4, %2[%c0]
@@ -84,7 +84,7 @@ func @loop_step(%arg0: memref<512xf32>,
 // CHECK:        %2 = affine.apply [[FLOOR_MOD_2]](%c0)
 // CHECK:        %3 = affine.apply [[FLOOR_MOD_2]](%c0)
 // CHECK-NEXT:   dma_start %arg0[%c0], %0[%2, %c0_0], %c4, [[TAG]][%3, %c0_0] : memref<512xf32>, memref<2x4xf32, 1>, memref<2x1xi32>
-// CHECK-NEXT:   for %i0 = 4 to 512 step 4 {
+// CHECK-NEXT:   affine.for %i0 = 4 to 512 step 4 {
 // CHECK-NEXT:     %4 = affine.apply [[FLOOR_MOD_2]](%i0)
 // CHECK-NEXT:     %5 = affine.apply [[FLOOR_MOD_2]](%i0)
 // CHECK-NEXT:     dma_start %arg0[%i0], %0[%4, %c0_0], %c4, [[TAG]][%5, %c0_0] : memref<512xf32>, memref<2x4xf32, 1>, memref<2x1xi32>
@@ -117,8 +117,8 @@ func @loop_dma_nested(%arg0: memref<512x32xvector<8xf32>, #map0>, %arg1: memref<
   // CHECK-DAG: [[BUF_ARG2:%[0-9]+]] = alloc() : memref<2x64x4xvector<8xf32>, 2>
   // CHECK-DAG: [[TAG_ARG2:%[0-9]+]] = alloc() : memref<2x2xi32>
   // CHECK: dma_start %arg2[
-  // CHECK: for %i0 = 1 to 8 {
-  for %i0 = 0 to 8 {
+  // CHECK: affine.for %i0 = 1 to 8 {
+  affine.for %i0 = 0 to 8 {
     %6 = affine.apply #map2(%i0)
     dma_start %arg2[%6, %c0], %2[%c0, %c0], %num_elts, %5[%c0] : memref<512x32xvector<8xf32>, #map0>, memref<64x4xvector<8xf32>, #map0, 2>, memref<2xi32>
     dma_wait %5[%c0], %num_elts : memref<2xi32>
@@ -132,8 +132,8 @@ func @loop_dma_nested(%arg0: memref<512x32xvector<8xf32>, #map0>, %arg1: memref<
     // CHECK: [[TAG_ARG1:%[0-9]+]] = alloc() : memref<2x2xi32>
     // CHECK: dma_start %arg0[
     // CHECK: dma_start %arg1[
-    // CHECK-NEXT for %i1 = 1 to 8 {
-    for %i1 = 0 to 8 {
+    // CHECK-NEXT affine.for %i1 = 1 to 8 {
+    affine.for %i1 = 0 to 8 {
       %7 = affine.apply #map1(%i0, %i1)
       %8 = affine.apply #map2(%i1)
       dma_start %arg0[%7, %c0], %0[%c0, %c0], %num_elts, %3[%c0] : memref<512x32xvector<8xf32>, #map0>, memref<64x4xvector<8xf32>, #map0, 2>, memref<2xi32>
@@ -145,8 +145,8 @@ func @loop_dma_nested(%arg0: memref<512x32xvector<8xf32>, #map0>, %arg1: memref<
       // CHECK: dma_start %arg1[
       // CHECK: dma_wait [[TAG_ARG0]]
       // CHECK: dma_wait [[TAG_ARG1]]
-      // CHECK-NEXT: for %i2 = 0 to 4 {
-      for %i2 = 0 to 4 {
+      // CHECK-NEXT: affine.for %i2 = 0 to 4 {
+      affine.for %i2 = 0 to 4 {
         "foo"() : () -> ()
       }
     }
@@ -166,16 +166,16 @@ func @loop_dma_nested(%arg0: memref<512x32xvector<8xf32>, #map0>, %arg1: memref<
   // CHECK: [[TAG_ARG1_NESTED:%[0-9]+]] = alloc() : memref<2x2xi32>
   // CHECK:  dma_start %arg0[
   // CHECK:  dma_start %arg1[
-  // CHECK:  for %i4 = 1 to 8 {
+  // CHECK:  affine.for %i4 = 1 to 8 {
   // CHECK:    dma_start %arg0[
   // CHECK:    dma_start %arg1[
   // CHECK:    dma_wait [[TAG_ARG0_NESTED]]
   // CHECK:    dma_wait [[TAG_ARG1_NESTED]]
-  // CHECK:    for %i5 = 0 to 4 {
+  // CHECK:    affine.for %i5 = 0 to 4 {
   // CHECK:      "foo"() : () -> ()
   // CHECK:  dma_wait [[TAG_ARG0_NESTED]]
   // CHECK:  dma_wait [[TAG_ARG1_NESTED]]
-  // CHECK:  for %i6 = 0 to 4 {
+  // CHECK:  affine.for %i6 = 0 to 4 {
   }
   return
 // CHECK: }
@@ -202,8 +202,8 @@ func @loop_dma_dependent(%arg2: memref<512x32xvector<8xf32>>) {
   // The two DMAs below are dependent (incoming and outgoing on the same
   // memref) in the same iteration; so no pipelining here.
   // CHECK-NOT: dma_start
-  // CHECK: for %i0 = 0 to 8 {
-  for %i0 = 0 to 8 {
+  // CHECK: affine.for %i0 = 0 to 8 {
+  affine.for %i0 = 0 to 8 {
     %6 = affine.apply #map2(%i0)
     dma_start %arg2[%6, %c0], %2[%c0, %c0], %num_elts, %5[%c0] : memref<512x32xvector<8xf32>>, memref<64x4xvector<8xf32>, 2>, memref<2xi32>
     dma_wait %5[%c0], %num_elts : memref<2xi32>
@@ -223,8 +223,8 @@ func @escaping_use(%arg0: memref<512 x 32 x f32>) {
   %tag = alloc() : memref<1 x i32>
 
   // CHECK-NOT: dma_start
-  // CHECK: for %i0 = 0 to 16 {
-  for %kTT = 0 to 16 {
+  // CHECK: affine.for %i0 = 0 to 16 {
+  affine.for %kTT = 0 to 16 {
     dma_start %arg0[%zero, %zero], %Av[%zero, %zero], %num_elt, %tag[%zero] :
       memref<512 x 32 x f32>,
       memref<32 x 32 x f32, 2>, memref<1 x i32>
@@ -247,14 +247,14 @@ func @live_out_use(%arg0: memref<512 x 32 x f32>) -> f32 {
   %tag = alloc() : memref<1 x i32>
 
   // CHECK-NOT: dma_start
-  // CHECK: for %i0 = 0 to 16 {
-  for %kTT = 0 to 16 {
+  // CHECK: affine.for %i0 = 0 to 16 {
+  affine.for %kTT = 0 to 16 {
     dma_start %arg0[%zero, %zero], %Av[%zero, %zero], %num_elt, %tag[%zero] :
       memref<512 x 32 x f32>,
       memref<32 x 32 x f32, 2>, memref<1 x i32>
     dma_wait %tag[%zero], %num_elt : memref<1 x i32>
   }
-  // Use live out of 'for' inst; no DMA pipelining will be done.
+  // Use live out of 'affine.for' inst; no DMA pipelining will be done.
   %v = load %Av[%zero, %zero] : memref<32 x 32 x f32, 2>
   return %v : f32
 // CHECK:      %{{[0-9]+}} = load %{{[0-9]+}}[%c0, %c0] : memref<32x32xf32, 2>
@@ -278,14 +278,14 @@ func @dynamic_shape_dma_buffer(%arg0: memref<512 x 32 x f32>) {
 // CHECK:       %5 = affine.apply [[MOD_2]](%c0)
 // CHECK:       %6 = affine.apply [[MOD_2]](%c0)
 // CHECK:       dma_start %arg0[%c0_0, %c0_0], %3[%5, %c0_0, %c0_0], %c512, %4[%6, %c0_0]
-  for %kTT = 0 to 16 {
+  affine.for %kTT = 0 to 16 {
     dma_start %arg0[%zero, %zero], %Av[%zero, %zero], %num_elt, %tag[%zero] :
       memref<512 x 32 x f32>,
       memref<? x ? x f32, 2>, memref<1 x i32>
     dma_wait %tag[%zero], %num_elt : memref<1 x i32>
   }
   return
-// CHECK-NEXT:  for %i0 = 1 to 16 {
+// CHECK-NEXT:  affine.for %i0 = 1 to 16 {
 // CHECK:         %7 = affine.apply [[MOD_2]](%i0)
 // CHECK:         %8 = affine.apply [[MOD_2]](%i0)
 // CHECK:         dma_start %arg0[%c0_0, %c0_0], %3[%7, %c0_0, %c0_0], %c512, %4[%8, %c0_0]
diff --git a/mlir/test/Transforms/simplify-affine-structures.mlir b/mlir/test/Transforms/simplify-affine-structures.mlir
index 2459604f3694..feb3a99b70b9 100644
--- a/mlir/test/Transforms/simplify-affine-structures.mlir
+++ b/mlir/test/Transforms/simplify-affine-structures.mlir
@@ -73,8 +73,8 @@
 
 // CHECK-LABEL: func @test_gaussian_elimination_empty_set0() {
 func @test_gaussian_elimination_empty_set0() {
-  for %i0 = 1 to 10 {
-    for %i1 = 1 to 100 {
+  affine.for %i0 = 1 to 10 {
+    affine.for %i1 = 1 to 100 {
       // CHECK: [[SET_EMPTY_2D]](%i0, %i1)
       affine.if (d0, d1) : (2 == 0)(%i0, %i1) {
       }
@@ -85,8 +85,8 @@ func @test_gaussian_elimination_empty_set0() {
 
 // CHECK-LABEL: func @test_gaussian_elimination_empty_set1() {
 func @test_gaussian_elimination_empty_set1() {
-  for %i0 = 1 to 10 {
-    for %i1 = 1 to 100 {
+  affine.for %i0 = 1 to 10 {
+    affine.for %i1 = 1 to 100 {
       // CHECK: [[SET_EMPTY_2D]](%i0, %i1)
       affine.if (d0, d1) : (1 >= 0, -1 >= 0) (%i0, %i1) {
       }
@@ -97,8 +97,8 @@ func @test_gaussian_elimination_empty_set1() {
 
 // CHECK-LABEL: func @test_gaussian_elimination_non_empty_set2() {
 func @test_gaussian_elimination_non_empty_set2() {
-  for %i0 = 1 to 10 {
-    for %i1 = 1 to 100 {
+  affine.for %i0 = 1 to 10 {
+    affine.for %i1 = 1 to 100 {
       // CHECK: #set1(%i0, %i1)
       affine.if #set2(%i0, %i1) {
       }
@@ -111,8 +111,8 @@ func @test_gaussian_elimination_non_empty_set2() {
 func @test_gaussian_elimination_empty_set3() {
   %c7 = constant 7 : index
   %c11 = constant 11 : index
-  for %i0 = 1 to 10 {
-    for %i1 = 1 to 100 {
+  affine.for %i0 = 1 to 10 {
+    affine.for %i1 = 1 to 100 {
       // CHECK: #set2(%i0, %i1)[%c7, %c11]
       affine.if #set3(%i0, %i1)[%c7, %c11] {
       }
@@ -125,8 +125,8 @@ func @test_gaussian_elimination_empty_set3() {
 func @test_gaussian_elimination_non_empty_set4() {
   %c7 = constant 7 : index
   %c11 = constant 11 : index
-  for %i0 = 1 to 10 {
-    for %i1 = 1 to 100 {
+  affine.for %i0 = 1 to 10 {
+    affine.for %i1 = 1 to 100 {
       // CHECK: #set3(%i0, %i1)[%c7, %c11]
       affine.if #set4(%i0, %i1)[%c7, %c11] {
       }
@@ -139,8 +139,8 @@ func @test_gaussian_elimination_non_empty_set4() {
 func @test_gaussian_elimination_empty_set5() {
   %c7 = constant 7 : index
   %c11 = constant 11 : index
-  for %i0 = 1 to 10 {
-    for %i1 = 1 to 100 {
+  affine.for %i0 = 1 to 10 {
+    affine.for %i1 = 1 to 100 {
       // CHECK: #set2(%i0, %i1)[%c7, %c11]
       affine.if #set5(%i0, %i1)[%c7, %c11] {
       }
@@ -151,8 +151,8 @@ func @test_gaussian_elimination_empty_set5() {
 
 // CHECK-LABEL: func @test_fuzz_explosion
 func @test_fuzz_explosion(%arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index) {
-  for %i0 = 1 to 10 {
-    for %i1 = 1 to 100 {
+  affine.for %i0 = 1 to 10 {
+    affine.for %i1 = 1 to 100 {
       affine.if #set_fuzz_virus(%i0, %i1, %arg0, %arg1, %arg2, %arg3) {
       }
     }
@@ -163,8 +163,8 @@ func @test_fuzz_explosion(%arg0 : index, %arg1 : index, %arg2 : index, %arg3 : i
 
 // CHECK-LABEL: func @test_empty_set(%arg0: index) {
 func @test_empty_set(%N : index) {
-  for %i = 0 to 10 {
-    for %j = 0 to 10 {
+  affine.for %i = 0 to 10 {
+    affine.for %j = 0 to 10 {
       // CHECK: affine.if [[SET_EMPTY_2D]](%i0, %i1)
       affine.if (d0, d1) : (d0 - d1 >= 0, d1 - d0 - 1 >= 0)(%i, %j) {
         "foo"() : () -> ()
@@ -198,8 +198,8 @@ func @test_empty_set(%N : index) {
     }
   }
   // The tests below test GCDTightenInequalities().
-  for %k = 0 to 10 {
-    for %l = 0 to 10 {
+  affine.for %k = 0 to 10 {
+    affine.for %l = 0 to 10 {
       // Empty because no multiple of 8 lies between 4 and 7.
       // CHECK: affine.if [[SET_EMPTY_1D]](%i2)
       affine.if (d0) : (8*d0 - 4 >= 0, -8*d0 + 7 >= 0)(%k) {
@@ -226,7 +226,7 @@ func @test_empty_set(%N : index) {
     }
   }
 
-  for %m = 0 to 10 {
+  affine.for %m = 0 to 10 {
     // CHECK: affine.if [[SET_EMPTY_1D]](%i{{[0-9]+}})
     affine.if (d0) : (d0 mod 2 - 3 == 0) (%m) {
       "foo"() : () -> ()
diff --git a/mlir/test/Transforms/strip-debuginfo.mlir b/mlir/test/Transforms/strip-debuginfo.mlir
index fdabd5d12e03..181481279d01 100644
--- a/mlir/test/Transforms/strip-debuginfo.mlir
+++ b/mlir/test/Transforms/strip-debuginfo.mlir
@@ -10,7 +10,7 @@ func @inline_notation() -> i32 loc("mysource.cc":10:8) {
   %1 = "foo"() : () -> i32 loc("foo")
 
   // CHECK: } loc(unknown)
-  for %i0 = 0 to 8 {
+  affine.for %i0 = 0 to 8 {
   } loc(fused["foo", "mysource.cc":10:8])
 
   // CHECK: } loc(unknown)
diff --git a/mlir/test/Transforms/unroll-jam.mlir b/mlir/test/Transforms/unroll-jam.mlir
index b872cb687fc7..44feeee8f185 100644
--- a/mlir/test/Transforms/unroll-jam.mlir
+++ b/mlir/test/Transforms/unroll-jam.mlir
@@ -8,13 +8,13 @@
 // CHECK-LABEL: func @unroll_jam_imperfect_nest() {
 func @unroll_jam_imperfect_nest() {
   // CHECK: %c100 = constant 100 : index
-  // CHECK-NEXT: for %i0 = 0 to 100 step 2 {
-  for %i = 0 to 101 {
+  // CHECK-NEXT: affine.for %i0 = 0 to 100 step 2 {
+  affine.for %i = 0 to 101 {
     // CHECK: %0 = "addi32"(%i0, %i0) : (index, index) -> i32
     // CHECK-NEXT: %1 = affine.apply [[MAP_PLUS_1]](%i0)
     // CHECK-NEXT: %2 = "addi32"(%1, %1) : (index, index) -> i32
     %x = "addi32"(%i, %i) : (index, index) -> i32
-    for %j = 0 to 17 {
+    affine.for %j = 0 to 17 {
       // CHECK:      %3 = "addi32"(%i0, %i0) : (index, index) -> i32
       // CHECK-NEXT: %4 = "addi32"(%3, %3) : (i32, i32) -> i32
       // CHECK-NEXT: %5 = affine.apply [[MAP_PLUS_1]](%i0)
@@ -30,7 +30,7 @@ func @unroll_jam_imperfect_nest() {
   } // CHECK }
   // cleanup loop (single iteration)
   // CHECK: %11 = "addi32"(%c100, %c100) : (index, index) -> i32
-  // CHECK-NEXT: for %i2 = 0 to 17 {
+  // CHECK-NEXT: affine.for %i2 = 0 to 17 {
   // CHECK-NEXT:   %12 = "addi32"(%c100, %c100) : (index, index) -> i32
   // CHECK-NEXT:   %13 = "addi32"(%12, %12) : (i32, i32) -> i32
   // CHECK-NEXT: }
@@ -40,20 +40,20 @@ func @unroll_jam_imperfect_nest() {
 
 // CHECK-LABEL: func @loop_nest_unknown_count_1(%arg0: index) {
 func @loop_nest_unknown_count_1(%N : index) {
-  // CHECK-NEXT: for %i0 = 1 to [[MAP_DIV_OFFSET]]()[%arg0] step 2 {
-  // CHECK-NEXT:   for %i1 = 1 to 100 {
+  // CHECK-NEXT: affine.for %i0 = 1 to [[MAP_DIV_OFFSET]]()[%arg0] step 2 {
+  // CHECK-NEXT:   affine.for %i1 = 1 to 100 {
   // CHECK-NEXT:     %0 = "foo"() : () -> i32
   // CHECK-NEXT:     %1 = "foo"() : () -> i32
   // CHECK-NEXT:   }
   // CHECK-NEXT: }
   // A cleanup loop should be generated here.
-  // CHECK-NEXT: for %i2 = [[MAP_DIV_OFFSET]]()[%arg0] to %arg0 {
-  // CHECK-NEXT:   for %i3 = 1 to 100 {
+  // CHECK-NEXT: affine.for %i2 = [[MAP_DIV_OFFSET]]()[%arg0] to %arg0 {
+  // CHECK-NEXT:   affine.for %i3 = 1 to 100 {
   // CHECK-NEXT:     %2 = "foo"() : () -> i32
   // CHECK_NEXT:   }
   // CHECK_NEXT: }
-  for %i = 1 to %N {
-    for %j = 1 to 100 {
+  affine.for %i = 1 to %N {
+    affine.for %j = 1 to 100 {
       %x = "foo"() : () -> i32
     }
   }
@@ -62,8 +62,8 @@ func @loop_nest_unknown_count_1(%N : index) {
 
 // CHECK-LABEL: func @loop_nest_unknown_count_2(%arg0: index) {
 func @loop_nest_unknown_count_2(%arg : index) {
-  // CHECK-NEXT: for %i0 = %arg0 to  [[M1]]()[%arg0] step 2 {
-  // CHECK-NEXT:   for %i1 = 1 to 100 {
+  // CHECK-NEXT: affine.for %i0 = %arg0 to  [[M1]]()[%arg0] step 2 {
+  // CHECK-NEXT:   affine.for %i1 = 1 to 100 {
   // CHECK-NEXT:     %0 = "foo"(%i0) : (index) -> i32
   // CHECK-NEXT:     %1 = affine.apply #map{{[0-9]+}}(%i0)
   // CHECK-NEXT:     %2 = "foo"(%1) : (index) -> i32
@@ -71,11 +71,11 @@ func @loop_nest_unknown_count_2(%arg : index) {
   // CHECK-NEXT: }
   // The cleanup loop is a single iteration one and is promoted.
   // CHECK-NEXT: %3 = affine.apply [[M1]]()[%arg0]
-  // CHECK-NEXT: for %i2 = 1 to 100 {
+  // CHECK-NEXT: affine.for %i2 = 1 to 100 {
   // CHECK-NEXT:   %4 = "foo"(%3) : (index) -> i32
   // CHECK_NEXT: }
-  for %i = %arg to ()[s0] -> (s0+9) ()[%arg] {
-    for %j = 1 to 100 {
+  affine.for %i = %arg to ()[s0] -> (s0+9) ()[%arg] {
+    affine.for %j = 1 to 100 {
       %x = "foo"(%i) : (index) -> i32
     }
   }
@@ -84,22 +84,22 @@ func @loop_nest_unknown_count_2(%arg : index) {
 
 // CHECK-LABEL: func @loop_nest_symbolic_and_min_upper_bound
 func @loop_nest_symbolic_and_min_upper_bound(%M : index, %N : index, %K : index) {
-  for %i = 0 to min ()[s0, s1] -> (s0, s1, 1024)()[%M, %N] {
-    for %j = 0 to %K {
+  affine.for %i = 0 to min ()[s0, s1] -> (s0, s1, 1024)()[%M, %N] {
+    affine.for %j = 0 to %K {
       "foo"(%i, %j) : (index, index) -> ()
     }
   }
   return
 }
-// CHECK-NEXT:  for %i0 = 0 to min [[MAP_MULTI_RES]]()[%arg0, %arg1] step 2 {
-// CHECK-NEXT:    for %i1 = 0 to %arg2 {
+// CHECK-NEXT:  affine.for %i0 = 0 to min [[MAP_MULTI_RES]]()[%arg0, %arg1] step 2 {
+// CHECK-NEXT:    affine.for %i1 = 0 to %arg2 {
 // CHECK-NEXT:      "foo"(%i0, %i1) : (index, index) -> ()
 // CHECK-NEXT:      %0 = affine.apply #map2(%i0)
 // CHECK-NEXT:      "foo"(%0, %i1) : (index, index) -> ()
 // CHECK-NEXT:    }
 // CHECK-NEXT:  }
-// CHECK-NEXT:  for %i2 = max [[MAP_MULTI_RES]]()[%arg0, %arg1] to min #map9()[%arg0, %arg1] {
-// CHECK-NEXT:    for %i3 = 0 to %arg2 {
+// CHECK-NEXT:  affine.for %i2 = max [[MAP_MULTI_RES]]()[%arg0, %arg1] to min #map9()[%arg0, %arg1] {
+// CHECK-NEXT:    affine.for %i3 = 0 to %arg2 {
 // CHECK-NEXT:      "foo"(%i2, %i3) : (index, index) -> ()
 // CHECK-NEXT:    }
 // CHECK-NEXT:  }
diff --git a/mlir/test/Transforms/unroll.mlir b/mlir/test/Transforms/unroll.mlir
index f676023ee1ca..5bbf3b8ce1e2 100644
--- a/mlir/test/Transforms/unroll.mlir
+++ b/mlir/test/Transforms/unroll.mlir
@@ -25,13 +25,13 @@
 
 // UNROLL-FULL-LABEL: func @loop_nest_simplest() {
 func @loop_nest_simplest() {
-  // UNROLL-FULL: for %i0 = 0 to 100 step 2 {
-  for %i = 0 to 100 step 2 {
+  // UNROLL-FULL: affine.for %i0 = 0 to 100 step 2 {
+  affine.for %i = 0 to 100 step 2 {
     // UNROLL-FULL: %c1_i32 = constant 1 : i32
     // UNROLL-FULL-NEXT: %c1_i32_0 = constant 1 : i32
     // UNROLL-FULL-NEXT: %c1_i32_1 = constant 1 : i32
     // UNROLL-FULL-NEXT: %c1_i32_2 = constant 1 : i32
-    for %j = 0 to 4 {
+    affine.for %j = 0 to 4 {
       %x = constant 1 : i32
     }
   }       // UNROLL-FULL:  }
@@ -41,8 +41,8 @@ func @loop_nest_simplest() {
 // UNROLL-FULL-LABEL: func @loop_nest_simple_iv_use() {
 func @loop_nest_simple_iv_use() {
   // UNROLL-FULL: %c0 = constant 0 : index
-  // UNROLL-FULL-NEXT: for %i0 = 0 to 100 step 2 {
-  for %i = 0 to 100 step 2 {
+  // UNROLL-FULL-NEXT: affine.for %i0 = 0 to 100 step 2 {
+  affine.for %i = 0 to 100 step 2 {
     // UNROLL-FULL: %0 = "addi32"(%c0, %c0) : (index, index) -> i32
     // UNROLL-FULL: %1 = affine.apply [[MAP0]](%c0)
     // UNROLL-FULL-NEXT:  %2 = "addi32"(%1, %1) : (index, index) -> i32
@@ -50,7 +50,7 @@ func @loop_nest_simple_iv_use() {
     // UNROLL-FULL-NEXT:  %4 = "addi32"(%3, %3) : (index, index) -> i32
     // UNROLL-FULL: %5 = affine.apply [[MAP2]](%c0)
     // UNROLL-FULL-NEXT:  %6 = "addi32"(%5, %5) : (index, index) -> i32
-    for %j = 0 to 4 {
+    affine.for %j = 0 to 4 {
       %x = "addi32"(%j, %j) : (index, index) -> i32
     }
   }       // UNROLL-FULL:  }
@@ -61,8 +61,8 @@ func @loop_nest_simple_iv_use() {
 // UNROLL-FULL-LABEL: func @loop_nest_body_def_use() {
 func @loop_nest_body_def_use() {
   // UNROLL-FULL: %c0 = constant 0 : index
-  // UNROLL-FULL-NEXT: for %i0 = 0 to 100 step 2 {
-  for %i = 0 to 100 step 2 {
+  // UNROLL-FULL-NEXT: affine.for %i0 = 0 to 100 step 2 {
+  affine.for %i = 0 to 100 step 2 {
     // UNROLL-FULL: %c0_0 = constant 0 : index
     %c0 = constant 0 : index
     // UNROLL-FULL:      %0 = affine.apply [[MAP0]](%c0)
@@ -76,7 +76,7 @@ func @loop_nest_body_def_use() {
     // UNROLL-FULL-NEXT: %8 = affine.apply [[MAP2]](%c0)
     // UNROLL-FULL-NEXT: %9 = affine.apply [[MAP0]](%8)
     // UNROLL-FULL-NEXT: %10 = "addi32"(%9, %c0_0) : (index, index) -> index
-    for %j = 0 to 4 {
+    affine.for %j = 0 to 4 {
       %x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
         (index) -> (index)
       %y = "addi32"(%x, %c0) : (index, index) -> index
@@ -89,14 +89,14 @@ func @loop_nest_body_def_use() {
 func @loop_nest_strided() {
   // UNROLL-FULL: %c2 = constant 2 : index
   // UNROLL-FULL-NEXT: %c2_0 = constant 2 : index
-  // UNROLL-FULL-NEXT: for %i0 = 0 to 100 {
-  for %i = 0 to 100 {
+  // UNROLL-FULL-NEXT: affine.for %i0 = 0 to 100 {
+  affine.for %i = 0 to 100 {
     // UNROLL-FULL:      %0 = affine.apply [[MAP0]](%c2_0)
     // UNROLL-FULL-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
     // UNROLL-FULL-NEXT: %2 = affine.apply [[MAP1]](%c2_0)
     // UNROLL-FULL-NEXT: %3 = affine.apply [[MAP0]](%2)
     // UNROLL-FULL-NEXT: %4 = "addi32"(%3, %3) : (index, index) -> index
-    for %j = 2 to 6 step 2 {
+    affine.for %j = 2 to 6 step 2 {
       %x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
         (index) -> (index)
       %y = "addi32"(%x, %x) : (index, index) -> index
@@ -109,7 +109,7 @@ func @loop_nest_strided() {
     // UNROLL-FULL-NEXT: %10 = affine.apply [[MAP3]](%c2)
     // UNROLL-FULL-NEXT: %11 = affine.apply [[MAP0]](%10)
     // UNROLL-FULL-NEXT: %12 = "addi32"(%11, %11) : (index, index) -> index
-    for %k = 2 to 7 step 2 {
+    affine.for %k = 2 to 7 step 2 {
       %z = "affine.apply" (%k) { map: (d0) -> (d0 + 1) } :
         (index) -> (index)
       %w = "addi32"(%z, %z) : (index, index) -> index
@@ -121,8 +121,8 @@ func @loop_nest_strided() {
 // UNROLL-FULL-LABEL: func @loop_nest_multiple_results() {
 func @loop_nest_multiple_results() {
   // UNROLL-FULL: %c0 = constant 0 : index
-  // UNROLL-FULL-NEXT: for %i0 = 0 to 100 {
-  for %i = 0 to 100 {
+  // UNROLL-FULL-NEXT: affine.for %i0 = 0 to 100 {
+  affine.for %i = 0 to 100 {
     // UNROLL-FULL: %0 = affine.apply [[MAP4]](%i0, %c0)
     // UNROLL-FULL-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
     // UNROLL-FULL-NEXT: %2 = affine.apply #map{{.*}}(%i0, %c0)
@@ -132,7 +132,7 @@ func @loop_nest_multiple_results() {
     // UNROLL-FULL-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> index
     // UNROLL-FULL-NEXT: %7 = affine.apply #map{{.*}}(%i0, %4)
     // UNROLL-FULL-NEXT: %8 = "fma"(%7, %5, %5) : (index, index, index) -> (index, index)
-    for %j = 0 to 2 step 1 {
+    affine.for %j = 0 to 2 step 1 {
       %x = affine.apply (d0, d1) -> (d0 + 1) (%i, %j)
       %y = "addi32"(%x, %x) : (index, index) -> index
       %z = affine.apply (d0, d1) -> (d0 + 3) (%i, %j)
@@ -149,8 +149,8 @@ func @loop_nest_seq_imperfect(%a : memref<128x128xf32>) {
   // UNROLL-FULL: %c0 = constant 0 : index
   // UNROLL-FULL-NEXT: %c128 = constant 128 : index
   %c128 = constant 128 : index
-  // UNROLL-FULL: for %i0 = 0 to 100 {
-  for %i = 0 to 100 {
+  // UNROLL-FULL: affine.for %i0 = 0 to 100 {
+  affine.for %i = 0 to 100 {
     // UNROLL-FULL: %0 = "vld"(%i0) : (index) -> i32
     %ld = "vld"(%i) : (index) -> i32
     // UNROLL-FULL: %1 = affine.apply [[MAP0]](%c0)
@@ -168,7 +168,7 @@ func @loop_nest_seq_imperfect(%a : memref<128x128xf32>) {
     // UNROLL-FULL-NEXT: %13 = affine.apply [[MAP0]](%12)
     // UNROLL-FULL-NEXT: %14 = "vmulf"(%12, %13) : (index, index) -> index
     // UNROLL-FULL-NEXT: %15 = "vaddf"(%14, %14) : (index, index) -> index
-    for %j = 0 to 4 {
+    affine.for %j = 0 to 4 {
       %x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
         (index) -> (index)
        %y = "vmulf"(%j, %x) : (index, index) -> index
@@ -197,7 +197,7 @@ func @loop_nest_seq_multiple() {
   // UNROLL-FULL-NEXT: %5 = affine.apply [[MAP2]](%c0_0)
   // UNROLL-FULL-NEXT: %6 = affine.apply [[MAP0]](%5)
   // UNROLL-FULL-NEXT: "mul"(%6, %6) : (index, index) -> ()
-  for %j = 0 to 4 {
+  affine.for %j = 0 to 4 {
     %x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
       (index) -> (index)
     "mul"(%x, %x) : (index, index) -> ()
@@ -205,8 +205,8 @@ func @loop_nest_seq_multiple() {
 
   // UNROLL-FULL: %c99 = constant 99 : index
   %k = constant 99 : index
-  // UNROLL-FULL: for %i0 = 0 to 100 step 2 {
-  for %m = 0 to 100 step 2 {
+  // UNROLL-FULL: affine.for %i0 = 0 to 100 step 2 {
+  affine.for %m = 0 to 100 step 2 {
     // UNROLL-FULL: %7 = affine.apply [[MAP0]](%c0)
     // UNROLL-FULL-NEXT: %8 = affine.apply [[MAP6]](%c0)[%c99]
     // UNROLL-FULL-NEXT: %9 = affine.apply [[MAP0]](%c0)
@@ -218,7 +218,7 @@ func @loop_nest_seq_multiple() {
     // UNROLL-FULL-NEXT: %15 = affine.apply [[MAP2]](%c0)
     // UNROLL-FULL-NEXT: %16 = affine.apply [[MAP0]](%15)
     // UNROLL-FULL-NEXT: %17 = affine.apply [[MAP6]](%15)[%c99]
-    for %n = 0 to 4 {
+    affine.for %n = 0 to 4 {
       %y = "affine.apply" (%n) { map: (d0) -> (d0 + 1) } :
         (index) -> (index)
       %z = "affine.apply" (%n, %k) { map: (d0) [s0] -> (d0 + s0 + 1) } :
@@ -233,7 +233,7 @@ func @loop_nest_unroll_full() {
   // UNROLL-FULL-NEXT: %0 = "foo"() : () -> i32
   // UNROLL-FULL-NEXT: %1 = "bar"() : () -> i32
   // UNROLL-FULL-NEXT:  return
-  for %i = 0 to 1 {
+  affine.for %i = 0 to 1 {
     %x = "foo"() : () -> i32
     %y = "bar"() : () -> i32
   }
@@ -242,16 +242,16 @@ func @loop_nest_unroll_full() {
 
 // SHORT-LABEL: func @loop_nest_outer_unroll() {
 func @loop_nest_outer_unroll() {
-  // SHORT:      for %i0 = 0 to 4 {
+  // SHORT:      affine.for %i0 = 0 to 4 {
   // SHORT-NEXT:   %0 = affine.apply [[MAP0]](%i0)
   // SHORT-NEXT:   %1 = "addi32"(%0, %0) : (index, index) -> index
   // SHORT-NEXT: }
-  // SHORT-NEXT: for %i1 = 0 to 4 {
+  // SHORT-NEXT: affine.for %i1 = 0 to 4 {
   // SHORT-NEXT:   %2 = affine.apply [[MAP0]](%i1)
   // SHORT-NEXT:   %3 = "addi32"(%2, %2) : (index, index) -> index
   // SHORT-NEXT: }
-  for %i = 0 to 2 {
-    for %j = 0 to 4 {
+  affine.for %i = 0 to 2 {
+    affine.for %j = 0 to 4 {
       %x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
         (index) -> (index)
       %y = "addi32"(%x, %x) : (index, index) -> index
@@ -275,33 +275,33 @@ func @loop_nest_seq_long() -> i32 {
 
   %zero_idx = constant 0 : index
 
-  // CHECK: for %i0 = 0 to 512
-  for %n0 = 0 to 512 {
-    // CHECK: for %i1 = 0 to 8
-    for %n1 = 0 to 8 {
+  // CHECK: affine.for %i0 = 0 to 512
+  affine.for %n0 = 0 to 512 {
+    // CHECK: affine.for %i1 = 0 to 8
+    affine.for %n1 = 0 to 8 {
       store %one,  %A[%n0, %n1] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
       store %two,  %B[%n0, %n1] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
       store %zero, %C[%n0, %n1] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
     }
   }
 
-  for %x = 0 to 2 {
-    for %y = 0 to 2 {
-      // CHECK: for %i2
-      for %i2 = 0 to 8 {
-        // CHECK-NOT: for %i3
+  affine.for %x = 0 to 2 {
+    affine.for %y = 0 to 2 {
+      // CHECK: affine.for %i2
+      affine.for %i2 = 0 to 8 {
+        // CHECK-NOT: affine.for %i3
         // CHECK: %{{[0-9]+}} = affine.apply
         %b2 = "affine.apply" (%y, %i2) {map: (d0, d1) -> (16*d0 + d1)} : (index, index) -> index
         %z = load %B[%x, %b2] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
         "op1"(%z) : (i32) -> ()
       }
-      for %j1 = 0 to 8 {
-        for %j2 = 0 to 8 {
+      affine.for %j1 = 0 to 8 {
+        affine.for %j2 = 0 to 8 {
           %a2 = "affine.apply" (%y, %j2) {map: (d0, d1) -> (16*d0 + d1)} : (index, index) -> index
           %v203 = load %A[%j1, %a2] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
           "op2"(%v203) : (i32) -> ()
         }
-        for %k2 = 0 to 8 {
+        affine.for %k2 = 0 to 8 {
           %s0 = "op3"() : () -> i32
           %c2 = "affine.apply" (%x, %k2) {map: (d0, d1) -> (16*d0 + d1)} : (index, index) -> index
           %s1 =  load %C[%j1, %c2] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
@@ -318,8 +318,8 @@ func @loop_nest_seq_long() -> i32 {
 
 // UNROLL-BY-4-LABEL: func @unroll_unit_stride_no_cleanup() {
 func @unroll_unit_stride_no_cleanup() {
-  // UNROLL-BY-4: for %i0 = 0 to 100 {
-  for %i = 0 to 100 {
+  // UNROLL-BY-4: affine.for %i0 = 0 to 100 {
+  affine.for %i = 0 to 100 {
     // UNROLL-BY-4: for [[L1:%i[0-9]+]] = 0 to 8 step 4 {
     // UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
     // UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32
@@ -333,13 +333,13 @@ func @unroll_unit_stride_no_cleanup() {
     // UNROLL-BY-4-NEXT: %9 = "addi32"(%8, %8) : (index, index) -> i32
     // UNROLL-BY-4-NEXT: %10 = "addi32"(%9, %9) : (i32, i32) -> i32
     // UNROLL-BY-4-NEXT: }
-    for %j = 0 to 8 {
+    affine.for %j = 0 to 8 {
       %x = "addi32"(%j, %j) : (index, index) -> i32
       %y = "addi32"(%x, %x) : (i32, i32) -> i32
     }
     // empty loop
-    // UNROLL-BY-4: for %i2 = 0 to 8 {
-    for %k = 0 to 8 {
+    // UNROLL-BY-4: affine.for %i2 = 0 to 8 {
+    affine.for %k = 0 to 8 {
     }
   }
   return
@@ -347,8 +347,8 @@ func @unroll_unit_stride_no_cleanup() {
 
 // UNROLL-BY-4-LABEL: func @unroll_unit_stride_cleanup() {
 func @unroll_unit_stride_cleanup() {
-  // UNROLL-BY-4: for %i0 = 0 to 100 {
-  for %i = 0 to 100 {
+  // UNROLL-BY-4: affine.for %i0 = 0 to 100 {
+  affine.for %i = 0 to 100 {
     // UNROLL-BY-4: for [[L1:%i[0-9]+]] = 0 to 8 step 4 {
     // UNROLL-BY-4-NEXT:   %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
     // UNROLL-BY-4-NEXT:   %1 = "addi32"(%0, %0) : (i32, i32) -> i32
@@ -366,7 +366,7 @@ func @unroll_unit_stride_cleanup() {
     // UNROLL-BY-4-NEXT:   %11 = "addi32"([[L2]], [[L2]]) : (index, index) -> i32
     // UNROLL-BY-4-NEXT:   %12 = "addi32"(%11, %11) : (i32, i32) -> i32
     // UNROLL-BY-4-NEXT: }
-    for %j = 0 to 10 {
+    affine.for %j = 0 to 10 {
       %x = "addi32"(%j, %j) : (index, index) -> i32
       %y = "addi32"(%x, %x) : (i32, i32) -> i32
     }
@@ -376,8 +376,8 @@ func @unroll_unit_stride_cleanup() {
 
 // UNROLL-BY-4-LABEL: func @unroll_non_unit_stride_cleanup() {
 func @unroll_non_unit_stride_cleanup() {
-  // UNROLL-BY-4: for %i0 = 0 to 100 {
-  for %i = 0 to 100 {
+  // UNROLL-BY-4: affine.for %i0 = 0 to 100 {
+  affine.for %i = 0 to 100 {
     // UNROLL-BY-4: for [[L1:%i[0-9]+]] = 2 to 42 step 20 {
     // UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
     // UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32
@@ -395,7 +395,7 @@ func @unroll_non_unit_stride_cleanup() {
     // UNROLL-BY-4-NEXT: %11 = "addi32"([[L2]], [[L2]]) : (index, index) -> i32
     // UNROLL-BY-4-NEXT: %12 = "addi32"(%11, %11) : (i32, i32) -> i32
     // UNROLL-BY-4-NEXT: }
-    for %j = 2 to 48 step 5 {
+    affine.for %j = 2 to 48 step 5 {
       %x = "addi32"(%j, %j) : (index, index) -> i32
       %y = "addi32"(%x, %x) : (i32, i32) -> i32
     }
@@ -408,8 +408,8 @@ func @unroll_non_unit_stride_cleanup() {
 func @loop_nest_single_iteration_after_unroll(%N: index) {
   // UNROLL-BY-4: %c0 = constant 0 : index
   // UNROLL-BY-4: %c4 = constant 4 : index
-  // UNROLL-BY-4: for %i0 = 0 to %arg0 {
-  for %i = 0 to %N {
+  // UNROLL-BY-4: affine.for %i0 = 0 to %arg0 {
+  affine.for %i = 0 to %N {
     // UNROLL-BY-4: %0 = "addi32"(%c0, %c0) : (index, index) -> i32
     // UNROLL-BY-4-NEXT: %1 = affine.apply [[MAP0]](%c0)
     // UNROLL-BY-4-NEXT: %2 = "addi32"(%1, %1) : (index, index) -> i32
@@ -419,7 +419,7 @@ func @loop_nest_single_iteration_after_unroll(%N: index) {
     // UNROLL-BY-4-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> i32
     // UNROLL-BY-4-NEXT: %7 = "addi32"(%c4, %c4) : (index, index) -> i32
     // UNROLL-BY-4-NOT: for
-    for %j = 0 to 5 {
+    affine.for %j = 0 to 5 {
       %x = "addi32"(%j, %j) : (index, index) -> i32
     } // UNROLL-BY-4-NOT: }
   } // UNROLL-BY-4:  }
@@ -431,8 +431,8 @@ func @loop_nest_single_iteration_after_unroll(%N: index) {
 // No cleanup will be generated here.
 // UNROLL-BY-4-LABEL: func @loop_nest_operand1() {
 func @loop_nest_operand1() {
-// UNROLL-BY-4:      for %i0 = 0 to 100 step 2 {
-// UNROLL-BY-4-NEXT:   for %i1 = 0 to #map{{[0-9]+}}(%i0) step 4
+// UNROLL-BY-4:      affine.for %i0 = 0 to 100 step 2 {
+// UNROLL-BY-4-NEXT:   affine.for %i1 = 0 to #map{{[0-9]+}}(%i0) step 4
 // UNROLL-BY-4-NEXT:      %0 = "foo"() : () -> i32
 // UNROLL-BY-4-NEXT:      %1 = "foo"() : () -> i32
 // UNROLL-BY-4-NEXT:      %2 = "foo"() : () -> i32
@@ -440,8 +440,8 @@ func @loop_nest_operand1() {
 // UNROLL-BY-4-NEXT:   }
 // UNROLL-BY-4-NEXT: }
 // UNROLL-BY-4-NEXT: return
-  for %i = 0 to 100 step 2 {
-    for %j = 0 to (d0) -> (d0 - d0 mod 4) (%i) {
+  affine.for %i = 0 to 100 step 2 {
+    affine.for %j = 0 to (d0) -> (d0 - d0 mod 4) (%i) {
       %x = "foo"() : () -> i32
     }
   }
@@ -451,8 +451,8 @@ func @loop_nest_operand1() {
 // No cleanup will be generated here.
 // UNROLL-BY-4-LABEL: func @loop_nest_operand2() {
 func @loop_nest_operand2() {
-// UNROLL-BY-4:      for %i0 = 0 to 100 step 2 {
-// UNROLL-BY-4-NEXT:   for %i1 = [[MAP11]](%i0) to #map{{[0-9]+}}(%i0) step 4 {
+// UNROLL-BY-4:      affine.for %i0 = 0 to 100 step 2 {
+// UNROLL-BY-4-NEXT:   affine.for %i1 = [[MAP11]](%i0) to #map{{[0-9]+}}(%i0) step 4 {
 // UNROLL-BY-4-NEXT:     %0 = "foo"() : () -> i32
 // UNROLL-BY-4-NEXT:     %1 = "foo"() : () -> i32
 // UNROLL-BY-4-NEXT:     %2 = "foo"() : () -> i32
@@ -460,8 +460,8 @@ func @loop_nest_operand2() {
 // UNROLL-BY-4-NEXT:   }
 // UNROLL-BY-4-NEXT: }
 // UNROLL-BY-4-NEXT: return
-  for %i = 0 to 100 step 2 {
-    for %j = (d0) -> (d0) (%i) to (d0) -> (5*d0 + 4) (%i) {
+  affine.for %i = 0 to 100 step 2 {
+    affine.for %j = (d0) -> (d0) (%i) to (d0) -> (5*d0 + 4) (%i) {
       %x = "foo"() : () -> i32
     }
   }
@@ -472,16 +472,16 @@ func @loop_nest_operand2() {
 // factor. The cleanup loop happens to be a single iteration one and is promoted.
 // UNROLL-BY-4-LABEL: func @loop_nest_operand3() {
 func @loop_nest_operand3() {
-  // UNROLL-BY-4: for %i0 = 0 to 100 step 2 {
-  for %i = 0 to 100 step 2 {
-    // UNROLL-BY-4: for %i1 = [[MAP11]](%i0) to #map{{[0-9]+}}(%i0) step 4 {
+  // UNROLL-BY-4: affine.for %i0 = 0 to 100 step 2 {
+  affine.for %i = 0 to 100 step 2 {
+    // UNROLL-BY-4: affine.for %i1 = [[MAP11]](%i0) to #map{{[0-9]+}}(%i0) step 4 {
     // UNROLL-BY-4-NEXT: %0 = "foo"() : () -> i32
     // UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32
     // UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32
     // UNROLL-BY-4-NEXT: %3 = "foo"() : () -> i32
     // UNROLL-BY-4-NEXT: }
     // UNROLL-BY-4-NEXT: %4 = "foo"() : () -> i32
-    for %j = (d0) -> (d0) (%i) to (d0) -> (d0 + 9) (%i) {
+    affine.for %j = (d0) -> (d0) (%i) to (d0) -> (d0 + 9) (%i) {
       %x = "foo"() : () -> i32
     }
   } // UNROLL-BY-4: }
@@ -490,19 +490,19 @@ func @loop_nest_operand3() {
 
 // UNROLL-BY-4-LABEL: func @loop_nest_symbolic_bound(%arg0: index) {
 func @loop_nest_symbolic_bound(%N : index) {
-  // UNROLL-BY-4: for %i0 = 0 to 100 {
-  for %i = 0 to 100 {
-    // UNROLL-BY-4: for %i1 = 0 to #map{{[0-9]+}}()[%arg0] step 4 {
+  // UNROLL-BY-4: affine.for %i0 = 0 to 100 {
+  affine.for %i = 0 to 100 {
+    // UNROLL-BY-4: affine.for %i1 = 0 to #map{{[0-9]+}}()[%arg0] step 4 {
     // UNROLL-BY-4: %0 = "foo"() : () -> i32
     // UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32
     // UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32
     // UNROLL-BY-4-NEXT: %3 = "foo"() : () -> i32
     // UNROLL-BY-4-NEXT: }
     // A cleanup loop will be be generated here.
-    // UNROLL-BY-4-NEXT: for %i2 = #map{{[0-9]+}}()[%arg0] to %arg0 {
+    // UNROLL-BY-4-NEXT: affine.for %i2 = #map{{[0-9]+}}()[%arg0] to %arg0 {
     // UNROLL-BY-4-NEXT: %4 = "foo"() : () -> i32
     // UNROLL-BY-4_NEXT: }
-    for %j = 0 to %N {
+    affine.for %j = 0 to %N {
       %x = "foo"() : () -> i32
     }
   }
@@ -511,18 +511,18 @@ func @loop_nest_symbolic_bound(%N : index) {
 
 // UNROLL-BY-4-LABEL: func @loop_nest_symbolic_and_min_upper_bound
 func @loop_nest_symbolic_and_min_upper_bound(%M : index, %N : index, %K : index) {
-  for %i = %M to min ()[s0, s1] -> (s0, s1, 1024)()[%N, %K] {
+  affine.for %i = %M to min ()[s0, s1] -> (s0, s1, 1024)()[%N, %K] {
     "foo"() : () -> ()
   }
   return
 }
-// CHECK-NEXT:  for %i0 = %arg0 to min [[MAP_TRIP_COUNT_MULTIPLE_FOUR]]()[%arg0, %arg1, %arg2] step 4 {
+// CHECK-NEXT:  affine.for %i0 = %arg0 to min [[MAP_TRIP_COUNT_MULTIPLE_FOUR]]()[%arg0, %arg1, %arg2] step 4 {
 // CHECK-NEXT:    "foo"() : () -> ()
 // CHECK-NEXT:    "foo"() : () -> ()
 // CHECK-NEXT:    "foo"() : () -> ()
 // CHECK-NEXT:    "foo"() : () -> ()
 // CHECK-NEXT:  }
-// CHECK-NEXT:  for %i1 = max [[MAP_TRIP_COUNT_MULTIPLE_FOUR]]()[%arg0, %arg1, %arg2] to min #map28()[%arg1, %arg2] {
+// CHECK-NEXT:  affine.for %i1 = max [[MAP_TRIP_COUNT_MULTIPLE_FOUR]]()[%arg0, %arg1, %arg2] to min #map28()[%arg1, %arg2] {
 // CHECK-NEXT:    "foo"() : () -> ()
 // CHECK-NEXT:  }
 // CHECK-NEXT:  return
@@ -533,22 +533,22 @@ func @loop_nest_symbolic_and_min_upper_bound(%M : index, %N : index, %K : index)
 func @loop_nest_non_trivial_multiple_unroll_factor(%M : index, %N : index) {
   %T = affine.apply (d0) -> (4*d0 + 1)(%M)
   %K = affine.apply (d0) -> (d0 - 1) (%T)
-  for %i = 0 to min (d0, d1) -> (4 * d0, d1, 1024)(%N, %K) {
+  affine.for %i = 0 to min (d0, d1) -> (4 * d0, d1, 1024)(%N, %K) {
     "foo"() : () -> ()
   }
   return
 }
-// UNROLL-BY-4: for %i0 = 0 to min
+// UNROLL-BY-4: affine.for %i0 = 0 to min
 // UNROLL-BY-4-NOT: for
 // UNROLL-BY-4: return
 
 // UNROLL-BY-4-LABEL: func @loop_nest_non_trivial_multiple_unroll_factor_2
 func @loop_nest_non_trivial_multiple_unroll_factor_2(%M : index, %N : index) {
   %K = affine.apply (d0) -> (4*d0) (%M)
-  for %i = 0 to min ()[s0, s1] -> (4 * s0, s1, 1024)()[%N, %K] {
+  affine.for %i = 0 to min ()[s0, s1] -> (4 * s0, s1, 1024)()[%N, %K] {
     "foo"() : () -> ()
   }
-  // UNROLL-BY-4: for %i0 = 0 to min
+  // UNROLL-BY-4: affine.for %i0 = 0 to min
   // UNROLL-BY-4-NEXT: "foo"
   // UNROLL-BY-4-NEXT: "foo"
   // UNROLL-BY-4-NEXT: "foo"
@@ -560,7 +560,7 @@ func @loop_nest_non_trivial_multiple_unroll_factor_2(%M : index, %N : index) {
 
 // UNROLL-BY-1-LABEL: func @unroll_by_one_should_promote_single_iteration_loop()
 func @unroll_by_one_should_promote_single_iteration_loop() {
-  for %i = 0 to 1 {
+  affine.for %i = 0 to 1 {
     %x = "foo"(%i) : (index) -> i32
   }
   return