[mlir][OpenMP] Added assembly format for omp.wsloop and remove parseClauses

This patch - adds assembly format for `omp.wsloop` operation - removes the `parseClauses` clauses as it is not required anymore This is expected to be the final patch in a series of patches for replacing parsers for clauses with `oilist`. Reviewed By: Mogball Differential Revision: https://reviews.llvm.org/D121367
2025-01-14 12:12:07 +00:00 · 2022-03-23 09:37:55 +05:30 · 2022-03-23 09:37:55 +05:30 · b244bba582
commit b244bba582
parent 32103608fc
11 changed files with 267 additions and 475 deletions
--- a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
+++ b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
@ -7,7 +7,8 @@ func @_QPsb1(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.ref<!fir.a
  omp.parallel  {
    %1 = fir.alloca i32 {adapt.valuebyref, pinned}
    %2 = fir.load %arg0 : !fir.ref<i32>
-    omp.wsloop (%arg2) : i32 = (%c1_i32) to (%2) inclusive step (%c1_i32) nowait  {
+    omp.wsloop nowait
+    for (%arg2) : i32 = (%c1_i32) to (%2) inclusive step (%c1_i32)  {
      fir.store %arg2 to %1 : !fir.ref<i32>
      %3 = fir.load %1 : !fir.ref<i32>
      %4 = fir.convert %3 : (i32) -> i64
@ -29,7 +30,8 @@ func @_QPsb1(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.ref<!fir.a
 // CHECK:      %[[ONE_3:.*]] = llvm.mlir.constant(1 : i64) : i64
 // CHECK:      %[[I_VAR:.*]] = llvm.alloca %[[ONE_3]] x i32 {adapt.valuebyref, in_type = i32, operand_segment_sizes = dense<0> : vector<2xi32>, pinned} : (i64) -> !llvm.ptr<i32>
 // CHECK:      %[[N:.*]] = llvm.load %[[N_REF]] : !llvm.ptr<i32>
-// CHECK: omp.wsloop (%[[I:.*]]) : i32 = (%[[ONE_2]]) to (%[[N]]) inclusive step (%[[ONE_2]]) nowait  {
+// CHECK: omp.wsloop nowait
+// CHECK-SAME: for (%[[I:.*]]) : i32 = (%[[ONE_2]]) to (%[[N]]) inclusive step (%[[ONE_2]]) {
 // CHECK:   llvm.store %[[I]], %[[I_VAR]] : !llvm.ptr<i32>
 // CHECK:   %[[I1:.*]] = llvm.load %[[I_VAR]] : !llvm.ptr<i32>
 // CHECK:   %[[I1_EXT:.*]] = llvm.sext %[[I1]] : i32 to i64
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@ -127,6 +127,8 @@ def TerminatorOp : OpenMP_Op<"terminator", [Terminator]> {
 def OMP_ScheduleModNone         : I32EnumAttrCase<"none", 0>;
 def OMP_ScheduleModMonotonic    : I32EnumAttrCase<"monotonic", 1>;
 def OMP_ScheduleModNonmonotonic : I32EnumAttrCase<"nonmonotonic", 2>;
+// FIXME: remove this value for the modifier because this is handled using a
+// separate attribute
 def OMP_ScheduleModSIMD         : I32EnumAttrCase<"simd", 3>;

 def ScheduleModifier
@ -227,7 +229,8 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
    by "omp.yield" instruction without operands.

    ```
-    omp.wsloop (%i1, %i2) : index = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) {
+    omp.wsloop <clauses>
+    for (%i1, %i2) : index = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) {
      %a = load %arrA[%i1, %i2] : memref<?x?xf32>
      %b = load %arrB[%i1, %i2] : memref<?x?xf32>
      %sum = arith.addf %a, %b : f32
@ -276,7 +279,7 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
             Variadic<IntLikeType>:$upperBound,
             Variadic<IntLikeType>:$step,
             Variadic<AnyType>:$linear_vars,
-             Variadic<AnyType>:$linear_step_vars,
+             Variadic<I32>:$linear_step_vars,
             Variadic<OpenMP_PointerLikeType>:$reduction_vars,
             OptionalAttr<SymbolRefArrayAttr>:$reductions,
             OptionalAttr<ScheduleKindAttr>:$schedule_val,
@ -305,6 +308,25 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
    unsigned getNumReductionVars() { return reduction_vars().size(); }
  }];
  let hasCustomAssemblyFormat = 1;
+  let assemblyFormat = [{
+    oilist(`linear` `(`
+              custom<LinearClause>($linear_vars, type($linear_vars),
+                                   $linear_step_vars) `)`
+          |`schedule` `(`
+              custom<ScheduleClause>(
+                $schedule_val, $schedule_modifier, $simd_modifier,
+                $schedule_chunk_var, type($schedule_chunk_var)) `)`
+          |`collapse` `(` $collapse_val `)`
+          |`nowait` $nowait
+          |`ordered` `(` $ordered_val `)`
+          |`order` `(` custom<ClauseAttr>($order_val) `)`
+          |`reduction` `(`
+              custom<ReductionVarList>(
+                $reduction_vars, type($reduction_vars), $reductions
+              ) `)`
+    ) `for` custom<WsLoopControl>($region, $lowerBound, $upperBound, $step,
+                                  type($step), $inclusive) attr-dict
+  }];
  let hasVerifier = 1;
 }

--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@ -119,7 +119,10 @@ static void printAllocateAndAllocator(OpAsmPrinter &p, Operation *op,
  }
 }

-/// Parse a clause attribute (StringEnumAttr)
+//===----------------------------------------------------------------------===//
+// Parser and printer for a clause attribute (StringEnumAttr)
+//===----------------------------------------------------------------------===//
+
 template <typename ClauseAttr>
 static ParseResult parseClauseAttr(AsmParser &parser, ClauseAttr &attr) {
  using ClauseT = decltype(std::declval<ClauseAttr>().getValue());
@ -139,28 +142,6 @@ void printClauseAttr(OpAsmPrinter &p, Operation *op, ClauseAttr attr) {
  p << stringifyEnum(attr.getValue());
 }

-//===----------------------------------------------------------------------===//
-// Parser and printer for Procbind Clause
-//===----------------------------------------------------------------------===//
-
-ParseResult parseProcBindKind(OpAsmParser &parser,
-                              omp::ClauseProcBindKindAttr &procBindAttr) {
-  StringRef procBindStr;
-  if (parser.parseKeyword(&procBindStr))
-    return failure();
-  if (auto procBindVal = symbolizeClauseProcBindKind(procBindStr)) {
-    procBindAttr =
-        ClauseProcBindKindAttr::get(parser.getContext(), *procBindVal);
-    return success();
-  }
-  return failure();
-}
-
-void printProcBindKind(OpAsmPrinter &p, Operation *op,
-                       omp::ClauseProcBindKindAttr procBindAttr) {
-  p << stringifyClauseProcBindKind(procBindAttr.getValue());
-}
-
 LogicalResult ParallelOp::verify() {
  if (allocate_vars().size() != allocators_vars().size())
    return emitError(
@ -180,9 +161,6 @@ parseLinearClause(OpAsmParser &parser,
                  SmallVectorImpl<OpAsmParser::UnresolvedOperand> &vars,
                  SmallVectorImpl<Type> &types,
                  SmallVectorImpl<OpAsmParser::UnresolvedOperand> &stepVars) {
-  if (parser.parseLParen())
-    return failure();
-
  do {
    OpAsmParser::UnresolvedOperand var;
    Type type;
@ -195,20 +173,16 @@ parseLinearClause(OpAsmParser &parser,
    types.push_back(type);
    stepVars.push_back(stepVar);
  } while (succeeded(parser.parseOptionalComma()));
-
-  if (parser.parseRParen())
-    return failure();
-
  return success();
 }

 /// Print Linear Clause
-static void printLinearClause(OpAsmPrinter &p, OperandRange linearVars,
-                              OperandRange linearStepVars) {
+static void printLinearClause(OpAsmPrinter &p, Operation *op,
+                              ValueRange linearVars, TypeRange linearVarTypes,
+                              ValueRange linearStepVars) {
  size_t linearVarsSize = linearVars.size();
-  p << "linear(";
  for (unsigned i = 0; i < linearVarsSize; ++i) {
-    std::string separator = i == linearVarsSize - 1 ? ") " : ", ";
+    std::string separator = i == linearVarsSize - 1 ? "" : ", ";
    p << linearVars[i];
    if (linearStepVars.size() > i)
      p << " = " << linearStepVars[i];
@ -261,20 +235,23 @@ verifyScheduleModifiers(OpAsmParser &parser,
 /// sched-wo-chunk ::=  `auto` | `runtime`
 /// sched-modifier ::=  sched-mod-val | sched-mod-val `,` sched-mod-val
 /// sched-mod-val ::=  `monotonic` | `nonmonotonic` | `simd` | `none`
-static ParseResult
-parseScheduleClause(OpAsmParser &parser, SmallString<8> &schedule,
-                    SmallVectorImpl<SmallString<12>> &modifiers,
-                    Optional<OpAsmParser::UnresolvedOperand> &chunkSize,
-                    Type &chunkType) {
-  if (parser.parseLParen())
-    return failure();
-
+static ParseResult parseScheduleClause(
+    OpAsmParser &parser, ClauseScheduleKindAttr &scheduleAttr,
+    ScheduleModifierAttr &schedule_modifier, UnitAttr &simdModifier,
+    Optional<OpAsmParser::UnresolvedOperand> &chunkSize, Type &chunkType) {
  StringRef keyword;
  if (parser.parseKeyword(&keyword))
    return failure();
+  llvm::Optional<mlir::omp::ClauseScheduleKind> schedule =
+      symbolizeClauseScheduleKind(keyword);
+  if (!schedule)
+    return parser.emitError(parser.getNameLoc()) << " expected schedule kind";

-  schedule = keyword;
-  if (keyword == "static" || keyword == "dynamic" || keyword == "guided") {
+  scheduleAttr = ClauseScheduleKindAttr::get(parser.getContext(), *schedule);
+  switch (*schedule) {
+  case ClauseScheduleKind::Static:
+  case ClauseScheduleKind::Dynamic:
+  case ClauseScheduleKind::Guided:
    if (succeeded(parser.parseOptionalEqual())) {
      chunkSize = OpAsmParser::UnresolvedOperand{};
      if (parser.parseOperand(*chunkSize) || parser.parseColonType(chunkType))
@ -282,13 +259,14 @@ parseScheduleClause(OpAsmParser &parser, SmallString<8> &schedule,
    } else {
      chunkSize = llvm::NoneType::None;
    }
-  } else if (keyword == "auto" || keyword == "runtime") {
+    break;
+  case ClauseScheduleKind::Auto:
+  case ClauseScheduleKind::Runtime:
    chunkSize = llvm::NoneType::None;
-  } else {
-    return parser.emitError(parser.getNameLoc()) << " expected schedule kind";
  }

  // If there is a comma, we have one or more modifiers..
+  SmallVector<SmallString<12>> modifiers;
  while (succeeded(parser.parseOptionalComma())) {
    StringRef mod;
    if (parser.parseKeyword(&mod))
@ -296,27 +274,40 @@ parseScheduleClause(OpAsmParser &parser, SmallString<8> &schedule,
    modifiers.push_back(mod);
  }

-  if (parser.parseRParen())
-    return failure();
-
  if (verifyScheduleModifiers(parser, modifiers))
    return failure();

+  if (!modifiers.empty()) {
+    SMLoc loc = parser.getCurrentLocation();
+    if (Optional<ScheduleModifier> mod =
+            symbolizeScheduleModifier(modifiers[0])) {
+      schedule_modifier = ScheduleModifierAttr::get(parser.getContext(), *mod);
+    } else {
+      return parser.emitError(loc, "invalid schedule modifier");
+    }
+    // Only SIMD attribute is allowed here!
+    if (modifiers.size() > 1) {
+      assert(symbolizeScheduleModifier(modifiers[1]) == ScheduleModifier::simd);
+      simdModifier = UnitAttr::get(parser.getBuilder().getContext());
+    }
+  }
+
  return success();
 }

 /// Print schedule clause
-static void printScheduleClause(OpAsmPrinter &p, ClauseScheduleKind sched,
-                                Optional<ScheduleModifier> modifier, bool simd,
-                                Value scheduleChunkVar) {
-  p << "schedule(" << stringifyClauseScheduleKind(sched).lower();
+static void printScheduleClause(OpAsmPrinter &p, Operation *op,
+                                ClauseScheduleKindAttr schedAttr,
+                                ScheduleModifierAttr modifier, UnitAttr simd,
+                                Value scheduleChunkVar,
+                                Type scheduleChunkType) {
+  p << stringifyClauseScheduleKind(schedAttr.getValue());
  if (scheduleChunkVar)
    p << " = " << scheduleChunkVar << " : " << scheduleChunkVar.getType();
  if (modifier)
-    p << ", " << stringifyScheduleModifier(*modifier);
+    p << ", " << stringifyScheduleModifier(modifier.getValue());
  if (simd)
    p << ", simd";
-  p << ") ";
 }

 //===----------------------------------------------------------------------===//
@ -478,242 +469,6 @@ static LogicalResult verifySynchronizationHint(Operation *op, uint64_t hint) {
  return success();
 }

-enum ClauseType {
-  allocateClause,
-  reductionClause,
-  nowaitClause,
-  linearClause,
-  scheduleClause,
-  collapseClause,
-  orderClause,
-  orderedClause,
-  COUNT
-};
-
-//===----------------------------------------------------------------------===//
-// Parser for Clause List
-//===----------------------------------------------------------------------===//
-
-/// Parse a list of clauses. The clauses can appear in any order, but their
-/// operand segment indices are in the same order that they are passed in the
-/// `clauses` list. The operand segments are added over the prevSegments
-
-/// clause-list ::= clause clause-list | empty
-/// clause ::= allocate | reduction | nowait | linear | schedule | collapse
-///          | order | ordered
-/// allocate ::= `allocate` `(` allocate-operand-list `)`
-/// reduction ::= `reduction` `(` reduction-entry-list `)`
-/// nowait ::= `nowait`
-/// linear ::= `linear` `(` linear-list `)`
-/// schedule ::= `schedule` `(` sched-list `)`
-/// collapse ::= `collapse` `(` ssa-id-and-type `)`
-/// order ::= `order` `(` `concurrent` `)`
-/// ordered ::= `ordered` `(` ssa-id-and-type `)`
-///
-/// Note that each clause can only appear once in the clase-list.
-static ParseResult parseClauses(OpAsmParser &parser, OperationState &result,
-                                SmallVectorImpl<ClauseType> &clauses,
-                                SmallVectorImpl<int> &segments) {
-
-  // Check done[clause] to see if it has been parsed already
-  BitVector done(ClauseType::COUNT, false);
-
-  // See pos[clause] to get position of clause in operand segments
-  SmallVector<int> pos(ClauseType::COUNT, -1);
-
-  // Stores the last parsed clause keyword
-  StringRef clauseKeyword;
-  StringRef opName = result.name.getStringRef();
-
-  // Containers for storing operands, types and attributes for various clauses
-  SmallVector<OpAsmParser::UnresolvedOperand> allocates, allocators;
-  SmallVector<Type> allocateTypes, allocatorTypes;
-
-  ArrayAttr reductions;
-  SmallVector<OpAsmParser::UnresolvedOperand> reductionVars;
-  SmallVector<Type> reductionVarTypes;
-
-  SmallVector<OpAsmParser::UnresolvedOperand> linears;
-  SmallVector<Type> linearTypes;
-  SmallVector<OpAsmParser::UnresolvedOperand> linearSteps;
-
-  SmallString<8> schedule;
-  SmallVector<SmallString<12>> modifiers;
-  Optional<OpAsmParser::UnresolvedOperand> scheduleChunkSize;
-  Type scheduleChunkType;
-
-  // Compute the position of clauses in operand segments
-  int currPos = 0;
-  for (ClauseType clause : clauses) {
-
-    // Skip the following clauses - they do not take any position in operand
-    // segments
-    if (clause == nowaitClause || clause == collapseClause ||
-        clause == orderClause || clause == orderedClause)
-      continue;
-
-    pos[clause] = currPos++;
-
-    // For the following clauses, two positions are reserved in the operand
-    // segments
-    if (clause == allocateClause || clause == linearClause)
-      currPos++;
-  }
-
-  SmallVector<int> clauseSegments(currPos);
-
-  // Helper function to check if a clause is allowed/repeated or not
-  auto checkAllowed = [&](ClauseType clause) -> ParseResult {
-    if (!llvm::is_contained(clauses, clause))
-      return parser.emitError(parser.getCurrentLocation())
-             << clauseKeyword << " is not a valid clause for the " << opName
-             << " operation";
-    if (done[clause])
-      return parser.emitError(parser.getCurrentLocation())
-             << "at most one " << clauseKeyword << " clause can appear on the "
-             << opName << " operation";
-    done[clause] = true;
-    return success();
-  };
-
-  while (succeeded(parser.parseOptionalKeyword(&clauseKeyword))) {
-    if (clauseKeyword == "allocate") {
-      if (checkAllowed(allocateClause) || parser.parseLParen() ||
-          parseAllocateAndAllocator(parser, allocates, allocateTypes,
-                                    allocators, allocatorTypes) ||
-          parser.parseRParen())
-        return failure();
-      clauseSegments[pos[allocateClause]] = allocates.size();
-      clauseSegments[pos[allocateClause] + 1] = allocators.size();
-    } else if (clauseKeyword == "reduction") {
-      if (checkAllowed(reductionClause) || parser.parseLParen() ||
-          parseReductionVarList(parser, reductionVars, reductionVarTypes,
-                                reductions) ||
-          parser.parseRParen())
-        return failure();
-      clauseSegments[pos[reductionClause]] = reductionVars.size();
-    } else if (clauseKeyword == "nowait") {
-      if (checkAllowed(nowaitClause))
-        return failure();
-      auto attr = UnitAttr::get(parser.getBuilder().getContext());
-      result.addAttribute("nowait", attr);
-    } else if (clauseKeyword == "linear") {
-      if (checkAllowed(linearClause) ||
-          parseLinearClause(parser, linears, linearTypes, linearSteps))
-        return failure();
-      clauseSegments[pos[linearClause]] = linears.size();
-      clauseSegments[pos[linearClause] + 1] = linearSteps.size();
-    } else if (clauseKeyword == "schedule") {
-      if (checkAllowed(scheduleClause) ||
-          parseScheduleClause(parser, schedule, modifiers, scheduleChunkSize,
-                              scheduleChunkType))
-        return failure();
-      if (scheduleChunkSize) {
-        clauseSegments[pos[scheduleClause]] = 1;
-      }
-    } else if (clauseKeyword == "collapse") {
-      auto type = parser.getBuilder().getI64Type();
-      mlir::IntegerAttr attr;
-      if (checkAllowed(collapseClause) || parser.parseLParen() ||
-          parser.parseAttribute(attr, type) || parser.parseRParen())
-        return failure();
-      result.addAttribute("collapse_val", attr);
-    } else if (clauseKeyword == "ordered") {
-      mlir::IntegerAttr attr;
-      if (checkAllowed(orderedClause))
-        return failure();
-      if (succeeded(parser.parseOptionalLParen())) {
-        auto type = parser.getBuilder().getI64Type();
-        if (parser.parseAttribute(attr, type) || parser.parseRParen())
-          return failure();
-      } else {
-        // Use 0 to represent no ordered parameter was specified
-        attr = parser.getBuilder().getI64IntegerAttr(0);
-      }
-      result.addAttribute("ordered_val", attr);
-    } else if (clauseKeyword == "order") {
-      ClauseOrderKindAttr order;
-      if (checkAllowed(orderClause) || parser.parseLParen() ||
-          parseClauseAttr<ClauseOrderKindAttr>(parser, order) ||
-          parser.parseRParen())
-        return failure();
-      result.addAttribute("order_val", order);
-    } else {
-      return parser.emitError(parser.getNameLoc())
-             << clauseKeyword << " is not a valid clause";
-    }
-  }
-
-  // Add allocate parameters.
-  if (done[allocateClause] && clauseSegments[pos[allocateClause]] &&
-      failed(parser.resolveOperands(allocates, allocateTypes,
-                                    allocates[0].location, result.operands)))
-    return failure();
-
-  // Add allocator parameters.
-  if (done[allocateClause] && clauseSegments[pos[allocateClause] + 1] &&
-      failed(parser.resolveOperands(allocators, allocatorTypes,
-                                    allocators[0].location, result.operands)))
-    return failure();
-
-  // Add reduction parameters and symbols
-  if (done[reductionClause] && clauseSegments[pos[reductionClause]]) {
-    if (failed(parser.resolveOperands(reductionVars, reductionVarTypes,
-                                      parser.getNameLoc(), result.operands)))
-      return failure();
-    result.addAttribute("reductions", reductions);
-  }
-
-  // Add linear parameters
-  if (done[linearClause] && clauseSegments[pos[linearClause]]) {
-    auto linearStepType = parser.getBuilder().getI32Type();
-    SmallVector<Type> linearStepTypes(linearSteps.size(), linearStepType);
-    if (failed(parser.resolveOperands(linears, linearTypes, linears[0].location,
-                                      result.operands)) ||
-        failed(parser.resolveOperands(linearSteps, linearStepTypes,
-                                      linearSteps[0].location,
-                                      result.operands)))
-      return failure();
-  }
-
-  // Add schedule parameters
-  if (done[scheduleClause] && !schedule.empty()) {
-    if (Optional<ClauseScheduleKind> sched =
-            symbolizeClauseScheduleKind(schedule)) {
-      auto attr = ClauseScheduleKindAttr::get(parser.getContext(), *sched);
-      result.addAttribute("schedule_val", attr);
-    } else {
-      return parser.emitError(parser.getCurrentLocation(),
-                              "invalid schedule kind");
-    }
-    if (!modifiers.empty()) {
-      SMLoc loc = parser.getCurrentLocation();
-      if (Optional<ScheduleModifier> mod =
-              symbolizeScheduleModifier(modifiers[0])) {
-        result.addAttribute(
-            "schedule_modifier",
-            ScheduleModifierAttr::get(parser.getContext(), *mod));
-      } else {
-        return parser.emitError(loc, "invalid schedule modifier");
-      }
-      // Only SIMD attribute is allowed here!
-      if (modifiers.size() > 1) {
-        assert(symbolizeScheduleModifier(modifiers[1]) ==
-               ScheduleModifier::simd);
-        auto attr = UnitAttr::get(parser.getBuilder().getContext());
-        result.addAttribute("simd_modifier", attr);
-      }
-    }
-    if (scheduleChunkSize)
-      parser.resolveOperand(*scheduleChunkSize, scheduleChunkType,
-                            result.operands);
-  }
-
-  segments.insert(segments.end(), clauseSegments.begin(), clauseSegments.end());
-
-  return success();
-}
-
 //===----------------------------------------------------------------------===//
 // Verifier for SectionsOp
 //===----------------------------------------------------------------------===//
@ -737,107 +492,68 @@ LogicalResult SectionsOp::verifyRegions() {
  return success();
 }

-/// Parses an OpenMP Workshare Loop operation
-///
-/// wsloop ::= `omp.wsloop` loop-control clause-list
+//===----------------------------------------------------------------------===//
+// WsLoopOp
+//===----------------------------------------------------------------------===//
+
 /// loop-control ::= `(` ssa-id-list `)` `:` type `=`  loop-bounds
 /// loop-bounds := `(` ssa-id-list `)` to `(` ssa-id-list `)` inclusive? steps
 /// steps := `step` `(`ssa-id-list`)`
-/// clause-list ::= clause clause-list | empty
-/// clause ::= linear | schedule | collapse | nowait | ordered | order
-///          | reduction
-ParseResult WsLoopOp::parse(OpAsmParser &parser, OperationState &result) {
+ParseResult
+parseWsLoopControl(OpAsmParser &parser, Region &region,
+                   SmallVectorImpl<OpAsmParser::UnresolvedOperand> &lowerBound,
+                   SmallVectorImpl<OpAsmParser::UnresolvedOperand> &upperBound,
+                   SmallVectorImpl<OpAsmParser::UnresolvedOperand> &steps,
+                   SmallVectorImpl<Type> &loopVarTypes, UnitAttr &inclusive) {
  // Parse an opening `(` followed by induction variables followed by `)`
  SmallVector<OpAsmParser::UnresolvedOperand> ivs;
  if (parser.parseRegionArgumentList(ivs, /*requiredOperandCount=*/-1,
                                     OpAsmParser::Delimiter::Paren))
    return failure();

-  int numIVs = static_cast<int>(ivs.size());
+  size_t numIVs = ivs.size();
  Type loopVarType;
  if (parser.parseColonType(loopVarType))
    return failure();

  // Parse loop bounds.
-  SmallVector<OpAsmParser::UnresolvedOperand> lower;
  if (parser.parseEqual() ||
-      parser.parseOperandList(lower, numIVs, OpAsmParser::Delimiter::Paren) ||
-      parser.resolveOperands(lower, loopVarType, result.operands))
+      parser.parseOperandList(lowerBound, numIVs,
+                              OpAsmParser::Delimiter::Paren))
    return failure();
-
-  SmallVector<OpAsmParser::UnresolvedOperand> upper;
  if (parser.parseKeyword("to") ||
-      parser.parseOperandList(upper, numIVs, OpAsmParser::Delimiter::Paren) ||
-      parser.resolveOperands(upper, loopVarType, result.operands))
+      parser.parseOperandList(upperBound, numIVs,
+                              OpAsmParser::Delimiter::Paren))
    return failure();

  if (succeeded(parser.parseOptionalKeyword("inclusive"))) {
-    auto attr = UnitAttr::get(parser.getBuilder().getContext());
-    result.addAttribute("inclusive", attr);
+    inclusive = UnitAttr::get(parser.getBuilder().getContext());
  }

  // Parse step values.
-  SmallVector<OpAsmParser::UnresolvedOperand> steps;
  if (parser.parseKeyword("step") ||
-      parser.parseOperandList(steps, numIVs, OpAsmParser::Delimiter::Paren) ||
-      parser.resolveOperands(steps, loopVarType, result.operands))
+      parser.parseOperandList(steps, numIVs, OpAsmParser::Delimiter::Paren))
    return failure();

-  SmallVector<ClauseType> clauses = {
-      linearClause,  reductionClause, collapseClause, orderClause,
-      orderedClause, nowaitClause,    scheduleClause};
-  SmallVector<int> segments{numIVs, numIVs, numIVs};
-  if (failed(parseClauses(parser, result, clauses, segments)))
-    return failure();
-
-  result.addAttribute("operand_segment_sizes",
-                      parser.getBuilder().getI32VectorAttr(segments));
-
  // Now parse the body.
-  Region *body = result.addRegion();
-  SmallVector<Type> ivTypes(numIVs, loopVarType);
+  loopVarTypes = SmallVector<Type>(numIVs, loopVarType);
  SmallVector<OpAsmParser::UnresolvedOperand> blockArgs(ivs);
-  if (parser.parseRegion(*body, blockArgs, ivTypes))
+  if (parser.parseRegion(region, blockArgs, loopVarTypes))
    return failure();
  return success();
 }

-void WsLoopOp::print(OpAsmPrinter &p) {
-  auto args = getRegion().front().getArguments();
-  p << " (" << args << ") : " << args[0].getType() << " = (" << lowerBound()
-    << ") to (" << upperBound() << ") ";
-  if (inclusive()) {
+void printWsLoopControl(OpAsmPrinter &p, Operation *op, Region &region,
+                        ValueRange lowerBound, ValueRange upperBound,
+                        ValueRange steps, TypeRange loopVarTypes,
+                        UnitAttr inclusive) {
+  auto args = region.front().getArguments();
+  p << " (" << args << ") : " << args[0].getType() << " = (" << lowerBound
+    << ") to (" << upperBound << ") ";
+  if (inclusive)
    p << "inclusive ";
-  }
-  p << "step (" << step() << ") ";
-
-  if (!linear_vars().empty())
-    printLinearClause(p, linear_vars(), linear_step_vars());
-
-  if (auto sched = schedule_val())
-    printScheduleClause(p, sched.getValue(), schedule_modifier(),
-                        simd_modifier(), schedule_chunk_var());
-
-  if (auto collapse = collapse_val())
-    p << "collapse(" << collapse << ") ";
-
-  if (nowait())
-    p << "nowait ";
-
-  if (auto ordered = ordered_val())
-    p << "ordered(" << ordered << ") ";
-
-  if (auto order = order_val())
-    p << "order(" << stringifyClauseOrderKind(*order) << ") ";
-
-  if (!reduction_vars().empty()) {
-    printReductionVarList(p << "reduction(", *this, reduction_vars(),
-                          reduction_vars().getTypes(), reductions());
-    p << ")";
-  }
-
-  p << ' ';
-  p.printRegion(region(), /*printEntryBlockArgs=*/false);
+  p << "step (" << steps << ") ";
+  p.printRegion(region, /*printEntryBlockArgs=*/false);
 }

 //===----------------------------------------------------------------------===//
--- a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir
+++ b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir
@ -49,7 +49,7 @@ func @branch_loop() {
 func @wsloop(%arg0: index, %arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index) {
  // CHECK: omp.parallel
  omp.parallel {
-    // CHECK: omp.wsloop (%[[ARG6:.*]], %[[ARG7:.*]]) : i64 = (%[[ARG0]], %[[ARG1]]) to (%[[ARG2]], %[[ARG3]]) step (%[[ARG4]], %[[ARG5]]) {
+    // CHECK: omp.wsloop for (%[[ARG6:.*]], %[[ARG7:.*]]) : i64 = (%[[ARG0]], %[[ARG1]]) to (%[[ARG2]], %[[ARG3]]) step (%[[ARG4]], %[[ARG5]]) {
    "omp.wsloop"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5) ({
    ^bb0(%arg6: index, %arg7: index):  
      // CHECK-DAG: %[[CAST_ARG6:.*]] = builtin.unrealized_conversion_cast %[[ARG6]] : i64 to index
--- a/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir
+++ b/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir
@ -4,7 +4,7 @@
 func @parallel(%arg0: index, %arg1: index, %arg2: index,
          %arg3: index, %arg4: index, %arg5: index) {
  // CHECK: omp.parallel {
-  // CHECK: omp.wsloop (%[[LVAR1:.*]], %[[LVAR2:.*]]) : index = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) {
+  // CHECK: omp.wsloop for (%[[LVAR1:.*]], %[[LVAR2:.*]]) : index = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) {
  // CHECK: memref.alloca_scope
  scf.parallel (%i, %j) = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) {
    // CHECK: "test.payload"(%[[LVAR1]], %[[LVAR2]]) : (index, index) -> ()
@ -21,11 +21,11 @@ func @parallel(%arg0: index, %arg1: index, %arg2: index,
 func @nested_loops(%arg0: index, %arg1: index, %arg2: index,
                   %arg3: index, %arg4: index, %arg5: index) {
  // CHECK: omp.parallel {
-  // CHECK: omp.wsloop (%[[LVAR_OUT1:.*]]) : index = (%arg0) to (%arg2) step (%arg4) {
+  // CHECK: omp.wsloop for (%[[LVAR_OUT1:.*]]) : index = (%arg0) to (%arg2) step (%arg4) {
    // CHECK: memref.alloca_scope
  scf.parallel (%i) = (%arg0) to (%arg2) step (%arg4) {
    // CHECK: omp.parallel
-    // CHECK: omp.wsloop (%[[LVAR_IN1:.*]]) : index = (%arg1) to (%arg3) step (%arg5) {
+    // CHECK: omp.wsloop for (%[[LVAR_IN1:.*]]) : index = (%arg1) to (%arg3) step (%arg5) {
    // CHECK: memref.alloca_scope
    scf.parallel (%j) = (%arg1) to (%arg3) step (%arg5) {
      // CHECK: "test.payload"(%[[LVAR_OUT1]], %[[LVAR_IN1]]) : (index, index) -> ()
@ -44,7 +44,7 @@ func @nested_loops(%arg0: index, %arg1: index, %arg2: index,
 func @adjacent_loops(%arg0: index, %arg1: index, %arg2: index,
                     %arg3: index, %arg4: index, %arg5: index) {
  // CHECK: omp.parallel {
-  // CHECK: omp.wsloop (%[[LVAR_AL1:.*]]) : index = (%arg0) to (%arg2) step (%arg4) {
+  // CHECK: omp.wsloop for (%[[LVAR_AL1:.*]]) : index = (%arg0) to (%arg2) step (%arg4) {
  // CHECK: memref.alloca_scope
  scf.parallel (%i) = (%arg0) to (%arg2) step (%arg4) {
    // CHECK: "test.payload1"(%[[LVAR_AL1]]) : (index) -> ()
@ -56,7 +56,7 @@ func @adjacent_loops(%arg0: index, %arg1: index, %arg2: index,
  // CHECK: }

  // CHECK: omp.parallel {
-  // CHECK: omp.wsloop (%[[LVAR_AL2:.*]]) : index = (%arg1) to (%arg3) step (%arg5) {
+  // CHECK: omp.wsloop for (%[[LVAR_AL2:.*]]) : index = (%arg1) to (%arg3) step (%arg5) {
  // CHECK: memref.alloca_scope
  scf.parallel (%j) = (%arg1) to (%arg3) step (%arg5) {
    // CHECK: "test.payload2"(%[[LVAR_AL2]]) : (index) -> ()
--- a/mlir/test/Dialect/LLVMIR/legalize-for-export.mlir
+++ b/mlir/test/Dialect/LLVMIR/legalize-for-export.mlir
@ -32,7 +32,7 @@ llvm.func @repeated_successor_no_args(%arg0: i1) {

 // CHECK: @repeated_successor_openmp
 llvm.func @repeated_successor_openmp(%arg0: i64, %arg1: i64, %arg2: i64, %arg3: i1) {
-  omp.wsloop (%arg4) : i64 = (%arg0) to (%arg1) step (%arg2)  {
+  omp.wsloop for (%arg4) : i64 = (%arg0) to (%arg1) step (%arg2) {
    // CHECK: llvm.cond_br %{{.*}}, ^[[BB1:.*]]({{.*}}), ^[[BB2:.*]]({{.*}})
    llvm.cond_br %arg3, ^bb1(%arg0 : i64), ^bb1(%arg1 : i64)
  // CHECK: ^[[BB1]]
--- a/mlir/test/Dialect/OpenMP/invalid.mlir
+++ b/mlir/test/Dialect/OpenMP/invalid.mlir
@ -88,8 +88,9 @@ func @proc_bind_once() {
 // -----

 func @inclusive_not_a_clause(%lb : index, %ub : index, %step : index) {
-  // expected-error @below {{inclusive is not a valid clause}}
-  omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) nowait inclusive {
+  // expected-error @below {{expected 'for'}}
+  omp.wsloop nowait inclusive
+  for (%iv) : index = (%lb) to (%ub) step (%step) {
    omp.yield
  }
 }
@ -98,7 +99,8 @@ func @inclusive_not_a_clause(%lb : index, %ub : index, %step : index) {

 func @order_value(%lb : index, %ub : index, %step : index) {
  // expected-error @below {{invalid clause value: 'default'}}
-  omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) order(default) {
+  omp.wsloop order(default)
+  for (%iv) : index = (%lb) to (%ub) step (%step) {
    omp.yield
  }
 }
@ -106,8 +108,9 @@ func @order_value(%lb : index, %ub : index, %step : index) {
 // -----

 func @if_not_allowed(%lb : index, %ub : index, %step : index, %bool_var : i1) {
-  // expected-error @below {{if is not a valid clause}}
-  omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) if(%bool_var: i1) {
+  // expected-error @below {{expected 'for'}}
+  omp.wsloop if(%bool_var: i1)
+  for (%iv) : index = (%lb) to (%ub) step (%step) {
    omp.yield
  }
 }
@ -115,8 +118,9 @@ func @if_not_allowed(%lb : index, %ub : index, %step : index, %bool_var : i1) {
 // -----

 func @num_threads_not_allowed(%lb : index, %ub : index, %step : index, %int_var : i32) {
-  // expected-error @below {{num_threads is not a valid clause}}
-  omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) num_threads(%int_var: i32) {
+  // expected-error @below {{expected 'for'}}
+  omp.wsloop num_threads(%int_var: i32)
+  for (%iv) : index = (%lb) to (%ub) step (%step) {
    omp.yield
  }
 }
@ -124,8 +128,9 @@ func @num_threads_not_allowed(%lb : index, %ub : index, %step : index, %int_var
 // -----

 func @proc_bind_not_allowed(%lb : index, %ub : index, %step : index) {
-  // expected-error @below {{proc_bind is not a valid clause}}
-  omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) proc_bind(close) {
+  // expected-error @below {{expected 'for'}}
+  omp.wsloop proc_bind(close)
+  for (%iv) : index = (%lb) to (%ub) step (%step) {
    omp.yield
  }
 }
@ -134,7 +139,8 @@ func @proc_bind_not_allowed(%lb : index, %ub : index, %step : index) {

 llvm.func @test_omp_wsloop_dynamic_bad_modifier(%lb : i64, %ub : i64, %step : i64) -> () {
  // expected-error @+1 {{unknown modifier type: ginandtonic}}
-  omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(dynamic, ginandtonic) {
+  omp.wsloop schedule(dynamic, ginandtonic)
+  for (%iv) : i64 = (%lb) to (%ub) step (%step) {
    omp.yield
  }
  llvm.return
@ -144,7 +150,8 @@ llvm.func @test_omp_wsloop_dynamic_bad_modifier(%lb : i64, %ub : i64, %step : i6

 llvm.func @test_omp_wsloop_dynamic_many_modifier(%lb : i64, %ub : i64, %step : i64) -> () {
  // expected-error @+1 {{unexpected modifier(s)}}
-  omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(dynamic, monotonic, monotonic, monotonic) {
+  omp.wsloop schedule(dynamic, monotonic, monotonic, monotonic)
+  for (%iv) : i64 = (%lb) to (%ub) step (%step) {
    omp.yield
  }
  llvm.return
@ -154,7 +161,8 @@ llvm.func @test_omp_wsloop_dynamic_many_modifier(%lb : i64, %ub : i64, %step : i

 llvm.func @test_omp_wsloop_dynamic_wrong_modifier(%lb : i64, %ub : i64, %step : i64) -> () {
  // expected-error @+1 {{incorrect modifier order}}
-  omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(dynamic, simd, monotonic) {
+  omp.wsloop schedule(dynamic, simd, monotonic)
+  for (%iv) : i64 = (%lb) to (%ub) step (%step) {
    omp.yield
  }
  llvm.return
@ -164,7 +172,8 @@ llvm.func @test_omp_wsloop_dynamic_wrong_modifier(%lb : i64, %ub : i64, %step :

 llvm.func @test_omp_wsloop_dynamic_wrong_modifier2(%lb : i64, %ub : i64, %step : i64) -> () {
  // expected-error @+1 {{incorrect modifier order}}
-  omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(dynamic, monotonic, monotonic) {
+  omp.wsloop schedule(dynamic, monotonic, monotonic)
+  for (%iv) : i64 = (%lb) to (%ub) step (%step) {
    omp.yield
  }
  llvm.return
@ -174,7 +183,8 @@ llvm.func @test_omp_wsloop_dynamic_wrong_modifier2(%lb : i64, %ub : i64, %step :

 llvm.func @test_omp_wsloop_dynamic_wrong_modifier3(%lb : i64, %ub : i64, %step : i64) -> () {
  // expected-error @+1 {{incorrect modifier order}}
-  omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(dynamic, simd, simd) {
+  omp.wsloop schedule(dynamic, simd, simd)
+  for (%iv) : i64 = (%lb) to (%ub) step (%step) {
    omp.yield
  }
  llvm.return
@ -311,8 +321,8 @@ func @foo(%lb : index, %ub : index, %step : index) {
  %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
  %1 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>

-  omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step)
-  reduction(@add_f32 -> %0 : !llvm.ptr<f32>) {
+  omp.wsloop reduction(@add_f32 -> %0 : !llvm.ptr<f32>)
+  for (%iv) : index = (%lb) to (%ub) step (%step) {
    %2 = arith.constant 2.0 : f32
    // expected-error @below {{accumulator is not used by the parent}}
    omp.reduction %2, %1 : !llvm.ptr<f32>
@ -329,8 +339,8 @@ func @foo(%lb : index, %ub : index, %step : index) {
  %1 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>

  // expected-error @below {{expected symbol reference @foo to point to a reduction declaration}}
-  omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step)
-  reduction(@foo -> %0 : !llvm.ptr<f32>) {
+  omp.wsloop reduction(@foo -> %0 : !llvm.ptr<f32>)
+  for (%iv) : index = (%lb) to (%ub) step (%step) {
    %2 = arith.constant 2.0 : f32
    omp.reduction %2, %1 : !llvm.ptr<f32>
    omp.yield
@ -357,8 +367,8 @@ func @foo(%lb : index, %ub : index, %step : index) {
  %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>

  // expected-error @below {{accumulator variable used more than once}}
-  omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step)
-  reduction(@add_f32 -> %0 : !llvm.ptr<f32>, @add_f32 -> %0 : !llvm.ptr<f32>) {
+  omp.wsloop reduction(@add_f32 -> %0 : !llvm.ptr<f32>, @add_f32 -> %0 : !llvm.ptr<f32>)
+  for (%iv) : index = (%lb) to (%ub) step (%step) {
    %2 = arith.constant 2.0 : f32
    omp.reduction %2, %0 : !llvm.ptr<f32>
    omp.yield
@ -390,8 +400,8 @@ func @foo(%lb : index, %ub : index, %step : index, %mem : memref<1xf32>) {
  %c1 = arith.constant 1 : i32

  // expected-error @below {{expected accumulator ('memref<1xf32>') to be the same type as reduction declaration ('!llvm.ptr<f32>')}}
-  omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step)
-  reduction(@add_f32 -> %mem : memref<1xf32>) {
+  omp.wsloop reduction(@add_f32 -> %mem : memref<1xf32>)
+  for (%iv) : index = (%lb) to (%ub) step (%step) {
    %2 = arith.constant 2.0 : f32
    omp.reduction %2, %mem : memref<1xf32>
    omp.yield
@ -427,7 +437,8 @@ omp.critical.declare @mutex hint(invalid_hint)
 // -----

 func @omp_ordered1(%arg1 : i32, %arg2 : i32, %arg3 : i32) -> () {
-  omp.wsloop (%0) : i32 = (%arg1) to (%arg2) step (%arg3) ordered(1) {
+  omp.wsloop ordered(1)
+  for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
    // expected-error @below {{ordered region must be closely nested inside a worksharing-loop region with an ordered clause without parameter present}}
    omp.ordered_region {
      omp.terminator
@ -440,7 +451,7 @@ func @omp_ordered1(%arg1 : i32, %arg2 : i32, %arg3 : i32) -> () {
 // -----

 func @omp_ordered2(%arg1 : i32, %arg2 : i32, %arg3 : i32) -> () {
-  omp.wsloop (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
+  omp.wsloop for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
    // expected-error @below {{ordered region must be closely nested inside a worksharing-loop region with an ordered clause without parameter present}}
    omp.ordered_region {
      omp.terminator
@ -461,7 +472,8 @@ func @omp_ordered3(%vec0 : i64) -> () {
 // -----

 func @omp_ordered4(%arg1 : i32, %arg2 : i32, %arg3 : i32, %vec0 : i64) -> () {
-  omp.wsloop (%0) : i32 = (%arg1) to (%arg2) step (%arg3) ordered(0) {
+  omp.wsloop ordered(0)
+  for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
    // expected-error @below {{ordered depend directive must be closely nested inside a worksharing-loop with ordered clause with parameter present}}
    omp.ordered depend_type(dependsink) depend_vec(%vec0 : i64) {num_loops_val = 1 : i64}

@ -472,7 +484,8 @@ func @omp_ordered4(%arg1 : i32, %arg2 : i32, %arg3 : i32, %vec0 : i64) -> () {
 // -----

 func @omp_ordered5(%arg1 : i32, %arg2 : i32, %arg3 : i32, %vec0 : i64, %vec1 : i64) -> () {
-  omp.wsloop (%0) : i32 = (%arg1) to (%arg2) step (%arg3) ordered(1) {
+  omp.wsloop ordered(1)
+  for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
    // expected-error @below {{number of variables in depend clause does not match number of iteration variables in the doacross loop}}
    omp.ordered depend_type(dependsource) depend_vec(%vec0, %vec1 : i64, i64) {num_loops_val = 2 : i64}

--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@ -124,35 +124,40 @@ func @omp_parallel_pretty(%data_var : memref<i32>, %if_cond : i1, %num_threads :
 // CHECK-LABEL: omp_wsloop
 func @omp_wsloop(%lb : index, %ub : index, %step : index, %data_var : memref<i32>, %linear_var : i32, %chunk_var : i32) -> () {

-  // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) collapse(2) ordered(1)
+  // CHECK: omp.wsloop collapse(2) ordered(1)
+  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
  "omp.wsloop" (%lb, %ub, %step) ({
    ^bb0(%iv: index):
      omp.yield
  }) {operand_segment_sizes = dense<[1,1,1,0,0,0,0]> : vector<7xi32>, collapse_val = 2, ordered_val = 1} :
    (index, index, index) -> ()

-  // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(static)
+  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(static)
+  // CHECK-SAMe: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
  "omp.wsloop" (%lb, %ub, %step, %data_var, %linear_var) ({
    ^bb0(%iv: index):
      omp.yield
  }) {operand_segment_sizes = dense<[1,1,1,1,1,0,0]> : vector<7xi32>, schedule_val = #omp<"schedulekind static">} :
    (index, index, index, memref<i32>, i32) -> ()

-  // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) linear(%{{.*}} = %{{.*}} : memref<i32>, %{{.*}} = %{{.*}} : memref<i32>) schedule(static)
+  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>, %{{.*}} = %{{.*}} : memref<i32>) schedule(static)
+  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
  "omp.wsloop" (%lb, %ub, %step, %data_var, %data_var, %linear_var, %linear_var) ({
    ^bb0(%iv: index):
      omp.yield
  }) {operand_segment_sizes = dense<[1,1,1,2,2,0,0]> : vector<7xi32>, schedule_val = #omp<"schedulekind static">} :
    (index, index, index, memref<i32>, memref<i32>, i32, i32) -> ()

-  // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(dynamic = %{{.*}}) collapse(3) ordered(2)
+  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(dynamic = %{{.*}}) collapse(3) ordered(2)
+  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
  "omp.wsloop" (%lb, %ub, %step, %data_var, %linear_var, %chunk_var) ({
    ^bb0(%iv: index):
      omp.yield
  }) {operand_segment_sizes = dense<[1,1,1,1,1,0,1]> : vector<7xi32>, schedule_val = #omp<"schedulekind dynamic">, collapse_val = 3, ordered_val = 2} :
    (index, index, index, memref<i32>, i32, i32) -> ()

-  // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) schedule(auto) nowait
+  // CHECK: omp.wsloop schedule(auto) nowait
+  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
  "omp.wsloop" (%lb, %ub, %step) ({
    ^bb0(%iv: index):
      omp.yield
@ -165,51 +170,62 @@ func @omp_wsloop(%lb : index, %ub : index, %step : index, %data_var : memref<i32
 // CHECK-LABEL: omp_wsloop_pretty
 func @omp_wsloop_pretty(%lb : index, %ub : index, %step : index, %data_var : memref<i32>, %linear_var : i32, %chunk_var : i32, %chunk_var2 : i16) -> () {

-  // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) collapse(2) ordered(2) {
+  // CHECK: omp.wsloop collapse(2) ordered(2)
+  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
+  omp.wsloop collapse(2) ordered(2)
+  for (%iv) : index = (%lb) to (%ub) step (%step) {
    omp.yield
  }

-  // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(static)
-  omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) schedule(static) linear(%data_var = %linear_var : memref<i32>) {
+  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(static)
+  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
+  omp.wsloop schedule(static) linear(%data_var = %linear_var : memref<i32>)
+  for (%iv) : index = (%lb) to (%ub) step (%step) {
    omp.yield
  }

-  // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(static = %{{.*}} : i32) collapse(3) ordered(2)
-  omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) ordered(2) linear(%data_var = %linear_var : memref<i32>)
-     schedule(static = %chunk_var : i32) collapse(3) {
+  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(static = %{{.*}} : i32) collapse(3) ordered(2)
+  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
+  omp.wsloop ordered(2) linear(%data_var = %linear_var : memref<i32>) schedule(static = %chunk_var : i32) collapse(3)
+  for (%iv) : index = (%lb) to (%ub) step (%step) {
    omp.yield
  }

-  // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(dynamic = %{{.*}} : i32, nonmonotonic) collapse(3) ordered(2)
-  omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) ordered(2) linear(%data_var = %linear_var : memref<i32>)
-     schedule(dynamic = %chunk_var : i32, nonmonotonic) collapse(3) {
+  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(dynamic = %{{.*}} : i32, nonmonotonic) collapse(3) ordered(2)
+  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
+  omp.wsloop ordered(2) linear(%data_var = %linear_var : memref<i32>) schedule(dynamic = %chunk_var : i32, nonmonotonic) collapse(3)
+  for (%iv) : index = (%lb) to (%ub) step (%step)  {
    omp.yield
  }

-  // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(dynamic = %{{.*}} : i16, monotonic) collapse(3) ordered(2)
-  omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) ordered(2) linear(%data_var = %linear_var : memref<i32>)
-     schedule(dynamic = %chunk_var2 : i16, monotonic) collapse(3) {
+  // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(dynamic = %{{.*}} : i16, monotonic) collapse(3) ordered(2)
+  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
+  omp.wsloop ordered(2) linear(%data_var = %linear_var : memref<i32>) schedule(dynamic = %chunk_var2 : i16, monotonic) collapse(3)
+  for (%iv) : index = (%lb) to (%ub) step (%step) {
    omp.yield
  }

-  // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) {
+  // CHECK: omp.wsloop for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
+  omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
    omp.yield
  }

-  // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}})
-  omp.wsloop (%iv) : index = (%lb) to (%ub) inclusive step (%step) {
+  // CHECK: omp.wsloop for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}})
+  omp.wsloop for (%iv) : index = (%lb) to (%ub) inclusive step (%step) {
    omp.yield
  }

-  // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) nowait
-  omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) nowait {
+  // CHECK: omp.wsloop nowait
+  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
+  omp.wsloop nowait
+  for (%iv) : index = (%lb) to (%ub) step (%step) {
    omp.yield
  }

-  // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) nowait order(concurrent)
-  omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) order(concurrent) nowait {
+  // CHECK: omp.wsloop nowait order(concurrent)
+  // CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
+  omp.wsloop order(concurrent) nowait
+  for (%iv) : index = (%lb) to (%ub) step (%step) {
    omp.yield
  }

@ -219,8 +235,8 @@ func @omp_wsloop_pretty(%lb : index, %ub : index, %step : index, %data_var : mem
 // CHECK-LABEL: omp_wsloop_pretty_multi_block
 func @omp_wsloop_pretty_multi_block(%lb : index, %ub : index, %step : index, %data1 : memref<?xi32>, %data2 : memref<?xi32>) -> () {

-  // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) {
+  // CHECK: omp.wsloop for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
+  omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
    %1 = "test.payload"(%iv) : (index) -> (i32)
    cf.br ^bb1(%1: i32)
  ^bb1(%arg: i32):
@ -228,8 +244,8 @@ func @omp_wsloop_pretty_multi_block(%lb : index, %ub : index, %step : index, %da
    omp.yield
  }

-  // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) {
+  // CHECK: omp.wsloop for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
+  omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
    %c = "test.condition"(%iv) : (index) -> (i1)
    %v1 = "test.payload"(%iv) : (index) -> (i32)
    cf.cond_br %c, ^bb1(%v1: i32), ^bb2(%v1: i32)
@ -243,8 +259,8 @@ func @omp_wsloop_pretty_multi_block(%lb : index, %ub : index, %step : index, %da
    omp.yield
  }

-  // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) {
+  // CHECK: omp.wsloop for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
+  omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) {
    %c = "test.condition"(%iv) : (index) -> (i1)
    %v1 = "test.payload"(%iv) : (index) -> (i32)
    cf.cond_br %c, ^bb1(%v1: i32), ^bb2(%v1: i32)
@ -263,8 +279,8 @@ func @omp_wsloop_pretty_multi_block(%lb : index, %ub : index, %step : index, %da
 func @omp_wsloop_pretty_non_index(%lb1 : i32, %ub1 : i32, %step1 : i32, %lb2 : i64, %ub2 : i64, %step2 : i64,
                           %data1 : memref<?xi32>, %data2 : memref<?xi64>) -> () {

-  // CHECK: omp.wsloop (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  omp.wsloop (%iv1) : i32 = (%lb1) to (%ub1) step (%step1) {
+  // CHECK: omp.wsloop for (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
+  omp.wsloop for (%iv1) : i32 = (%lb1) to (%ub1) step (%step1) {
    %1 = "test.payload"(%iv1) : (i32) -> (index)
    cf.br ^bb1(%1: index)
  ^bb1(%arg1: index):
@ -272,8 +288,8 @@ func @omp_wsloop_pretty_non_index(%lb1 : i32, %ub1 : i32, %step1 : i32, %lb2 : i
    omp.yield
  }

-  // CHECK: omp.wsloop (%{{.*}}) : i64 = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
-  omp.wsloop (%iv2) : i64 = (%lb2) to (%ub2) step (%step2) {
+  // CHECK: omp.wsloop for (%{{.*}}) : i64 = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
+  omp.wsloop for (%iv2) : i64 = (%lb2) to (%ub2) step (%step2) {
    %2 = "test.payload"(%iv2) : (i64) -> (index)
    cf.br ^bb1(%2: index)
  ^bb1(%arg2: index):
@ -287,8 +303,8 @@ func @omp_wsloop_pretty_non_index(%lb1 : i32, %ub1 : i32, %step1 : i32, %lb2 : i
 // CHECK-LABEL: omp_wsloop_pretty_multiple
 func @omp_wsloop_pretty_multiple(%lb1 : i32, %ub1 : i32, %step1 : i32, %lb2 : i32, %ub2 : i32, %step2 : i32, %data1 : memref<?xi32>) -> () {

-  // CHECK: omp.wsloop (%{{.*}}, %{{.*}}) : i32 = (%{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}})
-  omp.wsloop (%iv1, %iv2) : i32 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) {
+  // CHECK: omp.wsloop for (%{{.*}}, %{{.*}}) : i32 = (%{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}})
+  omp.wsloop for (%iv1, %iv2) : i32 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) {
    %1 = "test.payload"(%iv1) : (i32) -> (index)
    %2 = "test.payload"(%iv2) : (i32) -> (index)
    memref.store %iv1, %data1[%1] : memref<?xi32>
@ -395,8 +411,8 @@ func @reduction(%lb : index, %ub : index, %step : index) {
  %c1 = arith.constant 1 : i32
  %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
  // CHECK: reduction(@add_f32 -> %{{.+}} : !llvm.ptr<f32>)
-  omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step)
-  reduction(@add_f32 -> %0 : !llvm.ptr<f32>) {
+  omp.wsloop reduction(@add_f32 -> %0 : !llvm.ptr<f32>)
+  for (%iv) : index = (%lb) to (%ub) step (%step) {
    %1 = arith.constant 2.0 : f32
    // CHECK: omp.reduction %{{.+}}, %{{.+}}
    omp.reduction %1, %0 : !llvm.ptr<f32>
@ -425,8 +441,8 @@ combiner {
 func @reduction2(%lb : index, %ub : index, %step : index) {
  %0 = memref.alloca() : memref<1xf32>
  // CHECK: reduction
-  omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step)
-  reduction(@add2_f32 -> %0 : memref<1xf32>) {
+  omp.wsloop reduction(@add2_f32 -> %0 : memref<1xf32>)
+  for (%iv) : index = (%lb) to (%ub) step (%step) {
    %1 = arith.constant 2.0 : f32
    // CHECK: omp.reduction
    omp.reduction %1, %0 : memref<1xf32>
@ -475,14 +491,16 @@ func @omp_ordered(%arg1 : i32, %arg2 : i32, %arg3 : i32,
    omp.terminator
  }

-  omp.wsloop (%0) : i32 = (%arg1) to (%arg2) step (%arg3) ordered(0) {
+  omp.wsloop ordered(0)
+  for (%0) : i32 = (%arg1) to (%arg2) step (%arg3)  {
    omp.ordered_region {
      omp.terminator
    }
    omp.yield
  }

-  omp.wsloop (%0) : i32 = (%arg1) to (%arg2) step (%arg3) ordered(1) {
+  omp.wsloop ordered(1)
+  for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
    // Only one DEPEND(SINK: vec) clause
    // CHECK: omp.ordered depend_type(dependsink) depend_vec(%{{.*}} : i64) {num_loops_val = 1 : i64}
    omp.ordered depend_type(dependsink) depend_vec(%vec0 : i64) {num_loops_val = 1 : i64}
@ -493,7 +511,8 @@ func @omp_ordered(%arg1 : i32, %arg2 : i32, %arg3 : i32,
    omp.yield
  }

-  omp.wsloop (%0) : i32 = (%arg1) to (%arg2) step (%arg3) ordered(2) {
+  omp.wsloop ordered(2)
+  for (%0) : i32 = (%arg1) to (%arg2) step (%arg3) {
    // Multiple DEPEND(SINK: vec) clauses
    // CHECK: omp.ordered depend_type(dependsink) depend_vec(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : i64, i64, i64, i64) {num_loops_val = 2 : i64}
    omp.ordered depend_type(dependsink) depend_vec(%vec0, %vec1, %vec2, %vec3 : i64, i64, i64, i64) {num_loops_val = 2 : i64}
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@ -427,7 +427,8 @@ llvm.func @body(i32)

 // CHECK-LABEL: @test_omp_wsloop_static_defchunk
 llvm.func @test_omp_wsloop_static_defchunk(%lb : i32, %ub : i32, %step : i32) -> () {
- omp.wsloop (%iv) : i32 = (%lb) to (%ub) step (%step) schedule(static) {
+ omp.wsloop schedule(static)
+ for (%iv) : i32 = (%lb) to (%ub) step (%step) {
   // CHECK: call void @__kmpc_for_static_init_4u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 34, i32* %{{.*}}, i32* %{{.*}}, i32* %{{.*}}, i32* %{{.*}}, i32 1, i32 0)
   // CHECK: call void @__kmpc_for_static_fini
   llvm.call @body(%iv) : (i32) -> ()
@ -443,7 +444,8 @@ llvm.func @body(i32)
 // CHECK-LABEL: @test_omp_wsloop_static_1
 llvm.func @test_omp_wsloop_static_1(%lb : i32, %ub : i32, %step : i32) -> () {
 %static_chunk_size = llvm.mlir.constant(1 : i32) : i32
- omp.wsloop (%iv) : i32 = (%lb) to (%ub) step (%step) schedule(static = %static_chunk_size : i32) {
+ omp.wsloop schedule(static = %static_chunk_size : i32)
+ for (%iv) : i32 = (%lb) to (%ub) step (%step) {
   // CHECK: call void @__kmpc_for_static_init_4u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 33, i32* %{{.*}}, i32* %{{.*}}, i32* %{{.*}}, i32* %{{.*}}, i32 1, i32 1)
   // CHECK: call void @__kmpc_for_static_fini
   llvm.call @body(%iv) : (i32) -> ()
@ -459,7 +461,8 @@ llvm.func @body(i32)
 // CHECK-LABEL: @test_omp_wsloop_static_2
 llvm.func @test_omp_wsloop_static_2(%lb : i32, %ub : i32, %step : i32) -> () {
 %static_chunk_size = llvm.mlir.constant(2 : i32) : i32
- omp.wsloop (%iv) : i32 = (%lb) to (%ub) step (%step) schedule(static = %static_chunk_size : i32) {
+ omp.wsloop schedule(static = %static_chunk_size : i32)
+ for (%iv) : i32 = (%lb) to (%ub) step (%step) {
   // CHECK: call void @__kmpc_for_static_init_4u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 33, i32* %{{.*}}, i32* %{{.*}}, i32* %{{.*}}, i32* %{{.*}}, i32 1, i32 2)
   // CHECK: call void @__kmpc_for_static_fini
   llvm.call @body(%iv) : (i32) -> ()
@ -473,7 +476,8 @@ llvm.func @test_omp_wsloop_static_2(%lb : i32, %ub : i32, %step : i32) -> () {
 llvm.func @body(i64)

 llvm.func @test_omp_wsloop_dynamic(%lb : i64, %ub : i64, %step : i64) -> () {
- omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(dynamic) {
+ omp.wsloop schedule(dynamic)
+ for (%iv) : i64 = (%lb) to (%ub) step (%step)  {
  // CHECK: call void @__kmpc_dispatch_init_8u
  // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
  // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
@ -490,7 +494,8 @@ llvm.func @body(i64)

 llvm.func @test_omp_wsloop_dynamic_chunk_const(%lb : i64, %ub : i64, %step : i64) -> () {
 %chunk_size_const = llvm.mlir.constant(2 : i16) : i16
- omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(dynamic = %chunk_size_const : i16) {
+ omp.wsloop schedule(dynamic = %chunk_size_const : i16)
+ for (%iv) : i64 = (%lb) to (%ub) step (%step)  {
  // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 35, i64 {{.*}}, i64 %{{.*}}, i64 {{.*}}, i64 2)
  // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
  // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
@ -509,7 +514,8 @@ llvm.func @test_omp_wsloop_dynamic_chunk_var(%lb : i32, %ub : i32, %step : i32)
 %1 = llvm.mlir.constant(1 : i64) : i64
 %chunk_size_alloca = llvm.alloca %1 x i16 {bindc_name = "chunk_size", in_type = i16, uniq_name = "_QFsub1Echunk_size"} : (i64) -> !llvm.ptr<i16>
 %chunk_size_var = llvm.load %chunk_size_alloca : !llvm.ptr<i16>
- omp.wsloop (%iv) : i32 = (%lb) to (%ub) step (%step) schedule(dynamic = %chunk_size_var : i16) {
+ omp.wsloop schedule(dynamic = %chunk_size_var : i16)
+ for (%iv) : i32 = (%lb) to (%ub) step (%step) {
  // CHECK: %[[CHUNK_SIZE:.*]] = sext i16 %{{.*}} to i32
  // CHECK: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 35, i32 {{.*}}, i32 %{{.*}}, i32 {{.*}}, i32 %[[CHUNK_SIZE]])
  // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_4u
@ -529,7 +535,8 @@ llvm.func @test_omp_wsloop_dynamic_chunk_var2(%lb : i32, %ub : i32, %step : i32)
 %1 = llvm.mlir.constant(1 : i64) : i64
 %chunk_size_alloca = llvm.alloca %1 x i64 {bindc_name = "chunk_size", in_type = i64, uniq_name = "_QFsub1Echunk_size"} : (i64) -> !llvm.ptr<i64>
 %chunk_size_var = llvm.load %chunk_size_alloca : !llvm.ptr<i64>
- omp.wsloop (%iv) : i32 = (%lb) to (%ub) step (%step) schedule(dynamic = %chunk_size_var : i64) {
+ omp.wsloop schedule(dynamic = %chunk_size_var : i64)
+ for (%iv) : i32 = (%lb) to (%ub) step (%step) {
  // CHECK: %[[CHUNK_SIZE:.*]] = trunc i64 %{{.*}} to i32
  // CHECK: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 35, i32 {{.*}}, i32 %{{.*}}, i32 {{.*}}, i32 %[[CHUNK_SIZE]])
  // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_4u
@ -546,7 +553,8 @@ llvm.func @test_omp_wsloop_dynamic_chunk_var2(%lb : i32, %ub : i32, %step : i32)
 llvm.func @body(i32)

 llvm.func @test_omp_wsloop_dynamic_chunk_var3(%lb : i32, %ub : i32, %step : i32, %chunk_size : i32) -> () {
- omp.wsloop (%iv) : i32 = (%lb) to (%ub) step (%step) schedule(dynamic = %chunk_size : i32) {
+ omp.wsloop schedule(dynamic = %chunk_size : i32)
+ for (%iv) : i32 = (%lb) to (%ub) step (%step) {
  // CHECK: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 35, i32 {{.*}}, i32 %{{.*}}, i32 {{.*}}, i32 %{{.*}})
  // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_4u
  // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
@ -562,7 +570,8 @@ llvm.func @test_omp_wsloop_dynamic_chunk_var3(%lb : i32, %ub : i32, %step : i32,
 llvm.func @body(i64)

 llvm.func @test_omp_wsloop_auto(%lb : i64, %ub : i64, %step : i64) -> () {
- omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(auto) {
+ omp.wsloop schedule(auto)
+ for (%iv) : i64 = (%lb) to (%ub) step (%step) {
  // CHECK: call void @__kmpc_dispatch_init_8u
  // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
  // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
@ -578,7 +587,8 @@ llvm.func @test_omp_wsloop_auto(%lb : i64, %ub : i64, %step : i64) -> () {
 llvm.func @body(i64)

 llvm.func @test_omp_wsloop_runtime(%lb : i64, %ub : i64, %step : i64) -> () {
-  omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(runtime) {
+  omp.wsloop schedule(runtime)
+  for (%iv) : i64 = (%lb) to (%ub) step (%step) {
    // CHECK: call void @__kmpc_dispatch_init_8u
    // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
    // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
@ -594,7 +604,8 @@ llvm.func @test_omp_wsloop_runtime(%lb : i64, %ub : i64, %step : i64) -> () {
 llvm.func @body(i64)

 llvm.func @test_omp_wsloop_guided(%lb : i64, %ub : i64, %step : i64) -> () {
-  omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(guided) {
+  omp.wsloop schedule(guided)
+  for (%iv) : i64 = (%lb) to (%ub) step (%step) {
    // CHECK: call void @__kmpc_dispatch_init_8u
    // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
    // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
@ -610,7 +621,8 @@ llvm.func @test_omp_wsloop_guided(%lb : i64, %ub : i64, %step : i64) -> () {
 llvm.func @body(i64)

 llvm.func @test_omp_wsloop_dynamic_nonmonotonic(%lb : i64, %ub : i64, %step : i64) -> () {
-  omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(dynamic, nonmonotonic) {
+  omp.wsloop schedule(dynamic, nonmonotonic)
+  for (%iv) : i64 = (%lb) to (%ub) step (%step) {
    // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 1073741859
    // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
    // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
@ -626,7 +638,8 @@ llvm.func @test_omp_wsloop_dynamic_nonmonotonic(%lb : i64, %ub : i64, %step : i6
 llvm.func @body(i64)

 llvm.func @test_omp_wsloop_dynamic_monotonic(%lb : i64, %ub : i64, %step : i64) -> () {
-  omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(dynamic, monotonic) {
+  omp.wsloop schedule(dynamic, monotonic)
+  for (%iv) : i64 = (%lb) to (%ub) step (%step) {
    // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 536870947
    // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
    // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
@ -638,7 +651,8 @@ llvm.func @test_omp_wsloop_dynamic_monotonic(%lb : i64, %ub : i64, %step : i64)
 }

 llvm.func @test_omp_wsloop_runtime_simd(%lb : i64, %ub : i64, %step : i64) -> () {
-  omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(runtime, simd) {
+  omp.wsloop schedule(runtime, simd)
+  for (%iv) : i64 = (%lb) to (%ub) step (%step) {
    // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 47
    // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
    // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
@ -650,7 +664,8 @@ llvm.func @test_omp_wsloop_runtime_simd(%lb : i64, %ub : i64, %step : i64) -> ()
 }

 llvm.func @test_omp_wsloop_guided_simd(%lb : i64, %ub : i64, %step : i64) -> () {
-  omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(guided, simd) {
+  omp.wsloop schedule(guided, simd)
+  for (%iv) : i64 = (%lb) to (%ub) step (%step) {
    // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 46
    // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
    // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
@ -778,7 +793,8 @@ llvm.func @collapse_wsloop(
    // CHECK: %[[TOTAL_SUB_1:.*]] = sub i32 %[[TOTAL]], 1
    // CHECK: store i32 %[[TOTAL_SUB_1]], i32*
    // CHECK: call void @__kmpc_for_static_init_4u
-    omp.wsloop (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) collapse(3) {
+    omp.wsloop collapse(3)
+    for (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) {
      %31 = llvm.load %20 : !llvm.ptr<i32>
      %32 = llvm.add %31, %arg0 : i32
      %33 = llvm.add %32, %arg1 : i32
@ -838,7 +854,8 @@ llvm.func @collapse_wsloop_dynamic(
    // CHECK: store i32 1, i32*
    // CHECK: store i32 %[[TOTAL]], i32*
    // CHECK: call void @__kmpc_dispatch_init_4u
-    omp.wsloop (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) collapse(3) schedule(dynamic) {
+    omp.wsloop collapse(3) schedule(dynamic)
+    for (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) {
      %31 = llvm.load %20 : !llvm.ptr<i32>
      %32 = llvm.add %31, %arg0 : i32
      %33 = llvm.add %32, %arg1 : i32
@ -869,7 +886,8 @@ llvm.func @omp_ordered(%arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i64,
  // CHECK: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_THREAD]])
  }

-  omp.wsloop (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) ordered(0) {
+  omp.wsloop ordered(0)
+  for (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) {
    // CHECK: [[OMP_THREAD:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
    // CHECK-NEXT:  call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_THREAD]])
    omp.ordered_region  {
@ -879,7 +897,8 @@ llvm.func @omp_ordered(%arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i64,
    omp.yield
  }

-  omp.wsloop (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) ordered(1) {
+  omp.wsloop ordered(1)
+  for (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) {
    // CHECK: [[TMP:%.*]] = getelementptr inbounds [1 x i64], [1 x i64]* [[ADDR]], i64 0, i64 0
    // CHECK: store i64 [[ARG0:%.*]], i64* [[TMP]], align 8
    // CHECK: [[TMP2:%.*]] = getelementptr inbounds [1 x i64], [1 x i64]* [[ADDR]], i64 0, i64 0
@ -897,7 +916,8 @@ llvm.func @omp_ordered(%arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i64,
    omp.yield
  }

-  omp.wsloop (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) ordered(2) {
+  omp.wsloop ordered(2)
+  for (%arg7) : i32 = (%arg0) to (%arg1) step (%arg2) {
    // CHECK: [[TMP5:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[ADDR5]], i64 0, i64 0
    // CHECK: store i64 [[ARG0]], i64* [[TMP5]], align 8
    // CHECK: [[TMP6:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[ADDR5]], i64 0, i64 1
@ -1795,7 +1815,7 @@ llvm.func @omp_sections_with_clauses() -> () {
 // introduction mechanism itself is tested elsewhere.
 // CHECK-LABEL: @repeated_successor
 llvm.func @repeated_successor(%arg0: i64, %arg1: i64, %arg2: i64, %arg3: i1) {
-  omp.wsloop (%arg4) : i64 = (%arg0) to (%arg1) step (%arg2)  {
+  omp.wsloop for (%arg4) : i64 = (%arg0) to (%arg1) step (%arg2)  {
    llvm.cond_br %arg3, ^bb1(%arg0 : i64), ^bb1(%arg1 : i64)
  ^bb1(%0: i64):  // 2 preds: ^bb0, ^bb0
    omp.yield
--- a/mlir/test/Target/LLVMIR/openmp-nested.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-nested.mlir
@ -11,9 +11,9 @@ module {
      %2 = llvm.mlir.constant(0 : index) : i64
      %4 = llvm.mlir.constant(0 : i32) : i32
      %12 = llvm.alloca %0 x i64 : (i64) -> !llvm.ptr<i64>
-      omp.wsloop (%arg2) : i64 = (%2) to (%1) step (%0)  {
+      omp.wsloop for (%arg2) : i64 = (%2) to (%1) step (%0)  {
        omp.parallel   {
-          omp.wsloop (%arg3) : i64 = (%2) to (%0) step (%0)  {
+          omp.wsloop for (%arg3) : i64 = (%2) to (%0) step (%0)  {
            llvm.store %2, %12 : !llvm.ptr<i64>
            omp.yield
          }
--- a/mlir/test/Target/LLVMIR/openmp-reduction.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-reduction.mlir
@ -26,8 +26,8 @@ llvm.func @simple_reduction(%lb : i64, %ub : i64, %step : i64) {
  %c1 = llvm.mlir.constant(1 : i32) : i32
  %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
  omp.parallel {
-    omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step)
-    reduction(@add_f32 -> %0 : !llvm.ptr<f32>) {
+    omp.wsloop reduction(@add_f32 -> %0 : !llvm.ptr<f32>) 
+    for (%iv) : i64 = (%lb) to (%ub) step (%step) {
      %1 = llvm.mlir.constant(2.0 : f32) : f32
      omp.reduction %1, %0 : !llvm.ptr<f32>
      omp.yield
@ -103,8 +103,8 @@ llvm.func @reuse_declaration(%lb : i64, %ub : i64, %step : i64) {
  %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
  %2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
  omp.parallel {
-    omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step)
-    reduction(@add_f32 -> %0 : !llvm.ptr<f32>, @add_f32 -> %2 : !llvm.ptr<f32>) {
+    omp.wsloop reduction(@add_f32 -> %0 : !llvm.ptr<f32>, @add_f32 -> %2 : !llvm.ptr<f32>)
+    for (%iv) : i64 = (%lb) to (%ub) step (%step) {
      %1 = llvm.mlir.constant(2.0 : f32) : f32
      omp.reduction %1, %0 : !llvm.ptr<f32>
      omp.reduction %1, %2 : !llvm.ptr<f32>
@ -189,8 +189,8 @@ llvm.func @missing_omp_reduction(%lb : i64, %ub : i64, %step : i64) {
  %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
  %2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
  omp.parallel {
-    omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step)
-    reduction(@add_f32 -> %0 : !llvm.ptr<f32>, @add_f32 -> %2 : !llvm.ptr<f32>) {
+    omp.wsloop reduction(@add_f32 -> %0 : !llvm.ptr<f32>, @add_f32 -> %2 : !llvm.ptr<f32>)
+    for (%iv) : i64 = (%lb) to (%ub) step (%step) {
      %1 = llvm.mlir.constant(2.0 : f32) : f32
      omp.reduction %1, %0 : !llvm.ptr<f32>
      omp.yield
@ -272,8 +272,8 @@ llvm.func @double_reference(%lb : i64, %ub : i64, %step : i64) {
  %c1 = llvm.mlir.constant(1 : i32) : i32
  %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
  omp.parallel {
-    omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step)
-    reduction(@add_f32 -> %0 : !llvm.ptr<f32>) {
+    omp.wsloop reduction(@add_f32 -> %0 : !llvm.ptr<f32>)
+    for (%iv) : i64 = (%lb) to (%ub) step (%step) {
      %1 = llvm.mlir.constant(2.0 : f32) : f32
      omp.reduction %1, %0 : !llvm.ptr<f32>
      omp.reduction %1, %0 : !llvm.ptr<f32>
@ -362,8 +362,8 @@ llvm.func @no_atomic(%lb : i64, %ub : i64, %step : i64) {
  %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
  %2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr<f32>
  omp.parallel {
-    omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step)
-    reduction(@add_f32 -> %0 : !llvm.ptr<f32>, @mul_f32 -> %2 : !llvm.ptr<f32>) {
+    omp.wsloop reduction(@add_f32 -> %0 : !llvm.ptr<f32>, @mul_f32 -> %2 : !llvm.ptr<f32>)
+    for (%iv) : i64 = (%lb) to (%ub) step (%step) {
      %1 = llvm.mlir.constant(2.0 : f32) : f32
      omp.reduction %1, %0 : !llvm.ptr<f32>
      omp.reduction %1, %2 : !llvm.ptr<f32>