[mlir] Move the Builtin FuncOp to the Func dialect

This commit moves FuncOp out of the builtin dialect, and into the Func dialect. This move has been planned in some capacity from the moment we made FuncOp an operation (years ago). This commit handles the functional aspects of the move, but various aspects are left untouched to ease migration: func::FuncOp is re-exported into mlir to reduce the actual API churn, the assembly format still accepts the unqualified `func`. These temporary measures will remain for a little while to simplify migration before being removed. Differential Revision: https://reviews.llvm.org/D121266
2025-04-01 12:43:47 +00:00 · 2022-03-07 19:16:03 -08:00 · 2022-03-07 19:16:03 -08:00 · 3655069234
commit 3655069234
parent f4548ed7fc
246 changed files with 1137 additions and 1019 deletions
--- a/flang/include/flang/Lower/CallInterface.h
+++ b/flang/include/flang/Lower/CallInterface.h
@ -29,6 +29,7 @@

 #include "flang/Common/reference.h"
 #include "flang/Evaluate/characteristics.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/IR/BuiltinOps.h"
 #include <memory>
 #include <optional>
--- a/flang/include/flang/Optimizer/Builder/FIRBuilder.h
+++ b/flang/include/flang/Optimizer/Builder/FIRBuilder.h
@ -19,6 +19,7 @@
 #include "flang/Optimizer/Dialect/FIROps.h"
 #include "flang/Optimizer/Dialect/FIRType.h"
 #include "flang/Optimizer/Support/KindMapping.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinOps.h"

--- a/flang/include/flang/Optimizer/Builder/LowLevelIntrinsics.h
+++ b/flang/include/flang/Optimizer/Builder/LowLevelIntrinsics.h
@ -14,8 +14,10 @@
 #define FLANG_OPTIMIZER_BUILDER_LOWLEVELINTRINSICS_H

 namespace mlir {
+namespace func {
 class FuncOp;
-}
+} // namespace func
+} // namespace mlir
 namespace fir {
 class FirOpBuilder;
 }
@ -23,10 +25,10 @@ class FirOpBuilder;
 namespace fir::factory {

 /// Get the `llvm.stacksave` intrinsic.
-mlir::FuncOp getLlvmStackSave(FirOpBuilder &builder);
+mlir::func::FuncOp getLlvmStackSave(FirOpBuilder &builder);

 /// Get the `llvm.stackrestore` intrinsic.
-mlir::FuncOp getLlvmStackRestore(FirOpBuilder &builder);
+mlir::func::FuncOp getLlvmStackRestore(FirOpBuilder &builder);

 } // namespace fir::factory

--- a/flang/test/Fir/Todo/boxproc_host.fir
+++ b/flang/test/Fir/Todo/boxproc_host.fir
@ -3,7 +3,7 @@
 // Test that `fir.boxproc_host` fails conversion to llvm.
 // At the moment this test fails since `fir.boxproc` type does not have a conversion.

-// CHECK: failed to legalize operation 'builtin.func'
+// CHECK: failed to legalize operation 'func.func'
 func @test(%bproc: !fir.boxproc<(i32) -> ()>) {
  %tuple = fir.boxproc_host %bproc : (!fir.boxproc<(i32) -> ()>) -> (!fir.ref<tuple<i32,f64>>)
  return
--- a/flang/test/Fir/Todo/unboxproc.fir
+++ b/flang/test/Fir/Todo/unboxproc.fir
@ -4,7 +4,7 @@
 // Not implemented yet.
 // Currently fails since coversion for boxproc type is not implemented.

-// CHECK: failed to legalize operation 'builtin.func'
+// CHECK: failed to legalize operation 'func.func'
 func @boxing_match(%bproc: !fir.boxproc<(i32) -> ()>) {
  %ubproc:2 = fir.unboxproc %bproc : (!fir.boxproc<(i32) -> ()>) -> ((i32) -> (), !fir.ref<tuple<i32,f64>>)
  return
--- a/flang/unittests/Optimizer/Builder/CharacterTest.cpp
+++ b/flang/unittests/Optimizer/Builder/CharacterTest.cpp
@ -16,6 +16,8 @@
 struct CharacterTest : public testing::Test {
 public:
  void SetUp() override {
+    fir::support::loadDialects(context);
+
    kindMap = std::make_unique<fir::KindMapping>(&context,
        "i10:80,l3:24,a1:8,r54:Double,c20:X86_FP80,r11:PPC_FP128,"
        "r12:FP128,r13:X86_FP80,r14:Double,r15:Float,r16:Half,r23:BFloat");
@ -31,7 +33,6 @@ public:
    mod.push_back(mod);
    builder.setInsertionPointToStart(entryBlock);

-    fir::support::loadDialects(context);
    firBuilder = std::make_unique<fir::FirOpBuilder>(mod, *kindMap);
  }

--- a/flang/unittests/Optimizer/Builder/ComplexTest.cpp
+++ b/flang/unittests/Optimizer/Builder/ComplexTest.cpp
@ -15,6 +15,8 @@
 struct ComplexTest : public testing::Test {
 public:
  void SetUp() override {
+    fir::support::loadDialects(context);
+
    mlir::OpBuilder builder(&context);
    auto loc = builder.getUnknownLoc();

@ -27,7 +29,6 @@ public:
    mod.push_back(mod);
    builder.setInsertionPointToStart(entryBlock);

-    fir::support::loadDialects(context);
    kindMap = std::make_unique<fir::KindMapping>(&context);
    firBuilder = std::make_unique<fir::FirOpBuilder>(mod, *kindMap);
    helper = std::make_unique<fir::factory::Complex>(*firBuilder, loc);
--- a/flang/unittests/Optimizer/Builder/FIRBuilderTest.cpp
+++ b/flang/unittests/Optimizer/Builder/FIRBuilderTest.cpp
@ -17,6 +17,8 @@ using namespace mlir;
 struct FIRBuilderTest : public testing::Test {
 public:
  void SetUp() override {
+    fir::support::loadDialects(context);
+
    llvm::ArrayRef<fir::KindTy> defs;
    fir::KindMapping kindMap(&context, defs);
    mlir::OpBuilder builder(&context);
@ -31,7 +33,6 @@ public:
    mod.push_back(mod);
    builder.setInsertionPointToStart(entryBlock);

-    fir::support::loadDialects(context);
    firBuilder = std::make_unique<fir::FirOpBuilder>(mod, kindMap);
  }

--- a/flang/unittests/Optimizer/Builder/Runtime/RuntimeCallTestBase.h
+++ b/flang/unittests/Optimizer/Builder/Runtime/RuntimeCallTestBase.h
@ -17,6 +17,8 @@
 struct RuntimeCallTest : public testing::Test {
 public:
  void SetUp() override {
+    fir::support::loadDialects(context);
+
    mlir::OpBuilder builder(&context);
    auto loc = builder.getUnknownLoc();

@ -29,7 +31,6 @@ public:
    mod.push_back(mod);
    builder.setInsertionPointToStart(entryBlock);

-    fir::support::loadDialects(context);
    kindMap = std::make_unique<fir::KindMapping>(&context);
    firBuilder = std::make_unique<fir::FirOpBuilder>(mod, *kindMap);

--- a/mlir/benchmark/python/benchmark_sparse.py
+++ b/mlir/benchmark/python/benchmark_sparse.py
@ -43,7 +43,7 @@ def benchmark_sparse_mlir_multiplication():
        param2_type = ir.RankedTensorType.get([1500, 2000], f64)
        result_type = ir.RankedTensorType.get([1000, 2000], f64)
        with ir.InsertionPoint(module.body):
-            @builtin.FuncOp.from_py_func(param1_type, param2_type, result_type)
+            @func.FuncOp.from_py_func(param1_type, param2_type, result_type)
            def sparse_kernel(x, y, z):
                return matmul_dsl(x, y, outs=[z])

--- a/mlir/benchmark/python/common.py
+++ b/mlir/benchmark/python/common.py
@ -41,7 +41,7 @@ def create_sparse_np_tensor(dimensions, number_of_elements):
    return tensor


-def get_kernel_func_from_module(module: ir.Module) -> builtin.FuncOp:
+def get_kernel_func_from_module(module: ir.Module) -> func.FuncOp:
    """Takes an mlir module object and extracts the function object out of it.
    This function only works for a module with one region, one block, and one
    operation.
@ -55,12 +55,12 @@ def get_kernel_func_from_module(module: ir.Module) -> builtin.FuncOp:
    return module.operation.regions[0].blocks[0].operations[0]


-def emit_timer_func() -> builtin.FuncOp:
+def emit_timer_func() -> func.FuncOp:
    """Returns the declaration of nano_time function. If nano_time function is
    used, the `MLIR_RUNNER_UTILS` and `MLIR_C_RUNNER_UTILS` must be included.
    """
    i64_type = ir.IntegerType.get_signless(64)
-    nano_time = builtin.FuncOp(
+    nano_time = func.FuncOp(
        "nano_time", ([], [i64_type]), visibility="private")
    nano_time.attributes["llvm.emit_c_interface"] = ir.UnitAttr.get()
    return nano_time
@ -76,7 +76,7 @@ def emit_benchmark_wrapped_main_func(func, timer_func):
    """
    i64_type = ir.IntegerType.get_signless(64)
    memref_of_i64_type = ir.MemRefType.get([-1], i64_type)
-    wrapped_func = builtin.FuncOp(
+    wrapped_func = func.FuncOp(
        # Same signature and an extra buffer of indices to save timings.
        "main",
        (func.arguments.types + [memref_of_i64_type], func.type.results),
--- a/mlir/docs/Bindings/Python.md
+++ b/mlir/docs/Bindings/Python.md
@ -639,7 +639,7 @@ from mlir.dialects import builtin
 with Context():
  module = Module.create()
  with InsertionPoint(module.body), Location.unknown():
-    func = builtin.FuncOp("main", ([], []))
+    func = func.FuncOp("main", ([], []))
 ```

 Also see below for constructors generated from ODS.
@ -660,12 +660,12 @@ with Context():
  with InsertionPoint(module.body), Location.unknown():
    # Operations can be created in a generic way.
    func = Operation.create(
-        "builtin.func", results=[], operands=[],
+        "func.func", results=[], operands=[],
        attributes={"type":TypeAttr.get(FunctionType.get([], []))},
        successors=None, regions=1)
    # The result will be downcasted to the concrete `OpView` subclass if
    # available.
-    assert isinstance(func, builtin.FuncOp)
+    assert isinstance(func, func.FuncOp)
 ```

 Regions are created for an operation when constructing it on the C++ side. They
--- a/mlir/docs/Dialects/ShapeDialect.md
+++ b/mlir/docs/Dialects/ShapeDialect.md
@ -34,7 +34,7 @@ below[^wip_form1]:
 ```mlir
 shape.function_library @shplib {

-builtin.func @matmul(%lhs: !shape.value_shape, %rhs: !shape.value_shape) -> !shape.shape {
+func.func @matmul(%lhs: !shape.value_shape, %rhs: !shape.value_shape) -> !shape.shape {
  %c1 = shape.const_size 1
  %c2 = shape.const_size 2
  // We could also allow rank etc operations directly on value_shape too, that
--- a/mlir/docs/PassManagement.md
+++ b/mlir/docs/PassManagement.md
@ -532,12 +532,12 @@ A pipeline view that models the structure of the pass manager, this is the
 default view:

 ```shell
-$ mlir-opt -pass-pipeline='builtin.func(my-pass,my-pass)' foo.mlir -pass-statistics
+$ mlir-opt -pass-pipeline='func.func(my-pass,my-pass)' foo.mlir -pass-statistics

 ===-------------------------------------------------------------------------===
                         ... Pass statistics report ...
 ===-------------------------------------------------------------------------===
-'builtin.func' Pipeline
+'func.func' Pipeline
  MyPass
    (S) 15 exampleStat - An example statistic
  VerifierPass
@ -551,7 +551,7 @@ A list view that aggregates the statistics of all instances of a specific pass
 together:

 ```shell
-$ mlir-opt -pass-pipeline='builtin.func(my-pass, my-pass)' foo.mlir -pass-statistics -pass-statistics-display=list
+$ mlir-opt -pass-pipeline='func.func(my-pass, my-pass)' foo.mlir -pass-statistics -pass-statistics-display=list

 ===-------------------------------------------------------------------------===
                         ... Pass statistics report ...
@ -657,7 +657,7 @@ options           ::= '{' (key ('=' value)?)+ '}'

 *   `op-name`
    *   This corresponds to the mnemonic name of an operation to run passes on,
-        e.g. `builtin.func` or `builtin.module`.
+        e.g. `func.func` or `builtin.module`.
 *   `pass-name` | `pass-pipeline-name`
    *   This corresponds to the argument of a registered pass or pass pipeline,
        e.g. `cse` or `canonicalize`.
@ -676,7 +676,7 @@ $ mlir-opt foo.mlir -cse -canonicalize -convert-func-to-llvm='use-bare-ptr-memre
 Can also be specified as (via the `-pass-pipeline` flag):

 ```shell
-$ mlir-opt foo.mlir -pass-pipeline='builtin.func(cse,canonicalize),convert-func-to-llvm{use-bare-ptr-memref-call-conv=1}'
+$ mlir-opt foo.mlir -pass-pipeline='func.func(cse,canonicalize),convert-func-to-llvm{use-bare-ptr-memref-call-conv=1}'
 ```

 In order to support round-tripping a pass to the textual representation using
@ -997,7 +997,7 @@ pipeline. This display mode is available in mlir-opt via
 `-mlir-timing-display=list`.

 ```shell
-$ mlir-opt foo.mlir -mlir-disable-threading -pass-pipeline='builtin.func(cse,canonicalize)' -convert-func-to-llvm -mlir-timing -mlir-timing-display=list
+$ mlir-opt foo.mlir -mlir-disable-threading -pass-pipeline='func.func(cse,canonicalize)' -convert-func-to-llvm -mlir-timing -mlir-timing-display=list

 ===-------------------------------------------------------------------------===
                      ... Pass execution timing report ...
@ -1022,7 +1022,7 @@ the most time, and can also be used to identify when analyses are being
 invalidated and recomputed. This is the default display mode.

 ```shell
-$ mlir-opt foo.mlir -mlir-disable-threading -pass-pipeline='builtin.func(cse,canonicalize)' -convert-func-to-llvm -mlir-timing
+$ mlir-opt foo.mlir -mlir-disable-threading -pass-pipeline='func.func(cse,canonicalize)' -convert-func-to-llvm -mlir-timing

 ===-------------------------------------------------------------------------===
                      ... Pass execution timing report ...
@ -1030,7 +1030,7 @@ $ mlir-opt foo.mlir -mlir-disable-threading -pass-pipeline='builtin.func(cse,can
  Total Execution Time: 0.0249 seconds

   ---Wall Time---  --- Name ---
-   0.0058 ( 70.8%)  'builtin.func' Pipeline
+   0.0058 ( 70.8%)  'func.func' Pipeline
   0.0004 (  4.3%)    CSE
   0.0002 (  2.6%)      (A) DominanceInfo
   0.0004 (  4.8%)    VerifierPass
@ -1053,7 +1053,7 @@ perceived time, or clock time, whereas the `User Time` will display the total
 cpu time.

 ```shell
-$ mlir-opt foo.mlir -pass-pipeline='builtin.func(cse,canonicalize)' -convert-func-to-llvm -mlir-timing
+$ mlir-opt foo.mlir -pass-pipeline='func.func(cse,canonicalize)' -convert-func-to-llvm -mlir-timing

 ===-------------------------------------------------------------------------===
                      ... Pass execution timing report ...
@ -1061,7 +1061,7 @@ $ mlir-opt foo.mlir -pass-pipeline='builtin.func(cse,canonicalize)' -convert-fun
  Total Execution Time: 0.0078 seconds

   ---User Time---   ---Wall Time---  --- Name ---
-   0.0177 ( 88.5%)     0.0057 ( 71.3%)  'builtin.func' Pipeline
+   0.0177 ( 88.5%)     0.0057 ( 71.3%)  'func.func' Pipeline
   0.0044 ( 22.0%)     0.0015 ( 18.9%)    CSE
   0.0029 ( 14.5%)     0.0012 ( 15.2%)      (A) DominanceInfo
   0.0038 ( 18.9%)     0.0015 ( 18.7%)    VerifierPass
@ -1089,7 +1089,7 @@ this instrumentation:
    *   Print the IR before every pass in the pipeline.

 ```shell
-$ mlir-opt foo.mlir -pass-pipeline='builtin.func(cse)' -print-ir-before=cse
+$ mlir-opt foo.mlir -pass-pipeline='func.func(cse)' -print-ir-before=cse

 *** IR Dump Before CSE ***
 func @simple_constant() -> (i32, i32) {
@ -1105,7 +1105,7 @@ func @simple_constant() -> (i32, i32) {
    *   Print the IR after every pass in the pipeline.

 ```shell
-$ mlir-opt foo.mlir -pass-pipeline='builtin.func(cse)' -print-ir-after=cse
+$ mlir-opt foo.mlir -pass-pipeline='func.func(cse)' -print-ir-after=cse

 *** IR Dump After CSE ***
 func @simple_constant() -> (i32, i32) {
@ -1126,7 +1126,7 @@ func @simple_constant() -> (i32, i32) {
        printing.

 ```shell
-$ mlir-opt foo.mlir -pass-pipeline='builtin.func(cse,cse)' -print-ir-after=cse -print-ir-after-change
+$ mlir-opt foo.mlir -pass-pipeline='func.func(cse,cse)' -print-ir-after=cse -print-ir-after-change

 *** IR Dump After CSE ***
 func @simple_constant() -> (i32, i32) {
@ -1141,7 +1141,7 @@ func @simple_constant() -> (i32, i32) {
        above.

 ```shell
-$ mlir-opt foo.mlir -pass-pipeline='builtin.func(cse,bad-pass)' -print-ir-failure
+$ mlir-opt foo.mlir -pass-pipeline='func.func(cse,bad-pass)' -print-ir-failure

 *** IR Dump After BadPass Failed ***
 func @simple_constant() -> (i32, i32) {
@ -1157,9 +1157,9 @@ func @simple_constant() -> (i32, i32) {
        is disabled(`-mlir-disable-threading`)

 ```shell
-$ mlir-opt foo.mlir -mlir-disable-threading -pass-pipeline='builtin.func(cse)' -print-ir-after=cse -print-ir-module-scope
+$ mlir-opt foo.mlir -mlir-disable-threading -pass-pipeline='func.func(cse)' -print-ir-after=cse -print-ir-module-scope

-*** IR Dump After CSE ***  ('builtin.func' operation: @bar)
+*** IR Dump After CSE ***  ('func.func' operation: @bar)
 func @bar(%arg0: f32, %arg1: f32) -> f32 {
  ...
 }
@ -1170,7 +1170,7 @@ func @simple_constant() -> (i32, i32) {
  return %c1_i32, %c1_i32_0 : i32, i32
 }

-*** IR Dump After CSE ***  ('builtin.func' operation: @simple_constant)
+*** IR Dump After CSE ***  ('func.func' operation: @simple_constant)
 func @bar(%arg0: f32, %arg1: f32) -> f32 {
  ...
 }
@ -1194,7 +1194,7 @@ was executing, as well as the initial IR before any passes were run. A potential
 reproducible may have the form:

 ```mlir
-// configuration: -pass-pipeline='builtin.func(cse,canonicalize),inline' -verify-each
+// configuration: -pass-pipeline='func.func(cse,canonicalize),inline' -verify-each

 module {
  func @foo() {
@ -1229,7 +1229,7 @@ For example, if the failure in the previous example came from `canonicalize`,
 the following reproducer will be generated:

 ```mlir
-// configuration: -pass-pipeline='builtin.func(canonicalize)' -verify-each -mlir-disable-threading
+// configuration: -pass-pipeline='func.func(canonicalize)' -verify-each -mlir-disable-threading

 module {
  func @foo() {
--- a/mlir/docs/SymbolsAndSymbolTables.md
+++ b/mlir/docs/SymbolsAndSymbolTables.md
@ -31,7 +31,7 @@ defines a [`SymbolTable`](#symbol-table). The name of a symbol *must* be unique
 within the parent `SymbolTable`. This name is semantically similarly to an SSA
 result value, and may be referred to by other operations to provide a symbolic
 link, or use, to the symbol. An example of a `Symbol` operation is
-[`builtin.func`](Dialects/Builtin.md/#func-mlirfuncop). `builtin.func` defines a
+[`func.func`](Dialects/Builtin.md/#func-mlirfuncop). `func.func` defines a
 symbol name, which is [referred to](#referencing-a-symbol) by operations like
 [`func.call`](Dialects/Func.md/#funccall-callop).

@ -77,7 +77,7 @@ operation that is also a [symbol table](#symbol-table).
 Below is an example of how an operation can reference a symbol operation:

 ```mlir
-// This `builtin.func` operation defines a symbol named `symbol`.
+// This `func.func` operation defines a symbol named `symbol`.
 func @symbol()

 // Our `foo.user` operation contains a SymbolRefAttr with the name of the
@ -106,7 +106,7 @@ module {
 // Here we define another nested symbol table, except this time it also defines
 // a symbol.
 module @module_symbol {
-  // This `builtin.func` operation defines a symbol named `nested_symbol`.
+  // This `func.func` operation defines a symbol named `nested_symbol`.
  func @nested_symbol()
 }

--- a/mlir/docs/TargetLLVMIR.md
+++ b/mlir/docs/TargetLLVMIR.md
@ -348,7 +348,7 @@ individual scalar arguments.

 Examples:

-This convention is implemented in the conversion of `builtin.func` and `func.call` to
+This convention is implemented in the conversion of `func.func` and `func.call` to
 the LLVM dialect, with the former unpacking the descriptor into a set of
 individual values and the latter packing those values back into a descriptor so
 as to make it transparently usable by other operations. Conversions from other
--- a/mlir/include/mlir/Dialect/Affine/LoopUtils.h
+++ b/mlir/include/mlir/Dialect/Affine/LoopUtils.h
@ -22,13 +22,16 @@
 namespace mlir {
 class AffineForOp;
 class AffineMap;
-class FuncOp;
 class LoopLikeOpInterface;
 struct MemRefRegion;
 class OpBuilder;
 class Value;
 class ValueRange;

+namespace func {
+class FuncOp;
+} // namespace func
+
 namespace scf {
 class ForOp;
 class ParallelOp;
@ -79,7 +82,7 @@ LogicalResult promoteIfSingleIteration(AffineForOp forOp);

 /// Promotes all single iteration AffineForOp's in the Function, i.e., moves
 /// their body into the containing Block.
-void promoteSingleIterationLoops(FuncOp f);
+void promoteSingleIterationLoops(func::FuncOp f);

 /// Skew the operations in an affine.for's body with the specified
 /// operation-wise shifts. The shifts are with respect to the original execution
@ -92,7 +95,7 @@ LogicalResult affineForOpBodySkew(AffineForOp forOp, ArrayRef<uint64_t> shifts,
 /// Identify valid and profitable bands of loops to tile. This is currently just
 /// a temporary placeholder to test the mechanics of tiled code generation.
 /// Returns all maximal outermost perfect loop nests to tile.
-void getTileableBands(FuncOp f,
+void getTileableBands(func::FuncOp f,
                      std::vector<SmallVector<AffineForOp, 6>> *bands);

 /// Tiles the specified band of perfectly nested loops creating tile-space loops
@ -259,8 +262,8 @@ LogicalResult coalesceLoops(MutableArrayRef<AffineForOp> loops);
 void mapLoopToProcessorIds(scf::ForOp forOp, ArrayRef<Value> processorId,
                           ArrayRef<Value> numProcessors);

-/// Gathers all AffineForOps in 'builtin.func' grouped by loop depth.
-void gatherLoops(FuncOp func,
+/// Gathers all AffineForOps in 'func.func' grouped by loop depth.
+void gatherLoops(func::FuncOp func,
                 std::vector<SmallVector<AffineForOp, 2>> &depthToLoops);

 /// Creates an AffineForOp while ensuring that the lower and upper bounds are
--- a/mlir/include/mlir/Dialect/Affine/Passes.h
+++ b/mlir/include/mlir/Dialect/Affine/Passes.h
@ -18,6 +18,9 @@
 #include <limits>

 namespace mlir {
+namespace func {
+class FuncOp;
+} // namespace func

 class AffineForOp;

@ -28,53 +31,56 @@ enum FusionMode { Greedy, ProducerConsumer, Sibling };
 /// Creates a simplification pass for affine structures (maps and sets). In
 /// addition, this pass also normalizes memrefs to have the trivial (identity)
 /// layout map.
-std::unique_ptr<OperationPass<FuncOp>> createSimplifyAffineStructuresPass();
+std::unique_ptr<OperationPass<func::FuncOp>>
+createSimplifyAffineStructuresPass();

 /// Creates a loop invariant code motion pass that hoists loop invariant
 /// operations out of affine loops.
-std::unique_ptr<OperationPass<FuncOp>>
+std::unique_ptr<OperationPass<func::FuncOp>>
 createAffineLoopInvariantCodeMotionPass();

 /// Creates a pass to convert all parallel affine.for's into 1-d affine.parallel
 /// ops.
-std::unique_ptr<OperationPass<FuncOp>> createAffineParallelizePass();
+std::unique_ptr<OperationPass<func::FuncOp>> createAffineParallelizePass();

 /// Apply normalization transformations to affine loop-like ops.
-std::unique_ptr<OperationPass<FuncOp>> createAffineLoopNormalizePass();
+std::unique_ptr<OperationPass<func::FuncOp>> createAffineLoopNormalizePass();

 /// Performs packing (or explicit copying) of accessed memref regions into
 /// buffers in the specified faster memory space through either pointwise copies
 /// or DMA operations.
-std::unique_ptr<OperationPass<FuncOp>> createAffineDataCopyGenerationPass(
+std::unique_ptr<OperationPass<func::FuncOp>> createAffineDataCopyGenerationPass(
    unsigned slowMemorySpace, unsigned fastMemorySpace,
    unsigned tagMemorySpace = 0, int minDmaTransferSize = 1024,
    uint64_t fastMemCapacityBytes = std::numeric_limits<uint64_t>::max());
 /// Overload relying on pass options for initialization.
-std::unique_ptr<OperationPass<FuncOp>> createAffineDataCopyGenerationPass();
+std::unique_ptr<OperationPass<func::FuncOp>>
+createAffineDataCopyGenerationPass();

 /// Creates a pass to replace affine memref accesses by scalars using store to
 /// load forwarding and redundant load elimination; consequently also eliminate
 /// dead allocs.
-std::unique_ptr<OperationPass<FuncOp>> createAffineScalarReplacementPass();
+std::unique_ptr<OperationPass<func::FuncOp>>
+createAffineScalarReplacementPass();

 /// Creates a pass that transforms perfectly nested loops with independent
 /// bounds into a single loop.
-std::unique_ptr<OperationPass<FuncOp>> createLoopCoalescingPass();
+std::unique_ptr<OperationPass<func::FuncOp>> createLoopCoalescingPass();

 /// Creates a loop fusion pass which fuses loops according to type of fusion
 /// specified in `fusionMode`. Buffers of size less than or equal to
 /// `localBufSizeThreshold` are promoted to memory space `fastMemorySpace`.
-std::unique_ptr<OperationPass<FuncOp>>
+std::unique_ptr<OperationPass<func::FuncOp>>
 createLoopFusionPass(unsigned fastMemorySpace = 0,
                     uint64_t localBufSizeThreshold = 0,
                     bool maximalFusion = false,
                     enum FusionMode fusionMode = FusionMode::Greedy);

 /// Creates a pass to perform tiling on loop nests.
-std::unique_ptr<OperationPass<FuncOp>>
+std::unique_ptr<OperationPass<func::FuncOp>>
 createLoopTilingPass(uint64_t cacheSizeBytes);
 /// Overload relying on pass options for initialization.
-std::unique_ptr<OperationPass<FuncOp>> createLoopTilingPass();
+std::unique_ptr<OperationPass<func::FuncOp>> createLoopTilingPass();

 /// Creates a loop unrolling pass with the provided parameters.
 /// 'getUnrollFactor' is a function callback for clients to supply a function
@ -82,7 +88,7 @@ std::unique_ptr<OperationPass<FuncOp>> createLoopTilingPass();
 /// factors supplied through other means. If -1 is passed as the unrollFactor
 /// and no callback is provided, anything passed from the command-line (if at
 /// all) or the default unroll factor is used (LoopUnroll:kDefaultUnrollFactor).
-std::unique_ptr<OperationPass<FuncOp>> createLoopUnrollPass(
+std::unique_ptr<OperationPass<func::FuncOp>> createLoopUnrollPass(
    int unrollFactor = -1, bool unrollUpToFactor = false,
    bool unrollFull = false,
    const std::function<unsigned(AffineForOp)> &getUnrollFactor = nullptr);
@ -90,19 +96,19 @@ std::unique_ptr<OperationPass<FuncOp>> createLoopUnrollPass(
 /// Creates a loop unroll jam pass to unroll jam by the specified factor. A
 /// factor of -1 lets the pass use the default factor or the one on the command
 /// line if provided.
-std::unique_ptr<OperationPass<FuncOp>>
+std::unique_ptr<OperationPass<func::FuncOp>>
 createLoopUnrollAndJamPass(int unrollJamFactor = -1);

 /// Creates a pass to pipeline explicit movement of data across levels of the
 /// memory hierarchy.
-std::unique_ptr<OperationPass<FuncOp>> createPipelineDataTransferPass();
+std::unique_ptr<OperationPass<func::FuncOp>> createPipelineDataTransferPass();

 /// Creates a pass to vectorize loops, operations and data types using a
 /// target-independent, n-D super-vector abstraction.
-std::unique_ptr<OperationPass<FuncOp>>
+std::unique_ptr<OperationPass<func::FuncOp>>
 createSuperVectorizePass(ArrayRef<int64_t> virtualVectorSize);
 /// Overload relying on pass options for initialization.
-std::unique_ptr<OperationPass<FuncOp>> createSuperVectorizePass();
+std::unique_ptr<OperationPass<func::FuncOp>> createSuperVectorizePass();

 //===----------------------------------------------------------------------===//
 // Registration
--- a/mlir/include/mlir/Dialect/Affine/Utils.h
+++ b/mlir/include/mlir/Dialect/Affine/Utils.h
@ -21,10 +21,13 @@ class AffineForOp;
 class AffineIfOp;
 class AffineParallelOp;
 class DominanceInfo;
-class FuncOp;
 class Operation;
 class PostDominanceInfo;

+namespace func {
+class FuncOp;
+} // namespace func
+
 namespace memref {
 class AllocOp;
 } // namespace memref
@ -96,7 +99,7 @@ struct VectorizationStrategy {
 /// Replace affine store and load accesses by scalars by forwarding stores to
 /// loads and eliminate invariant affine loads; consequently, eliminate dead
 /// allocs.
-void affineScalarReplace(FuncOp f, DominanceInfo &domInfo,
+void affineScalarReplace(func::FuncOp f, DominanceInfo &domInfo,
                         PostDominanceInfo &postDomInfo);

 /// Vectorizes affine loops in 'loops' using the n-D vectorization factors in
--- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h
@ -4,6 +4,10 @@
 #include "mlir/Pass/Pass.h"

 namespace mlir {
+namespace func {
+class FuncOp;
+} // namespace func
+
 namespace bufferization {
 struct OneShotBufferizationOptions;

@ -31,7 +35,7 @@ std::unique_ptr<Pass> createBufferResultsToOutParamsPass();

 /// Creates a pass that finalizes a partial bufferization by removing remaining
 /// bufferization.to_tensor and bufferization.to_memref operations.
-std::unique_ptr<OperationPass<FuncOp>> createFinalizingBufferizePass();
+std::unique_ptr<OperationPass<func::FuncOp>> createFinalizingBufferizePass();

 /// Create a pass that bufferizes all ops that implement BufferizableOpInterface
 /// with One-Shot Bufferize.
--- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
@ -11,7 +11,7 @@

 include "mlir/Pass/PassBase.td"

-def BufferDeallocation : Pass<"buffer-deallocation", "FuncOp"> {
+def BufferDeallocation : Pass<"buffer-deallocation", "func::FuncOp"> {
  let summary = "Adds all required dealloc operations for all allocations in "
                "the input program";
  let description = [{
@ -88,7 +88,7 @@ def BufferDeallocation : Pass<"buffer-deallocation", "FuncOp"> {
  let constructor = "mlir::bufferization::createBufferDeallocationPass()";
 }

-def BufferHoisting : Pass<"buffer-hoisting", "FuncOp"> {
+def BufferHoisting : Pass<"buffer-hoisting", "func::FuncOp"> {
  let summary = "Optimizes placement of allocation operations by moving them "
                "into common dominators and out of nested regions";
  let description = [{
@ -98,7 +98,7 @@ def BufferHoisting : Pass<"buffer-hoisting", "FuncOp"> {
  let constructor = "mlir::bufferization::createBufferHoistingPass()";
 }

-def BufferLoopHoisting : Pass<"buffer-loop-hoisting", "FuncOp"> {
+def BufferLoopHoisting : Pass<"buffer-loop-hoisting", "func::FuncOp"> {
  let summary = "Optimizes placement of allocation operations by moving them "
                "out of loop nests";
  let description = [{
@ -133,7 +133,7 @@ def BufferResultsToOutParams : Pass<"buffer-results-to-out-params", "ModuleOp">
  let dependentDialects = ["memref::MemRefDialect"];
 }

-def FinalizingBufferize : Pass<"finalizing-bufferize", "FuncOp"> {
+def FinalizingBufferize : Pass<"finalizing-bufferize", "func::FuncOp"> {
  let summary = "Finalize a partial bufferization";
  let description = [{
    A bufferize pass that finalizes a partial bufferization by removing
@ -231,7 +231,7 @@ def OneShotBufferize : Pass<"one-shot-bufferize", "ModuleOp"> {
  let constructor = "mlir::bufferization::createOneShotBufferizePass()";
 }

-def PromoteBuffersToStack : Pass<"promote-buffers-to-stack", "FuncOp"> {
+def PromoteBuffersToStack : Pass<"promote-buffers-to-stack", "func::FuncOp"> {
  let summary = "Promotes heap-based allocations to automatically managed "
                "stack-based allocations";
  let description = [{
--- a/mlir/include/mlir/Dialect/Func/IR/FuncOps.h
+++ b/mlir/include/mlir/Dialect/Func/IR/FuncOps.h
@ -11,10 +11,11 @@

 #include "mlir/Dialect/ControlFlow/IR/ControlFlow.h"
 #include "mlir/IR/Builders.h"
-#include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/Dialect.h"
+#include "mlir/IR/FunctionInterfaces.h"
 #include "mlir/IR/OpImplementation.h"
+#include "mlir/IR/SymbolTable.h"
 #include "mlir/Interfaces/CallInterfaces.h"
 #include "mlir/Interfaces/ControlFlowInterfaces.h"
 #include "mlir/Interfaces/InferTypeOpInterface.h"
@ -29,4 +30,25 @@ class PatternRewriter;

 #include "mlir/Dialect/Func/IR/FuncOpsDialect.h.inc"

+namespace mlir {
+/// FIXME: This is a temporary using directive to ease the transition of FuncOp
+/// to the Func dialect. This will be removed after all uses are updated.
+using FuncOp = func::FuncOp;
+} // namespace mlir
+
+namespace llvm {
+
+/// Allow stealing the low bits of FuncOp.
+template <>
+struct PointerLikeTypeTraits<mlir::func::FuncOp> {
+  static inline void *getAsVoidPointer(mlir::func::FuncOp val) {
+    return const_cast<void *>(val.getAsOpaquePointer());
+  }
+  static inline mlir::func::FuncOp getFromVoidPointer(void *p) {
+    return mlir::func::FuncOp::getFromOpaquePointer(p);
+  }
+  static constexpr int numLowBitsAvailable = 3;
+};
+} // namespace llvm
+
 #endif // MLIR_DIALECT_FUNC_IR_OPS_H
--- a/mlir/include/mlir/Dialect/Func/IR/FuncOps.td
+++ b/mlir/include/mlir/Dialect/Func/IR/FuncOps.td
@ -13,6 +13,7 @@ include "mlir/IR/OpAsmInterface.td"
 include "mlir/IR/SymbolInterfaces.td"
 include "mlir/Interfaces/CallInterfaces.td"
 include "mlir/Interfaces/ControlFlowInterfaces.td"
+include "mlir/IR/FunctionInterfaces.td"
 include "mlir/Interfaces/InferTypeOpInterface.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"

@ -201,6 +202,120 @@ def ConstantOp : Func_Op<"constant",
  let hasVerifier = 1;
 }

+//===----------------------------------------------------------------------===//
+// FuncOp
+//===----------------------------------------------------------------------===//
+
+def FuncOp : Func_Op<"func", [
+  AffineScope, AutomaticAllocationScope, CallableOpInterface,
+  FunctionOpInterface, IsolatedFromAbove, Symbol
+]> {
+  let summary = "An operation with a name containing a single `SSACFG` region";
+  let description = [{
+    Operations within the function cannot implicitly capture values defined
+    outside of the function, i.e. Functions are `IsolatedFromAbove`. All
+    external references must use function arguments or attributes that establish
+    a symbolic connection (e.g. symbols referenced by name via a string
+    attribute like SymbolRefAttr). An external function declaration (used when
+    referring to a function declared in some other module) has no body. While
+    the MLIR textual form provides a nice inline syntax for function arguments,
+    they are internally represented as “block arguments” to the first block in
+    the region.
+
+    Only dialect attribute names may be specified in the attribute dictionaries
+    for function arguments, results, or the function itself.
+
+    Example:
+
+    ```mlir
+    // External function definitions.
+    func.func @abort()
+    func.func @scribble(i32, i64, memref<? x 128 x f32, #layout_map0>) -> f64
+
+    // A function that returns its argument twice:
+    func.func @count(%x: i64) -> (i64, i64)
+      attributes {fruit: "banana"} {
+      return %x, %x: i64, i64
+    }
+
+    // A function with an argument attribute
+    func.func @example_fn_arg(%x: i32 {swift.self = unit})
+
+    // A function with a result attribute
+    func.func @example_fn_result() -> (f64 {dialectName.attrName = 0 : i64})
+
+    // A function with an attribute
+    func.func @example_fn_attr() attributes {dialectName.attrName = false}
+    ```
+  }];
+
+  let arguments = (ins SymbolNameAttr:$sym_name,
+                       TypeAttrOf<FunctionType>:$type,
+                       OptionalAttr<StrAttr>:$sym_visibility);
+  let regions = (region AnyRegion:$body);
+
+  let builders = [OpBuilder<(ins
+    "StringRef":$name, "FunctionType":$type,
+    CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs,
+    CArg<"ArrayRef<DictionaryAttr>", "{}">:$argAttrs)
+  >];
+  let extraClassDeclaration = [{
+    static FuncOp create(Location location, StringRef name, FunctionType type,
+                         ArrayRef<NamedAttribute> attrs = {});
+    static FuncOp create(Location location, StringRef name, FunctionType type,
+                         Operation::dialect_attr_range attrs);
+    static FuncOp create(Location location, StringRef name, FunctionType type,
+                         ArrayRef<NamedAttribute> attrs,
+                         ArrayRef<DictionaryAttr> argAttrs);
+
+    /// Create a deep copy of this function and all of its blocks, remapping any
+    /// operands that use values outside of the function using the map that is
+    /// provided (leaving them alone if no entry is present). If the mapper
+    /// contains entries for function arguments, these arguments are not
+    /// included in the new function. Replaces references to cloned sub-values
+    /// with the corresponding value that is copied, and adds those mappings to
+    /// the mapper.
+    FuncOp clone(BlockAndValueMapping &mapper);
+    FuncOp clone();
+
+    /// Clone the internal blocks and attributes from this function into dest.
+    /// Any cloned blocks are appended to the back of dest. This function
+    /// asserts that the attributes of the current function and dest are
+    /// compatible.
+    void cloneInto(FuncOp dest, BlockAndValueMapping &mapper);
+
+    //===------------------------------------------------------------------===//
+    // CallableOpInterface
+    //===------------------------------------------------------------------===//
+
+    /// Returns the region on the current operation that is callable. This may
+    /// return null in the case of an external callable object, e.g. an external
+    /// function.
+    ::mlir::Region *getCallableRegion() { return isExternal() ? nullptr : &getBody(); }
+
+    /// Returns the results types that the callable region produces when
+    /// executed.
+    ArrayRef<Type> getCallableResults() { return getType().getResults(); }
+
+    //===------------------------------------------------------------------===//
+    // FunctionOpInterface Methods
+    //===------------------------------------------------------------------===//
+
+    /// Returns the argument types of this function.
+    ArrayRef<Type> getArgumentTypes() { return getType().getInputs(); }
+
+    /// Returns the result types of this function.
+    ArrayRef<Type> getResultTypes() { return getType().getResults(); }
+
+    //===------------------------------------------------------------------===//
+    // SymbolOpInterface Methods
+    //===------------------------------------------------------------------===//
+
+    bool isDeclaration() { return isExternal(); }
+  }];
+  let hasCustomAssemblyFormat = 1;
+}
+
 //===----------------------------------------------------------------------===//
 // ReturnOp
 //===----------------------------------------------------------------------===//
--- a/mlir/include/mlir/Dialect/Func/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/Func/Transforms/Passes.td
@ -14,9 +14,9 @@ include "mlir/Pass/PassBase.td"
 def FuncBufferize : Pass<"func-bufferize", "ModuleOp"> {
  let summary = "Bufferize func/call/return ops";
  let description = [{
-    A bufferize pass that bufferizes builtin.func and func.call ops.
+    A bufferize pass that bufferizes func.func and func.call ops.

-    Because this pass updates builtin.func ops, it must be a module pass. It is
+    Because this pass updates func.func ops, it must be a module pass. It is
    useful to keep this pass separate from other bufferizations so that the
    other ones can be run at function-level in parallel.

--- a/mlir/include/mlir/Dialect/GPU/Passes.h
+++ b/mlir/include/mlir/Dialect/GPU/Passes.h
@ -23,6 +23,10 @@ class Module;
 } // namespace llvm

 namespace mlir {
+namespace func {
+class FuncOp;
+} // namespace func
+
 /// Pass that moves ops which are likely an index computation into gpu.launch
 /// body.
 std::unique_ptr<Pass> createGpuLauchSinkIndexComputationsPass();
@ -33,7 +37,7 @@ std::unique_ptr<OperationPass<ModuleOp>>
 createGpuKernelOutliningPass(StringRef dataLayoutStr = StringRef());

 /// Rewrites a function region so that GPU ops execute asynchronously.
-std::unique_ptr<OperationPass<FuncOp>> createGpuAsyncRegionPass();
+std::unique_ptr<OperationPass<func::FuncOp>> createGpuAsyncRegionPass();

 /// Collect a set of patterns to rewrite all-reduce ops within the GPU dialect.
 void populateGpuAllReducePatterns(RewritePatternSet &patterns);
--- a/mlir/include/mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h
+++ b/mlir/include/mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h
@ -14,7 +14,9 @@
 #include "mlir/IR/OpDefinition.h"

 namespace mlir {
+namespace func {
 class FuncOp;
+} // namespace func

 namespace linalg {

@ -155,7 +157,8 @@ public:
  static StringRef getDependenceTypeStr(DependenceType depType);

  // Builds a linalg dependence graph for the ops of type LinalgOp under `f`.
-  static LinalgDependenceGraph buildDependenceGraph(Aliases &aliases, FuncOp f);
+  static LinalgDependenceGraph buildDependenceGraph(Aliases &aliases,
+                                                    func::FuncOp f);
  LinalgDependenceGraph(Aliases &aliases, ArrayRef<LinalgOp> ops);

  /// Returns the X such that op -> X is a dependence of type dt.
--- a/mlir/include/mlir/Dialect/Linalg/Passes.h
+++ b/mlir/include/mlir/Dialect/Linalg/Passes.h
@ -18,6 +18,13 @@
 #include "mlir/Pass/Pass.h"

 namespace mlir {
+namespace func {
+class FuncOp;
+} // namespace func
+
+// TODO: Remove when all references have been updated.
+using FuncOp = func::FuncOp;
+
 namespace bufferization {
 struct OneShotBufferizationOptions;
 } // namespace bufferization
@ -31,29 +38,32 @@ std::unique_ptr<Pass> createFoldReshapeOpsByLinearizationPass();

 std::unique_ptr<Pass> createLinalgNamedOpConversionPass();

-std::unique_ptr<OperationPass<FuncOp>>
+std::unique_ptr<OperationPass<func::FuncOp>>
 createLinalgTilingPass(ArrayRef<int64_t> tileSizes = {},
                       linalg::LinalgTilingLoopType loopType =
                           linalg::LinalgTilingLoopType::Loops);

-std::unique_ptr<OperationPass<FuncOp>>
+std::unique_ptr<OperationPass<func::FuncOp>>
 createLinalgPromotionPass(bool dynamicBuffers, bool useAlloca);
-std::unique_ptr<OperationPass<FuncOp>> createLinalgPromotionPass();
+std::unique_ptr<OperationPass<func::FuncOp>> createLinalgPromotionPass();

-std::unique_ptr<OperationPass<FuncOp>> createLinalgInlineScalarOperandsPass();
+std::unique_ptr<OperationPass<func::FuncOp>>
+createLinalgInlineScalarOperandsPass();

 /// Create a pass to convert Linalg operations to scf.for loops and
 /// memref.load/memref.store accesses.
-std::unique_ptr<OperationPass<FuncOp>> createConvertLinalgToLoopsPass();
+std::unique_ptr<OperationPass<func::FuncOp>> createConvertLinalgToLoopsPass();

 /// Create a pass to convert Linalg operations to scf.parallel loops and
 /// memref.load/memref.store accesses.
-std::unique_ptr<OperationPass<FuncOp>> createConvertLinalgToParallelLoopsPass();
+std::unique_ptr<OperationPass<func::FuncOp>>
+createConvertLinalgToParallelLoopsPass();

 /// Create a pass to convert Linalg operations to affine.for loops and
 /// affine_load/affine_store accesses.
 /// Placeholder for now, this is NYI.
-std::unique_ptr<OperationPass<FuncOp>> createConvertLinalgToAffineLoopsPass();
+std::unique_ptr<OperationPass<func::FuncOp>>
+createConvertLinalgToAffineLoopsPass();

 /// This pass implements a cross-dialect bufferization approach and performs an
 /// analysis to determine which op operands and results may be bufferized in the
@ -68,11 +78,11 @@ std::unique_ptr<Pass> createLinalgComprehensiveModuleBufferizePass(

 /// Create a pass to convert Linalg operations which work on tensors to use
 /// buffers instead.
-std::unique_ptr<OperationPass<FuncOp>> createLinalgBufferizePass();
+std::unique_ptr<OperationPass<func::FuncOp>> createLinalgBufferizePass();

 /// Create a pass to convert named Linalg operations to Linalg generic
 /// operations.
-std::unique_ptr<OperationPass<FuncOp>> createLinalgGeneralizationPass();
+std::unique_ptr<OperationPass<func::FuncOp>> createLinalgGeneralizationPass();

 /// Create a pass to convert Linalg operations to equivalent operations that
 /// work on primitive types, if possible.
@ -82,27 +92,28 @@ std::unique_ptr<Pass> createLinalgDetensorizePass();
 /// Linalg strategy passes.
 //===----------------------------------------------------------------------===//
 /// Create a LinalgStrategyTileAndFusePass.
-std::unique_ptr<OperationPass<FuncOp>> createLinalgStrategyTileAndFusePass(
+std::unique_ptr<OperationPass<func::FuncOp>>
+createLinalgStrategyTileAndFusePass(
    StringRef opName = "", const linalg::LinalgTilingAndFusionOptions &opt = {},
    const linalg::LinalgTransformationFilter &filter =
        linalg::LinalgTransformationFilter());

 /// Create a LinalgStrategyTilePass.
-std::unique_ptr<OperationPass<FuncOp>> createLinalgStrategyTilePass(
+std::unique_ptr<OperationPass<func::FuncOp>> createLinalgStrategyTilePass(
    StringRef opName = "",
    const linalg::LinalgTilingOptions &opt = linalg::LinalgTilingOptions(),
    const linalg::LinalgTransformationFilter &filter =
        linalg::LinalgTransformationFilter());

 /// Create a LinalgStrategyPadPass.
-std::unique_ptr<OperationPass<FuncOp>> createLinalgStrategyPadPass(
+std::unique_ptr<OperationPass<func::FuncOp>> createLinalgStrategyPadPass(
    StringRef opName = "",
    const linalg::LinalgPaddingOptions &opt = linalg::LinalgPaddingOptions(),
    const linalg::LinalgTransformationFilter &filter =
        linalg::LinalgTransformationFilter());

 /// Create a LinalgStrategyPromotePass.
-std::unique_ptr<OperationPass<FuncOp>> createLinalgStrategyPromotePass(
+std::unique_ptr<OperationPass<func::FuncOp>> createLinalgStrategyPromotePass(
    StringRef opName = "",
    const linalg::LinalgPromotionOptions &opt =
        linalg::LinalgPromotionOptions(),
@ -110,24 +121,25 @@ std::unique_ptr<OperationPass<FuncOp>> createLinalgStrategyPromotePass(
        linalg::LinalgTransformationFilter());

 /// Create a LinalgStrategyGeneralizePass.
-std::unique_ptr<OperationPass<FuncOp>> createLinalgStrategyGeneralizePass(
+std::unique_ptr<OperationPass<func::FuncOp>> createLinalgStrategyGeneralizePass(
    StringRef opName = "", const linalg::LinalgTransformationFilter &filter =
                               linalg::LinalgTransformationFilter());

 /// Create a LinalgStrategyDecomposePass.
 // TODO: if/when we need finer control add an `opName` parameter.
-std::unique_ptr<OperationPass<FuncOp>> createLinalgStrategyDecomposePass(
+std::unique_ptr<OperationPass<func::FuncOp>> createLinalgStrategyDecomposePass(
    const linalg::LinalgTransformationFilter &filter =
        linalg::LinalgTransformationFilter());

 /// Create a LinalgStrategyInterchangePass.
-std::unique_ptr<OperationPass<FuncOp>> createLinalgStrategyInterchangePass(
+std::unique_ptr<OperationPass<func::FuncOp>>
+createLinalgStrategyInterchangePass(
    ArrayRef<int64_t> iteratorInterchange = {},
    const linalg::LinalgTransformationFilter &filter =
        linalg::LinalgTransformationFilter());

 /// Create a LinalgStrategyVectorizePass.
-std::unique_ptr<OperationPass<FuncOp>> createLinalgStrategyVectorizePass(
+std::unique_ptr<OperationPass<func::FuncOp>> createLinalgStrategyVectorizePass(
    StringRef opName = "",
    linalg::LinalgVectorizationOptions opt =
        linalg::LinalgVectorizationOptions(),
@ -136,20 +148,22 @@ std::unique_ptr<OperationPass<FuncOp>> createLinalgStrategyVectorizePass(
    bool padVectorize = false);

 /// Create a LinalgStrategyEnablePass.
-std::unique_ptr<OperationPass<FuncOp>> createLinalgStrategyEnablePass(
+std::unique_ptr<OperationPass<func::FuncOp>> createLinalgStrategyEnablePass(
    linalg::LinalgEnablingOptions opt = linalg::LinalgEnablingOptions(),
    const linalg::LinalgTransformationFilter &filter =
        linalg::LinalgTransformationFilter());

 /// Create a LinalgStrategyLowerVectorsPass.
-std::unique_ptr<OperationPass<FuncOp>> createLinalgStrategyLowerVectorsPass(
+std::unique_ptr<OperationPass<func::FuncOp>>
+createLinalgStrategyLowerVectorsPass(
    linalg::LinalgVectorLoweringOptions opt =
        linalg::LinalgVectorLoweringOptions(),
    const linalg::LinalgTransformationFilter &filter =
        linalg::LinalgTransformationFilter());

 /// Create a LinalgStrategyRemoveMarkersPass.
-std::unique_ptr<OperationPass<FuncOp>> createLinalgStrategyRemoveMarkersPass();
+std::unique_ptr<OperationPass<func::FuncOp>>
+createLinalgStrategyRemoveMarkersPass();

 //===----------------------------------------------------------------------===//
 // Registration
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Hoisting.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Hoisting.h
@ -10,7 +10,9 @@
 #define MLIR_DIALECT_LINALG_TRANSFORMS_HOISTING_H_

 namespace mlir {
+namespace func {
 class FuncOp;
+} // namespace func

 namespace linalg {

@ -27,11 +29,11 @@ namespace linalg {
 /// results in scf::ForOp yielding the value that originally transited through
 /// memory.
 // TODO: generalize on a per-need basis.
-void hoistRedundantVectorTransfers(FuncOp func);
+void hoistRedundantVectorTransfers(func::FuncOp func);

 /// Same behavior as `hoistRedundantVectorTransfers` but works on tensors
 /// instead of buffers.
-void hoistRedundantVectorTransfersOnTensor(FuncOp func);
+void hoistRedundantVectorTransfersOnTensor(func::FuncOp func);

 } // namespace linalg
 } // namespace mlir
--- a/mlir/include/mlir/Dialect/Quant/Passes.h
+++ b/mlir/include/mlir/Dialect/Quant/Passes.h
@ -19,18 +19,22 @@
 #include "mlir/Pass/Pass.h"

 namespace mlir {
+namespace func {
+class FuncOp;
+} // namespace func
+
 namespace quant {

 /// Creates a pass that converts quantization simulation operations (i.e.
 /// FakeQuant and those like it) to casts into/out of supported QuantizedTypes.
-std::unique_ptr<OperationPass<FuncOp>> createConvertSimulatedQuantPass();
+std::unique_ptr<OperationPass<func::FuncOp>> createConvertSimulatedQuantPass();

 /// Creates a pass that converts constants followed by a qbarrier to a
 /// constant whose value is quantized. This is typically one of the last
 /// passes done when lowering to express actual quantized arithmetic in a
 /// low level representation. Because it modifies the constant, it is
 /// destructive and cannot be undone.
-std::unique_ptr<OperationPass<FuncOp>> createConvertConstPass();
+std::unique_ptr<OperationPass<func::FuncOp>> createConvertConstPass();

 //===----------------------------------------------------------------------===//
 // Registration
--- a/mlir/include/mlir/Dialect/SCF/Utils/Utils.h
+++ b/mlir/include/mlir/Dialect/SCF/Utils/Utils.h
@ -19,7 +19,6 @@
 #include "llvm/ADT/STLExtras.h"

 namespace mlir {
-class FuncOp;
 class Location;
 class Operation;
 class OpBuilder;
@ -28,6 +27,10 @@ class RewriterBase;
 class ValueRange;
 class Value;

+namespace func {
+class FuncOp;
+} // namespace func
+
 namespace scf {
 class IfOp;
 class ForOp;
@ -68,8 +71,9 @@ scf::ForOp cloneWithNewYields(OpBuilder &b, scf::ForOp loop,
 /// collide with another FuncOp name.
 // TODO: support more than single-block regions.
 // TODO: more flexible constant handling.
-FailureOr<FuncOp> outlineSingleBlockRegion(RewriterBase &rewriter, Location loc,
-                                           Region &region, StringRef funcName);
+FailureOr<func::FuncOp> outlineSingleBlockRegion(RewriterBase &rewriter,
+                                                 Location loc, Region &region,
+                                                 StringRef funcName);

 /// Outline the then and/or else regions of `ifOp` as follows:
 ///  - if `thenFn` is not null, `thenFnName` must be specified and the `then`
@ -79,8 +83,8 @@ FailureOr<FuncOp> outlineSingleBlockRegion(RewriterBase &rewriter, Location loc,
 /// Creates new FuncOps and thus cannot be used in a FuncOp pass.
 /// The client is responsible for providing a unique `thenFnName`/`elseFnName`
 /// that will not collide with another FuncOp name.
-LogicalResult outlineIfOp(RewriterBase &b, scf::IfOp ifOp, FuncOp *thenFn,
-                          StringRef thenFnName, FuncOp *elseFn,
+LogicalResult outlineIfOp(RewriterBase &b, scf::IfOp ifOp, func::FuncOp *thenFn,
+                          StringRef thenFnName, func::FuncOp *elseFn,
                          StringRef elseFnName);

 /// Get a list of innermost parallel loops contained in `rootOp`. Innermost
--- a/mlir/include/mlir/Dialect/Shape/IR/Shape.h
+++ b/mlir/include/mlir/Dialect/Shape/IR/Shape.h
@ -15,6 +15,7 @@
 #define MLIR_DIALECT_SHAPE_IR_SHAPE_H

 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/Dialect.h"
--- a/mlir/include/mlir/IR/BuiltinOps.h
+++ b/mlir/include/mlir/IR/BuiltinOps.h
@ -13,12 +13,10 @@
 #ifndef MLIR_IR_BUILTINOPS_H_
 #define MLIR_IR_BUILTINOPS_H_

-#include "mlir/IR/FunctionInterfaces.h"
 #include "mlir/IR/OpImplementation.h"
 #include "mlir/IR/OwningOpRef.h"
 #include "mlir/IR/RegionKindInterface.h"
 #include "mlir/IR/SymbolTable.h"
-#include "mlir/Interfaces/CallInterfaces.h"
 #include "mlir/Interfaces/CastInterfaces.h"
 #include "mlir/Interfaces/DataLayoutInterfaces.h"
 #include "mlir/Interfaces/SideEffectInterfaces.h"
@ -32,18 +30,6 @@
 #include "mlir/IR/BuiltinOps.h.inc"

 namespace llvm {
-/// Allow stealing the low bits of FuncOp.
-template <>
-struct PointerLikeTypeTraits<mlir::FuncOp> {
-  static inline void *getAsVoidPointer(mlir::FuncOp val) {
-    return const_cast<void *>(val.getAsOpaquePointer());
-  }
-  static inline mlir::FuncOp getFromVoidPointer(void *p) {
-    return mlir::FuncOp::getFromOpaquePointer(p);
-  }
-  static constexpr int numLowBitsAvailable = 3;
-};
-
 /// Allow stealing the low bits of ModuleOp.
 template <>
 struct PointerLikeTypeTraits<mlir::ModuleOp> {
--- a/mlir/include/mlir/IR/BuiltinOps.td
+++ b/mlir/include/mlir/IR/BuiltinOps.td
@ -15,11 +15,9 @@
 #define BUILTIN_OPS

 include "mlir/IR/BuiltinDialect.td"
-include "mlir/IR/FunctionInterfaces.td"
 include "mlir/IR/OpAsmInterface.td"
 include "mlir/IR/RegionKindInterface.td"
 include "mlir/IR/SymbolInterfaces.td"
-include "mlir/Interfaces/CallInterfaces.td"
 include "mlir/Interfaces/CastInterfaces.td"
 include "mlir/Interfaces/DataLayoutInterfaces.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
@ -28,120 +26,6 @@ include "mlir/Interfaces/SideEffectInterfaces.td"
 class Builtin_Op<string mnemonic, list<Trait> traits = []> :
    Op<Builtin_Dialect, mnemonic, traits>;

-//===----------------------------------------------------------------------===//
-// FuncOp
-//===----------------------------------------------------------------------===//
-
-def FuncOp : Builtin_Op<"func", [
-  AffineScope, AutomaticAllocationScope, CallableOpInterface,
-  FunctionOpInterface, IsolatedFromAbove, Symbol
-]> {
-  let summary = "An operation with a name containing a single `SSACFG` region";
-  let description = [{
-    Operations within the function cannot implicitly capture values defined
-    outside of the function, i.e. Functions are `IsolatedFromAbove`. All
-    external references must use function arguments or attributes that establish
-    a symbolic connection (e.g. symbols referenced by name via a string
-    attribute like SymbolRefAttr). An external function declaration (used when
-    referring to a function declared in some other module) has no body. While
-    the MLIR textual form provides a nice inline syntax for function arguments,
-    they are internally represented as “block arguments” to the first block in
-    the region.
-
-    Only dialect attribute names may be specified in the attribute dictionaries
-    for function arguments, results, or the function itself.
-
-    Example:
-
-    ```mlir
-    // External function definitions.
-    func @abort()
-    func @scribble(i32, i64, memref<? x 128 x f32, #layout_map0>) -> f64
-
-    // A function that returns its argument twice:
-    func @count(%x: i64) -> (i64, i64)
-      attributes {fruit: "banana"} {
-      return %x, %x: i64, i64
-    }
-
-    // A function with an argument attribute
-    func @example_fn_arg(%x: i32 {swift.self = unit})
-
-    // A function with a result attribute
-    func @example_fn_result() -> (f64 {dialectName.attrName = 0 : i64})
-
-    // A function with an attribute
-    func @example_fn_attr() attributes {dialectName.attrName = false}
-    ```
-  }];
-
-  let arguments = (ins SymbolNameAttr:$sym_name,
-                       TypeAttrOf<FunctionType>:$type,
-                       OptionalAttr<StrAttr>:$sym_visibility);
-  let regions = (region AnyRegion:$body);
-
-  let builders = [OpBuilder<(ins
-    "StringRef":$name, "FunctionType":$type,
-    CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs,
-    CArg<"ArrayRef<DictionaryAttr>", "{}">:$argAttrs)
-  >];
-  let extraClassDeclaration = [{
-    static FuncOp create(Location location, StringRef name, FunctionType type,
-                         ArrayRef<NamedAttribute> attrs = {});
-    static FuncOp create(Location location, StringRef name, FunctionType type,
-                         Operation::dialect_attr_range attrs);
-    static FuncOp create(Location location, StringRef name, FunctionType type,
-                         ArrayRef<NamedAttribute> attrs,
-                         ArrayRef<DictionaryAttr> argAttrs);
-
-    /// Create a deep copy of this function and all of its blocks, remapping any
-    /// operands that use values outside of the function using the map that is
-    /// provided (leaving them alone if no entry is present). If the mapper
-    /// contains entries for function arguments, these arguments are not
-    /// included in the new function. Replaces references to cloned sub-values
-    /// with the corresponding value that is copied, and adds those mappings to
-    /// the mapper.
-    FuncOp clone(BlockAndValueMapping &mapper);
-    FuncOp clone();
-
-    /// Clone the internal blocks and attributes from this function into dest.
-    /// Any cloned blocks are appended to the back of dest. This function
-    /// asserts that the attributes of the current function and dest are
-    /// compatible.
-    void cloneInto(FuncOp dest, BlockAndValueMapping &mapper);
-
-    //===------------------------------------------------------------------===//
-    // CallableOpInterface
-    //===------------------------------------------------------------------===//
-
-    /// Returns the region on the current operation that is callable. This may
-    /// return null in the case of an external callable object, e.g. an external
-    /// function.
-    ::mlir::Region *getCallableRegion() { return isExternal() ? nullptr : &getBody(); }
-
-    /// Returns the results types that the callable region produces when
-    /// executed.
-    ArrayRef<Type> getCallableResults() { return getType().getResults(); }
-
-    //===------------------------------------------------------------------===//
-    // FunctionOpInterface Methods
-    //===------------------------------------------------------------------===//
-
-    /// Returns the argument types of this function.
-    ArrayRef<Type> getArgumentTypes() { return getType().getInputs(); }
-
-    /// Returns the result types of this function.
-    ArrayRef<Type> getResultTypes() { return getType().getResults(); }
-
-    //===------------------------------------------------------------------===//
-    // SymbolOpInterface Methods
-    //===------------------------------------------------------------------===//
-
-    bool isDeclaration() { return isExternal(); }
-  }];
-  let hasCustomAssemblyFormat = 1;
-}
-
 //===----------------------------------------------------------------------===//
 // ModuleOp
 //===----------------------------------------------------------------------===//
--- a/mlir/include/mlir/IR/FunctionInterfaces.td
+++ b/mlir/include/mlir/IR/FunctionInterfaces.td
@ -158,7 +158,7 @@ def FunctionOpInterface : OpInterface<"FunctionOpInterface"> {
    /// Block argument iterator types.
    using BlockArgListType = Region::BlockArgListType;
    using args_iterator = BlockArgListType::iterator;
-
+    
    //===------------------------------------------------------------------===//
    // Body Handling
    //===------------------------------------------------------------------===//
--- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
+++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
@ -22,6 +22,7 @@
 #include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/ControlFlow/IR/ControlFlow.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/GPU/GPUDialect.h"
 #include "mlir/Dialect/GPU/Passes.h"
 #include "mlir/Dialect/LLVMIR/NVVMDialect.h"
--- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
+++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@ -22,6 +22,7 @@
 #include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
 #include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h"
 #include "mlir/Conversion/VectorToROCDL/VectorToROCDL.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/GPU/GPUDialect.h"
 #include "mlir/Dialect/GPU/Passes.h"
 #include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
--- a/mlir/lib/Conversion/LinalgToSPIRV/LinalgToSPIRVPass.cpp
+++ b/mlir/lib/Conversion/LinalgToSPIRV/LinalgToSPIRVPass.cpp
@ -9,6 +9,7 @@
 #include "mlir/Conversion/LinalgToSPIRV/LinalgToSPIRVPass.h"
 #include "../PassDetail.h"
 #include "mlir/Conversion/LinalgToSPIRV/LinalgToSPIRV.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h"
 #include "mlir/Dialect/SPIRV/Transforms/SPIRVConversion.h"

--- a/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp
+++ b/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp
@ -13,6 +13,7 @@
 #include "mlir/Conversion/LLVMCommon/Pattern.h"
 #include "mlir/Conversion/LLVMCommon/TypeConverter.h"
 #include "mlir/Conversion/MemRefToLLVM/AllocLikeConversion.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/LLVMIR/FunctionCallUtils.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
--- a/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp
+++ b/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp
@ -10,6 +10,7 @@

 #include "../PassDetail.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/Shape/IR/Shape.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
--- a/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp
+++ b/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp
@ -17,6 +17,7 @@
 #include "mlir/Dialect/Affine/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Affine/IR/AffineValueMap.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/IR/AffineExprVisitor.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/IntegerSet.h"
--- a/mlir/lib/Dialect/Affine/Transforms/PassDetail.h
+++ b/mlir/lib/Dialect/Affine/Transforms/PassDetail.h
@ -10,6 +10,7 @@
 #define DIALECT_AFFINE_TRANSFORMS_PASSDETAIL_H_

 #include "mlir/Dialect/Affine/Passes.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Pass/Pass.h"

 namespace mlir {
--- a/mlir/lib/Dialect/Affine/Utils/LoopFusionUtils.cpp
+++ b/mlir/lib/Dialect/Affine/Utils/LoopFusionUtils.cpp
@ -18,6 +18,7 @@
 #include "mlir/Dialect/Affine/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Affine/LoopUtils.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/AffineMap.h"
 #include "mlir/IR/BlockAndValueMapping.h"
--- a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
+++ b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
@ -18,6 +18,7 @@
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Affine/IR/AffineValueMap.h"
 #include "mlir/Dialect/Affine/Utils.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/IR/BlockAndValueMapping.h"
@ -2611,7 +2612,7 @@ gatherLoopsInBlock(Block *block, unsigned currLoopDepth,
  }
 }

-/// Gathers all AffineForOps in 'builtin.func' grouped by loop depth.
+/// Gathers all AffineForOps in 'func.func' grouped by loop depth.
 void mlir::gatherLoops(FuncOp func,
                       std::vector<SmallVector<AffineForOp, 2>> &depthToLoops) {
  for (auto &block : func)
--- a/mlir/lib/Dialect/Affine/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Affine/Utils/Utils.cpp
@ -17,6 +17,7 @@
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Affine/IR/AffineValueMap.h"
 #include "mlir/Dialect/Affine/LoopUtils.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/IR/AffineExprVisitor.h"
 #include "mlir/IR/BlockAndValueMapping.h"
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
@ -8,6 +8,7 @@

 #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/IR/AsmState.h"
 #include "mlir/IR/BlockAndValueMapping.h"
--- a/mlir/lib/Dialect/Bufferization/IR/CMakeLists.txt
+++ b/mlir/lib/Dialect/Bufferization/IR/CMakeLists.txt
@ -13,6 +13,7 @@ add_mlir_dialect_library(MLIRBufferization

  LINK_LIBS PUBLIC
  MLIRDialect
+  MLIRFunc
  MLIRIR
  MLIRTensor
  MLIRMemRef
--- a/mlir/lib/Dialect/Bufferization/Transforms/PassDetail.h
+++ b/mlir/lib/Dialect/Bufferization/Transforms/PassDetail.h
@ -9,6 +9,7 @@
 #ifndef DIALECT_BUFFERIZATION_TRANSFORMS_PASSDETAIL_H_
 #define DIALECT_BUFFERIZATION_TRANSFORMS_PASSDETAIL_H_

+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Pass/Pass.h"

 namespace mlir {
--- a/mlir/lib/Dialect/Func/IR/FuncOps.cpp
+++ b/mlir/lib/Dialect/Func/IR/FuncOps.cpp
@ -13,6 +13,7 @@
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/FunctionImplementation.h"
 #include "mlir/IR/Matchers.h"
 #include "mlir/IR/OpImplementation.h"
 #include "mlir/IR/PatternMatch.h"
@ -21,6 +22,7 @@
 #include "mlir/Support/MathExtras.h"
 #include "mlir/Transforms/InliningUtils.h"
 #include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/FormatVariadic.h"
@ -56,6 +58,12 @@ struct FuncInlinerInterface : public DialectInlinerInterface {
    return true;
  }

+  /// All functions can be inlined.
+  bool isLegalToInline(Region *, Region *, bool,
+                       BlockAndValueMapping &) const final {
+    return true;
+  }
+
  //===--------------------------------------------------------------------===//
  // Transformation Hooks
  //===--------------------------------------------------------------------===//
@ -208,6 +216,129 @@ bool ConstantOp::isBuildableWith(Attribute value, Type type) {
  return value.isa<FlatSymbolRefAttr>() && type.isa<FunctionType>();
 }

+//===----------------------------------------------------------------------===//
+// FuncOp
+//===----------------------------------------------------------------------===//
+
+FuncOp FuncOp::create(Location location, StringRef name, FunctionType type,
+                      ArrayRef<NamedAttribute> attrs) {
+  OpBuilder builder(location->getContext());
+  OperationState state(location, getOperationName());
+  FuncOp::build(builder, state, name, type, attrs);
+  return cast<FuncOp>(Operation::create(state));
+}
+FuncOp FuncOp::create(Location location, StringRef name, FunctionType type,
+                      Operation::dialect_attr_range attrs) {
+  SmallVector<NamedAttribute, 8> attrRef(attrs);
+  return create(location, name, type, llvm::makeArrayRef(attrRef));
+}
+FuncOp FuncOp::create(Location location, StringRef name, FunctionType type,
+                      ArrayRef<NamedAttribute> attrs,
+                      ArrayRef<DictionaryAttr> argAttrs) {
+  FuncOp func = create(location, name, type, attrs);
+  func.setAllArgAttrs(argAttrs);
+  return func;
+}
+
+void FuncOp::build(OpBuilder &builder, OperationState &state, StringRef name,
+                   FunctionType type, ArrayRef<NamedAttribute> attrs,
+                   ArrayRef<DictionaryAttr> argAttrs) {
+  state.addAttribute(SymbolTable::getSymbolAttrName(),
+                     builder.getStringAttr(name));
+  state.addAttribute(FunctionOpInterface::getTypeAttrName(),
+                     TypeAttr::get(type));
+  state.attributes.append(attrs.begin(), attrs.end());
+  state.addRegion();
+
+  if (argAttrs.empty())
+    return;
+  assert(type.getNumInputs() == argAttrs.size());
+  function_interface_impl::addArgAndResultAttrs(builder, state, argAttrs,
+                                                /*resultAttrs=*/llvm::None);
+}
+
+ParseResult FuncOp::parse(OpAsmParser &parser, OperationState &result) {
+  auto buildFuncType =
+      [](Builder &builder, ArrayRef<Type> argTypes, ArrayRef<Type> results,
+         function_interface_impl::VariadicFlag,
+         std::string &) { return builder.getFunctionType(argTypes, results); };
+
+  return function_interface_impl::parseFunctionOp(
+      parser, result, /*allowVariadic=*/false, buildFuncType);
+}
+
+void FuncOp::print(OpAsmPrinter &p) {
+  function_interface_impl::printFunctionOp(p, *this, /*isVariadic=*/false);
+}
+
+/// Clone the internal blocks from this function into dest and all attributes
+/// from this function to dest.
+void FuncOp::cloneInto(FuncOp dest, BlockAndValueMapping &mapper) {
+  // Add the attributes of this function to dest.
+  llvm::MapVector<StringAttr, Attribute> newAttrMap;
+  for (const auto &attr : dest->getAttrs())
+    newAttrMap.insert({attr.getName(), attr.getValue()});
+  for (const auto &attr : (*this)->getAttrs())
+    newAttrMap.insert({attr.getName(), attr.getValue()});
+
+  auto newAttrs = llvm::to_vector(llvm::map_range(
+      newAttrMap, [](std::pair<StringAttr, Attribute> attrPair) {
+        return NamedAttribute(attrPair.first, attrPair.second);
+      }));
+  dest->setAttrs(DictionaryAttr::get(getContext(), newAttrs));
+
+  // Clone the body.
+  getBody().cloneInto(&dest.getBody(), mapper);
+}
+
+/// Create a deep copy of this function and all of its blocks, remapping
+/// any operands that use values outside of the function using the map that is
+/// provided (leaving them alone if no entry is present). Replaces references
+/// to cloned sub-values with the corresponding value that is copied, and adds
+/// those mappings to the mapper.
+FuncOp FuncOp::clone(BlockAndValueMapping &mapper) {
+  // Create the new function.
+  FuncOp newFunc = cast<FuncOp>(getOperation()->cloneWithoutRegions());
+
+  // If the function has a body, then the user might be deleting arguments to
+  // the function by specifying them in the mapper. If so, we don't add the
+  // argument to the input type vector.
+  if (!isExternal()) {
+    FunctionType oldType = getType();
+
+    unsigned oldNumArgs = oldType.getNumInputs();
+    SmallVector<Type, 4> newInputs;
+    newInputs.reserve(oldNumArgs);
+    for (unsigned i = 0; i != oldNumArgs; ++i)
+      if (!mapper.contains(getArgument(i)))
+        newInputs.push_back(oldType.getInput(i));
+
+    /// If any of the arguments were dropped, update the type and drop any
+    /// necessary argument attributes.
+    if (newInputs.size() != oldNumArgs) {
+      newFunc.setType(FunctionType::get(oldType.getContext(), newInputs,
+                                        oldType.getResults()));
+
+      if (ArrayAttr argAttrs = getAllArgAttrs()) {
+        SmallVector<Attribute> newArgAttrs;
+        newArgAttrs.reserve(newInputs.size());
+        for (unsigned i = 0; i != oldNumArgs; ++i)
+          if (!mapper.contains(getArgument(i)))
+            newArgAttrs.push_back(argAttrs[i]);
+        newFunc.setAllArgAttrs(newArgAttrs);
+      }
+    }
+  }
+
+  /// Clone the current function into the new one and return it.
+  cloneInto(newFunc, mapper);
+  return newFunc;
+}
+FuncOp FuncOp::clone() {
+  BlockAndValueMapping mapper;
+  return clone(mapper);
+}
+
 //===----------------------------------------------------------------------===//
 // ReturnOp
 //===----------------------------------------------------------------------===//
--- a/mlir/lib/Dialect/Func/Transforms/FuncBufferize.cpp
+++ b/mlir/lib/Dialect/Func/Transforms/FuncBufferize.cpp
@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements bufferization of builtin.func's and func.call's.
+// This file implements bufferization of func.func's and func.call's.
 //
 //===----------------------------------------------------------------------===//

--- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@ -14,6 +14,7 @@
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
 #include "mlir/Dialect/DLTI/DLTI.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/GPU/GPUDialect.h"
 #include "mlir/Dialect/GPU/Passes.h"
 #include "mlir/Dialect/GPU/Utils.h"
--- a/mlir/lib/Dialect/GPU/Transforms/PassDetail.h
+++ b/mlir/lib/Dialect/GPU/Transforms/PassDetail.h
@ -11,6 +11,7 @@

 #include "mlir/Dialect/Async/IR/Async.h"
 #include "mlir/Dialect/DLTI/DLTI.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/GPU/GPUDialect.h"
 #include "mlir/Pass/Pass.h"

--- a/mlir/lib/Dialect/Linalg/Analysis/DependenceAnalysis.cpp
+++ b/mlir/lib/Dialect/Linalg/Analysis/DependenceAnalysis.cpp
@ -12,6 +12,7 @@

 #include "mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h"
 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/IR/BuiltinOps.h"

--- a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
@ -12,6 +12,7 @@

 #include "mlir/Dialect/Linalg/Transforms/HoistPadding.h"
 #include "mlir/Analysis/SliceAnalysis.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
 #include "mlir/Dialect/SCF/SCF.h"
--- a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
@ -15,6 +15,7 @@
 #include "mlir/Analysis/SliceAnalysis.h"
 #include "mlir/Dialect/Affine/Analysis/AffineStructures.h"
 #include "mlir/Dialect/Affine/IR/AffineValueMap.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
 #include "mlir/Dialect/SCF/SCF.h"
--- a/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp
@ -362,7 +362,7 @@ struct LinalgStrategyEnablePass
      hoistRedundantVectorTransfersOnTensor(funcOp);

    // Run CSE to cleanup after canonicalization.
-    OpPassManager dynamicPM("builtin.func");
+    OpPassManager dynamicPM("func.func");
    dynamicPM.addPass(createCSEPass());
    if (failed(runPipeline(dynamicPM, funcOp)))
      return signalPassFailure();
--- a/mlir/lib/Dialect/Linalg/Transforms/PassDetail.h
+++ b/mlir/lib/Dialect/Linalg/Transforms/PassDetail.h
@ -10,6 +10,7 @@
 #define DIALECT_LINALG_TRANSFORMS_PASSDETAIL_H_

 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/IR/Dialect.h"
 #include "mlir/Pass/Pass.h"

--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@ -13,6 +13,7 @@

 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/Linalg/Transforms/HoistPadding.h"
--- a/mlir/lib/Dialect/Quant/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Quant/Transforms/CMakeLists.txt
@ -10,6 +10,7 @@ add_mlir_dialect_library(MLIRQuantTransforms

  LINK_LIBS PUBLIC
  MLIRArithmetic
+  MLIRFunc
  MLIRIR
  MLIRQuant
  MLIRQuantUtils
--- a/mlir/lib/Dialect/Quant/Transforms/PassDetail.h
+++ b/mlir/lib/Dialect/Quant/Transforms/PassDetail.h
@ -9,6 +9,7 @@
 #ifndef DIALECT_QUANT_TRANSFORMS_PASSDETAIL_H_
 #define DIALECT_QUANT_TRANSFORMS_PASSDETAIL_H_

+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Pass/Pass.h"

 namespace mlir {
--- a/mlir/lib/Dialect/SCF/Transforms/PassDetail.h
+++ b/mlir/lib/Dialect/SCF/Transforms/PassDetail.h
@ -6,9 +6,10 @@
 //
 //===----------------------------------------------------------------------===//

-#ifndef DIALECT_LOOPOPS_TRANSFORMS_PASSDETAIL_H_
-#define DIALECT_LOOPOPS_TRANSFORMS_PASSDETAIL_H_
+#ifndef DIALECT_SCF_TRANSFORMS_PASSDETAIL_H_
+#define DIALECT_SCF_TRANSFORMS_PASSDETAIL_H_

+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Pass/Pass.h"

 namespace mlir {
@ -39,4 +40,4 @@ class TensorDialect;

 } // namespace mlir

-#endif // DIALECT_LOOPOPS_TRANSFORMS_PASSDETAIL_H_
+#endif // DIALECT_SCF_TRANSFORMS_PASSDETAIL_H_
--- a/mlir/lib/Dialect/SPIRV/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/SPIRV/Transforms/CMakeLists.txt
@ -15,6 +15,7 @@ add_mlir_dialect_library(MLIRSPIRVConversion
  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/SPIRV

  LINK_LIBS PUBLIC
+  MLIRFunc
  MLIRSPIRV
  MLIRTransformUtils
 )
--- a/mlir/lib/Dialect/SPIRV/Transforms/DecorateCompositeTypeLayoutPass.cpp
+++ b/mlir/lib/Dialect/SPIRV/Transforms/DecorateCompositeTypeLayoutPass.cpp
@ -14,6 +14,7 @@
 //===----------------------------------------------------------------------===//

 #include "PassDetail.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h"
 #include "mlir/Dialect/SPIRV/IR/SPIRVOps.h"
 #include "mlir/Dialect/SPIRV/Transforms/Passes.h"
--- a/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp
+++ b/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp
@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//

 #include "mlir/Dialect/SPIRV/Transforms/SPIRVConversion.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h"
 #include "mlir/Dialect/SPIRV/IR/SPIRVOps.h"
 #include "mlir/Transforms/DialectConversion.h"
--- a/mlir/lib/Dialect/Shape/IR/CMakeLists.txt
+++ b/mlir/lib/Dialect/Shape/IR/CMakeLists.txt
@ -16,6 +16,7 @@ add_mlir_dialect_library(MLIRShape
  MLIRCastInterfaces
  MLIRControlFlowInterfaces
  MLIRDialect
+  MLIRFunc
  MLIRInferTypeOpInterface
  MLIRIR
  MLIRSideEffectInterfaces
--- a/mlir/lib/Dialect/Shape/Transforms/PassDetail.h
+++ b/mlir/lib/Dialect/Shape/Transforms/PassDetail.h
@ -9,6 +9,7 @@
 #ifndef DIALECT_SHAPE_TRANSFORMS_PASSDETAIL_H_
 #define DIALECT_SHAPE_TRANSFORMS_PASSDETAIL_H_

+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Pass/Pass.h"

 namespace mlir {
--- a/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp
@ -11,6 +11,7 @@
 #include "mlir/Conversion/Passes.h"
 #include "mlir/Dialect/Arithmetic/Transforms/Passes.h"
 #include "mlir/Dialect/Bufferization/Transforms/Passes.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/Func/Transforms/Passes.h"
 #include "mlir/Dialect/Linalg/Passes.h"
 #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h"
--- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
@ -16,6 +16,7 @@
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/Linalg/Utils/Utils.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
--- a/mlir/lib/Dialect/Tensor/Transforms/PassDetail.h
+++ b/mlir/lib/Dialect/Tensor/Transforms/PassDetail.h
@ -9,6 +9,7 @@
 #ifndef DIALECT_TENSOR_TRANSFORMS_PASSDETAIL_H_
 #define DIALECT_TENSOR_TRANSFORMS_PASSDETAIL_H_

+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Pass/Pass.h"

 namespace mlir {
--- a/mlir/lib/Dialect/Vector/Transforms/PassDetail.h
+++ b/mlir/lib/Dialect/Vector/Transforms/PassDetail.h
@ -9,6 +9,7 @@
 #ifndef DIALECT_VECTOR_TRANSFORMS_PASSDETAIL_H_
 #define DIALECT_VECTOR_TRANSFORMS_PASSDETAIL_H_

+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Pass/Pass.h"

 namespace mlir {
--- a/mlir/lib/IR/BuiltinDialect.cpp
+++ b/mlir/lib/IR/BuiltinDialect.cpp
@ -16,10 +16,8 @@
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/BuiltinTypes.h"
-#include "mlir/IR/FunctionImplementation.h"
 #include "mlir/IR/OpImplementation.h"
 #include "mlir/IR/PatternMatch.h"
-#include "llvm/ADT/MapVector.h"

 using namespace mlir;

@ -72,129 +70,6 @@ void BuiltinDialect::initialize() {
  addInterfaces<BuiltinOpAsmDialectInterface>();
 }

-//===----------------------------------------------------------------------===//
-// FuncOp
-//===----------------------------------------------------------------------===//
-
-FuncOp FuncOp::create(Location location, StringRef name, FunctionType type,
-                      ArrayRef<NamedAttribute> attrs) {
-  OpBuilder builder(location->getContext());
-  OperationState state(location, getOperationName());
-  FuncOp::build(builder, state, name, type, attrs);
-  return cast<FuncOp>(Operation::create(state));
-}
-FuncOp FuncOp::create(Location location, StringRef name, FunctionType type,
-                      Operation::dialect_attr_range attrs) {
-  SmallVector<NamedAttribute, 8> attrRef(attrs);
-  return create(location, name, type, llvm::makeArrayRef(attrRef));
-}
-FuncOp FuncOp::create(Location location, StringRef name, FunctionType type,
-                      ArrayRef<NamedAttribute> attrs,
-                      ArrayRef<DictionaryAttr> argAttrs) {
-  FuncOp func = create(location, name, type, attrs);
-  func.setAllArgAttrs(argAttrs);
-  return func;
-}
-
-void FuncOp::build(OpBuilder &builder, OperationState &state, StringRef name,
-                   FunctionType type, ArrayRef<NamedAttribute> attrs,
-                   ArrayRef<DictionaryAttr> argAttrs) {
-  state.addAttribute(SymbolTable::getSymbolAttrName(),
-                     builder.getStringAttr(name));
-  state.addAttribute(function_interface_impl::getTypeAttrName(),
-                     TypeAttr::get(type));
-  state.attributes.append(attrs.begin(), attrs.end());
-  state.addRegion();
-
-  if (argAttrs.empty())
-    return;
-  assert(type.getNumInputs() == argAttrs.size());
-  function_interface_impl::addArgAndResultAttrs(builder, state, argAttrs,
-                                                /*resultAttrs=*/llvm::None);
-}
-
-ParseResult FuncOp::parse(OpAsmParser &parser, OperationState &result) {
-  auto buildFuncType =
-      [](Builder &builder, ArrayRef<Type> argTypes, ArrayRef<Type> results,
-         function_interface_impl::VariadicFlag,
-         std::string &) { return builder.getFunctionType(argTypes, results); };
-
-  return function_interface_impl::parseFunctionOp(
-      parser, result, /*allowVariadic=*/false, buildFuncType);
-}
-
-void FuncOp::print(OpAsmPrinter &p) {
-  function_interface_impl::printFunctionOp(p, *this, /*isVariadic=*/false);
-}
-
-/// Clone the internal blocks from this function into dest and all attributes
-/// from this function to dest.
-void FuncOp::cloneInto(FuncOp dest, BlockAndValueMapping &mapper) {
-  // Add the attributes of this function to dest.
-  llvm::MapVector<StringAttr, Attribute> newAttrMap;
-  for (const auto &attr : dest->getAttrs())
-    newAttrMap.insert({attr.getName(), attr.getValue()});
-  for (const auto &attr : (*this)->getAttrs())
-    newAttrMap.insert({attr.getName(), attr.getValue()});
-
-  auto newAttrs = llvm::to_vector(llvm::map_range(
-      newAttrMap, [](std::pair<StringAttr, Attribute> attrPair) {
-        return NamedAttribute(attrPair.first, attrPair.second);
-      }));
-  dest->setAttrs(DictionaryAttr::get(getContext(), newAttrs));
-
-  // Clone the body.
-  getBody().cloneInto(&dest.getBody(), mapper);
-}
-
-/// Create a deep copy of this function and all of its blocks, remapping
-/// any operands that use values outside of the function using the map that is
-/// provided (leaving them alone if no entry is present). Replaces references
-/// to cloned sub-values with the corresponding value that is copied, and adds
-/// those mappings to the mapper.
-FuncOp FuncOp::clone(BlockAndValueMapping &mapper) {
-  // Create the new function.
-  FuncOp newFunc = cast<FuncOp>(getOperation()->cloneWithoutRegions());
-
-  // If the function has a body, then the user might be deleting arguments to
-  // the function by specifying them in the mapper. If so, we don't add the
-  // argument to the input type vector.
-  if (!isExternal()) {
-    FunctionType oldType = getType();
-
-    unsigned oldNumArgs = oldType.getNumInputs();
-    SmallVector<Type, 4> newInputs;
-    newInputs.reserve(oldNumArgs);
-    for (unsigned i = 0; i != oldNumArgs; ++i)
-      if (!mapper.contains(getArgument(i)))
-        newInputs.push_back(oldType.getInput(i));
-
-    /// If any of the arguments were dropped, update the type and drop any
-    /// necessary argument attributes.
-    if (newInputs.size() != oldNumArgs) {
-      newFunc.setType(FunctionType::get(oldType.getContext(), newInputs,
-                                        oldType.getResults()));
-
-      if (ArrayAttr argAttrs = getAllArgAttrs()) {
-        SmallVector<Attribute> newArgAttrs;
-        newArgAttrs.reserve(newInputs.size());
-        for (unsigned i = 0; i != oldNumArgs; ++i)
-          if (!mapper.contains(getArgument(i)))
-            newArgAttrs.push_back(argAttrs[i]);
-        newFunc.setAllArgAttrs(newArgAttrs);
-      }
-    }
-  }
-
-  /// Clone the current function into the new one and return it.
-  cloneInto(newFunc, mapper);
-  return newFunc;
-}
-FuncOp FuncOp::clone() {
-  BlockAndValueMapping mapper;
-  return clone(mapper);
-}
-
 //===----------------------------------------------------------------------===//
 // ModuleOp
 //===----------------------------------------------------------------------===//
--- a/mlir/lib/Pass/PassRegistry.cpp
+++ b/mlir/lib/Pass/PassRegistry.cpp
@ -322,7 +322,7 @@ private:
  ///
  /// A pipeline is defined as a series of names, each of which may in itself
  /// recursively contain a nested pipeline. A name is either the name of a pass
-  /// (e.g. "cse") or the name of an operation type (e.g. "builtin.func"). If
+  /// (e.g. "cse") or the name of an operation type (e.g. "buitin.module"). If
  /// the name is the name of a pass, the InnerPipeline is empty, since passes
  /// cannot contain inner pipelines.
  struct PipelineElement {
--- a/mlir/lib/Target/LLVMIR/ConvertToLLVMIR.cpp
+++ b/mlir/lib/Target/LLVMIR/ConvertToLLVMIR.cpp
@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//

 #include "mlir/Dialect/DLTI/DLTI.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/Target/LLVMIR/Dialect/All.h"
 #include "mlir/Target/LLVMIR/Export.h"
@ -34,7 +35,7 @@ void registerToLLVMIRTranslation() {
        return success();
      },
      [](DialectRegistry &registry) {
-        registry.insert<DLTIDialect>();
+        registry.insert<DLTIDialect, func::FuncDialect>();
        registerAllToLLVMIRTranslations(registry);
      });
 }
--- a/mlir/lib/Transforms/Utils/InliningUtils.cpp
+++ b/mlir/lib/Transforms/Utils/InliningUtils.cpp
@ -14,8 +14,8 @@

 #include "mlir/IR/BlockAndValueMapping.h"
 #include "mlir/IR/Builders.h"
-#include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/Operation.h"
+#include "mlir/Interfaces/CallInterfaces.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@ -67,10 +67,6 @@ bool InlinerInterface::isLegalToInline(Operation *call, Operation *callable,
 bool InlinerInterface::isLegalToInline(
    Region *dest, Region *src, bool wouldBeCloned,
    BlockAndValueMapping &valueMapping) const {
-  // Regions can always be inlined into functions.
-  if (isa<FuncOp>(dest->getParentOp()))
-    return true;
-
  if (auto *handler = getInterfaceFor(dest->getParentOp()))
    return handler->isLegalToInline(dest, src, wouldBeCloned, valueMapping);
  return false;
--- a/mlir/python/mlir/dialects/_builtin_ops_ext.py
+++ b/mlir/python/mlir/dialects/_builtin_ops_ext.py
@ -3,17 +3,10 @@
 #  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 try:
-  from typing import Optional, Sequence, Union
-
-  import inspect
-
  from ..ir import *
 except ImportError as e:
  raise RuntimeError("Error loading imports from extension module") from e

-ARGUMENT_ATTRIBUTE_NAME = "arg_attrs"
-RESULT_ATTRIBUTE_NAME = "res_attrs"
-
 class ModuleOp:
  """Specialization for the module op class."""

@ -25,208 +18,3 @@ class ModuleOp:
  @property
  def body(self):
    return self.regions[0].blocks[0]
-
-
-class FuncOp:
-  """Specialization for the func op class."""
-
-  def __init__(self,
-               name,
-               type,
-               *,
-               visibility=None,
-               body_builder=None,
-               loc=None,
-               ip=None):
-    """
-    Create a FuncOp with the provided `name`, `type`, and `visibility`.
-    - `name` is a string representing the function name.
-    - `type` is either a FunctionType or a pair of list describing inputs and
-      results.
-    - `visibility` is a string matching `public`, `private`, or `nested`. None
-      implies private visibility.
-    - `body_builder` is an optional callback, when provided a new entry block
-      is created and the callback is invoked with the new op as argument within
-      an InsertionPoint context already set for the block. The callback is
-      expected to insert a terminator in the block.
-    """
-    sym_name = StringAttr.get(str(name))
-
-    # If the type is passed as a tuple, build a FunctionType on the fly.
-    if isinstance(type, tuple):
-      type = FunctionType.get(inputs=type[0], results=type[1])
-
-    type = TypeAttr.get(type)
-    sym_visibility = StringAttr.get(
-        str(visibility)) if visibility is not None else None
-    super().__init__(sym_name, type, sym_visibility, loc=loc, ip=ip)
-    if body_builder:
-      entry_block = self.add_entry_block()
-      with InsertionPoint(entry_block):
-        body_builder(self)
-
-  @property
-  def is_external(self):
-    return len(self.regions[0].blocks) == 0
-
-  @property
-  def body(self):
-    return self.regions[0]
-
-  @property
-  def type(self):
-    return FunctionType(TypeAttr(self.attributes["type"]).value)
-
-  @property
-  def visibility(self):
-    return self.attributes["sym_visibility"]
-
-  @property
-  def name(self) -> StringAttr:
-    return StringAttr(self.attributes["sym_name"])
-
-  @property
-  def entry_block(self):
-    if self.is_external:
-      raise IndexError('External function does not have a body')
-    return self.regions[0].blocks[0]
-
-  def add_entry_block(self):
-    """
-    Add an entry block to the function body using the function signature to
-    infer block arguments.
-    Returns the newly created block
-    """
-    if not self.is_external:
-      raise IndexError('The function already has an entry block!')
-    self.body.blocks.append(*self.type.inputs)
-    return self.body.blocks[0]
-
-  @property
-  def arg_attrs(self):
-    return ArrayAttr(self.attributes[ARGUMENT_ATTRIBUTE_NAME])
-
-  @arg_attrs.setter
-  def arg_attrs(self, attribute: Union[ArrayAttr, list]):
-    if isinstance(attribute, ArrayAttr):
-      self.attributes[ARGUMENT_ATTRIBUTE_NAME] = attribute
-    else:
-      self.attributes[ARGUMENT_ATTRIBUTE_NAME] = ArrayAttr.get(
-          attribute, context=self.context)
-
-  @property
-  def arguments(self):
-    return self.entry_block.arguments
-
-  @property
-  def result_attrs(self):
-    return self.attributes[RESULT_ATTRIBUTE_NAME]
-
-  @result_attrs.setter
-  def result_attrs(self, attribute: ArrayAttr):
-    self.attributes[RESULT_ATTRIBUTE_NAME] = attribute
-
-  @classmethod
-  def from_py_func(FuncOp,
-                   *inputs: Type,
-                   results: Optional[Sequence[Type]] = None,
-                   name: Optional[str] = None):
-    """Decorator to define an MLIR FuncOp specified as a python function.
-
-    Requires that an `mlir.ir.InsertionPoint` and `mlir.ir.Location` are
-    active for the current thread (i.e. established in a `with` block).
-
-    When applied as a decorator to a Python function, an entry block will
-    be constructed for the FuncOp with types as specified in `*inputs`. The
-    block arguments will be passed positionally to the Python function. In
-    addition, if the Python function accepts keyword arguments generally or
-    has a corresponding keyword argument, the following will be passed:
-      * `func_op`: The `func` op being defined.
-
-    By default, the function name will be the Python function `__name__`. This
-    can be overriden by passing the `name` argument to the decorator.
-
-    If `results` is not specified, then the decorator will implicitly
-    insert a `ReturnOp` with the `Value`'s returned from the decorated
-    function. It will also set the `FuncOp` type with the actual return
-    value types. If `results` is specified, then the decorated function
-    must return `None` and no implicit `ReturnOp` is added (nor are the result
-    types updated). The implicit behavior is intended for simple, single-block
-    cases, and users should specify result types explicitly for any complicated
-    cases.
-
-    The decorated function can further be called from Python and will insert
-    a `CallOp` at the then-current insertion point, returning either None (
-    if no return values), a unary Value (for one result), or a list of Values).
-    This mechanism cannot be used to emit recursive calls (by construction).
-    """
-
-    def decorator(f):
-      from . import func
-      # Introspect the callable for optional features.
-      sig = inspect.signature(f)
-      has_arg_func_op = False
-      for param in sig.parameters.values():
-        if param.kind == param.VAR_KEYWORD:
-          has_arg_func_op = True
-        if param.name == "func_op" and (param.kind
-                                        == param.POSITIONAL_OR_KEYWORD or
-                                        param.kind == param.KEYWORD_ONLY):
-          has_arg_func_op = True
-
-      # Emit the FuncOp.
-      implicit_return = results is None
-      symbol_name = name or f.__name__
-      function_type = FunctionType.get(
-          inputs=inputs, results=[] if implicit_return else results)
-      func_op = FuncOp(name=symbol_name, type=function_type)
-      with InsertionPoint(func_op.add_entry_block()):
-        func_args = func_op.entry_block.arguments
-        func_kwargs = {}
-        if has_arg_func_op:
-          func_kwargs["func_op"] = func_op
-        return_values = f(*func_args, **func_kwargs)
-        if not implicit_return:
-          return_types = list(results)
-          assert return_values is None, (
-              "Capturing a python function with explicit `results=` "
-              "requires that the wrapped function returns None.")
-        else:
-          # Coerce return values, add ReturnOp and rewrite func type.
-          if return_values is None:
-            return_values = []
-          elif isinstance(return_values, tuple):
-            return_values = list(return_values)
-          elif isinstance(return_values, Value):
-            # Returning a single value is fine, coerce it into a list.
-            return_values = [return_values]
-          elif isinstance(return_values, OpView):
-            # Returning a single operation is fine, coerce its results a list.
-            return_values = return_values.operation.results
-          elif isinstance(return_values, Operation):
-            # Returning a single operation is fine, coerce its results a list.
-            return_values = return_values.results
-          else:
-            return_values = list(return_values)
-          func.ReturnOp(return_values)
-          # Recompute the function type.
-          return_types = [v.type for v in return_values]
-          function_type = FunctionType.get(inputs=inputs, results=return_types)
-          func_op.attributes["type"] = TypeAttr.get(function_type)
-
-      def emit_call_op(*call_args):
-        call_op = func.CallOp(return_types, FlatSymbolRefAttr.get(symbol_name),
-                              call_args)
-        if return_types is None:
-          return None
-        elif len(return_types) == 1:
-          return call_op.result
-        else:
-          return call_op.results
-
-      wrapped = emit_call_op
-      wrapped.__name__ = f.__name__
-      wrapped.func_op = func_op
-      return wrapped
-
-    return decorator
--- a/mlir/python/mlir/dialects/_func_ops_ext.py
+++ b/mlir/python/mlir/dialects/_func_ops_ext.py
@ -4,13 +4,16 @@

 try:
  from ..ir import *
-  from .builtin import FuncOp
  from ._ods_common import get_default_loc_context as _get_default_loc_context

-  from typing import Any, List, Optional, Union
+  import inspect
+
+  from typing import Any, List, Optional, Sequence, Union
 except ImportError as e:
  raise RuntimeError("Error loading imports from extension module") from e

+ARGUMENT_ATTRIBUTE_NAME = "arg_attrs"
+RESULT_ATTRIBUTE_NAME = "res_attrs"

 class ConstantOp:
  """Specialization for the constant op class."""
@ -23,6 +26,210 @@ class ConstantOp:
    return self.results[0].type


+class FuncOp:
+  """Specialization for the func op class."""
+
+  def __init__(self,
+               name,
+               type,
+               *,
+               visibility=None,
+               body_builder=None,
+               loc=None,
+               ip=None):
+    """
+    Create a FuncOp with the provided `name`, `type`, and `visibility`.
+    - `name` is a string representing the function name.
+    - `type` is either a FunctionType or a pair of list describing inputs and
+      results.
+    - `visibility` is a string matching `public`, `private`, or `nested`. None
+      implies private visibility.
+    - `body_builder` is an optional callback, when provided a new entry block
+      is created and the callback is invoked with the new op as argument within
+      an InsertionPoint context already set for the block. The callback is
+      expected to insert a terminator in the block.
+    """
+    sym_name = StringAttr.get(str(name))
+
+    # If the type is passed as a tuple, build a FunctionType on the fly.
+    if isinstance(type, tuple):
+      type = FunctionType.get(inputs=type[0], results=type[1])
+
+    type = TypeAttr.get(type)
+    sym_visibility = StringAttr.get(
+        str(visibility)) if visibility is not None else None
+    super().__init__(sym_name, type, sym_visibility, loc=loc, ip=ip)
+    if body_builder:
+      entry_block = self.add_entry_block()
+      with InsertionPoint(entry_block):
+        body_builder(self)
+
+  @property
+  def is_external(self):
+    return len(self.regions[0].blocks) == 0
+
+  @property
+  def body(self):
+    return self.regions[0]
+
+  @property
+  def type(self):
+    return FunctionType(TypeAttr(self.attributes["type"]).value)
+
+  @property
+  def visibility(self):
+    return self.attributes["sym_visibility"]
+
+  @property
+  def name(self) -> StringAttr:
+    return StringAttr(self.attributes["sym_name"])
+
+  @property
+  def entry_block(self):
+    if self.is_external:
+      raise IndexError('External function does not have a body')
+    return self.regions[0].blocks[0]
+
+  def add_entry_block(self):
+    """
+    Add an entry block to the function body using the function signature to
+    infer block arguments.
+    Returns the newly created block
+    """
+    if not self.is_external:
+      raise IndexError('The function already has an entry block!')
+    self.body.blocks.append(*self.type.inputs)
+    return self.body.blocks[0]
+
+  @property
+  def arg_attrs(self):
+    return ArrayAttr(self.attributes[ARGUMENT_ATTRIBUTE_NAME])
+
+  @arg_attrs.setter
+  def arg_attrs(self, attribute: Union[ArrayAttr, list]):
+    if isinstance(attribute, ArrayAttr):
+      self.attributes[ARGUMENT_ATTRIBUTE_NAME] = attribute
+    else:
+      self.attributes[ARGUMENT_ATTRIBUTE_NAME] = ArrayAttr.get(
+          attribute, context=self.context)
+
+  @property
+  def arguments(self):
+    return self.entry_block.arguments
+
+  @property
+  def result_attrs(self):
+    return self.attributes[RESULT_ATTRIBUTE_NAME]
+
+  @result_attrs.setter
+  def result_attrs(self, attribute: ArrayAttr):
+    self.attributes[RESULT_ATTRIBUTE_NAME] = attribute
+
+  @classmethod
+  def from_py_func(FuncOp,
+                   *inputs: Type,
+                   results: Optional[Sequence[Type]] = None,
+                   name: Optional[str] = None):
+    """Decorator to define an MLIR FuncOp specified as a python function.
+
+    Requires that an `mlir.ir.InsertionPoint` and `mlir.ir.Location` are
+    active for the current thread (i.e. established in a `with` block).
+
+    When applied as a decorator to a Python function, an entry block will
+    be constructed for the FuncOp with types as specified in `*inputs`. The
+    block arguments will be passed positionally to the Python function. In
+    addition, if the Python function accepts keyword arguments generally or
+    has a corresponding keyword argument, the following will be passed:
+      * `func_op`: The `func` op being defined.
+
+    By default, the function name will be the Python function `__name__`. This
+    can be overriden by passing the `name` argument to the decorator.
+
+    If `results` is not specified, then the decorator will implicitly
+    insert a `ReturnOp` with the `Value`'s returned from the decorated
+    function. It will also set the `FuncOp` type with the actual return
+    value types. If `results` is specified, then the decorated function
+    must return `None` and no implicit `ReturnOp` is added (nor are the result
+    types updated). The implicit behavior is intended for simple, single-block
+    cases, and users should specify result types explicitly for any complicated
+    cases.
+
+    The decorated function can further be called from Python and will insert
+    a `CallOp` at the then-current insertion point, returning either None (
+    if no return values), a unary Value (for one result), or a list of Values).
+    This mechanism cannot be used to emit recursive calls (by construction).
+    """
+
+    def decorator(f):
+      from . import func
+      # Introspect the callable for optional features.
+      sig = inspect.signature(f)
+      has_arg_func_op = False
+      for param in sig.parameters.values():
+        if param.kind == param.VAR_KEYWORD:
+          has_arg_func_op = True
+        if param.name == "func_op" and (param.kind
+                                        == param.POSITIONAL_OR_KEYWORD or
+                                        param.kind == param.KEYWORD_ONLY):
+          has_arg_func_op = True
+
+      # Emit the FuncOp.
+      implicit_return = results is None
+      symbol_name = name or f.__name__
+      function_type = FunctionType.get(
+          inputs=inputs, results=[] if implicit_return else results)
+      func_op = FuncOp(name=symbol_name, type=function_type)
+      with InsertionPoint(func_op.add_entry_block()):
+        func_args = func_op.entry_block.arguments
+        func_kwargs = {}
+        if has_arg_func_op:
+          func_kwargs["func_op"] = func_op
+        return_values = f(*func_args, **func_kwargs)
+        if not implicit_return:
+          return_types = list(results)
+          assert return_values is None, (
+              "Capturing a python function with explicit `results=` "
+              "requires that the wrapped function returns None.")
+        else:
+          # Coerce return values, add ReturnOp and rewrite func type.
+          if return_values is None:
+            return_values = []
+          elif isinstance(return_values, tuple):
+            return_values = list(return_values)
+          elif isinstance(return_values, Value):
+            # Returning a single value is fine, coerce it into a list.
+            return_values = [return_values]
+          elif isinstance(return_values, OpView):
+            # Returning a single operation is fine, coerce its results a list.
+            return_values = return_values.operation.results
+          elif isinstance(return_values, Operation):
+            # Returning a single operation is fine, coerce its results a list.
+            return_values = return_values.results
+          else:
+            return_values = list(return_values)
+          func.ReturnOp(return_values)
+          # Recompute the function type.
+          return_types = [v.type for v in return_values]
+          function_type = FunctionType.get(inputs=inputs, results=return_types)
+          func_op.attributes["type"] = TypeAttr.get(function_type)
+
+      def emit_call_op(*call_args):
+        call_op = func.CallOp(return_types, FlatSymbolRefAttr.get(symbol_name),
+                              call_args)
+        if return_types is None:
+          return None
+        elif len(return_types) == 1:
+          return call_op.result
+        else:
+          return call_op.results
+
+      wrapped = emit_call_op
+      wrapped.__name__ = f.__name__
+      wrapped.func_op = func_op
+      return wrapped
+
+    return decorator
+
 class CallOp:
  """Specialization for the call op class."""

@ -45,7 +252,7 @@ class CallOp:

    For example

-        f = builtin.FuncOp("foo", ...)
+        f = func.FuncOp("foo", ...)
        func.CallOp(f, [args])
        func.CallOp([result_types], "foo", [args])

--- a/mlir/test/Analysis/test-alias-analysis-modref.mlir
+++ b/mlir/test/Analysis/test-alias-analysis-modref.mlir
@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline='builtin.func(test-alias-analysis-modref)' -split-input-file -allow-unregistered-dialect 2>&1 | FileCheck %s
+// RUN: mlir-opt %s -pass-pipeline='func.func(test-alias-analysis-modref)' -split-input-file -allow-unregistered-dialect 2>&1 | FileCheck %s

 // CHECK-LABEL: Testing : "no_side_effects"
 // CHECK: alloc -> func.region0#0: NoModRef
--- a/mlir/test/Analysis/test-alias-analysis.mlir
+++ b/mlir/test/Analysis/test-alias-analysis.mlir
@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline='builtin.func(test-alias-analysis)' -split-input-file -allow-unregistered-dialect 2>&1 | FileCheck %s
+// RUN: mlir-opt %s -pass-pipeline='func.func(test-alias-analysis)' -split-input-file -allow-unregistered-dialect 2>&1 | FileCheck %s

 // CHECK-LABEL: Testing : "simple"
 // CHECK-DAG: func.region0#0 <-> func.region0#1: MayAlias
--- a/mlir/test/Analysis/test-dominance.mlir
+++ b/mlir/test/Analysis/test-dominance.mlir
@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(test-print-dominance)" -split-input-file 2>&1 | FileCheck %s
+// RUN: mlir-opt %s -pass-pipeline="func.func(test-print-dominance)" -split-input-file 2>&1 | FileCheck %s

 // CHECK-LABEL: Testing : func_condBranch
 func @func_condBranch(%cond : i1) {
--- a/mlir/test/Analysis/test-liveness.mlir
+++ b/mlir/test/Analysis/test-liveness.mlir
@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(test-print-liveness)" -split-input-file 2>&1 | FileCheck %s
+// RUN: mlir-opt %s -pass-pipeline="func.func(test-print-liveness)" -split-input-file 2>&1 | FileCheck %s

 // CHECK-LABEL: Testing : func_empty
 func @func_empty() {
--- a/mlir/test/Analysis/test-match-reduction.mlir
+++ b/mlir/test/Analysis/test-match-reduction.mlir
@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(test-match-reduction)" -verify-diagnostics -split-input-file
+// RUN: mlir-opt %s -pass-pipeline="func.func(test-match-reduction)" -verify-diagnostics -split-input-file

 // Verify that the generic reduction detection utility works on different
 // dialects.
--- a/mlir/test/Analysis/test-shape-fn-report.mlir
+++ b/mlir/test/Analysis/test-shape-fn-report.mlir
@ -15,7 +15,7 @@ func @tanh(%arg: tensor<10x20xf32>) -> tensor<10x20xf32>
 // The shape function library with some local functions.
 shape.function_library @shape_lib {
  // Test shape function that returns the shape of input arg as result shape.
-  builtin.func @same_result_shape(%arg: !shape.value_shape) -> !shape.shape {
+  func.func @same_result_shape(%arg: !shape.value_shape) -> !shape.shape {
    %0 = shape.shape_of %arg : !shape.value_shape -> !shape.shape
    return %0 : !shape.shape
  }
--- a/mlir/test/Analysis/test-topoligical-sort.mlir
+++ b/mlir/test/Analysis/test-topoligical-sort.mlir
@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(test-print-topological-sort)" 2>&1 | FileCheck %s
+// RUN: mlir-opt %s -pass-pipeline="func.func(test-print-topological-sort)" 2>&1 | FileCheck %s

 // CHECK-LABEL: Testing : region
 //       CHECK: arith.addi {{.*}} : index
--- a/mlir/test/CAPI/execution_engine.c
+++ b/mlir/test/CAPI/execution_engine.c
@ -26,7 +26,7 @@
 void lowerModuleToLLVM(MlirContext ctx, MlirModule module) {
  MlirPassManager pm = mlirPassManagerCreate(ctx);
  MlirOpPassManager opm = mlirPassManagerGetNestedUnder(
-      pm, mlirStringRefCreateFromCString("builtin.func"));
+      pm, mlirStringRefCreateFromCString("func.func"));
  mlirPassManagerAddOwnedPass(pm, mlirCreateConversionConvertFuncToLLVM());
  mlirOpPassManagerAddOwnedPass(opm,
                                mlirCreateConversionConvertArithmeticToLLVM());
--- a/mlir/test/CAPI/ir.c
+++ b/mlir/test/CAPI/ir.c
@ -101,7 +101,7 @@ MlirModule makeAndDumpAdd(MlirContext ctx, MlirLocation location) {
          mlirIdentifierGet(ctx, mlirStringRefCreateFromCString("sym_name")),
          funcNameAttr)};
  MlirOperationState funcState = mlirOperationStateGet(
-      mlirStringRefCreateFromCString("builtin.func"), location);
+      mlirStringRefCreateFromCString("func.func"), location);
  mlirOperationStateAddAttributes(&funcState, 2, funcAttrs);
  mlirOperationStateAddOwnedRegions(&funcState, 1, &funcBodyRegion);
  MlirOperation func = mlirOperationCreate(&funcState);
--- a/mlir/test/CAPI/pass.c
+++ b/mlir/test/CAPI/pass.c
@ -42,7 +42,7 @@ void testRunPassOnModule() {
  // Run the print-op-stats pass on the top-level module:
  // CHECK-LABEL: Operations encountered:
  // CHECK: arith.addi        , 1
-  // CHECK: builtin.func      , 1
+  // CHECK: func.func      , 1
  // CHECK: func.return        , 1
  {
    MlirPassManager pm = mlirPassManagerCreate(ctx);
@ -84,12 +84,12 @@ void testRunPassOnNestedModule() {
  // Run the print-op-stats pass on functions under the top-level module:
  // CHECK-LABEL: Operations encountered:
  // CHECK: arith.addi        , 1
-  // CHECK: builtin.func      , 1
+  // CHECK: func.func      , 1
  // CHECK: func.return        , 1
  {
    MlirPassManager pm = mlirPassManagerCreate(ctx);
    MlirOpPassManager nestedFuncPm = mlirPassManagerGetNestedUnder(
-        pm, mlirStringRefCreateFromCString("builtin.func"));
+        pm, mlirStringRefCreateFromCString("func.func"));
    MlirPass printOpStatPass = mlirCreateTransformsPrintOpStats();
    mlirOpPassManagerAddOwnedPass(nestedFuncPm, printOpStatPass);
    MlirLogicalResult success = mlirPassManagerRun(pm, module);
@ -100,14 +100,14 @@ void testRunPassOnNestedModule() {
  // Run the print-op-stats pass on functions under the nested module:
  // CHECK-LABEL: Operations encountered:
  // CHECK: arith.addf        , 1
-  // CHECK: builtin.func      , 1
+  // CHECK: func.func      , 1
  // CHECK: func.return        , 1
  {
    MlirPassManager pm = mlirPassManagerCreate(ctx);
    MlirOpPassManager nestedModulePm = mlirPassManagerGetNestedUnder(
        pm, mlirStringRefCreateFromCString("builtin.module"));
    MlirOpPassManager nestedFuncPm = mlirOpPassManagerGetNestedUnder(
-        nestedModulePm, mlirStringRefCreateFromCString("builtin.func"));
+        nestedModulePm, mlirStringRefCreateFromCString("func.func"));
    MlirPass printOpStatPass = mlirCreateTransformsPrintOpStats();
    mlirOpPassManagerAddOwnedPass(nestedFuncPm, printOpStatPass);
    MlirLogicalResult success = mlirPassManagerRun(pm, module);
@ -132,19 +132,19 @@ void testPrintPassPipeline() {
  MlirOpPassManager nestedModulePm = mlirPassManagerGetNestedUnder(
      pm, mlirStringRefCreateFromCString("builtin.module"));
  MlirOpPassManager nestedFuncPm = mlirOpPassManagerGetNestedUnder(
-      nestedModulePm, mlirStringRefCreateFromCString("builtin.func"));
+      nestedModulePm, mlirStringRefCreateFromCString("func.func"));
  MlirPass printOpStatPass = mlirCreateTransformsPrintOpStats();
  mlirOpPassManagerAddOwnedPass(nestedFuncPm, printOpStatPass);

  // Print the top level pass manager
-  // CHECK: Top-level: builtin.module(builtin.func(print-op-stats))
+  // CHECK: Top-level: builtin.module(func.func(print-op-stats))
  fprintf(stderr, "Top-level: ");
  mlirPrintPassPipeline(mlirPassManagerGetAsOpPassManager(pm), printToStderr,
                        NULL);
  fprintf(stderr, "\n");

  // Print the pipeline nested one level down
-  // CHECK: Nested Module: builtin.func(print-op-stats)
+  // CHECK: Nested Module: func.func(print-op-stats)
  fprintf(stderr, "Nested Module: ");
  mlirPrintPassPipeline(nestedModulePm, printToStderr, NULL);
  fprintf(stderr, "\n");
@ -165,8 +165,8 @@ void testParsePassPipeline() {
  // Try parse a pipeline.
  MlirLogicalResult status = mlirParsePassPipeline(
      mlirPassManagerGetAsOpPassManager(pm),
-      mlirStringRefCreateFromCString(
-          "builtin.module(builtin.func(print-op-stats), builtin.func(print-op-stats))"));
+      mlirStringRefCreateFromCString("builtin.module(func.func(print-op-stats),"
+                                     " func.func(print-op-stats))"));
  // Expect a failure, we haven't registered the print-op-stats pass yet.
  if (mlirLogicalResultIsSuccess(status)) {
    fprintf(stderr, "Unexpected success parsing pipeline without registering the pass\n");
@ -176,15 +176,16 @@ void testParsePassPipeline() {
  mlirRegisterTransformsPrintOpStats();
  status = mlirParsePassPipeline(
      mlirPassManagerGetAsOpPassManager(pm),
-      mlirStringRefCreateFromCString(
-          "builtin.module(builtin.func(print-op-stats), builtin.func(print-op-stats))"));
+      mlirStringRefCreateFromCString("builtin.module(func.func(print-op-stats),"
+                                     " func.func(print-op-stats))"));
  // Expect a failure, we haven't registered the print-op-stats pass yet.
  if (mlirLogicalResultIsFailure(status)) {
    fprintf(stderr, "Unexpected failure parsing pipeline after registering the pass\n");
    exit(EXIT_FAILURE);
  }

-  // CHECK: Round-trip: builtin.module(builtin.func(print-op-stats), builtin.func(print-op-stats))
+  // CHECK: Round-trip: builtin.module(func.func(print-op-stats),
+  // func.func(print-op-stats))
  fprintf(stderr, "Round-trip: ");
  mlirPrintPassPipeline(mlirPassManagerGetAsOpPassManager(pm), printToStderr,
                        NULL);
--- a/mlir/test/Conversion/ArithmeticToLLVM/arith-to-llvm.mlir
+++ b/mlir/test/Conversion/ArithmeticToLLVM/arith-to-llvm.mlir
@ -1,4 +1,4 @@
-// RUN: mlir-opt -pass-pipeline="builtin.func(convert-arith-to-llvm)" %s -split-input-file | FileCheck %s
+// RUN: mlir-opt -pass-pipeline="func.func(convert-arith-to-llvm)" %s -split-input-file | FileCheck %s

 // CHECK-LABEL: @vector_ops
 func @vector_ops(%arg0: vector<4xf32>, %arg1: vector<4xi1>, %arg2: vector<4xi64>, %arg3: vector<4xi64>) -> vector<4xf32> {
--- a/mlir/test/Conversion/ArithmeticToLLVM/convert-nd-vector-to-llvmir.mlir
+++ b/mlir/test/Conversion/ArithmeticToLLVM/convert-nd-vector-to-llvmir.mlir
@ -1,4 +1,4 @@
-// RUN: mlir-opt -pass-pipeline="builtin.func(convert-arith-to-llvm)" %s -split-input-file | FileCheck %s
+// RUN: mlir-opt -pass-pipeline="func.func(convert-arith-to-llvm)" %s -split-input-file | FileCheck %s

 // CHECK-LABEL: @vec_bin
 func @vec_bin(%arg0: vector<2x2x2xf32>) -> vector<2x2x2xf32> {
--- a/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir
+++ b/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir
@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(convert-complex-to-standard)" | FileCheck %s
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-complex-to-standard)" | FileCheck %s

 // CHECK-LABEL: func @complex_abs
 // CHECK-SAME: %[[ARG:.*]]: complex<f32>
--- a/mlir/test/Conversion/ComplexToStandard/full-conversion.mlir
+++ b/mlir/test/Conversion/ComplexToStandard/full-conversion.mlir
@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline="builtin.func(convert-complex-to-standard),convert-complex-to-llvm,builtin.func(convert-math-to-llvm,convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" | FileCheck %s
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-complex-to-standard),convert-complex-to-llvm,func.func(convert-math-to-llvm,convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" | FileCheck %s

 // CHECK-LABEL: llvm.func @complex_abs
 // CHECK-SAME: %[[ARG:.*]]: ![[C_TY:.*]])
--- a/mlir/test/Conversion/FuncToLLVM/func-memref.mlir
+++ b/mlir/test/Conversion/FuncToLLVM/func-memref.mlir
@ -1,5 +1,5 @@
-// RUN: mlir-opt -pass-pipeline="builtin.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" -split-input-file %s | FileCheck %s
-// RUN: mlir-opt -pass-pipeline="builtin.func(convert-arith-to-llvm),convert-func-to-llvm{use-bare-ptr-memref-call-conv=1},reconcile-unrealized-casts" -split-input-file %s | FileCheck %s --check-prefix=BAREPTR
+// RUN: mlir-opt -pass-pipeline="func.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" -split-input-file %s | FileCheck %s
+// RUN: mlir-opt -pass-pipeline="func.func(convert-arith-to-llvm),convert-func-to-llvm{use-bare-ptr-memref-call-conv=1},reconcile-unrealized-casts" -split-input-file %s | FileCheck %s --check-prefix=BAREPTR

 // BAREPTR-LABEL: func @check_noalias
 // BAREPTR-SAME: %{{.*}}: !llvm.ptr<f32> {llvm.noalias}, %{{.*}}: !llvm.ptr<f32> {llvm.noalias}
--- a/mlir/test/Conversion/FuncToLLVM/func-to-llvm.mlir
+++ b/mlir/test/Conversion/FuncToLLVM/func-to-llvm.mlir
@ -1,5 +1,5 @@
-// RUN: mlir-opt -pass-pipeline="builtin.func(convert-math-to-llvm,convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" %s -split-input-file | FileCheck %s
-// RUN: mlir-opt -pass-pipeline="builtin.func(convert-math-to-llvm,convert-arith-to-llvm{index-bitwidth=32}),convert-func-to-llvm{index-bitwidth=32},reconcile-unrealized-casts" %s -split-input-file | FileCheck --check-prefix=CHECK32 %s
+// RUN: mlir-opt -pass-pipeline="func.func(convert-math-to-llvm,convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" %s -split-input-file | FileCheck %s
+// RUN: mlir-opt -pass-pipeline="func.func(convert-math-to-llvm,convert-arith-to-llvm{index-bitwidth=32}),convert-func-to-llvm{index-bitwidth=32},reconcile-unrealized-casts" %s -split-input-file | FileCheck --check-prefix=CHECK32 %s

 // CHECK-LABEL: func @empty() {
 // CHECK-NEXT:  llvm.return
--- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
+++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
@ -4,7 +4,7 @@
 gpu.module @test_module {
  // CHECK-LABEL: func @gpu_index_ops()
  // CHECK32-LABEL: func @gpu_index_ops()
-  builtin.func @gpu_index_ops()
+  func.func @gpu_index_ops()
      -> (index, index, index, index, index, index,
          index, index, index, index, index, index) {
    // CHECK32-NOT: = llvm.sext %{{.*}} : i32 to i64
@ -61,7 +61,7 @@ gpu.module @test_module {
 gpu.module @test_module {
  // CHECK-LABEL: func @gpu_index_comp
  // CHECK32-LABEL: func @gpu_index_comp
-  builtin.func @gpu_index_comp(%idx : index) -> index {
+  func.func @gpu_index_comp(%idx : index) -> index {
    // CHECK: = llvm.add %{{.*}}, %{{.*}} : i64
    // CHECK32: = llvm.add %{{.*}}, %{{.*}} : i32
    %0 = arith.addi %idx, %idx : index
@ -109,7 +109,7 @@ gpu.module @test_module {

 gpu.module @test_module {
  // CHECK-LABEL: func @gpu_shuffle()
-  builtin.func @gpu_shuffle() -> (f32, f32, f32, f32) {
+  func.func @gpu_shuffle() -> (f32, f32, f32, f32) {
    // CHECK: %[[#VALUE:]] = llvm.mlir.constant(1.000000e+00 : f32) : f32
    %arg0 = arith.constant 1.0 : f32
    // CHECK: %[[#OFFSET:]] = llvm.mlir.constant(4 : i32) : i32
@ -148,7 +148,7 @@ gpu.module @test_module {

 gpu.module @test_module {
  // CHECK-LABEL: func @gpu_sync()
-  builtin.func @gpu_sync() {
+  func.func @gpu_sync() {
    // CHECK: nvvm.barrier0
    gpu.barrier
    func.return
@ -161,7 +161,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__nv_fabsf(f32) -> f32
  // CHECK: llvm.func @__nv_fabs(f64) -> f64
  // CHECK-LABEL: func @gpu_fabs
-  builtin.func @gpu_fabs(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_fabs(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.abs %arg_f32 : f32
    // CHECK: llvm.call @__nv_fabsf(%{{.*}}) : (f32) -> f32
    %result64 = math.abs %arg_f64 : f64
@ -176,7 +176,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__nv_ceilf(f32) -> f32
  // CHECK: llvm.func @__nv_ceil(f64) -> f64
  // CHECK-LABEL: func @gpu_ceil
-  builtin.func @gpu_ceil(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_ceil(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.ceil %arg_f32 : f32
    // CHECK: llvm.call @__nv_ceilf(%{{.*}}) : (f32) -> f32
    %result64 = math.ceil %arg_f64 : f64
@ -191,7 +191,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__nv_floorf(f32) -> f32
  // CHECK: llvm.func @__nv_floor(f64) -> f64
  // CHECK-LABEL: func @gpu_floor
-  builtin.func @gpu_floor(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_floor(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.floor %arg_f32 : f32
    // CHECK: llvm.call @__nv_floorf(%{{.*}}) : (f32) -> f32
    %result64 = math.floor %arg_f64 : f64
@ -206,7 +206,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__nv_cosf(f32) -> f32
  // CHECK: llvm.func @__nv_cos(f64) -> f64
  // CHECK-LABEL: func @gpu_cos
-  builtin.func @gpu_cos(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_cos(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.cos %arg_f32 : f32
    // CHECK: llvm.call @__nv_cosf(%{{.*}}) : (f32) -> f32
    %result64 = math.cos %arg_f64 : f64
@ -220,7 +220,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__nv_expf(f32) -> f32
  // CHECK: llvm.func @__nv_exp(f64) -> f64
  // CHECK-LABEL: func @gpu_exp
-  builtin.func @gpu_exp(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_exp(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.exp %arg_f32 : f32
    // CHECK: llvm.call @__nv_expf(%{{.*}}) : (f32) -> f32
    %result64 = math.exp %arg_f64 : f64
@ -234,7 +234,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__nv_exp2f(f32) -> f32
  // CHECK: llvm.func @__nv_exp2(f64) -> f64
  // CHECK-LABEL: func @gpu_exp2
-  builtin.func @gpu_exp2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_exp2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.exp2 %arg_f32 : f32
    // CHECK: llvm.call @__nv_exp2f(%{{.*}}) : (f32) -> f32
    %result64 = math.exp2 %arg_f64 : f64
@ -249,7 +249,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__nv_logf(f32) -> f32
  // CHECK: llvm.func @__nv_log(f64) -> f64
  // CHECK-LABEL: func @gpu_log
-  builtin.func @gpu_log(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_log(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.log %arg_f32 : f32
    // CHECK: llvm.call @__nv_logf(%{{.*}}) : (f32) -> f32
    %result64 = math.log %arg_f64 : f64
@ -264,7 +264,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__nv_log10f(f32) -> f32
  // CHECK: llvm.func @__nv_log10(f64) -> f64
  // CHECK-LABEL: func @gpu_log10
-  builtin.func @gpu_log10(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_log10(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.log10 %arg_f32 : f32
    // CHECK: llvm.call @__nv_log10f(%{{.*}}) : (f32) -> f32
    %result64 = math.log10 %arg_f64 : f64
@ -279,7 +279,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__nv_log1pf(f32) -> f32
  // CHECK: llvm.func @__nv_log1p(f64) -> f64
  // CHECK-LABEL: func @gpu_log1p
-  builtin.func @gpu_log1p(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_log1p(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.log1p %arg_f32 : f32
    // CHECK: llvm.call @__nv_log1pf(%{{.*}}) : (f32) -> f32
    %result64 = math.log1p %arg_f64 : f64
@ -294,7 +294,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__nv_log2f(f32) -> f32
  // CHECK: llvm.func @__nv_log2(f64) -> f64
  // CHECK-LABEL: func @gpu_log2
-  builtin.func @gpu_log2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_log2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.log2 %arg_f32 : f32
    // CHECK: llvm.call @__nv_log2f(%{{.*}}) : (f32) -> f32
    %result64 = math.log2 %arg_f64 : f64
@ -309,7 +309,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__nv_sinf(f32) -> f32
  // CHECK: llvm.func @__nv_sin(f64) -> f64
  // CHECK-LABEL: func @gpu_sin
-  builtin.func @gpu_sin(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_sin(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.sin %arg_f32 : f32
    // CHECK: llvm.call @__nv_sinf(%{{.*}}) : (f32) -> f32
    %result64 = math.sin %arg_f64 : f64
@ -324,7 +324,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__nv_tanhf(f32) -> f32
  // CHECK: llvm.func @__nv_tanh(f64) -> f64
  // CHECK-LABEL: func @gpu_tanh
-  builtin.func @gpu_tanh(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64) -> (f16, f32, f64) {
+  func.func @gpu_tanh(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64) -> (f16, f32, f64) {
    %result16 = math.tanh %arg_f16 : f16
    // CHECK: llvm.fpext %{{.*}} : f16 to f32
    // CHECK-NEXT: llvm.call @__nv_tanhf(%{{.*}}) : (f32) -> f32
@ -343,7 +343,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__nv_rsqrtf(f32) -> f32
  // CHECK: llvm.func @__nv_rsqrt(f64) -> f64
  // CHECK-LABEL: func @gpu_rsqrt
-  builtin.func @gpu_rsqrt(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
+  func.func @gpu_rsqrt(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
      -> (f16, f32, f64) {
    %result16 = math.rsqrt %arg_f16 : f16
    // CHECK: llvm.fpext %{{.*}} : f16 to f32
@ -363,7 +363,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__nv_sqrtf(f32) -> f32
  // CHECK: llvm.func @__nv_sqrt(f64) -> f64
  // CHECK-LABEL: func @gpu_sqrt
-  builtin.func @gpu_sqrt(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
+  func.func @gpu_sqrt(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
      -> (f16, f32, f64) {
    %result16 = math.sqrt %arg_f16 : f16
    // CHECK: llvm.fpext %{{.*}} : f16 to f32
@ -383,7 +383,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__nv_atanf(f32) -> f32
  // CHECK: llvm.func @__nv_atan(f64) -> f64
  // CHECK-LABEL: func @gpu_atan
-  builtin.func @gpu_atan(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
+  func.func @gpu_atan(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
      -> (f16, f32, f64) {
    %result16 = math.atan %arg_f16 : f16
    // CHECK: llvm.fpext %{{.*}} : f16 to f32
@ -403,7 +403,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__nv_atan2f(f32, f32) -> f32
  // CHECK: llvm.func @__nv_atan2(f64, f64) -> f64
  // CHECK-LABEL: func @gpu_atan2
-  builtin.func @gpu_atan2(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
+  func.func @gpu_atan2(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
      -> (f16, f32, f64) {
    %result16 = math.atan2 %arg_f16, %arg_f16 : f16
    // CHECK: llvm.fpext %{{.*}} : f16 to f32
@ -427,7 +427,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__nv_expf(f32) -> f32
  // CHECK: llvm.func @__nv_exp(f64) -> f64
  // CHECK-LABEL: func @gpu_exp
-    builtin.func @gpu_exp(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+    func.func @gpu_exp(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
      %result32 = math.exp %arg_f32 : f32
      // CHECK: llvm.call @__nv_expf(%{{.*}}) : (f32) -> f32
      %result64 = math.exp %arg_f64 : f64
@ -444,7 +444,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__nv_expm1f(f32) -> f32
  // CHECK: llvm.func @__nv_expm1(f64) -> f64
  // CHECK-LABEL: func @gpu_expm1
-  builtin.func @gpu_expm1(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_expm1(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.expm1 %arg_f32 : f32
    // CHECK: llvm.call @__nv_expm1f(%{{.*}}) : (f32) -> f32
    %result64 = math.expm1 %arg_f64 : f64
@ -459,7 +459,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__nv_powf(f32, f32) -> f32
  // CHECK: llvm.func @__nv_pow(f64, f64) -> f64
  // CHECK-LABEL: func @gpu_pow
-  builtin.func @gpu_pow(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_pow(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.powf %arg_f32, %arg_f32 : f32
    // CHECK: llvm.call @__nv_powf(%{{.*}}, %{{.*}}) : (f32, f32) -> f32
    %result64 = math.powf %arg_f64, %arg_f64 : f64
--- a/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir
+++ b/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir
@ -6,7 +6,7 @@ gpu.module @test_module {
  // CHECK-LABEL: func @gpu_wmma_load_op() ->
  // CHECK-SAME: !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> {
  // CHECK32-LABEL: func @gpu_wmma_load_op() ->
-  builtin.func @gpu_wmma_load_op() -> (!gpu.mma_matrix<16x16xf16, "AOp">) {
+  func.func @gpu_wmma_load_op() -> (!gpu.mma_matrix<16x16xf16, "AOp">) {
    %wg = memref.alloca() {alignment = 32} : memref<32x32xf16, 3>
    %i = arith.constant 16 : index
    %j = arith.constant 16 : index
@ -46,7 +46,7 @@ gpu.module @test_module {
  // CHECK-SAME: (%[[D:.*]]: !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>) {
  // CHECK32-LABEL: func @gpu_wmma_store_op
  // CHECK32-SAME: (%[[D:.*]]: !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>) {
-  builtin.func @gpu_wmma_store_op(%arg0 : !gpu.mma_matrix<16x16xf16, "COp">) -> () {
+  func.func @gpu_wmma_store_op(%arg0 : !gpu.mma_matrix<16x16xf16, "COp">) -> () {
    %sg = memref.alloca(){alignment = 32} : memref<32x32xf16, 3>
    %i = arith.constant 16 : index
    %j = arith.constant 16 : index
@ -92,7 +92,7 @@ gpu.module @test_module {

  // CHECK-LABEL: func @gpu_wmma_mma_op
  // CHECK-SAME: (%[[A:.*]]: !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>, %[[B:.*]]: !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>, %[[C:.*]]: !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>)
-  builtin.func @gpu_wmma_mma_op(%A : !gpu.mma_matrix<16x16xf16, "AOp">, %B : !gpu.mma_matrix<16x16xf16, "BOp">, %C : !gpu.mma_matrix<16x16xf16, "COp">) -> (!gpu.mma_matrix<16x16xf16, "COp">) {
+  func.func @gpu_wmma_mma_op(%A : !gpu.mma_matrix<16x16xf16, "AOp">, %B : !gpu.mma_matrix<16x16xf16, "BOp">, %C : !gpu.mma_matrix<16x16xf16, "COp">) -> (!gpu.mma_matrix<16x16xf16, "COp">) {
    %D = gpu.subgroup_mma_compute %A, %B, %C : !gpu.mma_matrix<16x16xf16, "AOp">, !gpu.mma_matrix<16x16xf16, "BOp"> -> !gpu.mma_matrix<16x16xf16, "COp">
    // CHECK:  %[[A1:.*]] = llvm.extractvalue %[[A]][0 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>
    // CHECK:  %[[A2:.*]] = llvm.extractvalue %[[A]][1 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>
@ -163,7 +163,7 @@ gpu.module @test_module {
 //       CHECK:   %[[E3:.+]] = llvm.extractvalue %[[ACC]][3 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>
 //       CHECK:   nvvm.wmma.store %{{.*}}, %{{.*}}, %[[E0]], %[[E1]], %[[E2]], %[[E3]] {eltype = #nvvm.mma_type<f16>, k = 16 : i32, layout = #nvvm.mma_layout<row>, m = 16 : i32, n = 16 : i32} : !llvm.ptr<f16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>

-  builtin.func @gpu_wmma_mma_loop_op(%arg0: memref<128x128xf16>, %arg1: memref<128x128xf16>, %arg2: memref<128x128xf16>) {
+  func.func @gpu_wmma_mma_loop_op(%arg0: memref<128x128xf16>, %arg1: memref<128x128xf16>, %arg2: memref<128x128xf16>) {
      %c0 = arith.constant 0 : index
      %c128 = arith.constant 128 : index
      %c32 = arith.constant 32 : index
@ -202,7 +202,7 @@ gpu.module @test_module {
 //       CHECK: %[[M3:.+]] = llvm.insertvalue %[[V2]], %[[M2]][2 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>
 //       CHECK: %[[M4:.+]] = llvm.insertvalue %[[V2]], %[[M3]][3 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>
 //       CHECK: llvm.return %[[M4]] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>
-  builtin.func @gpu_wmma_constant_op()  ->(!gpu.mma_matrix<16x16xf16, "COp">) {
+  func.func @gpu_wmma_constant_op()  ->(!gpu.mma_matrix<16x16xf16, "COp">) {
    %cst = arith.constant 1.0 : f16
    %C = gpu.subgroup_mma_constant_matrix %cst : !gpu.mma_matrix<16x16xf16, "COp">
    return %C : !gpu.mma_matrix<16x16xf16, "COp">
@ -232,7 +232,7 @@ gpu.module @test_module {
 //       CHECK: %[[C3:.*]] = llvm.fadd %[[A3]], %[[B3]]  : vector<2xf16>
 //       CHECK: %[[M4:.*]] = llvm.insertvalue %[[C3]], %[[M3]][3 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>
 //       CHECK: llvm.return %[[M4]] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>
-  builtin.func @gpu_wmma_elementwise(%A : !gpu.mma_matrix<16x16xf16, "COp">, %B : !gpu.mma_matrix<16x16xf16, "COp">)  ->(!gpu.mma_matrix<16x16xf16, "COp">) {
+  func.func @gpu_wmma_elementwise(%A : !gpu.mma_matrix<16x16xf16, "COp">, %B : !gpu.mma_matrix<16x16xf16, "COp">)  ->(!gpu.mma_matrix<16x16xf16, "COp">) {
    %C = gpu.subgroup_mma_elementwise addf %A, %B : (!gpu.mma_matrix<16x16xf16, "COp">, !gpu.mma_matrix<16x16xf16, "COp">) -> !gpu.mma_matrix<16x16xf16, "COp">
    return %C : !gpu.mma_matrix<16x16xf16, "COp">
  }
--- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
+++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
@ -4,7 +4,7 @@
 gpu.module @test_module {
  // CHECK-LABEL: func @gpu_index_ops()
  // CHECK32-LABEL: func @gpu_index_ops()
-  builtin.func @gpu_index_ops()
+  func.func @gpu_index_ops()
      -> (index, index, index, index, index, index,
          index, index, index, index, index, index) {
    // CHECK32-NOT: = llvm.sext %{{.*}} : i32 to i64
@ -61,7 +61,7 @@ gpu.module @test_module {
 gpu.module @test_module {
  // CHECK-LABEL: func @gpu_index_comp
  // CHECK32-LABEL: func @gpu_index_comp
-  builtin.func @gpu_index_comp(%idx : index) -> index {
+  func.func @gpu_index_comp(%idx : index) -> index {
    // CHECK: = llvm.add %{{.*}}, %{{.*}} : i64
    // CHECK32: = llvm.add %{{.*}}, %{{.*}} : i32
    %0 = arith.addi %idx, %idx : index
@ -75,7 +75,7 @@ gpu.module @test_module {

 gpu.module @test_module {
  // CHECK-LABEL: func @gpu_sync()
-  builtin.func @gpu_sync() {
+  func.func @gpu_sync() {
    // CHECK: rocdl.barrier
    gpu.barrier
    func.return
@ -88,7 +88,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__ocml_fabs_f32(f32) -> f32
  // CHECK: llvm.func @__ocml_fabs_f64(f64) -> f64
  // CHECK-LABEL: func @gpu_fabs
-  builtin.func @gpu_fabs(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_fabs(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.abs %arg_f32 : f32
    // CHECK: llvm.call @__ocml_fabs_f32(%{{.*}}) : (f32) -> f32
    %result64 = math.abs %arg_f64 : f64
@ -103,7 +103,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__ocml_ceil_f32(f32) -> f32
  // CHECK: llvm.func @__ocml_ceil_f64(f64) -> f64
  // CHECK-LABEL: func @gpu_ceil
-  builtin.func @gpu_ceil(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_ceil(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.ceil %arg_f32 : f32
    // CHECK: llvm.call @__ocml_ceil_f32(%{{.*}}) : (f32) -> f32
    %result64 = math.ceil %arg_f64 : f64
@ -118,7 +118,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__ocml_floor_f32(f32) -> f32
  // CHECK: llvm.func @__ocml_floor_f64(f64) -> f64
  // CHECK-LABEL: func @gpu_floor
-  builtin.func @gpu_floor(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_floor(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.floor %arg_f32 : f32
    // CHECK: llvm.call @__ocml_floor_f32(%{{.*}}) : (f32) -> f32
    %result64 = math.floor %arg_f64 : f64
@ -133,7 +133,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__ocml_cos_f32(f32) -> f32
  // CHECK: llvm.func @__ocml_cos_f64(f64) -> f64
  // CHECK-LABEL: func @gpu_cos
-  builtin.func @gpu_cos(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_cos(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.cos %arg_f32 : f32
    // CHECK: llvm.call @__ocml_cos_f32(%{{.*}}) : (f32) -> f32
    %result64 = math.cos %arg_f64 : f64
@ -148,7 +148,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__ocml_exp_f32(f32) -> f32
  // CHECK: llvm.func @__ocml_exp_f64(f64) -> f64
  // CHECK-LABEL: func @gpu_exp
-  builtin.func @gpu_exp(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_exp(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %exp_f32 = math.exp %arg_f32 : f32
    // CHECK: llvm.call @__ocml_exp_f32(%{{.*}}) : (f32) -> f32
    %result32 = math.exp %exp_f32 : f32
@ -165,7 +165,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__ocml_exp2_f32(f32) -> f32
  // CHECK: llvm.func @__ocml_exp2_f64(f64) -> f64
  // CHECK-LABEL: func @gpu_exp2
-  builtin.func @gpu_exp2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_exp2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %exp2_f32 = math.exp2 %arg_f32 : f32
    // CHECK: llvm.call @__ocml_exp2_f32(%{{.*}}) : (f32) -> f32
    %result32 = math.exp2 %exp2_f32 : f32
@ -185,7 +185,7 @@ gpu.module @test_module {
    // CHECK: llvm.func @__ocml_exp_f32(f32) -> f32
    // CHECK: llvm.func @__ocml_exp_f64(f64) -> f64
    // CHECK-LABEL: func @gpu_exp
-    builtin.func @gpu_exp(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+    func.func @gpu_exp(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
      %exp_f32 = math.exp %arg_f32 : f32
      // CHECK: llvm.call @__ocml_exp_f32(%{{.*}}) : (f32) -> f32
      %result32 = math.exp %exp_f32 : f32
@ -204,7 +204,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__ocml_expm1_f32(f32) -> f32
  // CHECK: llvm.func @__ocml_expm1_f64(f64) -> f64
  // CHECK-LABEL: func @gpu_expm1
-  builtin.func @gpu_expm1(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_expm1(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %expm1_f32 = math.expm1 %arg_f32 : f32
    // CHECK: llvm.call @__ocml_expm1_f32(%{{.*}}) : (f32) -> f32
    %result32 = math.expm1 %expm1_f32 : f32
@ -221,7 +221,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__ocml_log_f32(f32) -> f32
  // CHECK: llvm.func @__ocml_log_f64(f64) -> f64
  // CHECK-LABEL: func @gpu_log
-  builtin.func @gpu_log(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_log(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.log %arg_f32 : f32
    // CHECK: llvm.call @__ocml_log_f32(%{{.*}}) : (f32) -> f32
    %result64 = math.log %arg_f64 : f64
@ -236,7 +236,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__ocml_log1p_f32(f32) -> f32
  // CHECK: llvm.func @__ocml_log1p_f64(f64) -> f64
  // CHECK-LABEL: func @gpu_log1p
-  builtin.func @gpu_log1p(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_log1p(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.log1p %arg_f32 : f32
    // CHECK: llvm.call @__ocml_log1p_f32(%{{.*}}) : (f32) -> f32
    %result64 = math.log1p %arg_f64 : f64
@ -251,7 +251,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__ocml_log10_f32(f32) -> f32
  // CHECK: llvm.func @__ocml_log10_f64(f64) -> f64
  // CHECK-LABEL: func @gpu_log10
-  builtin.func @gpu_log10(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_log10(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.log10 %arg_f32 : f32
    // CHECK: llvm.call @__ocml_log10_f32(%{{.*}}) : (f32) -> f32
    %result64 = math.log10 %arg_f64 : f64
@ -266,7 +266,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__ocml_log2_f32(f32) -> f32
  // CHECK: llvm.func @__ocml_log2_f64(f64) -> f64
  // CHECK-LABEL: func @gpu_log2
-  builtin.func @gpu_log2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_log2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.log2 %arg_f32 : f32
    // CHECK: llvm.call @__ocml_log2_f32(%{{.*}}) : (f32) -> f32
    %result64 = math.log2 %arg_f64 : f64
@ -281,7 +281,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__ocml_rsqrt_f32(f32) -> f32
  // CHECK: llvm.func @__ocml_rsqrt_f64(f64) -> f64
  // CHECK-LABEL: func @gpu_rsqrt
-  builtin.func @gpu_rsqrt(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
+  func.func @gpu_rsqrt(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
      -> (f16, f32, f64) {
    %result16 = math.rsqrt %arg_f16 : f16
    // CHECK: llvm.fpext %{{.*}} : f16 to f32
@ -301,7 +301,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__ocml_sqrt_f32(f32) -> f32
  // CHECK: llvm.func @__ocml_sqrt_f64(f64) -> f64
  // CHECK-LABEL: func @gpu_sqrt
-  builtin.func @gpu_sqrt(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
+  func.func @gpu_sqrt(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
      -> (f16, f32, f64) {
    %result16 = math.sqrt %arg_f16 : f16
    // CHECK: llvm.fpext %{{.*}} : f16 to f32
@ -321,7 +321,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__ocml_tanh_f32(f32) -> f32
  // CHECK: llvm.func @__ocml_tanh_f64(f64) -> f64
  // CHECK-LABEL: func @gpu_tanh
-  builtin.func @gpu_tanh(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_tanh(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.tanh %arg_f32 : f32
    // CHECK: llvm.call @__ocml_tanh_f32(%{{.*}}) : (f32) -> f32
    %result64 = math.tanh %arg_f64 : f64
@ -336,7 +336,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__ocml_atan_f32(f32) -> f32
  // CHECK: llvm.func @__ocml_atan_f64(f64) -> f64
  // CHECK-LABEL: func @gpu_atan
-  builtin.func @gpu_atan(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_atan(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.atan %arg_f32 : f32
    // CHECK: llvm.call @__ocml_atan_f32(%{{.*}}) : (f32) -> f32
    %result64 = math.atan %arg_f64 : f64
@ -351,7 +351,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__ocml_atan2_f32(f32, f32) -> f32
  // CHECK: llvm.func @__ocml_atan2_f64(f64, f64) -> f64
  // CHECK-LABEL: func @gpu_atan2
-  builtin.func @gpu_atan2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_atan2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.atan2 %arg_f32, %arg_f32 : f32
    // CHECK: llvm.call @__ocml_atan2_f32(%{{.*}}) : (f32, f32) -> f32
    %result64 = math.atan2 %arg_f64, %arg_f64 : f64
@ -366,7 +366,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__ocml_pow_f32(f32, f32) -> f32
  // CHECK: llvm.func @__ocml_pow_f64(f64, f64) -> f64
  // CHECK-LABEL: func @gpu_pow
-  builtin.func @gpu_pow(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  func.func @gpu_pow(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.powf %arg_f32, %arg_f32 : f32
    // CHECK: llvm.call @__ocml_pow_f32(%{{.*}}, %{{.*}}) : (f32, f32) -> f32
    %result64 = math.powf %arg_f64, %arg_f64 : f64
--- a/mlir/test/Conversion/MathToLLVM/math-to-llvm.mlir
+++ b/mlir/test/Conversion/MathToLLVM/math-to-llvm.mlir
@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -split-input-file -pass-pipeline="builtin.func(convert-math-to-llvm)" | FileCheck %s
+// RUN: mlir-opt %s -split-input-file -pass-pipeline="func.func(convert-math-to-llvm)" | FileCheck %s

 // CHECK-LABEL: @ops
 func @ops(%arg0: f32, %arg1: f32, %arg2: i32, %arg3: i32, %arg4: f64) {
--- a/mlir/test/Conversion/SCFToGPU/no_blocks_no_threads.mlir
+++ b/mlir/test/Conversion/SCFToGPU/no_blocks_no_threads.mlir
@ -1,5 +1,5 @@
-// RUN: mlir-opt -pass-pipeline="builtin.func(convert-affine-for-to-gpu{gpu-block-dims=0 gpu-thread-dims=1})" %s | FileCheck --check-prefix=CHECK-THREADS %s
-// RUN: mlir-opt -pass-pipeline="builtin.func(convert-affine-for-to-gpu{gpu-block-dims=1 gpu-thread-dims=0})" %s | FileCheck --check-prefix=CHECK-BLOCKS %s
+// RUN: mlir-opt -pass-pipeline="func.func(convert-affine-for-to-gpu{gpu-block-dims=0 gpu-thread-dims=1})" %s | FileCheck --check-prefix=CHECK-THREADS %s
+// RUN: mlir-opt -pass-pipeline="func.func(convert-affine-for-to-gpu{gpu-block-dims=1 gpu-thread-dims=0})" %s | FileCheck --check-prefix=CHECK-BLOCKS %s

 // CHECK-THREADS-LABEL: @one_d_loop
 // CHECK-BLOCKS-LABEL: @one_d_loop
--- a/mlir/test/Conversion/SCFToGPU/step_one.mlir
+++ b/mlir/test/Conversion/SCFToGPU/step_one.mlir
@ -1,5 +1,5 @@
-// RUN: mlir-opt -pass-pipeline="builtin.func(convert-affine-for-to-gpu{gpu-block-dims=1 gpu-thread-dims=1})" %s | FileCheck --check-prefix=CHECK-11 %s
-// RUN: mlir-opt -pass-pipeline="builtin.func(convert-affine-for-to-gpu{gpu-block-dims=2 gpu-thread-dims=2})" %s | FileCheck --check-prefix=CHECK-22 %s
+// RUN: mlir-opt -pass-pipeline="func.func(convert-affine-for-to-gpu{gpu-block-dims=1 gpu-thread-dims=1})" %s | FileCheck --check-prefix=CHECK-11 %s
+// RUN: mlir-opt -pass-pipeline="func.func(convert-affine-for-to-gpu{gpu-block-dims=2 gpu-thread-dims=2})" %s | FileCheck --check-prefix=CHECK-22 %s

 // CHECK-11-LABEL: @step_1
 // CHECK-22-LABEL: @step_1
--- a/mlir/test/Conversion/SCFToGPU/step_positive.mlir
+++ b/mlir/test/Conversion/SCFToGPU/step_positive.mlir
@ -1,4 +1,4 @@
-// RUN: mlir-opt -pass-pipeline="builtin.func(convert-affine-for-to-gpu{gpu-block-dims=1 gpu-thread-dims=1})" %s | FileCheck %s
+// RUN: mlir-opt -pass-pipeline="func.func(convert-affine-for-to-gpu{gpu-block-dims=1 gpu-thread-dims=1})" %s | FileCheck %s

 // CHECK-LABEL: @step_var
 func @step_var(%A : memref<?x?xf32>, %B : memref<?x?xf32>) {
--- a/mlir/test/Conversion/ShapeToStandard/convert-shape-constraints.mlir
+++ b/mlir/test/Conversion/ShapeToStandard/convert-shape-constraints.mlir
@ -1,4 +1,4 @@
-// RUN: mlir-opt -pass-pipeline="builtin.func(convert-shape-constraints)" <%s | FileCheck %s
+// RUN: mlir-opt -pass-pipeline="func.func(convert-shape-constraints)" <%s | FileCheck %s

 // There's not very much useful to check here other than pasting the output.
 // CHECK-LABEL:   func @cstr_broadcastable(
--- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir
+++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir
@ -1,4 +1,4 @@
-// RUN: mlir-opt --split-input-file -pass-pipeline="builtin.func(tosa-to-linalg-named)" %s -verify-diagnostics -o -| FileCheck %s
+// RUN: mlir-opt --split-input-file -pass-pipeline="func.func(tosa-to-linalg-named)" %s -verify-diagnostics -o -| FileCheck %s

 // CHECK-LABEL: @matmul
 func @matmul(%arg0: tensor<1x5x3xf32>, %arg1: tensor<1x3x6xf32>) -> (tensor<1x5x6xf32>) {
--- a/Show More
+++ b/Show More