[MLIR][GPU] Expose GpuParallelLoopMapping as non-test pass.

Reviewed By: bondhugula, herhut Differential Revision: https://reviews.llvm.org/D126199
2024-12-14 19:49:36 +00:00 · 2022-05-30 08:32:01 +02:00 · 2022-05-30 08:32:01 +02:00 · bcf3d52486
commit bcf3d52486
parent a5ddd4a238
8 changed files with 46 additions and 80 deletions
--- a/mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h
+++ b/mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h
@ -60,13 +60,5 @@ ParallelLoopDimMapping getParallelLoopDimMappingAttr(Processor processor,
 LogicalResult setMappingAttr(scf::ParallelOp ploopOp,
                             ArrayRef<ParallelLoopDimMapping> mapping);
 } // namespace gpu
-
-/// Maps the parallel loops found in the given function to workgroups. The first
-/// loop encountered will be mapped to the global workgroup and the second loop
-/// encountered to the local workgroup. Within each mapping, the first three
-/// dimensions are mapped to x/y/z hardware ids and all following dimensions are
-/// mapped to sequential loops.
-void greedilyMapParallelSCFToGPU(Region &region);
-
 } // namespace mlir
 #endif // MLIR_DIALECT_GPU_PARALLELLOOPMAPPER_H
--- a/mlir/include/mlir/Dialect/GPU/Passes.h
+++ b/mlir/include/mlir/Dialect/GPU/Passes.h
@ -39,6 +39,13 @@ createGpuKernelOutliningPass(StringRef dataLayoutStr = StringRef());
 /// Rewrites a function region so that GPU ops execute asynchronously.
 std::unique_ptr<OperationPass<func::FuncOp>> createGpuAsyncRegionPass();

+/// Maps the parallel loops found in the given function to workgroups. The first
+/// loop encountered will be mapped to the global workgroup and the second loop
+/// encountered to the local workgroup. Within each mapping, the first three
+/// dimensions are mapped to x/y/z hardware ids and all following dimensions are
+/// mapped to sequential loops.
+std::unique_ptr<OperationPass<func::FuncOp>> createGpuMapParallelLoopsPass();
+
 /// Collect a set of patterns to rewrite all-reduce ops within the GPU dialect.
 void populateGpuAllReducePatterns(RewritePatternSet &patterns);

--- a/mlir/include/mlir/Dialect/GPU/Passes.td
+++ b/mlir/include/mlir/Dialect/GPU/Passes.td
@ -29,4 +29,11 @@ def GpuAsyncRegionPass : Pass<"gpu-async-region", "func::FuncOp"> {
  let dependentDialects = ["async::AsyncDialect"];
 }

+def GpuMapParallelLoopsPass
+    : Pass<"gpu-map-parallel-loops", "mlir::func::FuncOp"> {
+  let summary = "Greedily maps loops to GPU hardware dimensions.";
+  let constructor = "mlir::createGpuMapParallelLoopsPass()";
+  let description = "Greedily maps loops to GPU hardware dimensions.";
+}
+
 #endif // MLIR_DIALECT_GPU_PASSES
--- a/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp
@ -13,26 +13,25 @@

 #include "mlir/Dialect/GPU/ParallelLoopMapper.h"

+#include "PassDetail.h"
 #include "mlir/Dialect/GPU/GPUDialect.h"
 #include "mlir/Dialect/GPU/Passes.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/IR/AffineMap.h"
 #include "mlir/Pass/Pass.h"

-using namespace mlir;
-using namespace mlir::gpu;
-using namespace mlir::scf;
-
 #include "mlir/Dialect/GPU/ParallelLoopMapperAttr.cpp.inc"
 #include "mlir/Dialect/GPU/ParallelLoopMapperEnums.cpp.inc"
+
 namespace mlir {
-namespace gpu {

-StringRef getMappingAttrName() { return "mapping"; }
+using scf::ParallelOp;

-ParallelLoopDimMapping getParallelLoopDimMappingAttr(Processor processor,
-                                                     AffineMap map,
-                                                     AffineMap bound) {
+StringRef gpu::getMappingAttrName() { return "mapping"; }
+
+gpu::ParallelLoopDimMapping
+gpu::getParallelLoopDimMappingAttr(Processor processor, AffineMap map,
+                                   AffineMap bound) {
  MLIRContext *context = map.getContext();
  OpBuilder builder(context);
  return ParallelLoopDimMapping::get(
@ -40,8 +39,8 @@ ParallelLoopDimMapping getParallelLoopDimMappingAttr(Processor processor,
      AffineMapAttr::get(map), AffineMapAttr::get(bound), context);
 }

-LogicalResult setMappingAttr(scf::ParallelOp ploopOp,
-                             ArrayRef<ParallelLoopDimMapping> mapping) {
+LogicalResult gpu::setMappingAttr(ParallelOp ploopOp,
+                                  ArrayRef<ParallelLoopDimMapping> mapping) {
  // Verify that each processor is mapped to only once.
  llvm::DenseSet<gpu::Processor> specifiedMappings;
  for (auto dimAttr : mapping) {
@ -56,20 +55,17 @@ LogicalResult setMappingAttr(scf::ParallelOp ploopOp,
                   ArrayAttr::get(ploopOp.getContext(), mappingAsAttrs));
  return success();
 }
-} // namespace gpu
-} // namespace mlir

+namespace gpu {
 namespace {
-
 enum MappingLevel { MapGrid = 0, MapBlock = 1, Sequential = 2 };
+} // namespace

 static constexpr int kNumHardwareIds = 3;

-} // namespace
-
 /// Bounded increment on MappingLevel. Increments to the next
 /// level unless Sequential was already reached.
-MappingLevel &operator++(MappingLevel &mappingLevel) {
+static MappingLevel &operator++(MappingLevel &mappingLevel) {
  if (mappingLevel < Sequential) {
    mappingLevel = static_cast<MappingLevel>(mappingLevel + 1);
  }
@ -82,8 +78,7 @@ MappingLevel &operator++(MappingLevel &mappingLevel) {
 /// TODO: Make this use x for the inner-most loop that is
 /// distributed to map to x, the next innermost to y and the next innermost to
 /// z.
-static gpu::Processor getHardwareIdForMapping(MappingLevel level,
-                                              int dimension) {
+static Processor getHardwareIdForMapping(MappingLevel level, int dimension) {

  if (dimension >= kNumHardwareIds || level == Sequential)
    return Processor::Sequential;
@ -145,6 +140,21 @@ static void mapParallelOp(ParallelOp parallelOp,
  }
 }

-void mlir::greedilyMapParallelSCFToGPU(Region &region) {
-  region.walk([](ParallelOp parallelOp) { mapParallelOp(parallelOp); });
+namespace {
+struct GpuMapParallelLoopsPass
+    : public GpuMapParallelLoopsPassBase<GpuMapParallelLoopsPass> {
+  void runOnOperation() override {
+    for (Region &region : getOperation()->getRegions()) {
+      region.walk([](ParallelOp parallelOp) { mapParallelOp(parallelOp); });
+    }
+  }
+};
+
+} // namespace
+} // namespace gpu
+} // namespace mlir
+
+std::unique_ptr<mlir::OperationPass<mlir::func::FuncOp>>
+mlir::createGpuMapParallelLoopsPass() {
+  return std::make_unique<gpu::GpuMapParallelLoopsPass>();
 }
--- a/mlir/test/Dialect/GPU/mapping.mlir
+++ b/mlir/test/Dialect/GPU/mapping.mlir
@ -1,4 +1,4 @@
-// RUN: mlir-opt -test-gpu-greedy-parallel-loop-mapping -split-input-file %s | FileCheck %s
+// RUN: mlir-opt -gpu-map-parallel-loops -split-input-file %s | FileCheck %s

 func.func @parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index,
                    %arg3 : index) {
--- a/mlir/test/lib/Dialect/GPU/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/GPU/CMakeLists.txt
@ -3,7 +3,6 @@ add_mlir_library(MLIRGPUTestPasses
  TestConvertGPUKernelToCubin.cpp
  TestConvertGPUKernelToHsaco.cpp
  TestGpuMemoryPromotion.cpp
-  TestGpuParallelLoopMapping.cpp
  TestGpuRewrite.cpp

  EXCLUDE_FROM_LIBMLIR
--- a/mlir/test/lib/Dialect/GPU/TestGpuParallelLoopMapping.cpp
+++ b/mlir/test/lib/Dialect/GPU/TestGpuParallelLoopMapping.cpp
@ -1,47 +0,0 @@
-//===- TestGPUParallelLoopMapping.cpp - Test pass for GPU loop mapping ----===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the pass testing the utilities for mapping parallel
-// loops to gpu hardware ids.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/GPU/ParallelLoopMapper.h"
-#include "mlir/Pass/Pass.h"
-
-using namespace mlir;
-
-namespace {
-/// Simple pass for testing the mapping of parallel loops to hardware ids using
-/// a greedy mapping strategy.
-struct TestGpuGreedyParallelLoopMappingPass
-    : public PassWrapper<TestGpuGreedyParallelLoopMappingPass,
-                         OperationPass<>> {
-  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(
-      TestGpuGreedyParallelLoopMappingPass)
-
-  StringRef getArgument() const final {
-    return "test-gpu-greedy-parallel-loop-mapping";
-  }
-  StringRef getDescription() const final {
-    return "Greedily maps all parallel loops to gpu hardware ids.";
-  }
-  void runOnOperation() override {
-    for (Region &region : getOperation()->getRegions())
-      greedilyMapParallelSCFToGPU(region);
-  }
-};
-} // namespace
-
-namespace mlir {
-namespace test {
-void registerTestGpuParallelLoopMappingPass() {
-  PassRegistration<TestGpuGreedyParallelLoopMappingPass>();
-}
-} // namespace test
-} // namespace mlir
--- a/mlir/tools/mlir-opt/mlir-opt.cpp
+++ b/mlir/tools/mlir-opt/mlir-opt.cpp
@ -79,7 +79,6 @@ void registerTestDynamicPipelinePass();
 void registerTestExpandTanhPass();
 void registerTestComposeSubView();
 void registerTestMultiBuffering();
-void registerTestGpuParallelLoopMappingPass();
 void registerTestIRVisitorsPass();
 void registerTestGenericIRVisitorsPass();
 void registerTestGenericIRVisitorsInterruptPass();
@ -176,7 +175,6 @@ void registerTestPasses() {
  mlir::test::registerTestExpandTanhPass();
  mlir::test::registerTestComposeSubView();
  mlir::test::registerTestMultiBuffering();
-  mlir::test::registerTestGpuParallelLoopMappingPass();
  mlir::test::registerTestIRVisitorsPass();
  mlir::test::registerTestGenericIRVisitorsPass();
  mlir::test::registerTestInterfaces();