[mlir][GPU] Extend GPU kernel outlining to generate DL specification

This patch extends the GPU kernel outlining pass so that it can take in
an optional data layout specification that will be attached to the GPU
module operation generated. If the data layout specification is not provided
the default data layout is used instead.

Reviewed By: herhut, mehdi_amini

Differential Revision: https://reviews.llvm.org/D115722
This commit is contained in:
Diego Caballero 2021-12-16 09:47:41 +00:00
parent 59a85a7a52
commit 32fe1a8a25
6 changed files with 67 additions and 5 deletions

View File

@ -25,7 +25,8 @@ class Module;
namespace mlir {
/// Replaces `gpu.launch` with `gpu.launch_func` by moving the region into
/// a separate kernel function.
std::unique_ptr<OperationPass<ModuleOp>> createGpuKernelOutliningPass();
std::unique_ptr<OperationPass<ModuleOp>>
createGpuKernelOutliningPass(StringRef dataLayoutStr = StringRef());
/// Rewrites a function region so that GPU ops execute asynchronously.
std::unique_ptr<OperationPass<FuncOp>> createGpuAsyncRegionPass();

View File

@ -14,6 +14,7 @@ include "mlir/Pass/PassBase.td"
def GpuKernelOutlining : Pass<"gpu-kernel-outlining", "ModuleOp"> {
let summary = "Outline gpu.launch bodies to kernel functions";
let constructor = "mlir::createGpuKernelOutliningPass()";
let dependentDialects = ["mlir::DLTIDialect"];
}
def GpuAsyncRegionPass : FunctionPass<"gpu-async-region"> {

View File

@ -12,6 +12,7 @@
#include "PassDetail.h"
#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
#include "mlir/Dialect/DLTI/DLTI.h"
#include "mlir/Dialect/GPU/GPUDialect.h"
#include "mlir/Dialect/GPU/Passes.h"
#include "mlir/Dialect/GPU/Utils.h"
@ -20,6 +21,7 @@
#include "mlir/IR/BlockAndValueMapping.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/SymbolTable.h"
#include "mlir/Parser.h"
#include "mlir/Support/LLVM.h"
#include "mlir/Transforms/RegionUtils.h"
@ -239,6 +241,31 @@ namespace {
class GpuKernelOutliningPass
: public GpuKernelOutliningBase<GpuKernelOutliningPass> {
public:
GpuKernelOutliningPass(StringRef dlStr) {
if (!dlStr.empty() && !dataLayoutStr.hasValue())
dataLayoutStr = dlStr.str();
}
GpuKernelOutliningPass(const GpuKernelOutliningPass &other)
: dataLayoutSpec(other.dataLayoutSpec) {
dataLayoutStr = other.dataLayoutStr;
}
LogicalResult initialize(MLIRContext *context) override {
// Initialize the data layout specification from the data layout string.
if (!dataLayoutStr.empty()) {
Attribute resultAttr = mlir::parseAttribute(dataLayoutStr, context);
if (!resultAttr)
return failure();
dataLayoutSpec = resultAttr.dyn_cast<DataLayoutSpecInterface>();
if (!dataLayoutSpec)
return failure();
}
return success();
}
void runOnOperation() override {
SymbolTable symbolTable(getOperation());
bool modified = false;
@ -290,6 +317,12 @@ private:
OpBuilder builder(context);
auto kernelModule = builder.create<gpu::GPUModuleOp>(kernelFunc.getLoc(),
kernelFunc.getName());
// If a valid data layout spec was provided, attach it to the kernel module.
// Otherwise, the default data layout will be used.
if (dataLayoutSpec)
kernelModule->setAttr("dlspec", dataLayoutSpec);
SymbolTable symbolTable(kernelModule);
symbolTable.insert(kernelFunc);
@ -313,10 +346,18 @@ private:
return kernelModule;
}
Option<std::string> dataLayoutStr{
*this, "data-layout-str",
llvm::cl::desc("String containing the data layout specification to be "
"attached to the GPU kernel module")};
DataLayoutSpecInterface dataLayoutSpec;
};
} // namespace
std::unique_ptr<OperationPass<ModuleOp>> mlir::createGpuKernelOutliningPass() {
return std::make_unique<GpuKernelOutliningPass>();
std::unique_ptr<OperationPass<ModuleOp>>
mlir::createGpuKernelOutliningPass(StringRef dataLayoutStr) {
return std::make_unique<GpuKernelOutliningPass>(dataLayoutStr);
}

View File

@ -10,6 +10,7 @@
#define DIALECT_GPU_TRANSFORMS_PASSDETAIL_H_
#include "mlir/Dialect/Async/IR/Async.h"
#include "mlir/Dialect/DLTI/DLTI.h"
#include "mlir/Pass/Pass.h"
namespace mlir {

View File

@ -1,4 +1,5 @@
// RUN: mlir-opt -allow-unregistered-dialect -gpu-kernel-outlining -split-input-file -verify-diagnostics %s | FileCheck %s
// RUN: mlir-opt -allow-unregistered-dialect -gpu-kernel-outlining=data-layout-str='#dlti.dl_spec<#dlti.dl_entry<index,32:i32>>' -split-input-file %s | FileCheck --check-prefix CHECK-DL %s
// CHECK: module attributes {gpu.container_module}
@ -35,8 +36,9 @@ func @launch() {
return
}
// CHECK-DL-LABEL: gpu.module @launch_kernel attributes {dlspec = #dlti.dl_spec<#dlti.dl_entry<index, 32 : i32>>}
// CHECK-LABEL: module @launch_kernel
// CHECK-LABEL: gpu.module @launch_kernel
// CHECK-NEXT: gpu.func @launch_kernel
// CHECK-SAME: (%[[KERNEL_ARG0:.*]]: f32, %[[KERNEL_ARG1:.*]]: memref<?xf32, 1>)
// CHECK-NEXT: %[[BID:.*]] = "gpu.block_id"() {dimension = "x"} : () -> index
@ -81,7 +83,10 @@ func @multiple_launches() {
return
}
// CHECK: module @multiple_launches_kernel
// CHECK-DL-LABEL: gpu.module @multiple_launches_kernel attributes {dlspec = #dlti.dl_spec<#dlti.dl_entry<index, 32 : i32>>}
// CHECK-DL-LABEL: gpu.module @multiple_launches_kernel_0 attributes {dlspec = #dlti.dl_spec<#dlti.dl_entry<index, 32 : i32>>}
// CHECK: gpu.module @multiple_launches_kernel
// CHECK: func @multiple_launches_kernel
// CHECK: module @multiple_launches_kernel_0
// CHECK: func @multiple_launches_kernel
@ -106,6 +111,8 @@ func @extra_constants_not_inlined(%arg0: memref<?xf32>) {
return
}
// CHECK-DL-LABEL: gpu.module @extra_constants_not_inlined_kernel attributes {dlspec = #dlti.dl_spec<#dlti.dl_entry<index, 32 : i32>>}
// CHECK-LABEL: func @extra_constants_not_inlined_kernel(%{{.*}}: memref<?xf32>, %{{.*}}: index)
// CHECK: arith.constant 2
@ -130,6 +137,8 @@ func @extra_constants(%arg0: memref<?xf32>) {
return
}
// CHECK-DL-LABEL: gpu.module @extra_constants_kernel attributes {dlspec = #dlti.dl_spec<#dlti.dl_entry<index, 32 : i32>>}
// CHECK-LABEL: func @extra_constants_kernel(
// CHECK-SAME: %[[KARG0:.*]]: memref<?xf32>
// CHECK: arith.constant 2
@ -158,6 +167,8 @@ func @extra_constants_noarg(%arg0: memref<?xf32>, %arg1: memref<?xf32>) {
return
}
// CHECK-DL-LABEL: gpu.module @extra_constants_noarg_kernel attributes {dlspec = #dlti.dl_spec<#dlti.dl_entry<index, 32 : i32>>}
// CHECK-LABEL: func @extra_constants_noarg_kernel(
// CHECK-SAME: %[[KARG0:.*]]: memref<?xf32>, %[[KARG1:.*]]: index
// CHECK: %[[KCST:.*]] = arith.constant 2
@ -186,6 +197,8 @@ func @multiple_uses(%arg0 : memref<?xf32>) {
return
}
// CHECK-DL-LABEL: gpu.module @multiple_uses_kernel attributes {dlspec = #dlti.dl_spec<#dlti.dl_entry<index, 32 : i32>>}
// -----
// CHECK-LABEL: @multiple_uses2
@ -213,6 +226,8 @@ func @multiple_uses2(%arg0 : memref<*xf32>) {
return
}
// CHECK-DL-LABEL: gpu.module @multiple_uses2_kernel attributes {dlspec = #dlti.dl_spec<#dlti.dl_entry<index, 32 : i32>>}
// -----
llvm.mlir.global internal @global(42 : i64) : i64
@ -242,6 +257,8 @@ func @recursive_device_function() {
return
}
// CHECK-DL-LABEL: gpu.module @function_call_kernel attributes {dlspec = #dlti.dl_spec<#dlti.dl_entry<index, 32 : i32>>}
// CHECK: gpu.module @function_call_kernel {
// CHECK: gpu.func @function_call_kernel()
// CHECK: call @device_function() : () -> ()

View File

@ -2999,6 +2999,7 @@ cc_library(
deps = [
":ArithmeticDialect",
":Async",
":DLTIDialect",
":GPUDialect",
":GPUPassIncGen",
":MemRefDialect",