[mlir] Remove mlir-cuda-runner

Change CUDA integration tests to use mlir-opt + mlir-cpu-runner instead.

Depends On D98203

Reviewed By: herhut

Differential Revision: https://reviews.llvm.org/D98396
This commit is contained in:
Christian Sigg 2021-03-11 08:34:53 +01:00
parent bd197ed0a5
commit 1ef544d4a9
16 changed files with 57 additions and 309 deletions

View File

@ -88,12 +88,6 @@ if(LLVM_BUILD_EXAMPLES)
)
endif()
if(MLIR_CUDA_RUNNER_ENABLED)
list(APPEND MLIR_TEST_DEPENDS
mlir-cuda-runner
)
endif()
if(MLIR_ROCM_RUNNER_ENABLED)
list(APPEND MLIR_TEST_DEPENDS
mlir-rocm-runner

View File

@ -1,6 +1,8 @@
// RUN: mlir-cuda-runner %s \
// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \
// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \
// RUN: mlir-opt %s \
// RUN: -gpu-kernel-outlining \
// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \
// RUN: -gpu-to-llvm \
// RUN: | mlir-cpu-runner \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
// RUN: --entry-point-result=void \

View File

@ -1,6 +1,8 @@
// RUN: mlir-cuda-runner %s \
// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \
// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \
// RUN: mlir-opt %s \
// RUN: -gpu-kernel-outlining \
// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \
// RUN: -gpu-to-llvm \
// RUN: | mlir-cpu-runner \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
// RUN: --entry-point-result=void \

View File

@ -1,6 +1,8 @@
// RUN: mlir-cuda-runner %s \
// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \
// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \
// RUN: mlir-opt %s \
// RUN: -gpu-kernel-outlining \
// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \
// RUN: -gpu-to-llvm \
// RUN: | mlir-cpu-runner \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
// RUN: --entry-point-result=void \

View File

@ -1,6 +1,8 @@
// RUN: mlir-cuda-runner %s \
// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \
// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \
// RUN: mlir-opt %s \
// RUN: -gpu-kernel-outlining \
// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \
// RUN: -gpu-to-llvm \
// RUN: | mlir-cpu-runner \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
// RUN: --entry-point-result=void \

View File

@ -1,6 +1,8 @@
// RUN: mlir-cuda-runner %s \
// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \
// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \
// RUN: mlir-opt %s \
// RUN: -gpu-kernel-outlining \
// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \
// RUN: -gpu-to-llvm \
// RUN: | mlir-cpu-runner \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
// RUN: --entry-point-result=void \

View File

@ -1,6 +1,8 @@
// RUN: mlir-cuda-runner %s \
// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \
// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \
// RUN: mlir-opt %s \
// RUN: -gpu-kernel-outlining \
// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \
// RUN: -gpu-to-llvm \
// RUN: | mlir-cpu-runner \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
// RUN: --entry-point-result=void \

View File

@ -1,6 +1,8 @@
// RUN: mlir-cuda-runner %s \
// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \
// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \
// RUN: mlir-opt %s \
// RUN: -gpu-kernel-outlining \
// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \
// RUN: -gpu-to-llvm \
// RUN: | mlir-cpu-runner \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
// RUN: --entry-point-result=void \

View File

@ -1,8 +1,9 @@
// RUN: mlir-cuda-runner %s \
// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \
// RUN: -gpu-async-region -async-ref-counting \
// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \
// RUN: mlir-opt %s \
// RUN: -gpu-kernel-outlining \
// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \
// RUN: -gpu-async-region -async-ref-counting -gpu-to-llvm \
// RUN: -async-to-async-runtime -convert-async-to-llvm -convert-std-to-llvm \
// RUN: | mlir-cpu-runner \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_async_runtime%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \

View File

@ -1,6 +1,8 @@
// RUN: mlir-cuda-runner %s \
// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \
// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \
// RUN: mlir-opt %s \
// RUN: -gpu-kernel-outlining \
// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \
// RUN: -gpu-to-llvm \
// RUN: | mlir-cpu-runner \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
// RUN: --entry-point-result=void \

View File

@ -1,6 +1,8 @@
// RUN: mlir-cuda-runner %s \
// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \
// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \
// RUN: mlir-opt %s \
// RUN: -gpu-kernel-outlining \
// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \
// RUN: -gpu-to-llvm \
// RUN: | mlir-cpu-runner \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
// RUN: --entry-point-result=void \

View File

@ -1,7 +1,7 @@
// RUN: mlir-opt %s \
// RUN: -gpu-kernel-outlining \
// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin{gpu-binary-annotation=nvvm.cubin})' \
// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \
// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \
// RUN: -gpu-to-llvm \
// RUN: | mlir-cpu-runner \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \

View File

@ -1,6 +1,8 @@
// RUN: mlir-cuda-runner %s \
// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \
// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \
// RUN: mlir-opt %s \
// RUN: -gpu-kernel-outlining \
// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \
// RUN: -gpu-to-llvm \
// RUN: | mlir-cpu-runner \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_cuda_runtime%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
// RUN: --entry-point-result=void \

View File

@ -1,4 +1,3 @@
add_subdirectory(mlir-cuda-runner)
add_subdirectory(mlir-cpu-runner)
add_subdirectory(mlir-opt)
add_subdirectory(mlir-reduce)

View File

@ -1,75 +0,0 @@
set(LLVM_OPTIONAL_SOURCES
mlir-cuda-runner.cpp
)
set(LLVM_LINK_COMPONENTS
Core
Support
)
if(MLIR_CUDA_RUNNER_ENABLED)
if (NOT ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD))
message(SEND_ERROR
"Building the mlir cuda runner requires the NVPTX backend")
endif()
# Configure CUDA runner support. Using check_language first allows us to give
# a custom error message.
include(CheckLanguage)
check_language(CUDA)
if (CMAKE_CUDA_COMPILER)
enable_language(CUDA)
else()
message(SEND_ERROR
"Building the mlir cuda runner requires a working CUDA install")
endif()
# We need the libcuda.so library.
find_library(CUDA_RUNTIME_LIBRARY cuda)
get_property(conversion_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS)
set(LIBS
${conversion_libs}
MLIRJitRunner
MLIRAnalysis
MLIRAsync
MLIREDSC
MLIRExecutionEngine
MLIRGPU
MLIRIR
MLIRLLVMIR
MLIRLLVMToLLVMIRTranslation
MLIRNVVMIR
MLIRParser
MLIRStandard
MLIRSupport
MLIRTargetLLVMIRExport
MLIRNVVMToLLVMIRTranslation
MLIRTransforms
MLIRTranslation
${CUDA_RUNTIME_LIBRARY}
)
# Manually expand the target library, since our MLIR libraries
# aren't plugged into the LLVM dependency tracking. If we don't
# do this then we can't insert the CodeGen library after ourselves
llvm_expand_pseudo_components(TARGET_LIBS AllTargetsCodeGens)
# Prepend LLVM in front of every target, this is how the library
# are named with CMake
SET(targets_to_link)
FOREACH(t ${TARGET_LIBS})
LIST(APPEND targets_to_link "LLVM${t}")
ENDFOREACH(t)
add_llvm_tool(mlir-cuda-runner
mlir-cuda-runner.cpp
DEPENDS
mlir_cuda_runtime
)
target_include_directories(mlir-cuda-runner
PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}
)
llvm_update_compile_flags(mlir-cuda-runner)
target_link_libraries(mlir-cuda-runner PRIVATE ${LIBS} ${targets_to_link})
endif()

View File

@ -1,191 +0,0 @@
//===- mlir-cuda-runner.cpp - MLIR CUDA Execution Driver-------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This is a command line utility that executes an MLIR file on the GPU by
// translating MLIR to NVVM/LVVM IR before JIT-compiling and executing the
// latter.
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/STLExtras.h"
#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
#include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"
#include "mlir/Conversion/Passes.h"
#include "mlir/Dialect/Async/IR/Async.h"
#include "mlir/Dialect/Async/Passes.h"
#include "mlir/Dialect/GPU/GPUDialect.h"
#include "mlir/Dialect/GPU/Passes.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/ExecutionEngine/JitRunner.h"
#include "mlir/ExecutionEngine/OptUtils.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Pass/PassManager.h"
#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
#include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h"
#include "mlir/Target/LLVMIR/Export.h"
#include "mlir/Transforms/DialectConversion.h"
#include "mlir/Transforms/Passes.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/TargetSelect.h"
#include "cuda.h"
using namespace mlir;
static void emitCudaError(const llvm::Twine &expr, const char *buffer,
CUresult result, Location loc) {
const char *error;
cuGetErrorString(result, &error);
emitError(loc, expr.concat(" failed with error code ")
.concat(llvm::Twine{error})
.concat("[")
.concat(buffer)
.concat("]"));
}
#define RETURN_ON_CUDA_ERROR(expr) \
do { \
if (auto status = (expr)) { \
emitCudaError(#expr, jitErrorBuffer, status, loc); \
return {}; \
} \
} while (false)
OwnedBlob compilePtxToCubin(const std::string ptx, Location loc,
StringRef name) {
char jitErrorBuffer[4096] = {0};
// Initialize CUDA once in a thread-safe manner.
static CUresult cuInitResult = [] { return cuInit(/*flags=*/0); }();
RETURN_ON_CUDA_ERROR(cuInitResult);
// Linking requires a device context.
CUdevice device;
RETURN_ON_CUDA_ERROR(cuDeviceGet(&device, 0));
CUcontext context;
RETURN_ON_CUDA_ERROR(cuCtxCreate(&context, 0, device));
CUlinkState linkState;
CUjit_option jitOptions[] = {CU_JIT_ERROR_LOG_BUFFER,
CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES};
void *jitOptionsVals[] = {jitErrorBuffer,
reinterpret_cast<void *>(sizeof(jitErrorBuffer))};
RETURN_ON_CUDA_ERROR(cuLinkCreate(2, /* number of jit options */
jitOptions, /* jit options */
jitOptionsVals, /* jit option values */
&linkState));
RETURN_ON_CUDA_ERROR(
cuLinkAddData(linkState, CUjitInputType::CU_JIT_INPUT_PTX,
const_cast<void *>(static_cast<const void *>(ptx.c_str())),
ptx.length(), name.str().data(), /* kernel name */
0, /* number of jit options */
nullptr, /* jit options */
nullptr /* jit option values */
));
void *cubinData;
size_t cubinSize;
RETURN_ON_CUDA_ERROR(cuLinkComplete(linkState, &cubinData, &cubinSize));
char *cubinAsChar = static_cast<char *>(cubinData);
OwnedBlob result =
std::make_unique<std::vector<char>>(cubinAsChar, cubinAsChar + cubinSize);
// This will also destroy the cubin data.
RETURN_ON_CUDA_ERROR(cuLinkDestroy(linkState));
RETURN_ON_CUDA_ERROR(cuCtxDestroy(context));
return result;
}
struct GpuToCubinPipelineOptions
: public mlir::PassPipelineOptions<GpuToCubinPipelineOptions> {
Option<std::string> gpuBinaryAnnotation{
*this, "gpu-binary-annotation",
llvm::cl::desc("Annotation attribute string for GPU binary"),
llvm::cl::init(gpu::getDefaultGpuBinaryAnnotation())};
};
// Register cuda-runner specific passes.
static void registerCudaRunnerPasses() {
PassPipelineRegistration<GpuToCubinPipelineOptions> registerGpuToCubin(
"gpu-to-cubin", "Generate CUBIN from gpu.launch regions",
[&](OpPassManager &pm, const GpuToCubinPipelineOptions &options) {
pm.addPass(createGpuKernelOutliningPass());
auto &kernelPm = pm.nest<gpu::GPUModuleOp>();
kernelPm.addPass(createStripDebugInfoPass());
kernelPm.addPass(createLowerGpuOpsToNVVMOpsPass());
kernelPm.addPass(createConvertGPUKernelToBlobPass(
translateModuleToLLVMIR, compilePtxToCubin, "nvptx64-nvidia-cuda",
"sm_35", "+ptx60", options.gpuBinaryAnnotation));
});
registerGPUPasses();
registerGpuToLLVMConversionPassPass();
registerAsyncPasses();
registerConvertAsyncToLLVMPass();
registerConvertStandardToLLVMPass();
}
static LogicalResult runMLIRPasses(ModuleOp module,
PassPipelineCLParser &passPipeline) {
PassManager pm(module.getContext(), PassManager::Nesting::Implicit);
applyPassManagerCLOptions(pm);
auto errorHandler = [&](const Twine &msg) {
emitError(UnknownLoc::get(module.getContext())) << msg;
return failure();
};
// Build the provided pipeline.
if (failed(passPipeline.addToPipeline(pm, errorHandler)))
return failure();
// Run the pipeline.
return pm.run(module);
}
int main(int argc, char **argv) {
llvm::InitLLVM y(argc, argv);
llvm::InitializeNativeTarget();
llvm::InitializeNativeTargetAsmPrinter();
// Initialize LLVM NVPTX backend.
LLVMInitializeNVPTXTarget();
LLVMInitializeNVPTXTargetInfo();
LLVMInitializeNVPTXTargetMC();
LLVMInitializeNVPTXAsmPrinter();
mlir::initializeLLVMPasses();
registerCudaRunnerPasses();
PassPipelineCLParser passPipeline("", "Compiler passes to run");
registerPassManagerCLOptions();
auto mlirTransformer = [&](ModuleOp module) {
return runMLIRPasses(module, passPipeline);
};
mlir::JitRunnerConfig jitRunnerConfig;
jitRunnerConfig.mlirTransformer = mlirTransformer;
mlir::DialectRegistry registry;
registry.insert<mlir::LLVM::LLVMDialect, mlir::NVVM::NVVMDialect,
mlir::async::AsyncDialect, mlir::gpu::GPUDialect,
mlir::StandardOpsDialect>();
mlir::registerLLVMDialectTranslation(registry);
mlir::registerNVVMDialectTranslation(registry);
return mlir::JitRunnerMain(argc, argv, registry, jitRunnerConfig);
}