[StreamExecutor] Add KernelLoaderSpec

Summary:
Add definitions for the KernelLoaderSpec and MultiKernelLoaderSpec
classes to StreamExecutor. Instances of these classes are generated by the
compiler in order to provide host code with a handle to device code.

Reviewers: jlebar, tra

Subscribers: parallel_libs-commits

Differential Revision: https://reviews.llvm.org/D23038

llvm-svn: 277615
This commit is contained in:
Jason Henline 2016-08-03 18:04:13 +00:00
parent fa8ef91748
commit 8c04cbf882
6 changed files with 508 additions and 0 deletions

View File

@ -32,6 +32,7 @@ if(STREAM_EXECUTOR_STANDALONE)
enable_testing()
find_package(GTest REQUIRED)
include_directories(${GTEST_INCLUDE_DIRS})
find_package(Threads REQUIRED)
endif()
else(NOT STREAM_EXECUTOR_STANDALONE)
if(STREAM_EXECUTOR_UNIT_TESTS)

View File

@ -0,0 +1,263 @@
//===-- KernelSpec.h - Kernel loader spec types -----------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// KernelLoaderSpec is the base class for types that know where to find the
/// code for a data-parallel kernel in a particular format on a particular
/// platform. So, for example, there will be one subclass that deals with CUDA
/// PTX code, another subclass that deals with CUDA fatbin code, and yet another
/// subclass that deals with OpenCL text code.
///
/// A MultiKernelLoaderSpec is basically a collection of KernelLoaderSpec
/// instances. This is useful when code is available for the same kernel in
/// several different formats or targeted for several different platforms. All
/// the various KernelLoaderSpec instances for this kernel can be combined
/// together in one MultiKernelLoaderSpec and the specific platform consumer can
/// decide which instance of the code it wants to use.
///
/// MultiKernelLoaderSpec provides several helper functions to build and
/// register KernelLoaderSpec instances all in a single operation. For example,
/// MultiKernelLoaderSpec::addCUDAPTXInMemory can be used to construct and
/// register a CUDAPTXInMemorySpec KernelLoaderSpec.
///
/// The loader spec classes declared here are designed primarily to be
/// instantiated by the compiler, but they can also be instantiated directly by
/// the user. A simplified example workflow which a compiler might follow in the
/// case of a CUDA kernel that is compiled to CUDA fatbin code is as follows:
///
/// 1. The user defines a kernel function called UserKernel.
/// 2. The compiler compiles the kernel code into CUDA fatbin data and embeds
/// that data into the host code at address __UserKernelFatbinAddress.
/// 3. The compiler adds code at the beginning of the host code to instantiate a
/// MultiKernelLoaderSpec:
/// \code
/// namespace compiler_cuda_namespace {
/// MultiKernelLoaderSpec UserKernelLoaderSpec;
/// } // namespace compiler_cuda_namespace
/// \endcode
/// 4. The compiler then adds code to the host code to add the fatbin data to
/// the new MultiKernelLoaderSpec, and to associate that data with the kernel
/// name "UserKernel":
/// \code
/// namespace compiler_cuda_namespace {
/// UserKernelLoaderSpec.addCUDAFatbinInMemory(
/// __UserKernelFatbinAddress, "UserKernel");
/// } // namespace compiler_cuda_namespace
/// \encode
/// 5. The host code, having known beforehand that the compiler would initialize
/// a MultiKernelLoaderSpec based on the name of the CUDA kernel, makes use
/// of the symbol cudanamespace::UserKernelLoaderSpec without defining it.
///
/// In the example above, the MultiKernelLoaderSpec instance created by the
/// compiler can be used by the host code to create StreamExecutor kernel
/// objects. In turn, those StreamExecutor kernel objects can be used by the
/// host code to launch the kernel on the device as desired.
///
//===----------------------------------------------------------------------===//
#ifndef STREAMEXECUTOR_KERNELSPEC_H
#define STREAMEXECUTOR_KERNELSPEC_H
#include <cassert>
#include <map>
#include <memory>
#include <string>
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
namespace streamexecutor {
/// An object that knows how to find the code for a device kernel.
///
/// This is the base class for the hierarchy of loader specs. The different
/// subclasses know how to find code in different formats (e.g. CUDA PTX, OpenCL
/// binary).
///
/// This base class has functionality for storing and getting the name of the
/// kernel as a string.
class KernelLoaderSpec {
public:
/// Returns the name of the kernel this spec loads.
const std::string &getKernelName() const { return KernelName; }
protected:
explicit KernelLoaderSpec(llvm::StringRef KernelName);
private:
std::string KernelName;
KernelLoaderSpec(const KernelLoaderSpec &) = delete;
KernelLoaderSpec &operator=(const KernelLoaderSpec &) = delete;
};
/// A KernelLoaderSpec for CUDA PTX code that resides in memory as a
/// null-terminated string.
class CUDAPTXInMemorySpec : public KernelLoaderSpec {
public:
/// First component is major version, second component is minor version.
using ComputeCapability = std::pair<int, int>;
/// PTX code combined with its compute capability.
struct PTXSpec {
ComputeCapability TheComputeCapability;
const char *PTXCode;
};
/// Creates a CUDAPTXInMemorySpec from an array of PTXSpec objects.
///
/// Adds each item in SpecList to this object.
///
/// Does not take ownership of the PTXCode pointers in the SpecList elements.
CUDAPTXInMemorySpec(llvm::StringRef KernelName,
const llvm::ArrayRef<PTXSpec> SpecList);
/// Returns a pointer to the PTX code for the requested compute capability.
///
/// Returns nullptr on failed lookup (if the requested compute capability is
/// not available). Matches exactly the specified compute capability. Doesn't
/// try to do anything smart like finding the next best compute capability if
/// the specified capability cannot be found.
const char *getCode(int ComputeCapabilityMajor,
int ComputeCapabilityMinor) const;
private:
/// PTX code contents in memory.
///
/// The key is a pair (cc_major, cc_minor), i.e., (2, 0), (3, 0), (3, 5).
std::map<ComputeCapability, const char *> PTXByComputeCapability;
CUDAPTXInMemorySpec(const CUDAPTXInMemorySpec &) = delete;
CUDAPTXInMemorySpec &operator=(const CUDAPTXInMemorySpec &) = delete;
};
/// A KernelLoaderSpec for CUDA fatbin code that resides in memory.
class CUDAFatbinInMemorySpec : public KernelLoaderSpec {
public:
/// Creates a CUDAFatbinInMemorySpec with a reference to the given fatbin
/// bytes.
///
/// Does not take ownership of the Bytes pointer.
CUDAFatbinInMemorySpec(llvm::StringRef KernelName, const void *Bytes);
/// Gets the fatbin data bytes.
const void *getBytes() const { return Bytes; }
private:
const void *Bytes;
CUDAFatbinInMemorySpec(const CUDAFatbinInMemorySpec &) = delete;
CUDAFatbinInMemorySpec &operator=(const CUDAFatbinInMemorySpec &) = delete;
};
/// A KernelLoaderSpec for OpenCL text that resides in memory as a
/// null-terminated string.
class OpenCLTextInMemorySpec : public KernelLoaderSpec {
public:
/// Creates a OpenCLTextInMemorySpec with a reference to the given OpenCL text
/// code bytes.
///
/// Does not take ownership of the Text pointer.
OpenCLTextInMemorySpec(llvm::StringRef KernelName, const char *Text);
/// Returns the OpenCL text contents.
const char *getText() const { return Text; }
private:
const char *Text;
OpenCLTextInMemorySpec(const OpenCLTextInMemorySpec &) = delete;
OpenCLTextInMemorySpec &operator=(const OpenCLTextInMemorySpec &) = delete;
};
/// An object to store several different KernelLoaderSpecs for the same kernel.
///
/// This allows code in different formats and for different platforms to be
/// stored all together for a single kernel.
///
/// Various methods are available to add a new KernelLoaderSpec to a
/// MultiKernelLoaderSpec. There are also methods to query which formats and
/// platforms are supported by the currently added KernelLoaderSpec objects, and
/// methods to get the KernelLoaderSpec objects for each format and platform.
///
/// Since all stored KernelLoaderSpecs are supposed to reference the same
/// kernel, they are all assumed to take the same number and type of parameters,
/// but no checking is done to enforce this. In debug mode, all
/// KernelLoaderSpecs are checked to make sure they have the same kernel name,
/// so passing in specs with different kernel names can cause the program to
/// abort.
///
/// This interface is prone to errors, so it is better to leave
/// MultiKernelLoaderSpec creation and initialization to the compiler rather
/// than doing it by hand.
class MultiKernelLoaderSpec {
public:
// Convenience getters for testing whether these platform variants have
// kernel loader specifications available.
bool hasCUDAPTXInMemory() const { return TheCUDAPTXInMemorySpec != nullptr; }
bool hasCUDAFatbinInMemory() const {
return TheCUDAFatbinInMemorySpec != nullptr;
}
bool hasOpenCLTextInMemory() const {
return TheOpenCLTextInMemorySpec != nullptr;
}
// Accessors for platform variant kernel load specifications.
//
// Precondition: corresponding has* method returns true.
const CUDAPTXInMemorySpec &getCUDAPTXInMemory() const {
assert(hasCUDAPTXInMemory() && "getting spec that is not present");
return *TheCUDAPTXInMemorySpec;
}
const CUDAFatbinInMemorySpec &getCUDAFatbinInMemory() const {
assert(hasCUDAFatbinInMemory() && "getting spec that is not present");
return *TheCUDAFatbinInMemorySpec;
}
const OpenCLTextInMemorySpec &getOpenCLTextInMemory() const {
assert(hasOpenCLTextInMemory() && "getting spec that is not present");
return *TheOpenCLTextInMemorySpec;
}
// Builder-pattern-like methods for use in initializing a
// MultiKernelLoaderSpec.
//
// Each of these should be used at most once for a single
// MultiKernelLoaderSpec object. See file comment for example usage.
//
// Note that the KernelName parameter must be consistent with the kernel in
// the PTX or OpenCL being loaded. Also be aware that in CUDA C++ the kernel
// name may be mangled by the compiler if it is not declared extern "C".
/// Does not take ownership of the PTXCode pointers in the SpecList elements.
MultiKernelLoaderSpec &
addCUDAPTXInMemory(llvm::StringRef KernelName,
llvm::ArrayRef<CUDAPTXInMemorySpec::PTXSpec> SpecList);
/// Does not take ownership of the FatbinBytes pointer.
MultiKernelLoaderSpec &addCUDAFatbinInMemory(llvm::StringRef KernelName,
const void *FatbinBytes);
/// Does not take ownership of the OpenCLText pointer.
MultiKernelLoaderSpec &addOpenCLTextInMemory(llvm::StringRef KernelName,
const char *OpenCLText);
private:
void setKernelName(llvm::StringRef KernelName);
std::unique_ptr<std::string> TheKernelName;
std::unique_ptr<CUDAPTXInMemorySpec> TheCUDAPTXInMemorySpec;
std::unique_ptr<CUDAFatbinInMemorySpec> TheCUDAFatbinInMemorySpec;
std::unique_ptr<OpenCLTextInMemorySpec> TheOpenCLTextInMemorySpec;
};
} // namespace streamexecutor
#endif // STREAMEXECUTOR_KERNELSPEC_H

View File

@ -2,3 +2,12 @@ add_library(
utils
OBJECT
Utils/Error.cpp)
add_library(
streamexecutor
$<TARGET_OBJECTS:utils>
KernelSpec.cpp)
if(STREAM_EXECUTOR_UNIT_TESTS)
add_subdirectory(unittests)
endif()

View File

@ -0,0 +1,94 @@
//===-- KernelSpec.cpp - General kernel spec implementation ---------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file contains the implementation details for kernel loader specs.
///
//===----------------------------------------------------------------------===//
#include "streamexecutor/KernelSpec.h"
#include "llvm/ADT/STLExtras.h"
namespace streamexecutor {
KernelLoaderSpec::KernelLoaderSpec(llvm::StringRef KernelName)
: KernelName(KernelName) {}
CUDAPTXInMemorySpec::CUDAPTXInMemorySpec(
llvm::StringRef KernelName,
const llvm::ArrayRef<CUDAPTXInMemorySpec::PTXSpec> SpecList)
: KernelLoaderSpec(KernelName) {
for (const auto &Spec : SpecList) {
PTXByComputeCapability.emplace(Spec.TheComputeCapability, Spec.PTXCode);
}
}
const char *CUDAPTXInMemorySpec::getCode(int ComputeCapabilityMajor,
int ComputeCapabilityMinor) const {
auto PTXIter =
PTXByComputeCapability.find(CUDAPTXInMemorySpec::ComputeCapability{
ComputeCapabilityMajor, ComputeCapabilityMinor});
if (PTXIter == PTXByComputeCapability.end()) {
return nullptr;
}
return PTXIter->second;
}
CUDAFatbinInMemorySpec::CUDAFatbinInMemorySpec(llvm::StringRef KernelName,
const void *Bytes)
: KernelLoaderSpec(KernelName), Bytes(Bytes) {}
OpenCLTextInMemorySpec::OpenCLTextInMemorySpec(llvm::StringRef KernelName,
const char *Text)
: KernelLoaderSpec(KernelName), Text(Text) {}
void MultiKernelLoaderSpec::setKernelName(llvm::StringRef KernelName) {
if (TheKernelName) {
assert(KernelName.equals(*TheKernelName) &&
"different kernel names in one MultiKernelLoaderSpec");
} else {
TheKernelName = llvm::make_unique<std::string>(KernelName);
}
}
MultiKernelLoaderSpec &MultiKernelLoaderSpec::addCUDAPTXInMemory(
llvm::StringRef KernelName,
llvm::ArrayRef<CUDAPTXInMemorySpec::PTXSpec> SpecList) {
assert((TheCUDAPTXInMemorySpec == nullptr) &&
"illegal loader spec overwrite");
setKernelName(KernelName);
TheCUDAPTXInMemorySpec =
llvm::make_unique<CUDAPTXInMemorySpec>(KernelName, SpecList);
return *this;
}
MultiKernelLoaderSpec &
MultiKernelLoaderSpec::addCUDAFatbinInMemory(llvm::StringRef KernelName,
const void *Bytes) {
assert((TheCUDAFatbinInMemorySpec == nullptr) &&
"illegal loader spec overwrite");
setKernelName(KernelName);
TheCUDAFatbinInMemorySpec =
llvm::make_unique<CUDAFatbinInMemorySpec>(KernelName, Bytes);
return *this;
}
MultiKernelLoaderSpec &
MultiKernelLoaderSpec::addOpenCLTextInMemory(llvm::StringRef KernelName,
const char *OpenCLText) {
assert((TheOpenCLTextInMemorySpec == nullptr) &&
"illegal loader spec overwrite");
setKernelName(KernelName);
TheOpenCLTextInMemorySpec =
llvm::make_unique<OpenCLTextInMemorySpec>(KernelName, OpenCLText);
return *this;
}
} // namespace streamexecutor

View File

@ -0,0 +1,9 @@
add_executable(
kernel_spec_test
KernelSpecTest.cpp)
target_link_libraries(
kernel_spec_test
streamexecutor
${GTEST_BOTH_LIBRARIES}
${CMAKE_THREAD_LIBS_INIT})
add_test(KernelSpecTest kernel_spec_test)

View File

@ -0,0 +1,132 @@
//===-- KernelSpecTest.cpp - Tests for KernelSpec -------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file contains the unit tests for the code in KernelSpec.
///
//===----------------------------------------------------------------------===//
#include "streamexecutor/KernelSpec.h"
#include "gtest/gtest.h"
namespace {
namespace se = ::streamexecutor;
TEST(CUDAPTXInMemorySpec, NoCode) {
se::CUDAPTXInMemorySpec Spec("KernelName", {});
EXPECT_EQ("KernelName", Spec.getKernelName());
EXPECT_EQ(nullptr, Spec.getCode(1, 0));
}
TEST(CUDAPTXInMemorySpec, SingleComputeCapability) {
const char *PTXCodeString = "Dummy PTX code";
se::CUDAPTXInMemorySpec Spec("KernelName", {{{1, 0}, PTXCodeString}});
EXPECT_EQ("KernelName", Spec.getKernelName());
EXPECT_EQ(PTXCodeString, Spec.getCode(1, 0));
EXPECT_EQ(nullptr, Spec.getCode(2, 0));
}
TEST(CUDAPTXInMemorySpec, TwoComputeCapabilities) {
const char *PTXCodeString10 = "Dummy PTX code 10";
const char *PTXCodeString30 = "Dummy PTX code 30";
se::CUDAPTXInMemorySpec Spec(
"KernelName", {{{1, 0}, PTXCodeString10}, {{3, 0}, PTXCodeString30}});
EXPECT_EQ("KernelName", Spec.getKernelName());
EXPECT_EQ(PTXCodeString10, Spec.getCode(1, 0));
EXPECT_EQ(PTXCodeString30, Spec.getCode(3, 0));
EXPECT_EQ(nullptr, Spec.getCode(2, 0));
}
TEST(CUDAFatbinInMemorySpec, BasicUsage) {
const char *FatbinBytes = "Dummy fatbin bytes";
se::CUDAFatbinInMemorySpec Spec("KernelName", FatbinBytes);
EXPECT_EQ("KernelName", Spec.getKernelName());
EXPECT_EQ(FatbinBytes, Spec.getBytes());
}
TEST(OpenCLTextInMemorySpec, BasicUsage) {
const char *OpenCLText = "Dummy OpenCL text";
se::OpenCLTextInMemorySpec Spec("KernelName", OpenCLText);
EXPECT_EQ("KernelName", Spec.getKernelName());
EXPECT_EQ(OpenCLText, Spec.getText());
}
TEST(MultiKernelLoaderSpec, NoCode) {
se::MultiKernelLoaderSpec MultiSpec;
EXPECT_FALSE(MultiSpec.hasCUDAPTXInMemory());
EXPECT_FALSE(MultiSpec.hasCUDAFatbinInMemory());
EXPECT_FALSE(MultiSpec.hasOpenCLTextInMemory());
EXPECT_DEBUG_DEATH(MultiSpec.getCUDAPTXInMemory(),
"getting spec that is not present");
EXPECT_DEBUG_DEATH(MultiSpec.getCUDAFatbinInMemory(),
"getting spec that is not present");
EXPECT_DEBUG_DEATH(MultiSpec.getOpenCLTextInMemory(),
"getting spec that is not present");
}
TEST(MultiKernelLoaderSpec, Registration) {
se::MultiKernelLoaderSpec MultiSpec;
const char *KernelName = "KernelName";
const char *PTXCodeString = "Dummy PTX code";
const char *FatbinBytes = "Dummy fatbin bytes";
const char *OpenCLText = "Dummy OpenCL text";
MultiSpec.addCUDAPTXInMemory(KernelName, {{{1, 0}, PTXCodeString}})
.addCUDAFatbinInMemory(KernelName, FatbinBytes)
.addOpenCLTextInMemory(KernelName, OpenCLText);
EXPECT_TRUE(MultiSpec.hasCUDAPTXInMemory());
EXPECT_TRUE(MultiSpec.hasCUDAFatbinInMemory());
EXPECT_TRUE(MultiSpec.hasOpenCLTextInMemory());
EXPECT_EQ(KernelName, MultiSpec.getCUDAPTXInMemory().getKernelName());
EXPECT_EQ(PTXCodeString, MultiSpec.getCUDAPTXInMemory().getCode(1, 0));
EXPECT_EQ(nullptr, MultiSpec.getCUDAPTXInMemory().getCode(2, 0));
EXPECT_EQ(KernelName, MultiSpec.getCUDAFatbinInMemory().getKernelName());
EXPECT_EQ(FatbinBytes, MultiSpec.getCUDAFatbinInMemory().getBytes());
EXPECT_EQ(KernelName, MultiSpec.getOpenCLTextInMemory().getKernelName());
EXPECT_EQ(OpenCLText, MultiSpec.getOpenCLTextInMemory().getText());
}
TEST(MultiKernelLoaderSpec, RegisterTwice) {
se::MultiKernelLoaderSpec MultiSpec;
const char *KernelName = "KernelName";
const char *FatbinBytes = "Dummy fatbin bytes";
MultiSpec.addCUDAFatbinInMemory(KernelName, FatbinBytes);
EXPECT_DEBUG_DEATH(MultiSpec.addCUDAFatbinInMemory(KernelName, FatbinBytes),
"illegal loader spec overwrite");
}
TEST(MultiKernelLoaderSpec, ConflictingKernelNames) {
se::MultiKernelLoaderSpec MultiSpec;
const char *KernelNameA = "KernelName";
std::string KernelNameB = KernelNameA;
const char *PTXCodeString = "Dummy PTX code";
const char *FatbinBytes = "Dummy fatbin bytes";
// Check that names don't conflict if they are equivalent strings in different
// locations.
MultiSpec.addCUDAPTXInMemory(KernelNameA, {{{1, 0}, PTXCodeString}})
.addCUDAFatbinInMemory(KernelNameB, FatbinBytes);
const char *OtherKernelName = "OtherKernelName";
const char *OpenCLText = "Dummy OpenCL text";
EXPECT_DEBUG_DEATH(
MultiSpec.addOpenCLTextInMemory(OtherKernelName, OpenCLText),
"different kernel names in one MultiKernelLoaderSpec");
}
} // namespace