mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-01-15 20:51:35 +00:00
[StreamExecutor] Add KernelLoaderSpec
Summary: Add definitions for the KernelLoaderSpec and MultiKernelLoaderSpec classes to StreamExecutor. Instances of these classes are generated by the compiler in order to provide host code with a handle to device code. Reviewers: jlebar, tra Subscribers: parallel_libs-commits Differential Revision: https://reviews.llvm.org/D23038 llvm-svn: 277615
This commit is contained in:
parent
fa8ef91748
commit
8c04cbf882
@ -32,6 +32,7 @@ if(STREAM_EXECUTOR_STANDALONE)
|
||||
enable_testing()
|
||||
find_package(GTest REQUIRED)
|
||||
include_directories(${GTEST_INCLUDE_DIRS})
|
||||
find_package(Threads REQUIRED)
|
||||
endif()
|
||||
else(NOT STREAM_EXECUTOR_STANDALONE)
|
||||
if(STREAM_EXECUTOR_UNIT_TESTS)
|
||||
|
263
parallel-libs/streamexecutor/include/streamexecutor/KernelSpec.h
Normal file
263
parallel-libs/streamexecutor/include/streamexecutor/KernelSpec.h
Normal file
@ -0,0 +1,263 @@
|
||||
//===-- KernelSpec.h - Kernel loader spec types -----------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
///
|
||||
/// \file
|
||||
/// KernelLoaderSpec is the base class for types that know where to find the
|
||||
/// code for a data-parallel kernel in a particular format on a particular
|
||||
/// platform. So, for example, there will be one subclass that deals with CUDA
|
||||
/// PTX code, another subclass that deals with CUDA fatbin code, and yet another
|
||||
/// subclass that deals with OpenCL text code.
|
||||
///
|
||||
/// A MultiKernelLoaderSpec is basically a collection of KernelLoaderSpec
|
||||
/// instances. This is useful when code is available for the same kernel in
|
||||
/// several different formats or targeted for several different platforms. All
|
||||
/// the various KernelLoaderSpec instances for this kernel can be combined
|
||||
/// together in one MultiKernelLoaderSpec and the specific platform consumer can
|
||||
/// decide which instance of the code it wants to use.
|
||||
///
|
||||
/// MultiKernelLoaderSpec provides several helper functions to build and
|
||||
/// register KernelLoaderSpec instances all in a single operation. For example,
|
||||
/// MultiKernelLoaderSpec::addCUDAPTXInMemory can be used to construct and
|
||||
/// register a CUDAPTXInMemorySpec KernelLoaderSpec.
|
||||
///
|
||||
/// The loader spec classes declared here are designed primarily to be
|
||||
/// instantiated by the compiler, but they can also be instantiated directly by
|
||||
/// the user. A simplified example workflow which a compiler might follow in the
|
||||
/// case of a CUDA kernel that is compiled to CUDA fatbin code is as follows:
|
||||
///
|
||||
/// 1. The user defines a kernel function called UserKernel.
|
||||
/// 2. The compiler compiles the kernel code into CUDA fatbin data and embeds
|
||||
/// that data into the host code at address __UserKernelFatbinAddress.
|
||||
/// 3. The compiler adds code at the beginning of the host code to instantiate a
|
||||
/// MultiKernelLoaderSpec:
|
||||
/// \code
|
||||
/// namespace compiler_cuda_namespace {
|
||||
/// MultiKernelLoaderSpec UserKernelLoaderSpec;
|
||||
/// } // namespace compiler_cuda_namespace
|
||||
/// \endcode
|
||||
/// 4. The compiler then adds code to the host code to add the fatbin data to
|
||||
/// the new MultiKernelLoaderSpec, and to associate that data with the kernel
|
||||
/// name "UserKernel":
|
||||
/// \code
|
||||
/// namespace compiler_cuda_namespace {
|
||||
/// UserKernelLoaderSpec.addCUDAFatbinInMemory(
|
||||
/// __UserKernelFatbinAddress, "UserKernel");
|
||||
/// } // namespace compiler_cuda_namespace
|
||||
/// \encode
|
||||
/// 5. The host code, having known beforehand that the compiler would initialize
|
||||
/// a MultiKernelLoaderSpec based on the name of the CUDA kernel, makes use
|
||||
/// of the symbol cudanamespace::UserKernelLoaderSpec without defining it.
|
||||
///
|
||||
/// In the example above, the MultiKernelLoaderSpec instance created by the
|
||||
/// compiler can be used by the host code to create StreamExecutor kernel
|
||||
/// objects. In turn, those StreamExecutor kernel objects can be used by the
|
||||
/// host code to launch the kernel on the device as desired.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef STREAMEXECUTOR_KERNELSPEC_H
|
||||
#define STREAMEXECUTOR_KERNELSPEC_H
|
||||
|
||||
#include <cassert>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
|
||||
namespace streamexecutor {
|
||||
|
||||
/// An object that knows how to find the code for a device kernel.
|
||||
///
|
||||
/// This is the base class for the hierarchy of loader specs. The different
|
||||
/// subclasses know how to find code in different formats (e.g. CUDA PTX, OpenCL
|
||||
/// binary).
|
||||
///
|
||||
/// This base class has functionality for storing and getting the name of the
|
||||
/// kernel as a string.
|
||||
class KernelLoaderSpec {
|
||||
public:
|
||||
/// Returns the name of the kernel this spec loads.
|
||||
const std::string &getKernelName() const { return KernelName; }
|
||||
|
||||
protected:
|
||||
explicit KernelLoaderSpec(llvm::StringRef KernelName);
|
||||
|
||||
private:
|
||||
std::string KernelName;
|
||||
|
||||
KernelLoaderSpec(const KernelLoaderSpec &) = delete;
|
||||
KernelLoaderSpec &operator=(const KernelLoaderSpec &) = delete;
|
||||
};
|
||||
|
||||
/// A KernelLoaderSpec for CUDA PTX code that resides in memory as a
|
||||
/// null-terminated string.
|
||||
class CUDAPTXInMemorySpec : public KernelLoaderSpec {
|
||||
public:
|
||||
/// First component is major version, second component is minor version.
|
||||
using ComputeCapability = std::pair<int, int>;
|
||||
|
||||
/// PTX code combined with its compute capability.
|
||||
struct PTXSpec {
|
||||
ComputeCapability TheComputeCapability;
|
||||
const char *PTXCode;
|
||||
};
|
||||
|
||||
/// Creates a CUDAPTXInMemorySpec from an array of PTXSpec objects.
|
||||
///
|
||||
/// Adds each item in SpecList to this object.
|
||||
///
|
||||
/// Does not take ownership of the PTXCode pointers in the SpecList elements.
|
||||
CUDAPTXInMemorySpec(llvm::StringRef KernelName,
|
||||
const llvm::ArrayRef<PTXSpec> SpecList);
|
||||
|
||||
/// Returns a pointer to the PTX code for the requested compute capability.
|
||||
///
|
||||
/// Returns nullptr on failed lookup (if the requested compute capability is
|
||||
/// not available). Matches exactly the specified compute capability. Doesn't
|
||||
/// try to do anything smart like finding the next best compute capability if
|
||||
/// the specified capability cannot be found.
|
||||
const char *getCode(int ComputeCapabilityMajor,
|
||||
int ComputeCapabilityMinor) const;
|
||||
|
||||
private:
|
||||
/// PTX code contents in memory.
|
||||
///
|
||||
/// The key is a pair (cc_major, cc_minor), i.e., (2, 0), (3, 0), (3, 5).
|
||||
std::map<ComputeCapability, const char *> PTXByComputeCapability;
|
||||
|
||||
CUDAPTXInMemorySpec(const CUDAPTXInMemorySpec &) = delete;
|
||||
CUDAPTXInMemorySpec &operator=(const CUDAPTXInMemorySpec &) = delete;
|
||||
};
|
||||
|
||||
/// A KernelLoaderSpec for CUDA fatbin code that resides in memory.
|
||||
class CUDAFatbinInMemorySpec : public KernelLoaderSpec {
|
||||
public:
|
||||
/// Creates a CUDAFatbinInMemorySpec with a reference to the given fatbin
|
||||
/// bytes.
|
||||
///
|
||||
/// Does not take ownership of the Bytes pointer.
|
||||
CUDAFatbinInMemorySpec(llvm::StringRef KernelName, const void *Bytes);
|
||||
|
||||
/// Gets the fatbin data bytes.
|
||||
const void *getBytes() const { return Bytes; }
|
||||
|
||||
private:
|
||||
const void *Bytes;
|
||||
|
||||
CUDAFatbinInMemorySpec(const CUDAFatbinInMemorySpec &) = delete;
|
||||
CUDAFatbinInMemorySpec &operator=(const CUDAFatbinInMemorySpec &) = delete;
|
||||
};
|
||||
|
||||
/// A KernelLoaderSpec for OpenCL text that resides in memory as a
|
||||
/// null-terminated string.
|
||||
class OpenCLTextInMemorySpec : public KernelLoaderSpec {
|
||||
public:
|
||||
/// Creates a OpenCLTextInMemorySpec with a reference to the given OpenCL text
|
||||
/// code bytes.
|
||||
///
|
||||
/// Does not take ownership of the Text pointer.
|
||||
OpenCLTextInMemorySpec(llvm::StringRef KernelName, const char *Text);
|
||||
|
||||
/// Returns the OpenCL text contents.
|
||||
const char *getText() const { return Text; }
|
||||
|
||||
private:
|
||||
const char *Text;
|
||||
|
||||
OpenCLTextInMemorySpec(const OpenCLTextInMemorySpec &) = delete;
|
||||
OpenCLTextInMemorySpec &operator=(const OpenCLTextInMemorySpec &) = delete;
|
||||
};
|
||||
|
||||
/// An object to store several different KernelLoaderSpecs for the same kernel.
|
||||
///
|
||||
/// This allows code in different formats and for different platforms to be
|
||||
/// stored all together for a single kernel.
|
||||
///
|
||||
/// Various methods are available to add a new KernelLoaderSpec to a
|
||||
/// MultiKernelLoaderSpec. There are also methods to query which formats and
|
||||
/// platforms are supported by the currently added KernelLoaderSpec objects, and
|
||||
/// methods to get the KernelLoaderSpec objects for each format and platform.
|
||||
///
|
||||
/// Since all stored KernelLoaderSpecs are supposed to reference the same
|
||||
/// kernel, they are all assumed to take the same number and type of parameters,
|
||||
/// but no checking is done to enforce this. In debug mode, all
|
||||
/// KernelLoaderSpecs are checked to make sure they have the same kernel name,
|
||||
/// so passing in specs with different kernel names can cause the program to
|
||||
/// abort.
|
||||
///
|
||||
/// This interface is prone to errors, so it is better to leave
|
||||
/// MultiKernelLoaderSpec creation and initialization to the compiler rather
|
||||
/// than doing it by hand.
|
||||
class MultiKernelLoaderSpec {
|
||||
public:
|
||||
// Convenience getters for testing whether these platform variants have
|
||||
// kernel loader specifications available.
|
||||
|
||||
bool hasCUDAPTXInMemory() const { return TheCUDAPTXInMemorySpec != nullptr; }
|
||||
bool hasCUDAFatbinInMemory() const {
|
||||
return TheCUDAFatbinInMemorySpec != nullptr;
|
||||
}
|
||||
bool hasOpenCLTextInMemory() const {
|
||||
return TheOpenCLTextInMemorySpec != nullptr;
|
||||
}
|
||||
|
||||
// Accessors for platform variant kernel load specifications.
|
||||
//
|
||||
// Precondition: corresponding has* method returns true.
|
||||
|
||||
const CUDAPTXInMemorySpec &getCUDAPTXInMemory() const {
|
||||
assert(hasCUDAPTXInMemory() && "getting spec that is not present");
|
||||
return *TheCUDAPTXInMemorySpec;
|
||||
}
|
||||
const CUDAFatbinInMemorySpec &getCUDAFatbinInMemory() const {
|
||||
assert(hasCUDAFatbinInMemory() && "getting spec that is not present");
|
||||
return *TheCUDAFatbinInMemorySpec;
|
||||
}
|
||||
const OpenCLTextInMemorySpec &getOpenCLTextInMemory() const {
|
||||
assert(hasOpenCLTextInMemory() && "getting spec that is not present");
|
||||
return *TheOpenCLTextInMemorySpec;
|
||||
}
|
||||
|
||||
// Builder-pattern-like methods for use in initializing a
|
||||
// MultiKernelLoaderSpec.
|
||||
//
|
||||
// Each of these should be used at most once for a single
|
||||
// MultiKernelLoaderSpec object. See file comment for example usage.
|
||||
//
|
||||
// Note that the KernelName parameter must be consistent with the kernel in
|
||||
// the PTX or OpenCL being loaded. Also be aware that in CUDA C++ the kernel
|
||||
// name may be mangled by the compiler if it is not declared extern "C".
|
||||
|
||||
/// Does not take ownership of the PTXCode pointers in the SpecList elements.
|
||||
MultiKernelLoaderSpec &
|
||||
addCUDAPTXInMemory(llvm::StringRef KernelName,
|
||||
llvm::ArrayRef<CUDAPTXInMemorySpec::PTXSpec> SpecList);
|
||||
|
||||
/// Does not take ownership of the FatbinBytes pointer.
|
||||
MultiKernelLoaderSpec &addCUDAFatbinInMemory(llvm::StringRef KernelName,
|
||||
const void *FatbinBytes);
|
||||
|
||||
/// Does not take ownership of the OpenCLText pointer.
|
||||
MultiKernelLoaderSpec &addOpenCLTextInMemory(llvm::StringRef KernelName,
|
||||
const char *OpenCLText);
|
||||
|
||||
private:
|
||||
void setKernelName(llvm::StringRef KernelName);
|
||||
|
||||
std::unique_ptr<std::string> TheKernelName;
|
||||
std::unique_ptr<CUDAPTXInMemorySpec> TheCUDAPTXInMemorySpec;
|
||||
std::unique_ptr<CUDAFatbinInMemorySpec> TheCUDAFatbinInMemorySpec;
|
||||
std::unique_ptr<OpenCLTextInMemorySpec> TheOpenCLTextInMemorySpec;
|
||||
};
|
||||
|
||||
} // namespace streamexecutor
|
||||
|
||||
#endif // STREAMEXECUTOR_KERNELSPEC_H
|
@ -2,3 +2,12 @@ add_library(
|
||||
utils
|
||||
OBJECT
|
||||
Utils/Error.cpp)
|
||||
|
||||
add_library(
|
||||
streamexecutor
|
||||
$<TARGET_OBJECTS:utils>
|
||||
KernelSpec.cpp)
|
||||
|
||||
if(STREAM_EXECUTOR_UNIT_TESTS)
|
||||
add_subdirectory(unittests)
|
||||
endif()
|
||||
|
94
parallel-libs/streamexecutor/lib/KernelSpec.cpp
Normal file
94
parallel-libs/streamexecutor/lib/KernelSpec.cpp
Normal file
@ -0,0 +1,94 @@
|
||||
//===-- KernelSpec.cpp - General kernel spec implementation ---------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
///
|
||||
/// \file
|
||||
/// This file contains the implementation details for kernel loader specs.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "streamexecutor/KernelSpec.h"
|
||||
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
|
||||
namespace streamexecutor {
|
||||
|
||||
KernelLoaderSpec::KernelLoaderSpec(llvm::StringRef KernelName)
|
||||
: KernelName(KernelName) {}
|
||||
|
||||
CUDAPTXInMemorySpec::CUDAPTXInMemorySpec(
|
||||
llvm::StringRef KernelName,
|
||||
const llvm::ArrayRef<CUDAPTXInMemorySpec::PTXSpec> SpecList)
|
||||
: KernelLoaderSpec(KernelName) {
|
||||
for (const auto &Spec : SpecList) {
|
||||
PTXByComputeCapability.emplace(Spec.TheComputeCapability, Spec.PTXCode);
|
||||
}
|
||||
}
|
||||
|
||||
const char *CUDAPTXInMemorySpec::getCode(int ComputeCapabilityMajor,
|
||||
int ComputeCapabilityMinor) const {
|
||||
auto PTXIter =
|
||||
PTXByComputeCapability.find(CUDAPTXInMemorySpec::ComputeCapability{
|
||||
ComputeCapabilityMajor, ComputeCapabilityMinor});
|
||||
if (PTXIter == PTXByComputeCapability.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
return PTXIter->second;
|
||||
}
|
||||
|
||||
CUDAFatbinInMemorySpec::CUDAFatbinInMemorySpec(llvm::StringRef KernelName,
|
||||
const void *Bytes)
|
||||
: KernelLoaderSpec(KernelName), Bytes(Bytes) {}
|
||||
|
||||
OpenCLTextInMemorySpec::OpenCLTextInMemorySpec(llvm::StringRef KernelName,
|
||||
const char *Text)
|
||||
: KernelLoaderSpec(KernelName), Text(Text) {}
|
||||
|
||||
void MultiKernelLoaderSpec::setKernelName(llvm::StringRef KernelName) {
|
||||
if (TheKernelName) {
|
||||
assert(KernelName.equals(*TheKernelName) &&
|
||||
"different kernel names in one MultiKernelLoaderSpec");
|
||||
} else {
|
||||
TheKernelName = llvm::make_unique<std::string>(KernelName);
|
||||
}
|
||||
}
|
||||
|
||||
MultiKernelLoaderSpec &MultiKernelLoaderSpec::addCUDAPTXInMemory(
|
||||
llvm::StringRef KernelName,
|
||||
llvm::ArrayRef<CUDAPTXInMemorySpec::PTXSpec> SpecList) {
|
||||
assert((TheCUDAPTXInMemorySpec == nullptr) &&
|
||||
"illegal loader spec overwrite");
|
||||
setKernelName(KernelName);
|
||||
TheCUDAPTXInMemorySpec =
|
||||
llvm::make_unique<CUDAPTXInMemorySpec>(KernelName, SpecList);
|
||||
return *this;
|
||||
}
|
||||
|
||||
MultiKernelLoaderSpec &
|
||||
MultiKernelLoaderSpec::addCUDAFatbinInMemory(llvm::StringRef KernelName,
|
||||
const void *Bytes) {
|
||||
assert((TheCUDAFatbinInMemorySpec == nullptr) &&
|
||||
"illegal loader spec overwrite");
|
||||
setKernelName(KernelName);
|
||||
TheCUDAFatbinInMemorySpec =
|
||||
llvm::make_unique<CUDAFatbinInMemorySpec>(KernelName, Bytes);
|
||||
return *this;
|
||||
}
|
||||
|
||||
MultiKernelLoaderSpec &
|
||||
MultiKernelLoaderSpec::addOpenCLTextInMemory(llvm::StringRef KernelName,
|
||||
const char *OpenCLText) {
|
||||
assert((TheOpenCLTextInMemorySpec == nullptr) &&
|
||||
"illegal loader spec overwrite");
|
||||
setKernelName(KernelName);
|
||||
TheOpenCLTextInMemorySpec =
|
||||
llvm::make_unique<OpenCLTextInMemorySpec>(KernelName, OpenCLText);
|
||||
return *this;
|
||||
}
|
||||
|
||||
} // namespace streamexecutor
|
@ -0,0 +1,9 @@
|
||||
add_executable(
|
||||
kernel_spec_test
|
||||
KernelSpecTest.cpp)
|
||||
target_link_libraries(
|
||||
kernel_spec_test
|
||||
streamexecutor
|
||||
${GTEST_BOTH_LIBRARIES}
|
||||
${CMAKE_THREAD_LIBS_INIT})
|
||||
add_test(KernelSpecTest kernel_spec_test)
|
132
parallel-libs/streamexecutor/lib/unittests/KernelSpecTest.cpp
Normal file
132
parallel-libs/streamexecutor/lib/unittests/KernelSpecTest.cpp
Normal file
@ -0,0 +1,132 @@
|
||||
//===-- KernelSpecTest.cpp - Tests for KernelSpec -------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
///
|
||||
/// \file
|
||||
/// This file contains the unit tests for the code in KernelSpec.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "streamexecutor/KernelSpec.h"
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace {
|
||||
|
||||
namespace se = ::streamexecutor;
|
||||
|
||||
TEST(CUDAPTXInMemorySpec, NoCode) {
|
||||
se::CUDAPTXInMemorySpec Spec("KernelName", {});
|
||||
EXPECT_EQ("KernelName", Spec.getKernelName());
|
||||
EXPECT_EQ(nullptr, Spec.getCode(1, 0));
|
||||
}
|
||||
|
||||
TEST(CUDAPTXInMemorySpec, SingleComputeCapability) {
|
||||
const char *PTXCodeString = "Dummy PTX code";
|
||||
se::CUDAPTXInMemorySpec Spec("KernelName", {{{1, 0}, PTXCodeString}});
|
||||
EXPECT_EQ("KernelName", Spec.getKernelName());
|
||||
EXPECT_EQ(PTXCodeString, Spec.getCode(1, 0));
|
||||
EXPECT_EQ(nullptr, Spec.getCode(2, 0));
|
||||
}
|
||||
|
||||
TEST(CUDAPTXInMemorySpec, TwoComputeCapabilities) {
|
||||
const char *PTXCodeString10 = "Dummy PTX code 10";
|
||||
const char *PTXCodeString30 = "Dummy PTX code 30";
|
||||
se::CUDAPTXInMemorySpec Spec(
|
||||
"KernelName", {{{1, 0}, PTXCodeString10}, {{3, 0}, PTXCodeString30}});
|
||||
EXPECT_EQ("KernelName", Spec.getKernelName());
|
||||
EXPECT_EQ(PTXCodeString10, Spec.getCode(1, 0));
|
||||
EXPECT_EQ(PTXCodeString30, Spec.getCode(3, 0));
|
||||
EXPECT_EQ(nullptr, Spec.getCode(2, 0));
|
||||
}
|
||||
|
||||
TEST(CUDAFatbinInMemorySpec, BasicUsage) {
|
||||
const char *FatbinBytes = "Dummy fatbin bytes";
|
||||
se::CUDAFatbinInMemorySpec Spec("KernelName", FatbinBytes);
|
||||
EXPECT_EQ("KernelName", Spec.getKernelName());
|
||||
EXPECT_EQ(FatbinBytes, Spec.getBytes());
|
||||
}
|
||||
|
||||
TEST(OpenCLTextInMemorySpec, BasicUsage) {
|
||||
const char *OpenCLText = "Dummy OpenCL text";
|
||||
se::OpenCLTextInMemorySpec Spec("KernelName", OpenCLText);
|
||||
EXPECT_EQ("KernelName", Spec.getKernelName());
|
||||
EXPECT_EQ(OpenCLText, Spec.getText());
|
||||
}
|
||||
|
||||
TEST(MultiKernelLoaderSpec, NoCode) {
|
||||
se::MultiKernelLoaderSpec MultiSpec;
|
||||
EXPECT_FALSE(MultiSpec.hasCUDAPTXInMemory());
|
||||
EXPECT_FALSE(MultiSpec.hasCUDAFatbinInMemory());
|
||||
EXPECT_FALSE(MultiSpec.hasOpenCLTextInMemory());
|
||||
|
||||
EXPECT_DEBUG_DEATH(MultiSpec.getCUDAPTXInMemory(),
|
||||
"getting spec that is not present");
|
||||
EXPECT_DEBUG_DEATH(MultiSpec.getCUDAFatbinInMemory(),
|
||||
"getting spec that is not present");
|
||||
EXPECT_DEBUG_DEATH(MultiSpec.getOpenCLTextInMemory(),
|
||||
"getting spec that is not present");
|
||||
}
|
||||
|
||||
TEST(MultiKernelLoaderSpec, Registration) {
|
||||
se::MultiKernelLoaderSpec MultiSpec;
|
||||
const char *KernelName = "KernelName";
|
||||
const char *PTXCodeString = "Dummy PTX code";
|
||||
const char *FatbinBytes = "Dummy fatbin bytes";
|
||||
const char *OpenCLText = "Dummy OpenCL text";
|
||||
|
||||
MultiSpec.addCUDAPTXInMemory(KernelName, {{{1, 0}, PTXCodeString}})
|
||||
.addCUDAFatbinInMemory(KernelName, FatbinBytes)
|
||||
.addOpenCLTextInMemory(KernelName, OpenCLText);
|
||||
|
||||
EXPECT_TRUE(MultiSpec.hasCUDAPTXInMemory());
|
||||
EXPECT_TRUE(MultiSpec.hasCUDAFatbinInMemory());
|
||||
EXPECT_TRUE(MultiSpec.hasOpenCLTextInMemory());
|
||||
|
||||
EXPECT_EQ(KernelName, MultiSpec.getCUDAPTXInMemory().getKernelName());
|
||||
EXPECT_EQ(PTXCodeString, MultiSpec.getCUDAPTXInMemory().getCode(1, 0));
|
||||
EXPECT_EQ(nullptr, MultiSpec.getCUDAPTXInMemory().getCode(2, 0));
|
||||
|
||||
EXPECT_EQ(KernelName, MultiSpec.getCUDAFatbinInMemory().getKernelName());
|
||||
EXPECT_EQ(FatbinBytes, MultiSpec.getCUDAFatbinInMemory().getBytes());
|
||||
|
||||
EXPECT_EQ(KernelName, MultiSpec.getOpenCLTextInMemory().getKernelName());
|
||||
EXPECT_EQ(OpenCLText, MultiSpec.getOpenCLTextInMemory().getText());
|
||||
}
|
||||
|
||||
TEST(MultiKernelLoaderSpec, RegisterTwice) {
|
||||
se::MultiKernelLoaderSpec MultiSpec;
|
||||
const char *KernelName = "KernelName";
|
||||
const char *FatbinBytes = "Dummy fatbin bytes";
|
||||
|
||||
MultiSpec.addCUDAFatbinInMemory(KernelName, FatbinBytes);
|
||||
|
||||
EXPECT_DEBUG_DEATH(MultiSpec.addCUDAFatbinInMemory(KernelName, FatbinBytes),
|
||||
"illegal loader spec overwrite");
|
||||
}
|
||||
|
||||
TEST(MultiKernelLoaderSpec, ConflictingKernelNames) {
|
||||
se::MultiKernelLoaderSpec MultiSpec;
|
||||
const char *KernelNameA = "KernelName";
|
||||
std::string KernelNameB = KernelNameA;
|
||||
const char *PTXCodeString = "Dummy PTX code";
|
||||
const char *FatbinBytes = "Dummy fatbin bytes";
|
||||
|
||||
// Check that names don't conflict if they are equivalent strings in different
|
||||
// locations.
|
||||
MultiSpec.addCUDAPTXInMemory(KernelNameA, {{{1, 0}, PTXCodeString}})
|
||||
.addCUDAFatbinInMemory(KernelNameB, FatbinBytes);
|
||||
|
||||
const char *OtherKernelName = "OtherKernelName";
|
||||
const char *OpenCLText = "Dummy OpenCL text";
|
||||
EXPECT_DEBUG_DEATH(
|
||||
MultiSpec.addOpenCLTextInMemory(OtherKernelName, OpenCLText),
|
||||
"different kernel names in one MultiKernelLoaderSpec");
|
||||
}
|
||||
|
||||
} // namespace
|
Loading…
x
Reference in New Issue
Block a user