mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-11-24 06:10:12 +00:00
[libomptarget] Build cuda plugin without cuda installed locally
[libomptarget] Build cuda plugin without cuda installed locally Compiles a new file, `plugins/cuda/dynamic_cuda/cuda.cpp`, to an object file that exposes the same symbols that the plugin presently uses from libcuda. The object file contains dlopen of libcuda and cached dlsym calls. Also provides a cuda.h containing the subset that is used. This lets the cmake file choose between the system cuda and a dlopen shim, with no changes to rtl.cpp. The corresponding change to amdgpu is postponed until after a refactor of the plugin to reduce the size of the hsa.h stub required Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D95155
This commit is contained in:
parent
ad25bdcb8e
commit
47e95e87a3
277
openmp/libomptarget/include/dlwrap.h
Normal file
277
openmp/libomptarget/include/dlwrap.h
Normal file
@ -0,0 +1,277 @@
|
||||
//===------- dlwrap.h - Convenience wrapper around dlopen/dlsym -- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// The openmp plugins depend on extern libraries. These can be used via:
|
||||
// - bitcode file statically linked
|
||||
// - (relocatable) object file statically linked
|
||||
// - static library
|
||||
// - dynamic library, linked at build time
|
||||
// - dynamic library, loaded at application run time by dlopen
|
||||
//
|
||||
// This file factors out most boilerplate for using a dlopened library.
|
||||
// - Function symbols are generated that are statically linked against
|
||||
// - The dlopen can be done implicitly when initializing the library
|
||||
// - dlsym lookups are done once and cached
|
||||
// - The abstraction is very thin to permit varied uses of the library
|
||||
//
|
||||
// Given int foo(char, double, void*);, writing DLWRAP(foo, 3) will expand to:
|
||||
// int foo(char x0, double x1, void* x2) {
|
||||
// constexpr size_t index = id();
|
||||
// void * dlsymResult = pointer(index);
|
||||
// return ((int (*)(char, double, void*))dlsymResult)(x0, x1, x2);
|
||||
// }
|
||||
//
|
||||
// Multiple calls to DLWRAP(symbol_name, arity) with bespoke
|
||||
// initialization code that can use the thin abstraction:
|
||||
// namespace dlwrap {
|
||||
// static size_t size();
|
||||
// static const char *symbol(size_t);
|
||||
// static void **pointer(size_t);
|
||||
// }
|
||||
// will compile to an object file that only exposes the symbols that the
|
||||
// dynamic library would do, with the right function types.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef DLWRAP_H_INCLUDED
|
||||
#define DLWRAP_H_INCLUDED
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <tuple>
|
||||
#include <type_traits>
|
||||
|
||||
// Where symbol is a function, these expand to some book keeping and an
|
||||
// implementation of that function
|
||||
#define DLWRAP(SYMBOL, ARITY) DLWRAP_IMPL(SYMBOL, ARITY)
|
||||
#define DLWRAP_INTERNAL(SYMBOL, ARITY) DLWRAP_INTERNAL_IMPL(SYMBOL, ARITY)
|
||||
|
||||
// For example, given a prototype:
|
||||
// int foo(char, double);
|
||||
//
|
||||
// DLWRAP(foo, 2) expands to:
|
||||
//
|
||||
// namespace dlwrap {
|
||||
// struct foo_Trait : public dlwrap::trait<decltype(&foo)> {
|
||||
// using T = dlwrap::trait<decltype(&foo)>;
|
||||
// static T::FunctionType get() {
|
||||
// constexpr size_t Index = getIndex();
|
||||
// void *P = *dlwrap::pointer(Index);
|
||||
// return reinterpret_cast<T::FunctionType>(P);
|
||||
// }
|
||||
// };
|
||||
// }
|
||||
// int foo(char x0, double x1) { return dlwrap::foo_Trait::get()(x0, x1); }
|
||||
//
|
||||
// DLWRAP_INTERNAL is similar, except the function it expands to is:
|
||||
// static int dlwrap_foo(char x0, double x1) { ... }
|
||||
// so that the function pointer call can be wrapped in library-specific code
|
||||
|
||||
// DLWRAP_FINALIZE() expands to definitions of:
|
||||
#define DLWRAP_FINALIZE() DLWRAP_FINALIZE_IMPL()
|
||||
namespace dlwrap {
|
||||
static size_t size();
|
||||
static const char *symbol(size_t); // get symbol name in [0, size())
|
||||
static void **pointer(size_t); // get pointer to function pointer in [0, size())
|
||||
} // namespace dlwrap
|
||||
|
||||
// Implementation details follow.
|
||||
|
||||
namespace dlwrap {
|
||||
|
||||
// Extract return / argument types from address of function symbol
|
||||
template <typename F> struct trait;
|
||||
template <typename R, typename... Ts> struct trait<R (*)(Ts...)> {
|
||||
constexpr static const size_t nargs = sizeof...(Ts);
|
||||
typedef R ReturnType;
|
||||
template <size_t i> struct arg {
|
||||
typedef typename std::tuple_element<i, std::tuple<Ts...>>::type type;
|
||||
};
|
||||
|
||||
typedef R (*FunctionType)(Ts...);
|
||||
};
|
||||
|
||||
namespace type {
|
||||
// Book keeping is by type specialization
|
||||
|
||||
template <size_t S> struct count {
|
||||
static constexpr size_t N = count<S - 1>::N;
|
||||
};
|
||||
|
||||
template <> struct count<0> { static constexpr size_t N = 0; };
|
||||
|
||||
// Get a constexpr size_t ID, starts at zero
|
||||
#define DLWRAP_ID() (dlwrap::type::count<__LINE__>::N)
|
||||
|
||||
// Increment value returned by DLWRAP_ID
|
||||
#define DLWRAP_INC() \
|
||||
template <> struct dlwrap::type::count<__LINE__> { \
|
||||
static constexpr size_t N = 1 + dlwrap::type::count<__LINE__ - 1>::N; \
|
||||
}
|
||||
|
||||
template <size_t N> struct symbol;
|
||||
#define DLWRAP_SYMBOL(SYMBOL, ID) \
|
||||
template <> struct dlwrap::type::symbol<ID> { \
|
||||
static constexpr const char *call() { return #SYMBOL; } \
|
||||
}
|
||||
} // namespace type
|
||||
|
||||
template <size_t N, size_t... Is>
|
||||
constexpr std::array<const char *, N> static getSymbolArray(
|
||||
std::index_sequence<Is...>) {
|
||||
return {{dlwrap::type::symbol<Is>::call()...}};
|
||||
}
|
||||
|
||||
} // namespace dlwrap
|
||||
|
||||
#define DLWRAP_INSTANTIATE(SYM_USE, SYM_DEF, ARITY) \
|
||||
DLWRAP_INSTANTIATE_##ARITY(SYM_USE, SYM_DEF, \
|
||||
dlwrap::trait<decltype(&SYM_USE)>)
|
||||
|
||||
#define DLWRAP_FINALIZE_IMPL() \
|
||||
static size_t dlwrap::size() { return DLWRAP_ID(); } \
|
||||
static const char *dlwrap::symbol(size_t i) { \
|
||||
static constexpr const std::array<const char *, DLWRAP_ID()> \
|
||||
dlwrap_symbols = getSymbolArray<DLWRAP_ID()>( \
|
||||
std::make_index_sequence<DLWRAP_ID()>()); \
|
||||
return dlwrap_symbols[i]; \
|
||||
} \
|
||||
static void **dlwrap::pointer(size_t i) { \
|
||||
static std::array<void *, DLWRAP_ID()> dlwrap_pointers; \
|
||||
return &dlwrap_pointers.data()[i]; \
|
||||
}
|
||||
|
||||
#define DLWRAP_COMMON(SYMBOL, ARITY) \
|
||||
DLWRAP_INC(); \
|
||||
DLWRAP_SYMBOL(SYMBOL, DLWRAP_ID() - 1); \
|
||||
namespace dlwrap { \
|
||||
struct SYMBOL##_Trait : public dlwrap::trait<decltype(&SYMBOL)> { \
|
||||
using T = dlwrap::trait<decltype(&SYMBOL)>; \
|
||||
static T::FunctionType get() { \
|
||||
constexpr size_t Index = DLWRAP_ID() - 1; \
|
||||
void *P = *dlwrap::pointer(Index); \
|
||||
return reinterpret_cast<T::FunctionType>(P); \
|
||||
} \
|
||||
}; \
|
||||
}
|
||||
|
||||
#define DLWRAP_IMPL(SYMBOL, ARITY) \
|
||||
DLWRAP_COMMON(SYMBOL, ARITY); \
|
||||
DLWRAP_INSTANTIATE(SYMBOL, SYMBOL, ARITY)
|
||||
|
||||
#define DLWRAP_INTERNAL_IMPL(SYMBOL, ARITY) \
|
||||
DLWRAP_COMMON(SYMBOL, ARITY); \
|
||||
static DLWRAP_INSTANTIATE(SYMBOL, dlwrap_##SYMBOL, ARITY)
|
||||
|
||||
#define DLWRAP_INSTANTIATE_0(SYM_USE, SYM_DEF, T) \
|
||||
T::ReturnType SYM_DEF() { return dlwrap::SYM_USE##_Trait::get()(); }
|
||||
#define DLWRAP_INSTANTIATE_1(SYM_USE, SYM_DEF, T) \
|
||||
T::ReturnType SYM_DEF(typename T::template arg<0>::type x0) { \
|
||||
return dlwrap::SYM_USE##_Trait::get()(x0); \
|
||||
}
|
||||
#define DLWRAP_INSTANTIATE_2(SYM_USE, SYM_DEF, T) \
|
||||
T::ReturnType SYM_DEF(typename T::template arg<0>::type x0, \
|
||||
typename T::template arg<1>::type x1) { \
|
||||
return dlwrap::SYM_USE##_Trait::get()(x0, x1); \
|
||||
}
|
||||
#define DLWRAP_INSTANTIATE_3(SYM_USE, SYM_DEF, T) \
|
||||
T::ReturnType SYM_DEF(typename T::template arg<0>::type x0, \
|
||||
typename T::template arg<1>::type x1, \
|
||||
typename T::template arg<2>::type x2) { \
|
||||
return dlwrap::SYM_USE##_Trait::get()(x0, x1, x2); \
|
||||
}
|
||||
#define DLWRAP_INSTANTIATE_4(SYM_USE, SYM_DEF, T) \
|
||||
T::ReturnType SYM_DEF(typename T::template arg<0>::type x0, \
|
||||
typename T::template arg<1>::type x1, \
|
||||
typename T::template arg<2>::type x2, \
|
||||
typename T::template arg<3>::type x3) { \
|
||||
return dlwrap::SYM_USE##_Trait::get()(x0, x1, x2, x3); \
|
||||
}
|
||||
#define DLWRAP_INSTANTIATE_5(SYM_USE, SYM_DEF, T) \
|
||||
T::ReturnType SYM_DEF(typename T::template arg<0>::type x0, \
|
||||
typename T::template arg<1>::type x1, \
|
||||
typename T::template arg<2>::type x2, \
|
||||
typename T::template arg<3>::type x3, \
|
||||
typename T::template arg<4>::type x4) { \
|
||||
return dlwrap::SYM_USE##_Trait::get()(x0, x1, x2, x3, x4); \
|
||||
}
|
||||
#define DLWRAP_INSTANTIATE_6(SYM_USE, SYM_DEF, T) \
|
||||
T::ReturnType SYM_DEF(typename T::template arg<0>::type x0, \
|
||||
typename T::template arg<1>::type x1, \
|
||||
typename T::template arg<2>::type x2, \
|
||||
typename T::template arg<3>::type x3, \
|
||||
typename T::template arg<4>::type x4, \
|
||||
typename T::template arg<5>::type x5) { \
|
||||
return dlwrap::SYM_USE##_Trait::get()(x0, x1, x2, x3, x4, x5); \
|
||||
}
|
||||
|
||||
#define DLWRAP_INSTANTIATE_7(SYM_USE, SYM_DEF, T) \
|
||||
T::ReturnType SYM_DEF(typename T::template arg<0>::type x0, \
|
||||
typename T::template arg<1>::type x1, \
|
||||
typename T::template arg<2>::type x2, \
|
||||
typename T::template arg<3>::type x3, \
|
||||
typename T::template arg<4>::type x4, \
|
||||
typename T::template arg<5>::type x5, \
|
||||
typename T::template arg<6>::type x6) { \
|
||||
return dlwrap::SYM_USE##_Trait::get()(x0, x1, x2, x3, x4, x5, x6); \
|
||||
}
|
||||
|
||||
#define DLWRAP_INSTANTIATE_8(SYM_USE, SYM_DEF, T) \
|
||||
T::ReturnType SYM_DEF(typename T::template arg<0>::type x0, \
|
||||
typename T::template arg<1>::type x1, \
|
||||
typename T::template arg<2>::type x2, \
|
||||
typename T::template arg<3>::type x3, \
|
||||
typename T::template arg<4>::type x4, \
|
||||
typename T::template arg<5>::type x5, \
|
||||
typename T::template arg<6>::type x6, \
|
||||
typename T::template arg<7>::type x7) { \
|
||||
return dlwrap::SYM_USE##_Trait::get()(x0, x1, x2, x3, x4, x5, x6, x7); \
|
||||
}
|
||||
#define DLWRAP_INSTANTIATE_9(SYM_USE, SYM_DEF, T) \
|
||||
T::ReturnType SYM_DEF(typename T::template arg<0>::type x0, \
|
||||
typename T::template arg<1>::type x1, \
|
||||
typename T::template arg<2>::type x2, \
|
||||
typename T::template arg<3>::type x3, \
|
||||
typename T::template arg<4>::type x4, \
|
||||
typename T::template arg<5>::type x5, \
|
||||
typename T::template arg<6>::type x6, \
|
||||
typename T::template arg<7>::type x7, \
|
||||
typename T::template arg<8>::type x8) { \
|
||||
return dlwrap::SYM_USE##_Trait::get()(x0, x1, x2, x3, x4, x5, x6, x7, x8); \
|
||||
}
|
||||
#define DLWRAP_INSTANTIATE_10(SYM_USE, SYM_DEF, T) \
|
||||
T::ReturnType SYM_DEF(typename T::template arg<0>::type x0, \
|
||||
typename T::template arg<1>::type x1, \
|
||||
typename T::template arg<2>::type x2, \
|
||||
typename T::template arg<3>::type x3, \
|
||||
typename T::template arg<4>::type x4, \
|
||||
typename T::template arg<5>::type x5, \
|
||||
typename T::template arg<6>::type x6, \
|
||||
typename T::template arg<7>::type x7, \
|
||||
typename T::template arg<8>::type x8, \
|
||||
typename T::template arg<9>::type x9) { \
|
||||
return dlwrap::SYM_USE##_Trait::get()(x0, x1, x2, x3, x4, x5, x6, x7, x8, \
|
||||
x9); \
|
||||
}
|
||||
#define DLWRAP_INSTANTIATE_11(SYM_USE, SYM_DEF, T) \
|
||||
T::ReturnType SYM_DEF(typename T::template arg<0>::type x0, \
|
||||
typename T::template arg<1>::type x1, \
|
||||
typename T::template arg<2>::type x2, \
|
||||
typename T::template arg<3>::type x3, \
|
||||
typename T::template arg<4>::type x4, \
|
||||
typename T::template arg<5>::type x5, \
|
||||
typename T::template arg<6>::type x6, \
|
||||
typename T::template arg<7>::type x7, \
|
||||
typename T::template arg<8>::type x8, \
|
||||
typename T::template arg<9>::type x9, \
|
||||
typename T::template arg<10>::type x10) { \
|
||||
return dlwrap::SYM_USE##_Trait::get()(x0, x1, x2, x3, x4, x5, x6, x7, x8, \
|
||||
x9, x10); \
|
||||
}
|
||||
|
||||
#endif
|
@ -15,12 +15,6 @@ if (NOT(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(ppc64le)|(aarch64)$" AND CMAKE
|
||||
elseif (NOT LIBOMPTARGET_DEP_LIBELF_FOUND)
|
||||
libomptarget_say("Not building CUDA offloading plugin: libelf dependency not found.")
|
||||
return()
|
||||
elseif(NOT LIBOMPTARGET_DEP_CUDA_FOUND)
|
||||
libomptarget_say("Not building CUDA offloading plugin: CUDA not found in system.")
|
||||
return()
|
||||
elseif(NOT LIBOMPTARGET_DEP_CUDA_DRIVER_FOUND)
|
||||
libomptarget_say("Not building CUDA offloading plugin: CUDA Driver API not found in system.")
|
||||
return()
|
||||
endif()
|
||||
|
||||
libomptarget_say("Building CUDA offloading plugin.")
|
||||
@ -28,10 +22,22 @@ libomptarget_say("Building CUDA offloading plugin.")
|
||||
# Define the suffix for the runtime messaging dumps.
|
||||
add_definitions(-DTARGET_NAME=CUDA)
|
||||
|
||||
include_directories(${LIBOMPTARGET_DEP_CUDA_INCLUDE_DIRS})
|
||||
include_directories(${LIBOMPTARGET_DEP_LIBELF_INCLUDE_DIRS})
|
||||
|
||||
add_library(omptarget.rtl.cuda SHARED src/rtl.cpp)
|
||||
option(LIBOMPTARGET_DLOPEN_LIBCUDA "Build with dlopened libcuda" OFF)
|
||||
|
||||
if (LIBOMPTARGET_DEP_CUDA_FOUND AND LIBOMPTARGET_DEP_CUDA_DRIVER_FOUND
|
||||
AND NOT LIBOMPTARGET_DLOPEN_LIBCUDA)
|
||||
libomptarget_say("Building CUDA plugin linked against libcuda")
|
||||
include_directories(${LIBOMPTARGET_DEP_CUDA_INCLUDE_DIRS})
|
||||
add_library(omptarget.rtl.cuda SHARED src/rtl.cpp)
|
||||
set (LIBOMPTARGET_DEP_LIBRARIES ${LIBOMPTARGET_DEP_CUDA_DRIVER_LIBRARIES})
|
||||
else()
|
||||
libomptarget_say("Building CUDA plugin for dlopened libcuda")
|
||||
include_directories(dynamic_cuda)
|
||||
add_library(omptarget.rtl.cuda SHARED src/rtl.cpp dynamic_cuda/cuda.cpp)
|
||||
set (LIBOMPTARGET_DEP_LIBRARIES ${CMAKE_DL_LIBS})
|
||||
endif()
|
||||
|
||||
# Install plugin under the lib destination folder.
|
||||
install(TARGETS omptarget.rtl.cuda LIBRARY DESTINATION "${OPENMP_INSTALL_LIBDIR}")
|
||||
@ -39,7 +45,7 @@ install(TARGETS omptarget.rtl.cuda LIBRARY DESTINATION "${OPENMP_INSTALL_LIBDIR}
|
||||
target_link_libraries(omptarget.rtl.cuda
|
||||
elf_common
|
||||
MemoryManager
|
||||
${LIBOMPTARGET_DEP_CUDA_DRIVER_LIBRARIES}
|
||||
${LIBOMPTARGET_DEP_LIBRARIES}
|
||||
${LIBOMPTARGET_DEP_LIBELF_LIBRARIES}
|
||||
"-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exports"
|
||||
"-Wl,-z,defs")
|
||||
|
99
openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp
Normal file
99
openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp
Normal file
@ -0,0 +1,99 @@
|
||||
//===--- cuda/dynamic_cuda/cuda.pp ------------------------------- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Implement subset of cuda api by calling into cuda library via dlopen
|
||||
// Does the dlopen/dlsym calls as part of the call to cuInit
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "cuda.h"
|
||||
#include "Debug.h"
|
||||
#include "dlwrap.h"
|
||||
|
||||
#include <dlfcn.h>
|
||||
|
||||
DLWRAP_INTERNAL(cuInit, 1);
|
||||
|
||||
DLWRAP(cuCtxGetDevice, 1);
|
||||
DLWRAP(cuDeviceGet, 2);
|
||||
DLWRAP(cuDeviceGetAttribute, 3);
|
||||
DLWRAP(cuDeviceGetCount, 1);
|
||||
DLWRAP(cuFuncGetAttribute, 3);
|
||||
|
||||
DLWRAP(cuGetErrorString, 2);
|
||||
DLWRAP(cuLaunchKernel, 11);
|
||||
|
||||
DLWRAP(cuMemAlloc, 2);
|
||||
DLWRAP(cuMemcpyDtoDAsync, 4);
|
||||
|
||||
DLWRAP(cuMemcpyDtoH, 3);
|
||||
DLWRAP(cuMemcpyDtoHAsync, 4);
|
||||
DLWRAP(cuMemcpyHtoD, 3);
|
||||
DLWRAP(cuMemcpyHtoDAsync, 4);
|
||||
|
||||
DLWRAP(cuMemFree, 1);
|
||||
DLWRAP(cuModuleGetFunction, 3);
|
||||
DLWRAP(cuModuleGetGlobal, 4);
|
||||
|
||||
DLWRAP(cuModuleUnload, 1);
|
||||
DLWRAP(cuStreamCreate, 2);
|
||||
DLWRAP(cuStreamDestroy, 1);
|
||||
DLWRAP(cuStreamSynchronize, 1);
|
||||
DLWRAP(cuCtxSetCurrent, 1);
|
||||
DLWRAP(cuDevicePrimaryCtxRelease, 1);
|
||||
DLWRAP(cuDevicePrimaryCtxGetState, 3);
|
||||
DLWRAP(cuDevicePrimaryCtxSetFlags, 2);
|
||||
DLWRAP(cuDevicePrimaryCtxRetain, 2);
|
||||
DLWRAP(cuModuleLoadDataEx, 5);
|
||||
|
||||
DLWRAP(cuDeviceCanAccessPeer, 3);
|
||||
DLWRAP(cuCtxEnablePeerAccess, 2);
|
||||
DLWRAP(cuMemcpyPeerAsync, 6);
|
||||
|
||||
DLWRAP_FINALIZE();
|
||||
|
||||
#ifndef DYNAMIC_CUDA_PATH
|
||||
#define DYNAMIC_CUDA_PATH "libcuda.so"
|
||||
#endif
|
||||
|
||||
#define TARGET_NAME CUDA
|
||||
#define DEBUG_PREFIX "Target " GETNAME(TARGET_NAME) " RTL"
|
||||
|
||||
static bool checkForCUDA() {
|
||||
// return true if dlopen succeeded and all functions found
|
||||
|
||||
const char *CudaLib = DYNAMIC_CUDA_PATH;
|
||||
void *DynlibHandle = dlopen(CudaLib, RTLD_NOW);
|
||||
if (!DynlibHandle) {
|
||||
DP("Unable to load library '%s': %s!\n", CudaLib, dlerror());
|
||||
return false;
|
||||
}
|
||||
|
||||
for (size_t I = 0; I < dlwrap::size(); I++) {
|
||||
const char *Sym = dlwrap::symbol(I);
|
||||
|
||||
void *P = dlsym(DynlibHandle, Sym);
|
||||
if (P == nullptr) {
|
||||
DP("Unable to find '%s' in '%s'!\n", Sym, CudaLib);
|
||||
return false;
|
||||
}
|
||||
|
||||
*dlwrap::pointer(I) = P;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
CUresult cuInit(unsigned X) {
|
||||
// Note: Called exactly once from cuda rtl.cpp in a global constructor so
|
||||
// does not need to handle being called repeatedly or concurrently
|
||||
if (!checkForCUDA()) {
|
||||
return CUDA_ERROR_INVALID_VALUE;
|
||||
}
|
||||
return dlwrap_cuInit(X);
|
||||
}
|
104
openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h
Normal file
104
openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h
Normal file
@ -0,0 +1,104 @@
|
||||
//===--- cuda/dynamic_cuda/cuda.h --------------------------------- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// The parts of the cuda api that are presently in use by the openmp cuda plugin
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef DYNAMIC_CUDA_CUDA_H_INCLUDED
|
||||
#define DYNAMIC_CUDA_CUDA_H_INCLUDED
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
typedef int CUdevice;
|
||||
typedef uintptr_t CUdeviceptr;
|
||||
typedef struct CUmod_st *CUmodule;
|
||||
typedef struct CUctx_st *CUcontext;
|
||||
typedef struct CUfunc_st *CUfunction;
|
||||
typedef struct CUstream_st *CUstream;
|
||||
|
||||
typedef enum cudaError_enum {
|
||||
CUDA_SUCCESS = 0,
|
||||
CUDA_ERROR_INVALID_VALUE = 1,
|
||||
} CUresult;
|
||||
|
||||
typedef enum CUstream_flags_enum {
|
||||
CU_STREAM_DEFAULT = 0x0,
|
||||
CU_STREAM_NON_BLOCKING = 0x1,
|
||||
} CUstream_flags;
|
||||
|
||||
typedef enum CUdevice_attribute_enum {
|
||||
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2,
|
||||
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5,
|
||||
CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10,
|
||||
} CUdevice_attribute;
|
||||
|
||||
typedef enum CUfunction_attribute_enum {
|
||||
CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0,
|
||||
} CUfunction_attribute;
|
||||
|
||||
typedef enum CUctx_flags_enum {
|
||||
CU_CTX_SCHED_BLOCKING_SYNC = 0x04,
|
||||
CU_CTX_SCHED_MASK = 0x07,
|
||||
} CUctx_flags;
|
||||
|
||||
#define cuMemFree cuMemFree_v2
|
||||
#define cuMemAlloc cuMemAlloc_v2
|
||||
#define cuMemcpyDtoH cuMemcpyDtoH_v2
|
||||
#define cuMemcpyHtoD cuMemcpyHtoD_v2
|
||||
#define cuStreamDestroy cuStreamDestroy_v2
|
||||
#define cuModuleGetGlobal cuModuleGetGlobal_v2
|
||||
#define cuMemcpyDtoHAsync cuMemcpyDtoHAsync_v2
|
||||
#define cuMemcpyDtoDAsync cuMemcpyDtoDAsync_v2
|
||||
#define cuMemcpyHtoDAsync cuMemcpyHtoDAsync_v2
|
||||
#define cuDevicePrimaryCtxRelease cuDevicePrimaryCtxRelease_v2
|
||||
#define cuDevicePrimaryCtxSetFlags cuDevicePrimaryCtxSetFlags_v2
|
||||
|
||||
CUresult cuCtxGetDevice(CUdevice *);
|
||||
CUresult cuDeviceGet(CUdevice *, int);
|
||||
CUresult cuDeviceGetAttribute(int *, CUdevice_attribute, CUdevice);
|
||||
CUresult cuDeviceGetCount(int *);
|
||||
CUresult cuFuncGetAttribute(int *, CUfunction_attribute, CUfunction);
|
||||
|
||||
CUresult cuGetErrorString(CUresult, const char **);
|
||||
CUresult cuInit(unsigned);
|
||||
CUresult cuLaunchKernel(CUfunction, unsigned, unsigned, unsigned, unsigned,
|
||||
unsigned, unsigned, unsigned, CUstream, void **,
|
||||
void **);
|
||||
|
||||
CUresult cuMemAlloc(CUdeviceptr *, size_t);
|
||||
CUresult cuMemcpyDtoDAsync(CUdeviceptr, CUdeviceptr, size_t, CUstream);
|
||||
|
||||
CUresult cuMemcpyDtoH(void *, CUdeviceptr, size_t);
|
||||
CUresult cuMemcpyDtoHAsync(void *, CUdeviceptr, size_t, CUstream);
|
||||
CUresult cuMemcpyHtoD(CUdeviceptr, const void *, size_t);
|
||||
CUresult cuMemcpyHtoDAsync(CUdeviceptr, const void *, size_t, CUstream);
|
||||
|
||||
CUresult cuMemFree(CUdeviceptr);
|
||||
CUresult cuModuleGetFunction(CUfunction *, CUmodule, const char *);
|
||||
CUresult cuModuleGetGlobal(CUdeviceptr *, size_t *, CUmodule, const char *);
|
||||
|
||||
CUresult cuModuleUnload(CUmodule);
|
||||
CUresult cuStreamCreate(CUstream *, unsigned);
|
||||
CUresult cuStreamDestroy(CUstream);
|
||||
CUresult cuStreamSynchronize(CUstream);
|
||||
CUresult cuCtxSetCurrent(CUcontext);
|
||||
CUresult cuDevicePrimaryCtxRelease(CUdevice);
|
||||
CUresult cuDevicePrimaryCtxGetState(CUdevice, unsigned *, int *);
|
||||
CUresult cuDevicePrimaryCtxSetFlags(CUdevice, unsigned);
|
||||
CUresult cuDevicePrimaryCtxRetain(CUcontext *, CUdevice);
|
||||
CUresult cuModuleLoadDataEx(CUmodule *, const void *, unsigned, void *,
|
||||
void **);
|
||||
|
||||
CUresult cuDeviceCanAccessPeer(int *, CUdevice, CUdevice);
|
||||
CUresult cuCtxEnablePeerAccess(CUcontext, unsigned);
|
||||
CUresult cuMemcpyPeerAsync(CUdeviceptr, CUcontext, CUdeviceptr, CUcontext,
|
||||
size_t, CUstream);
|
||||
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user