mirror of
https://github.com/reactos/CMake.git
synced 2025-02-23 21:40:34 +00:00
CUDA: Honor CUDA_RESOLVE_DEVICE_SYMBOLS for more target types
`CUDA_RESOLVE_DEVICE_SYMBOLS` can be used with shared, module, and executable target types. This relaxation is to allow for better interoperability with linkers that automatically do CUDA device symbol resolution and have no way to disable it.
This commit is contained in:
parent
6e91f5d620
commit
850ef90a66
@ -1,12 +1,18 @@
|
||||
CUDA_RESOLVE_DEVICE_SYMBOLS
|
||||
---------------------------
|
||||
|
||||
CUDA only: Enables device linking for the specific static library target
|
||||
CUDA only: Enables device linking for the specific library target
|
||||
|
||||
If set this will enable device linking on this static library target. Normally
|
||||
If set this will enable device linking on the library target. Normally
|
||||
device linking is deferred until a shared library or executable is generated,
|
||||
allowing for multiple static libraries to resolve device symbols at the same
|
||||
time.
|
||||
time when they are used by a shared library or executable.
|
||||
|
||||
By default static library targets have this property is disabled,
|
||||
while shared, module, and executable targets have this property enabled.
|
||||
|
||||
Note that device linking is not supported for :ref:`Object Libraries`.
|
||||
|
||||
|
||||
For instance:
|
||||
|
||||
|
6
Help/release/dev/CUDA_RESOLVE_DEVICE_SYMBOLS.rst
Normal file
6
Help/release/dev/CUDA_RESOLVE_DEVICE_SYMBOLS.rst
Normal file
@ -0,0 +1,6 @@
|
||||
CUDA_RESOLVE_DEVICE_SYMBOLS
|
||||
---------------------------
|
||||
|
||||
* The :prop_tgt:`CUDA_RESOLVE_DEVICE_SYMBOLS` target property is now supported
|
||||
on shared library, module library, and executable targets. Previously it was
|
||||
only honored on static libraries.
|
@ -95,7 +95,13 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule(
|
||||
const bool hasCUDA =
|
||||
(std::find(closure->Languages.begin(), closure->Languages.end(),
|
||||
cuda_lang) != closure->Languages.end());
|
||||
if (!hasCUDA) {
|
||||
|
||||
bool doDeviceLinking = true;
|
||||
if (const char* resolveDeviceSymbols =
|
||||
this->GeneratorTarget->GetProperty("CUDA_RESOLVE_DEVICE_SYMBOLS")) {
|
||||
doDeviceLinking = cmSystemTools::IsOn(resolveDeviceSymbols);
|
||||
}
|
||||
if (!hasCUDA || !doDeviceLinking) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -133,9 +133,12 @@ void cmMakefileLibraryTargetGenerator::WriteStaticLibraryRules()
|
||||
(std::find(closure->Languages.begin(), closure->Languages.end(),
|
||||
cuda_lang) != closure->Languages.end());
|
||||
|
||||
const bool resolveDeviceSymbols =
|
||||
this->GeneratorTarget->GetPropertyAsBool("CUDA_RESOLVE_DEVICE_SYMBOLS");
|
||||
if (hasCUDA && resolveDeviceSymbols) {
|
||||
bool doDeviceLinking = false;
|
||||
if (const char* resolveDeviceSymbols =
|
||||
this->GeneratorTarget->GetProperty("CUDA_RESOLVE_DEVICE_SYMBOLS")) {
|
||||
doDeviceLinking = cmSystemTools::IsOn(resolveDeviceSymbols);
|
||||
}
|
||||
if (hasCUDA && doDeviceLinking) {
|
||||
std::string linkRuleVar = "CMAKE_CUDA_DEVICE_LINK_LIBRARY";
|
||||
this->WriteDeviceLibraryRules(linkRuleVar, false);
|
||||
}
|
||||
@ -168,7 +171,12 @@ void cmMakefileLibraryTargetGenerator::WriteSharedLibraryRules(bool relink)
|
||||
const bool hasCUDA =
|
||||
(std::find(closure->Languages.begin(), closure->Languages.end(),
|
||||
cuda_lang) != closure->Languages.end());
|
||||
if (hasCUDA) {
|
||||
bool doDeviceLinking = true;
|
||||
if (const char* resolveDeviceSymbols =
|
||||
this->GeneratorTarget->GetProperty("CUDA_RESOLVE_DEVICE_SYMBOLS")) {
|
||||
doDeviceLinking = cmSystemTools::IsOn(resolveDeviceSymbols);
|
||||
}
|
||||
if (hasCUDA && doDeviceLinking) {
|
||||
std::string linkRuleVar = "CMAKE_CUDA_DEVICE_LINK_LIBRARY";
|
||||
this->WriteDeviceLibraryRules(linkRuleVar, relink);
|
||||
}
|
||||
@ -209,7 +217,12 @@ void cmMakefileLibraryTargetGenerator::WriteModuleLibraryRules(bool relink)
|
||||
const bool hasCUDA =
|
||||
(std::find(closure->Languages.begin(), closure->Languages.end(),
|
||||
cuda_lang) != closure->Languages.end());
|
||||
if (hasCUDA) {
|
||||
bool doDeviceLinking = true;
|
||||
if (const char* resolveDeviceSymbols =
|
||||
this->GeneratorTarget->GetProperty("CUDA_RESOLVE_DEVICE_SYMBOLS")) {
|
||||
doDeviceLinking = cmSystemTools::IsOn(resolveDeviceSymbols);
|
||||
}
|
||||
if (hasCUDA && doDeviceLinking) {
|
||||
std::string linkRuleVar = "CMAKE_CUDA_DEVICE_LINK_LIBRARY";
|
||||
this->WriteDeviceLibraryRules(linkRuleVar, relink);
|
||||
}
|
||||
|
@ -566,22 +566,23 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement()
|
||||
(std::find(closure->Languages.begin(), closure->Languages.end(),
|
||||
cuda_lang) != closure->Languages.end());
|
||||
|
||||
bool shouldHaveDeviceLinking = false;
|
||||
switch (genTarget.GetType()) {
|
||||
case cmStateEnums::SHARED_LIBRARY:
|
||||
case cmStateEnums::MODULE_LIBRARY:
|
||||
case cmStateEnums::EXECUTABLE:
|
||||
shouldHaveDeviceLinking = true;
|
||||
break;
|
||||
case cmStateEnums::STATIC_LIBRARY:
|
||||
shouldHaveDeviceLinking =
|
||||
genTarget.GetPropertyAsBool("CUDA_RESOLVE_DEVICE_SYMBOLS");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
bool doDeviceLinking = false;
|
||||
if (const char* resolveDeviceSymbols =
|
||||
genTarget.GetProperty("CUDA_RESOLVE_DEVICE_SYMBOLS")) {
|
||||
doDeviceLinking = cmSystemTools::IsOn(resolveDeviceSymbols);
|
||||
} else {
|
||||
switch (genTarget.GetType()) {
|
||||
case cmStateEnums::SHARED_LIBRARY:
|
||||
case cmStateEnums::MODULE_LIBRARY:
|
||||
case cmStateEnums::EXECUTABLE:
|
||||
doDeviceLinking = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!(shouldHaveDeviceLinking && hasCUDA)) {
|
||||
if (!(doDeviceLinking && hasCUDA)) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -2998,18 +2998,19 @@ bool cmVisualStudio10TargetGenerator::ComputeCudaLinkOptions(
|
||||
|
||||
// Determine if we need to do a device link
|
||||
bool doDeviceLinking = false;
|
||||
switch (this->GeneratorTarget->GetType()) {
|
||||
case cmStateEnums::SHARED_LIBRARY:
|
||||
case cmStateEnums::MODULE_LIBRARY:
|
||||
case cmStateEnums::EXECUTABLE:
|
||||
doDeviceLinking = true;
|
||||
break;
|
||||
case cmStateEnums::STATIC_LIBRARY:
|
||||
doDeviceLinking = this->GeneratorTarget->GetPropertyAsBool(
|
||||
"CUDA_RESOLVE_DEVICE_SYMBOLS");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
if (const char* resolveDeviceSymbols =
|
||||
this->GeneratorTarget->GetProperty("CUDA_RESOLVE_DEVICE_SYMBOLS")) {
|
||||
doDeviceLinking = cmSystemTools::IsOn(resolveDeviceSymbols);
|
||||
} else {
|
||||
switch (this->GeneratorTarget->GetType()) {
|
||||
case cmStateEnums::SHARED_LIBRARY:
|
||||
case cmStateEnums::MODULE_LIBRARY:
|
||||
case cmStateEnums::EXECUTABLE:
|
||||
doDeviceLinking = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
cudaLinkOptions.AddFlag("PerformDeviceLink",
|
||||
|
@ -7,6 +7,17 @@ ADD_TEST_MACRO(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols)
|
||||
ADD_TEST_MACRO(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation)
|
||||
ADD_TEST_MACRO(CudaOnly.WithDefs CudaOnlyWithDefs)
|
||||
|
||||
add_test(NAME CudaOnly.DontResolveDeviceSymbols COMMAND
|
||||
${CMAKE_CTEST_COMMAND} -C $<CONFIGURATION>
|
||||
--build-and-test
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/DontResolveDeviceSymbols/"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/DontResolveDeviceSymbols/"
|
||||
${build_generator_args}
|
||||
--build-project DontResolveDeviceSymbols
|
||||
--build-options ${build_options}
|
||||
--test-command ${CMAKE_CTEST_COMMAND} -V -C $<CONFIGURATION>
|
||||
)
|
||||
|
||||
if(MSVC)
|
||||
ADD_TEST_MACRO(CudaOnly.PDB CudaOnlyPDB)
|
||||
endif()
|
||||
|
50
Tests/CudaOnly/DontResolveDeviceSymbols/CMakeLists.txt
Normal file
50
Tests/CudaOnly/DontResolveDeviceSymbols/CMakeLists.txt
Normal file
@ -0,0 +1,50 @@
|
||||
cmake_minimum_required(VERSION 3.13)
|
||||
project (DontResolveDeviceSymbols CUDA)
|
||||
|
||||
# Find nm and dumpbin
|
||||
if(CMAKE_NM)
|
||||
set(dump_command ${CMAKE_NM})
|
||||
set(dump_args --defined-only)
|
||||
set(symbol_name cudaRegisterLinkedBinary)
|
||||
else()
|
||||
include(GetPrerequisites)
|
||||
message(STATUS "calling list_prerequisites to find dumpbin")
|
||||
list_prerequisites("${CMAKE_COMMAND}" 0 0 0)
|
||||
if(gp_dumpbin)
|
||||
set(dump_command ${gp_dumpbin})
|
||||
set(dump_args /SYMBOLS)
|
||||
set(symbol_name nv_fatb)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
||||
#Goal for this example:
|
||||
# Build a static library that defines multiple methods and kernels that
|
||||
# use each other.
|
||||
# Don't resolve the device symbols in the static library
|
||||
# Don't resolve the device symbols in the executable library
|
||||
# Verify that we can't use those device symbols from anything
|
||||
string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=[compute_30] -gencode arch=compute_50,code=\\\"compute_50\\\"")
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
set(CMAKE_CUDA_STANDARD 11)
|
||||
|
||||
add_library(CUDANoDeviceResolve SHARED file1.cu)
|
||||
set_target_properties(CUDANoDeviceResolve
|
||||
PROPERTIES
|
||||
CUDA_SEPARABLE_COMPILATION ON
|
||||
CUDA_RESOLVE_DEVICE_SYMBOLS OFF
|
||||
POSITION_INDEPENDENT_CODE ON)
|
||||
if(MSVC)
|
||||
target_link_options(CUDANoDeviceResolve PRIVATE "/FORCE:UNRESOLVED")
|
||||
endif()
|
||||
|
||||
if(dump_command)
|
||||
add_custom_command(TARGET CUDANoDeviceResolve POST_BUILD
|
||||
COMMAND ${CMAKE_COMMAND}
|
||||
-DDUMP_COMMAND=${dump_command}
|
||||
-DDUMP_ARGS=${dump_args}
|
||||
-DSYMBOL_NAME=${symbol_name}
|
||||
-DTEST_LIBRARY_PATH=$<TARGET_FILE:CUDANoDeviceResolve>
|
||||
-P ${CMAKE_CURRENT_SOURCE_DIR}/verify.cmake
|
||||
)
|
||||
endif()
|
69
Tests/CudaOnly/DontResolveDeviceSymbols/file1.cu
Normal file
69
Tests/CudaOnly/DontResolveDeviceSymbols/file1.cu
Normal file
@ -0,0 +1,69 @@
|
||||
|
||||
#include <iostream>
|
||||
|
||||
static __global__ void file1_kernel(int in, int* out)
|
||||
{
|
||||
*out = in * in;
|
||||
}
|
||||
|
||||
int choose_cuda_device()
|
||||
{
|
||||
int nDevices = 0;
|
||||
cudaError_t err = cudaGetDeviceCount(&nDevices);
|
||||
if (err != cudaSuccess) {
|
||||
std::cerr << "Failed to retrieve the number of CUDA enabled devices"
|
||||
<< std::endl;
|
||||
return 1;
|
||||
}
|
||||
for (int i = 0; i < nDevices; ++i) {
|
||||
cudaDeviceProp prop;
|
||||
cudaError_t err = cudaGetDeviceProperties(&prop, i);
|
||||
if (err != cudaSuccess) {
|
||||
std::cerr << "Could not retrieve properties from CUDA device " << i
|
||||
<< std::endl;
|
||||
return 1;
|
||||
}
|
||||
std::cout << "prop.major: " << prop.major << std::endl;
|
||||
if (prop.major >= 3) {
|
||||
err = cudaSetDevice(i);
|
||||
if (err != cudaSuccess) {
|
||||
std::cout << "Could not select CUDA device " << i << std::endl;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Could not find a CUDA enabled card supporting compute >=3.0"
|
||||
<< std::endl;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int file1_launch_kernel()
|
||||
{
|
||||
int ret = choose_cuda_device();
|
||||
if (ret) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int input = 4;
|
||||
|
||||
int* output;
|
||||
cudaError_t err = cudaMallocManaged(&output, sizeof(int));
|
||||
cudaDeviceSynchronize();
|
||||
if (err != cudaSuccess) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
file1_kernel<<<1, 1>>>(input, output);
|
||||
cudaDeviceSynchronize();
|
||||
err = cudaGetLastError();
|
||||
std::cout << err << " " << cudaGetErrorString(err) << std::endl;
|
||||
if (err == cudaSuccess) {
|
||||
// This kernel launch should failed as the device linking never occured
|
||||
std::cerr << "file1_kernel: kernel launch should have failed" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
7
Tests/CudaOnly/DontResolveDeviceSymbols/main.cu
Normal file
7
Tests/CudaOnly/DontResolveDeviceSymbols/main.cu
Normal file
@ -0,0 +1,7 @@
|
||||
|
||||
#include <iostream>
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
return 0;
|
||||
}
|
14
Tests/CudaOnly/DontResolveDeviceSymbols/verify.cmake
Normal file
14
Tests/CudaOnly/DontResolveDeviceSymbols/verify.cmake
Normal file
@ -0,0 +1,14 @@
|
||||
execute_process(COMMAND ${DUMP_COMMAND} ${DUMP_ARGS} ${TEST_LIBRARY_PATH}
|
||||
RESULT_VARIABLE RESULT
|
||||
OUTPUT_VARIABLE OUTPUT
|
||||
ERROR_VARIABLE ERROR
|
||||
)
|
||||
|
||||
if(NOT "${RESULT}" STREQUAL "0")
|
||||
message(FATAL_ERROR "${DUMP_COMMAND} failed [${RESULT}] [${OUTPUT}] [${ERROR}]")
|
||||
endif()
|
||||
|
||||
if("${OUTPUT}" MATCHES "${SYMBOL_NAME}")
|
||||
message(FATAL_ERROR
|
||||
"The '${SYMBOL_NAME}' symbol is defined; device linking occurred!")
|
||||
endif()
|
Loading…
x
Reference in New Issue
Block a user