CUDA: Honor CUDA_RESOLVE_DEVICE_SYMBOLS for more target types

`CUDA_RESOLVE_DEVICE_SYMBOLS` can be used with shared, module, and
executable target types.  This relaxation is to allow for better
interoperability with linkers that automatically do CUDA device symbol
resolution and have no way to disable it.
This commit is contained in:
Robert Maynard 2019-01-31 17:34:41 -05:00 committed by Brad King
parent 6e91f5d620
commit 850ef90a66
11 changed files with 219 additions and 35 deletions

View File

@ -1,12 +1,18 @@
CUDA_RESOLVE_DEVICE_SYMBOLS
---------------------------
CUDA only: Enables device linking for the specific static library target
CUDA only: Enables device linking for the specific library target
If set this will enable device linking on this static library target. Normally
If set this will enable device linking on the library target. Normally
device linking is deferred until a shared library or executable is generated,
allowing for multiple static libraries to resolve device symbols at the same
time.
time when they are used by a shared library or executable.
By default static library targets have this property is disabled,
while shared, module, and executable targets have this property enabled.
Note that device linking is not supported for :ref:`Object Libraries`.
For instance:

View File

@ -0,0 +1,6 @@
CUDA_RESOLVE_DEVICE_SYMBOLS
---------------------------
* The :prop_tgt:`CUDA_RESOLVE_DEVICE_SYMBOLS` target property is now supported
on shared library, module library, and executable targets. Previously it was
only honored on static libraries.

View File

@ -95,7 +95,13 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule(
const bool hasCUDA =
(std::find(closure->Languages.begin(), closure->Languages.end(),
cuda_lang) != closure->Languages.end());
if (!hasCUDA) {
bool doDeviceLinking = true;
if (const char* resolveDeviceSymbols =
this->GeneratorTarget->GetProperty("CUDA_RESOLVE_DEVICE_SYMBOLS")) {
doDeviceLinking = cmSystemTools::IsOn(resolveDeviceSymbols);
}
if (!hasCUDA || !doDeviceLinking) {
return;
}

View File

@ -133,9 +133,12 @@ void cmMakefileLibraryTargetGenerator::WriteStaticLibraryRules()
(std::find(closure->Languages.begin(), closure->Languages.end(),
cuda_lang) != closure->Languages.end());
const bool resolveDeviceSymbols =
this->GeneratorTarget->GetPropertyAsBool("CUDA_RESOLVE_DEVICE_SYMBOLS");
if (hasCUDA && resolveDeviceSymbols) {
bool doDeviceLinking = false;
if (const char* resolveDeviceSymbols =
this->GeneratorTarget->GetProperty("CUDA_RESOLVE_DEVICE_SYMBOLS")) {
doDeviceLinking = cmSystemTools::IsOn(resolveDeviceSymbols);
}
if (hasCUDA && doDeviceLinking) {
std::string linkRuleVar = "CMAKE_CUDA_DEVICE_LINK_LIBRARY";
this->WriteDeviceLibraryRules(linkRuleVar, false);
}
@ -168,7 +171,12 @@ void cmMakefileLibraryTargetGenerator::WriteSharedLibraryRules(bool relink)
const bool hasCUDA =
(std::find(closure->Languages.begin(), closure->Languages.end(),
cuda_lang) != closure->Languages.end());
if (hasCUDA) {
bool doDeviceLinking = true;
if (const char* resolveDeviceSymbols =
this->GeneratorTarget->GetProperty("CUDA_RESOLVE_DEVICE_SYMBOLS")) {
doDeviceLinking = cmSystemTools::IsOn(resolveDeviceSymbols);
}
if (hasCUDA && doDeviceLinking) {
std::string linkRuleVar = "CMAKE_CUDA_DEVICE_LINK_LIBRARY";
this->WriteDeviceLibraryRules(linkRuleVar, relink);
}
@ -209,7 +217,12 @@ void cmMakefileLibraryTargetGenerator::WriteModuleLibraryRules(bool relink)
const bool hasCUDA =
(std::find(closure->Languages.begin(), closure->Languages.end(),
cuda_lang) != closure->Languages.end());
if (hasCUDA) {
bool doDeviceLinking = true;
if (const char* resolveDeviceSymbols =
this->GeneratorTarget->GetProperty("CUDA_RESOLVE_DEVICE_SYMBOLS")) {
doDeviceLinking = cmSystemTools::IsOn(resolveDeviceSymbols);
}
if (hasCUDA && doDeviceLinking) {
std::string linkRuleVar = "CMAKE_CUDA_DEVICE_LINK_LIBRARY";
this->WriteDeviceLibraryRules(linkRuleVar, relink);
}

View File

@ -566,22 +566,23 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement()
(std::find(closure->Languages.begin(), closure->Languages.end(),
cuda_lang) != closure->Languages.end());
bool shouldHaveDeviceLinking = false;
switch (genTarget.GetType()) {
case cmStateEnums::SHARED_LIBRARY:
case cmStateEnums::MODULE_LIBRARY:
case cmStateEnums::EXECUTABLE:
shouldHaveDeviceLinking = true;
break;
case cmStateEnums::STATIC_LIBRARY:
shouldHaveDeviceLinking =
genTarget.GetPropertyAsBool("CUDA_RESOLVE_DEVICE_SYMBOLS");
break;
default:
break;
bool doDeviceLinking = false;
if (const char* resolveDeviceSymbols =
genTarget.GetProperty("CUDA_RESOLVE_DEVICE_SYMBOLS")) {
doDeviceLinking = cmSystemTools::IsOn(resolveDeviceSymbols);
} else {
switch (genTarget.GetType()) {
case cmStateEnums::SHARED_LIBRARY:
case cmStateEnums::MODULE_LIBRARY:
case cmStateEnums::EXECUTABLE:
doDeviceLinking = true;
break;
default:
break;
}
}
if (!(shouldHaveDeviceLinking && hasCUDA)) {
if (!(doDeviceLinking && hasCUDA)) {
return;
}

View File

@ -2998,18 +2998,19 @@ bool cmVisualStudio10TargetGenerator::ComputeCudaLinkOptions(
// Determine if we need to do a device link
bool doDeviceLinking = false;
switch (this->GeneratorTarget->GetType()) {
case cmStateEnums::SHARED_LIBRARY:
case cmStateEnums::MODULE_LIBRARY:
case cmStateEnums::EXECUTABLE:
doDeviceLinking = true;
break;
case cmStateEnums::STATIC_LIBRARY:
doDeviceLinking = this->GeneratorTarget->GetPropertyAsBool(
"CUDA_RESOLVE_DEVICE_SYMBOLS");
break;
default:
break;
if (const char* resolveDeviceSymbols =
this->GeneratorTarget->GetProperty("CUDA_RESOLVE_DEVICE_SYMBOLS")) {
doDeviceLinking = cmSystemTools::IsOn(resolveDeviceSymbols);
} else {
switch (this->GeneratorTarget->GetType()) {
case cmStateEnums::SHARED_LIBRARY:
case cmStateEnums::MODULE_LIBRARY:
case cmStateEnums::EXECUTABLE:
doDeviceLinking = true;
break;
default:
break;
}
}
cudaLinkOptions.AddFlag("PerformDeviceLink",

View File

@ -7,6 +7,17 @@ ADD_TEST_MACRO(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols)
ADD_TEST_MACRO(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation)
ADD_TEST_MACRO(CudaOnly.WithDefs CudaOnlyWithDefs)
add_test(NAME CudaOnly.DontResolveDeviceSymbols COMMAND
${CMAKE_CTEST_COMMAND} -C $<CONFIGURATION>
--build-and-test
"${CMAKE_CURRENT_SOURCE_DIR}/DontResolveDeviceSymbols/"
"${CMAKE_CURRENT_BINARY_DIR}/DontResolveDeviceSymbols/"
${build_generator_args}
--build-project DontResolveDeviceSymbols
--build-options ${build_options}
--test-command ${CMAKE_CTEST_COMMAND} -V -C $<CONFIGURATION>
)
if(MSVC)
ADD_TEST_MACRO(CudaOnly.PDB CudaOnlyPDB)
endif()

View File

@ -0,0 +1,50 @@
cmake_minimum_required(VERSION 3.13)
project (DontResolveDeviceSymbols CUDA)
# Find nm and dumpbin
if(CMAKE_NM)
set(dump_command ${CMAKE_NM})
set(dump_args --defined-only)
set(symbol_name cudaRegisterLinkedBinary)
else()
include(GetPrerequisites)
message(STATUS "calling list_prerequisites to find dumpbin")
list_prerequisites("${CMAKE_COMMAND}" 0 0 0)
if(gp_dumpbin)
set(dump_command ${gp_dumpbin})
set(dump_args /SYMBOLS)
set(symbol_name nv_fatb)
endif()
endif()
#Goal for this example:
# Build a static library that defines multiple methods and kernels that
# use each other.
# Don't resolve the device symbols in the static library
# Don't resolve the device symbols in the executable library
# Verify that we can't use those device symbols from anything
string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=[compute_30] -gencode arch=compute_50,code=\\\"compute_50\\\"")
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CUDA_STANDARD 11)
add_library(CUDANoDeviceResolve SHARED file1.cu)
set_target_properties(CUDANoDeviceResolve
PROPERTIES
CUDA_SEPARABLE_COMPILATION ON
CUDA_RESOLVE_DEVICE_SYMBOLS OFF
POSITION_INDEPENDENT_CODE ON)
if(MSVC)
target_link_options(CUDANoDeviceResolve PRIVATE "/FORCE:UNRESOLVED")
endif()
if(dump_command)
add_custom_command(TARGET CUDANoDeviceResolve POST_BUILD
COMMAND ${CMAKE_COMMAND}
-DDUMP_COMMAND=${dump_command}
-DDUMP_ARGS=${dump_args}
-DSYMBOL_NAME=${symbol_name}
-DTEST_LIBRARY_PATH=$<TARGET_FILE:CUDANoDeviceResolve>
-P ${CMAKE_CURRENT_SOURCE_DIR}/verify.cmake
)
endif()

View File

@ -0,0 +1,69 @@
#include <iostream>
static __global__ void file1_kernel(int in, int* out)
{
*out = in * in;
}
int choose_cuda_device()
{
int nDevices = 0;
cudaError_t err = cudaGetDeviceCount(&nDevices);
if (err != cudaSuccess) {
std::cerr << "Failed to retrieve the number of CUDA enabled devices"
<< std::endl;
return 1;
}
for (int i = 0; i < nDevices; ++i) {
cudaDeviceProp prop;
cudaError_t err = cudaGetDeviceProperties(&prop, i);
if (err != cudaSuccess) {
std::cerr << "Could not retrieve properties from CUDA device " << i
<< std::endl;
return 1;
}
std::cout << "prop.major: " << prop.major << std::endl;
if (prop.major >= 3) {
err = cudaSetDevice(i);
if (err != cudaSuccess) {
std::cout << "Could not select CUDA device " << i << std::endl;
} else {
return 0;
}
}
}
std::cout << "Could not find a CUDA enabled card supporting compute >=3.0"
<< std::endl;
return 1;
}
int file1_launch_kernel()
{
int ret = choose_cuda_device();
if (ret) {
return 0;
}
int input = 4;
int* output;
cudaError_t err = cudaMallocManaged(&output, sizeof(int));
cudaDeviceSynchronize();
if (err != cudaSuccess) {
return 1;
}
file1_kernel<<<1, 1>>>(input, output);
cudaDeviceSynchronize();
err = cudaGetLastError();
std::cout << err << " " << cudaGetErrorString(err) << std::endl;
if (err == cudaSuccess) {
// This kernel launch should failed as the device linking never occured
std::cerr << "file1_kernel: kernel launch should have failed" << std::endl;
return 1;
}
return 0;
}

View File

@ -0,0 +1,7 @@
#include <iostream>
int main(int argc, char** argv)
{
return 0;
}

View File

@ -0,0 +1,14 @@
execute_process(COMMAND ${DUMP_COMMAND} ${DUMP_ARGS} ${TEST_LIBRARY_PATH}
RESULT_VARIABLE RESULT
OUTPUT_VARIABLE OUTPUT
ERROR_VARIABLE ERROR
)
if(NOT "${RESULT}" STREQUAL "0")
message(FATAL_ERROR "${DUMP_COMMAND} failed [${RESULT}] [${OUTPUT}] [${ERROR}]")
endif()
if("${OUTPUT}" MATCHES "${SYMBOL_NAME}")
message(FATAL_ERROR
"The '${SYMBOL_NAME}' symbol is defined; device linking occurred!")
endif()