mirror of
https://github.com/reactos/CMake.git
synced 2024-11-23 11:39:48 +00:00
Merge topic 'cuda-clang'
a653ca9504
Tests: Update CUDA tests to work with Clang5df21adf46
CUDA: Add support for Clang compilerdc2eae1f91
FindCUDAToolkit: Factor out discovery code into a separate file70be10cbf4
CUDA: Remove toolkit include dirs from implicit include dirs only with NVIDIA Acked-by: Kitware Robot <kwrobot@kitware.com> Acked-by: Artem Belevich <tra@google.com> Acked-by: Robert Maynard <robert.maynard@kitware.com> Acked-by: Axel Huebl <axel.huebl@plasma.ninja> Acked-by: friendnick <ikoval67@gmail.com> Acked-by: Patrik Huber <patrikhuber@gmail.com> Merge-request: !4442
This commit is contained in:
commit
b246dee7db
@ -7,7 +7,7 @@ This property specifies the CUDA/C++ standard whose features are requested
|
||||
to build this target. For some compilers, this results in adding a
|
||||
flag such as ``-std=gnu++11`` to the compile line.
|
||||
|
||||
Supported values are ``98``, ``11``, ``14``.
|
||||
Supported values are ``98``, ``03``, ``11``, ``14``, ``17``, ``20``.
|
||||
|
||||
If the value requested does not result in a compile flag being added for
|
||||
the compiler in use, a previous standard flag will be added instead. This
|
||||
|
4
Help/release/dev/cuda-clang.rst
Normal file
4
Help/release/dev/cuda-clang.rst
Normal file
@ -0,0 +1,4 @@
|
||||
cuda-clang
|
||||
----------
|
||||
|
||||
* The ``CUDA`` language now supports Clang as a compiler.
|
@ -8,6 +8,19 @@ else()
|
||||
endif()
|
||||
set(CMAKE_INCLUDE_FLAG_CUDA "-I")
|
||||
|
||||
# Set implicit links early so compiler-specific modules can use them.
|
||||
set(__IMPLICT_LINKS )
|
||||
foreach(dir ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
|
||||
string(APPEND __IMPLICT_LINKS " -L\"${dir}\"")
|
||||
endforeach()
|
||||
foreach(lib ${CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES})
|
||||
if(${lib} MATCHES "/")
|
||||
string(APPEND __IMPLICT_LINKS " \"${lib}\"")
|
||||
else()
|
||||
string(APPEND __IMPLICT_LINKS " -l${lib}")
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
# Load compiler-specific information.
|
||||
if(CMAKE_CUDA_COMPILER_ID)
|
||||
include(Compiler/${CMAKE_CUDA_COMPILER_ID}-CUDA OPTIONAL)
|
||||
@ -97,22 +110,10 @@ include(CMakeCommonLanguageInclude)
|
||||
# CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION
|
||||
# CMAKE_CUDA_LINK_EXECUTABLE
|
||||
|
||||
if(CMAKE_CUDA_HOST_COMPILER)
|
||||
if(CMAKE_CUDA_HOST_COMPILER AND CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
|
||||
string(APPEND _CMAKE_CUDA_EXTRA_FLAGS " -ccbin=<CMAKE_CUDA_HOST_COMPILER>")
|
||||
endif()
|
||||
|
||||
set(__IMPLICT_LINKS )
|
||||
foreach(dir ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
|
||||
string(APPEND __IMPLICT_LINKS " -L\"${dir}\"")
|
||||
endforeach()
|
||||
foreach(lib ${CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES})
|
||||
if(${lib} MATCHES "/")
|
||||
string(APPEND __IMPLICT_LINKS " \"${lib}\"")
|
||||
else()
|
||||
string(APPEND __IMPLICT_LINKS " -l${lib}")
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
# create a shared library
|
||||
if(NOT CMAKE_CUDA_CREATE_SHARED_LIBRARY)
|
||||
set(CMAKE_CUDA_CREATE_SHARED_LIBRARY
|
||||
|
@ -89,9 +89,8 @@ function(compiler_id_detection outvar lang)
|
||||
)
|
||||
endif()
|
||||
|
||||
#Currently the only CUDA compilers are NVIDIA
|
||||
if(lang STREQUAL CUDA)
|
||||
set(ordered_compilers NVIDIA)
|
||||
set(ordered_compilers NVIDIA Clang)
|
||||
endif()
|
||||
|
||||
if(CID_ID_DEFINE)
|
||||
|
@ -2,7 +2,7 @@
|
||||
# file Copyright.txt or https://cmake.org/licensing for details.
|
||||
|
||||
include(${CMAKE_ROOT}/Modules/CMakeDetermineCompiler.cmake)
|
||||
include(${CMAKE_ROOT}/Modules//CMakeParseImplicitLinkInfo.cmake)
|
||||
include(${CMAKE_ROOT}/Modules/CMakeParseImplicitLinkInfo.cmake)
|
||||
|
||||
if( NOT ( ("${CMAKE_GENERATOR}" MATCHES "Make") OR
|
||||
("${CMAKE_GENERATOR}" MATCHES "Ninja") OR
|
||||
@ -57,16 +57,39 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
|
||||
file(READ ${CMAKE_ROOT}/Modules/CMakePlatformId.h.in
|
||||
CMAKE_CUDA_COMPILER_ID_PLATFORM_CONTENT)
|
||||
|
||||
list(APPEND CMAKE_CUDA_COMPILER_ID_MATCH_VENDORS NVIDIA)
|
||||
set(CMAKE_CUDA_COMPILER_ID_MATCH_VENDOR_REGEX_NVIDIA "nvcc: NVIDIA \(R\) Cuda compiler driver")
|
||||
list(APPEND CMAKE_CUDA_COMPILER_ID_VENDORS NVIDIA Clang)
|
||||
set(CMAKE_CUDA_COMPILER_ID_VENDOR_REGEX_NVIDIA "nvcc: NVIDIA \\(R\\) Cuda compiler driver")
|
||||
set(CMAKE_CUDA_COMPILER_ID_VENDOR_REGEX_Clang "(clang version)")
|
||||
|
||||
set(CMAKE_CXX_COMPILER_ID_TOOL_MATCH_REGEX "\nLd[^\n]*(\n[ \t]+[^\n]*)*\n[ \t]+([^ \t\r\n]+)[^\r\n]*-o[^\r\n]*CompilerIdCUDA/(\\./)?(CompilerIdCUDA.xctest/)?CompilerIdCUDA[ \t\n\\\"]")
|
||||
set(CMAKE_CXX_COMPILER_ID_TOOL_MATCH_INDEX 2)
|
||||
set(CMAKE_CUDA_COMPILER_ID_FLAGS_ALWAYS "-v")
|
||||
|
||||
set(CMAKE_CUDA_COMPILER_ID_FLAGS_ALWAYS -v --keep --keep-dir tmp)
|
||||
# nvcc
|
||||
set(nvcc_test_flags "--keep --keep-dir tmp")
|
||||
if(CMAKE_CUDA_HOST_COMPILER)
|
||||
list(APPEND CMAKE_CUDA_COMPILER_ID_FLAGS_ALWAYS "-ccbin=${CMAKE_CUDA_HOST_COMPILER}")
|
||||
string(APPEND nvcc_test_flags " -ccbin=${CMAKE_CUDA_HOST_COMPILER}")
|
||||
endif()
|
||||
list(APPEND CMAKE_CUDA_COMPILER_ID_TEST_FLAGS_FIRST ${nvcc_test_flags})
|
||||
|
||||
# Clang
|
||||
if(CMAKE_CROSSCOMPILING)
|
||||
# Need to pass the host target and include directories if we're crosscompiling.
|
||||
set(clang_test_flags "--sysroot=\"${CMAKE_SYSROOT}\" --target=${CMAKE_CUDA_COMPILER_TARGET}")
|
||||
else()
|
||||
set(clang_test_flags)
|
||||
endif()
|
||||
|
||||
# Clang doesn't automatically select an architecture supported by the SDK.
|
||||
# Try in reverse order of deprecation with the most recent at front (i.e. the most likely to work for new setups).
|
||||
foreach(arch ${CMAKE_CUDA_ARCHITECTURES} "20" "30" "52")
|
||||
# Strip specifiers.
|
||||
string(REGEX MATCH "[0-9]+" arch_name "${arch}")
|
||||
list(APPEND CMAKE_CUDA_COMPILER_ID_TEST_FLAGS_FIRST "${clang_test_flags} --cuda-gpu-arch=sm_${arch_name}")
|
||||
endforeach()
|
||||
|
||||
# Finally also try the default.
|
||||
list(APPEND CMAKE_CUDA_COMPILER_ID_TEST_FLAGS_FIRST "${clang_test_flags}")
|
||||
|
||||
include(${CMAKE_ROOT}/Modules/CMakeDetermineCompilerId.cmake)
|
||||
CMAKE_DETERMINE_COMPILER_ID(CUDA CUDAFLAGS CMakeCUDACompilerId.cu)
|
||||
@ -89,6 +112,33 @@ if(${CMAKE_GENERATOR} MATCHES "Visual Studio")
|
||||
set(CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES "")
|
||||
set(CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES "")
|
||||
set(CMAKE_CUDA_HOST_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "")
|
||||
elseif(CMAKE_CUDA_COMPILER_ID STREQUAL "Clang")
|
||||
# Parse default CUDA architecture.
|
||||
if(NOT CMAKE_CUDA_ARCHITECTURES)
|
||||
string(REGEX MATCH "-target-cpu sm_([0-9]+)" dont_care "${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}")
|
||||
set(CMAKE_CUDA_ARCHITECTURES "${CMAKE_MATCH_1}" CACHE STRING "CUDA architectures")
|
||||
|
||||
if(NOT CMAKE_CUDA_ARCHITECTURES)
|
||||
message(FATAL_ERROR "Failed to find default CUDA architecture.")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Parsing implicit host linker info is as simple as for regular Clang.
|
||||
CMAKE_PARSE_IMPLICIT_LINK_INFO("${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}"
|
||||
CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES
|
||||
CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES
|
||||
CMAKE_CUDA_HOST_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES
|
||||
log
|
||||
"${CMAKE_CUDA_IMPLICIT_OBJECT_REGEX}")
|
||||
|
||||
# Get SDK directory.
|
||||
string(REGEX MATCH "Found CUDA installation: (.+), version" dont_care "${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}")
|
||||
set(__cuda_directory "${CMAKE_MATCH_1}")
|
||||
|
||||
# Clang doesn't add the SDK library directory to the implicit link path. Do it ourselves, so stuff works.
|
||||
include(Internal/CUDAToolkit)
|
||||
set(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES "${CUDAToolkit_INCLUDE_DIR}")
|
||||
list(APPEND CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES "${CUDAToolkit_LIBRARY_DIR}")
|
||||
elseif(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
|
||||
set(_nvcc_log "")
|
||||
string(REPLACE "\r" "" _nvcc_output_orig "${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}")
|
||||
|
@ -74,20 +74,27 @@ else()
|
||||
# - cudart_static
|
||||
# - cudadevrt
|
||||
#
|
||||
# Additionally on Linux:
|
||||
# - rt
|
||||
# - pthread
|
||||
# - dl
|
||||
#
|
||||
# These are controlled by CMAKE_CUDA_RUNTIME_LIBRARY
|
||||
list(REMOVE_ITEM CMAKE_CUDA_IMPLICIT_LINK_LIBRARIES cudart cudart_static cudadevrt)
|
||||
list(REMOVE_ITEM CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES cudart cudart_static cudadevrt)
|
||||
list(REMOVE_ITEM CMAKE_CUDA_IMPLICIT_LINK_LIBRARIES cudart cudart_static cudadevrt rt pthread dl)
|
||||
list(REMOVE_ITEM CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES cudart cudart_static cudadevrt rt pthread dl)
|
||||
|
||||
# Remove the CUDA Toolkit include directories from the set of
|
||||
# implicit system include directories.
|
||||
# This resolves the issue that NVCC doesn't specify these
|
||||
# includes as SYSTEM includes when compiling device code, and sometimes
|
||||
# they contain headers that generate warnings, so let users mark them
|
||||
# as SYSTEM explicitly
|
||||
if(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES)
|
||||
list(REMOVE_ITEM CMAKE_CUDA_IMPLICIT_INCLUDE_DIRECTORIES
|
||||
${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}
|
||||
)
|
||||
if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
|
||||
# Remove the CUDA Toolkit include directories from the set of
|
||||
# implicit system include directories.
|
||||
# This resolves the issue that NVCC doesn't specify these
|
||||
# includes as SYSTEM includes when compiling device code, and sometimes
|
||||
# they contain headers that generate warnings, so let users mark them
|
||||
# as SYSTEM explicitly
|
||||
if(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES)
|
||||
list(REMOVE_ITEM CMAKE_CUDA_IMPLICIT_INCLUDE_DIRECTORIES
|
||||
${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Re-configure to save learned information.
|
||||
|
25
Modules/Compiler/Clang-CUDA.cmake
Normal file
25
Modules/Compiler/Clang-CUDA.cmake
Normal file
@ -0,0 +1,25 @@
|
||||
include(Compiler/Clang)
|
||||
__compiler_clang(CUDA)
|
||||
|
||||
# C++03 isn't supported for CXX, but is for CUDA, so we need to set these manually.
|
||||
# Do this before __compiler_clang_cxx_standards() since that adds the feature.
|
||||
set(CMAKE_CUDA03_STANDARD_COMPILE_OPTION "-std=c++03")
|
||||
set(CMAKE_CUDA03_EXTENSION_COMPILE_OPTION "-std=gnu++03")
|
||||
__compiler_clang_cxx_standards(CUDA)
|
||||
|
||||
set(CMAKE_CUDA_COMPILER_HAS_DEVICE_LINK_PHASE TRUE)
|
||||
set(_CMAKE_COMPILE_AS_CUDA_FLAG "-x cuda")
|
||||
set(_CMAKE_CUDA_PTX_FLAG "--cuda-device-only -S")
|
||||
|
||||
# RulePlaceholderExpander expands crosscompile variables like sysroot and target only for CMAKE_<LANG>_COMPILER. Override the default.
|
||||
set(CMAKE_CUDA_LINK_EXECUTABLE "<CMAKE_CUDA_COMPILER> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>${__IMPLICT_LINKS}")
|
||||
set(CMAKE_CUDA_CREATE_SHARED_LIBRARY "<CMAKE_CUDA_COMPILER> <CMAKE_SHARED_LIBRARY_CUDA_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CUDA_FLAGS> <SONAME_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>${__IMPLICT_LINKS}")
|
||||
|
||||
set(CMAKE_CUDA_RUNTIME_LIBRARY_DEFAULT "STATIC")
|
||||
set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_STATIC "cudadevrt;cudart_static")
|
||||
set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_SHARED "cudadevrt;cudart")
|
||||
set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_NONE "")
|
||||
|
||||
if(UNIX)
|
||||
list(APPEND CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_STATIC "rt" "pthread" "dl")
|
||||
endif()
|
@ -64,6 +64,10 @@ set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_STATIC "cudadevrt;cudart_static")
|
||||
set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_SHARED "cudadevrt;cudart")
|
||||
set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_NONE "")
|
||||
|
||||
if(UNIX)
|
||||
list(APPEND CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_STATIC "rt" "pthread" "dl")
|
||||
endif()
|
||||
|
||||
if("x${CMAKE_CUDA_SIMULATE_ID}" STREQUAL "xMSVC")
|
||||
set(CMAKE_CUDA03_STANDARD_COMPILE_OPTION "")
|
||||
set(CMAKE_CUDA03_EXTENSION_COMPILE_OPTION "")
|
||||
|
@ -473,168 +473,8 @@ Result variables
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
# For NVCC we can easily deduce the SDK binary directory from the compiler path.
|
||||
if(CMAKE_CUDA_COMPILER_LOADED AND NOT CUDAToolkit_BIN_DIR AND CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
|
||||
get_filename_component(cuda_dir "${CMAKE_CUDA_COMPILER}" DIRECTORY)
|
||||
set(CUDAToolkit_BIN_DIR "${cuda_dir}" CACHE PATH "")
|
||||
mark_as_advanced(CUDAToolkit_BIN_DIR)
|
||||
unset(cuda_dir)
|
||||
endif()
|
||||
|
||||
# Try language- or user-provided path first.
|
||||
if(CUDAToolkit_BIN_DIR)
|
||||
find_program(CUDAToolkit_NVCC_EXECUTABLE
|
||||
NAMES nvcc nvcc.exe
|
||||
PATHS ${CUDAToolkit_BIN_DIR}
|
||||
NO_DEFAULT_PATH
|
||||
)
|
||||
endif()
|
||||
|
||||
# Search using CUDAToolkit_ROOT
|
||||
find_program(CUDAToolkit_NVCC_EXECUTABLE
|
||||
NAMES nvcc nvcc.exe
|
||||
PATHS ENV CUDA_PATH
|
||||
PATH_SUFFIXES bin
|
||||
)
|
||||
|
||||
# If the user specified CUDAToolkit_ROOT but nvcc could not be found, this is an error.
|
||||
if (NOT CUDAToolkit_NVCC_EXECUTABLE AND (DEFINED CUDAToolkit_ROOT OR DEFINED ENV{CUDAToolkit_ROOT}))
|
||||
# Declare error messages now, print later depending on find_package args.
|
||||
set(fail_base "Could not find nvcc executable in path specified by")
|
||||
set(cuda_root_fail "${fail_base} CUDAToolkit_ROOT=${CUDAToolkit_ROOT}")
|
||||
set(env_cuda_root_fail "${fail_base} environment variable CUDAToolkit_ROOT=$ENV{CUDAToolkit_ROOT}")
|
||||
|
||||
if (CUDAToolkit_FIND_REQUIRED)
|
||||
if (DEFINED CUDAToolkit_ROOT)
|
||||
message(FATAL_ERROR ${cuda_root_fail})
|
||||
elseif (DEFINED ENV{CUDAToolkit_ROOT})
|
||||
message(FATAL_ERROR ${env_cuda_root_fail})
|
||||
endif()
|
||||
else()
|
||||
if (NOT CUDAToolkit_FIND_QUIETLY)
|
||||
if (DEFINED CUDAToolkit_ROOT)
|
||||
message(STATUS ${cuda_root_fail})
|
||||
elseif (DEFINED ENV{CUDAToolkit_ROOT})
|
||||
message(STATUS ${env_cuda_root_fail})
|
||||
endif()
|
||||
endif()
|
||||
set(CUDAToolkit_FOUND FALSE)
|
||||
unset(fail_base)
|
||||
unset(cuda_root_fail)
|
||||
unset(env_cuda_root_fail)
|
||||
return()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# CUDAToolkit_ROOT cmake / env variable not specified, try platform defaults.
|
||||
#
|
||||
# - Linux: /usr/local/cuda-X.Y
|
||||
# - macOS: /Developer/NVIDIA/CUDA-X.Y
|
||||
# - Windows: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\vX.Y
|
||||
#
|
||||
# We will also search the default symlink location /usr/local/cuda first since
|
||||
# if CUDAToolkit_ROOT is not specified, it is assumed that the symlinked
|
||||
# directory is the desired location.
|
||||
if (NOT CUDAToolkit_NVCC_EXECUTABLE)
|
||||
if (UNIX)
|
||||
if (NOT APPLE)
|
||||
set(platform_base "/usr/local/cuda-")
|
||||
else()
|
||||
set(platform_base "/Developer/NVIDIA/CUDA-")
|
||||
endif()
|
||||
else()
|
||||
set(platform_base "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v")
|
||||
endif()
|
||||
|
||||
# Build out a descending list of possible cuda installations, e.g.
|
||||
file(GLOB possible_paths "${platform_base}*")
|
||||
# Iterate the glob results and create a descending list.
|
||||
set(possible_versions)
|
||||
foreach (p ${possible_paths})
|
||||
# Extract version number from end of string
|
||||
string(REGEX MATCH "[0-9][0-9]?\\.[0-9]$" p_version ${p})
|
||||
if (IS_DIRECTORY ${p} AND p_version)
|
||||
list(APPEND possible_versions ${p_version})
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
# Cannot use list(SORT) because that is alphabetical, we need numerical.
|
||||
# NOTE: this is not an efficient sorting strategy. But even if a user had
|
||||
# every possible version of CUDA installed, this wouldn't create any
|
||||
# significant overhead.
|
||||
set(versions)
|
||||
foreach (v ${possible_versions})
|
||||
list(LENGTH versions num_versions)
|
||||
# First version, nothing to compare with so just append.
|
||||
if (num_versions EQUAL 0)
|
||||
list(APPEND versions ${v})
|
||||
else()
|
||||
# Loop through list. Insert at an index when comparison is
|
||||
# VERSION_GREATER since we want a descending list. Duplicates will not
|
||||
# happen since this came from a glob list of directories.
|
||||
set(i 0)
|
||||
set(early_terminate FALSE)
|
||||
while (i LESS num_versions)
|
||||
list(GET versions ${i} curr)
|
||||
if (v VERSION_GREATER curr)
|
||||
list(INSERT versions ${i} ${v})
|
||||
set(early_terminate TRUE)
|
||||
break()
|
||||
endif()
|
||||
math(EXPR i "${i} + 1")
|
||||
endwhile()
|
||||
# If it did not get inserted, place it at the end.
|
||||
if (NOT early_terminate)
|
||||
list(APPEND versions ${v})
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
# With a descending list of versions, populate possible paths to search.
|
||||
set(search_paths)
|
||||
foreach (v ${versions})
|
||||
list(APPEND search_paths "${platform_base}${v}")
|
||||
endforeach()
|
||||
|
||||
# Force the global default /usr/local/cuda to the front on Unix.
|
||||
if (UNIX)
|
||||
list(INSERT search_paths 0 "/usr/local/cuda")
|
||||
endif()
|
||||
|
||||
# Now search for nvcc again using the platform default search paths.
|
||||
find_program(CUDAToolkit_NVCC_EXECUTABLE
|
||||
NAMES nvcc nvcc.exe
|
||||
PATHS ${search_paths}
|
||||
PATH_SUFFIXES bin
|
||||
)
|
||||
|
||||
# We are done with these variables now, cleanup for caller.
|
||||
unset(platform_base)
|
||||
unset(possible_paths)
|
||||
unset(possible_versions)
|
||||
unset(versions)
|
||||
unset(i)
|
||||
unset(early_terminate)
|
||||
unset(search_paths)
|
||||
|
||||
if (NOT CUDAToolkit_NVCC_EXECUTABLE)
|
||||
if (CUDAToolkit_FIND_REQUIRED)
|
||||
message(FATAL_ERROR "Could not find nvcc, please set CUDAToolkit_ROOT.")
|
||||
elseif(NOT CUDAToolkit_FIND_QUIETLY)
|
||||
message(STATUS "Could not find nvcc, please set CUDAToolkit_ROOT.")
|
||||
endif()
|
||||
|
||||
set(CUDAToolkit_FOUND FALSE)
|
||||
return()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT CUDAToolkit_BIN_DIR AND CUDAToolkit_NVCC_EXECUTABLE)
|
||||
get_filename_component(cuda_dir "${CUDAToolkit_NVCC_EXECUTABLE}" DIRECTORY)
|
||||
set(CUDAToolkit_BIN_DIR "${cuda_dir}" CACHE PATH "" FORCE)
|
||||
mark_as_advanced(CUDAToolkit_BIN_DIR)
|
||||
unset(cuda_dir)
|
||||
endif()
|
||||
# Include shared CUDA toolkit location code.
|
||||
include(Internal/CUDAToolkit)
|
||||
|
||||
if(CUDAToolkit_NVCC_EXECUTABLE AND
|
||||
CUDAToolkit_NVCC_EXECUTABLE STREQUAL CMAKE_CUDA_COMPILER)
|
||||
@ -658,72 +498,22 @@ else()
|
||||
unset(NVCC_OUT)
|
||||
endif()
|
||||
|
||||
|
||||
get_filename_component(CUDAToolkit_ROOT_DIR ${CUDAToolkit_BIN_DIR} DIRECTORY ABSOLUTE)
|
||||
|
||||
# Handle cross compilation
|
||||
if(CMAKE_CROSSCOMPILING)
|
||||
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7-a")
|
||||
# Support for NVPACK
|
||||
set (CUDAToolkit_TARGET_NAME "armv7-linux-androideabi")
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm")
|
||||
# Support for arm cross compilation
|
||||
set(CUDAToolkit_TARGET_NAME "armv7-linux-gnueabihf")
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
|
||||
# Support for aarch64 cross compilation
|
||||
if (ANDROID_ARCH_NAME STREQUAL "arm64")
|
||||
set(CUDAToolkit_TARGET_NAME "aarch64-linux-androideabi")
|
||||
else()
|
||||
set(CUDAToolkit_TARGET_NAME "aarch64-linux")
|
||||
endif (ANDROID_ARCH_NAME STREQUAL "arm64")
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
|
||||
set(CUDAToolkit_TARGET_NAME "x86_64-linux")
|
||||
endif()
|
||||
|
||||
if (EXISTS "${CUDAToolkit_ROOT_DIR}/targets/${CUDAToolkit_TARGET_NAME}")
|
||||
set(CUDAToolkit_TARGET_DIR "${CUDAToolkit_ROOT_DIR}/targets/${CUDAToolkit_TARGET_NAME}")
|
||||
# add known CUDA target root path to the set of directories we search for programs, libraries and headers
|
||||
list(PREPEND CMAKE_FIND_ROOT_PATH "${CUDAToolkit_TARGET_DIR}")
|
||||
|
||||
# Mark that we need to pop the root search path changes after we have
|
||||
# found all cuda libraries so that searches for our cross-compilation
|
||||
# libraries work when another cuda sdk is in CMAKE_PREFIX_PATH or
|
||||
# PATh
|
||||
set(_CUDAToolkit_Pop_ROOT_PATH True)
|
||||
endif()
|
||||
else()
|
||||
# Not cross compiling
|
||||
set(CUDAToolkit_TARGET_DIR "${CUDAToolkit_ROOT_DIR}")
|
||||
# Now that we have the real ROOT_DIR, find components inside it.
|
||||
list(APPEND CMAKE_PREFIX_PATH ${CUDAToolkit_ROOT_DIR})
|
||||
|
||||
# Mark that we need to pop the prefix path changes after we have
|
||||
# found the cudart library.
|
||||
set(_CUDAToolkit_Pop_Prefix True)
|
||||
if(NOT CUDA_CUDART AND NOT CUDAToolkit_FIND_QUIETLY)
|
||||
message(STATUS "Unable to find cudart library.")
|
||||
endif()
|
||||
|
||||
|
||||
# Find the include/ directory
|
||||
find_path(CUDAToolkit_INCLUDE_DIR
|
||||
NAMES cuda_runtime.h
|
||||
)
|
||||
|
||||
# And find the CUDA Runtime Library libcudart
|
||||
# Find the CUDA Runtime Library libcudart
|
||||
find_library(CUDA_CUDART
|
||||
NAMES cudart
|
||||
PATH_SUFFIXES lib64 lib/x64
|
||||
)
|
||||
if (NOT CUDA_CUDART)
|
||||
if(NOT CUDA_CUDART)
|
||||
find_library(CUDA_CUDART
|
||||
NAMES cudart
|
||||
PATH_SUFFIXES lib64/stubs lib/x64/stubs
|
||||
)
|
||||
endif()
|
||||
|
||||
if (NOT CUDA_CUDART AND NOT CUDAToolkit_FIND_QUIETLY)
|
||||
message(STATUS "Unable to find cudart library.")
|
||||
endif()
|
||||
|
||||
unset(CUDAToolkit_ROOT_DIR)
|
||||
if(_CUDAToolkit_Pop_Prefix)
|
||||
list(REMOVE_AT CMAKE_PREFIX_PATH -1)
|
||||
@ -749,8 +539,8 @@ mark_as_advanced(CUDA_CUDART
|
||||
#-----------------------------------------------------------------------------
|
||||
# Construct result variables
|
||||
if(CUDAToolkit_FOUND)
|
||||
set(CUDAToolkit_INCLUDE_DIRS ${CUDAToolkit_INCLUDE_DIR})
|
||||
get_filename_component(CUDAToolkit_LIBRARY_DIR ${CUDA_CUDART} DIRECTORY ABSOLUTE)
|
||||
set(CUDAToolkit_INCLUDE_DIRS ${CUDAToolkit_INCLUDE_DIR})
|
||||
get_filename_component(CUDAToolkit_LIBRARY_DIR ${CUDA_CUDART} DIRECTORY ABSOLUTE)
|
||||
endif()
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
225
Modules/Internal/CUDAToolkit.cmake
Normal file
225
Modules/Internal/CUDAToolkit.cmake
Normal file
@ -0,0 +1,225 @@
|
||||
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
|
||||
# file Copyright.txt or https://cmake.org/licensing for details.
|
||||
|
||||
# This file is for sharing code for finding basic CUDA toolkit information between
|
||||
# CMakeDetermineCUDACompiler.cmake and FindCUDAToolkit.cmake.
|
||||
|
||||
# For NVCC we can easily deduce the SDK binary directory from the compiler path.
|
||||
if(CMAKE_CUDA_COMPILER_LOADED AND NOT CUDAToolkit_BIN_DIR AND CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
|
||||
get_filename_component(cuda_dir "${CMAKE_CUDA_COMPILER}" DIRECTORY)
|
||||
set(CUDAToolkit_BIN_DIR "${cuda_dir}" CACHE PATH "")
|
||||
mark_as_advanced(CUDAToolkit_BIN_DIR)
|
||||
unset(cuda_dir)
|
||||
endif()
|
||||
|
||||
# Try language- or user-provided path first.
|
||||
if(CUDAToolkit_BIN_DIR)
|
||||
find_program(CUDAToolkit_NVCC_EXECUTABLE
|
||||
NAMES nvcc nvcc.exe
|
||||
PATHS ${CUDAToolkit_BIN_DIR}
|
||||
NO_DEFAULT_PATH
|
||||
)
|
||||
endif()
|
||||
|
||||
# Search using CUDAToolkit_ROOT
|
||||
find_program(CUDAToolkit_NVCC_EXECUTABLE
|
||||
NAMES nvcc nvcc.exe
|
||||
PATHS ENV CUDA_PATH
|
||||
PATH_SUFFIXES bin
|
||||
)
|
||||
|
||||
# If the user specified CUDAToolkit_ROOT but nvcc could not be found, this is an error.
|
||||
if(NOT CUDAToolkit_NVCC_EXECUTABLE AND (DEFINED CUDAToolkit_ROOT OR DEFINED ENV{CUDAToolkit_ROOT}))
|
||||
# Declare error messages now, print later depending on find_package args.
|
||||
set(fail_base "Could not find nvcc executable in path specified by")
|
||||
set(cuda_root_fail "${fail_base} CUDAToolkit_ROOT=${CUDAToolkit_ROOT}")
|
||||
set(env_cuda_root_fail "${fail_base} environment variable CUDAToolkit_ROOT=$ENV{CUDAToolkit_ROOT}")
|
||||
|
||||
if(CUDAToolkit_FIND_REQUIRED)
|
||||
if(DEFINED CUDAToolkit_ROOT)
|
||||
message(FATAL_ERROR ${cuda_root_fail})
|
||||
elseif(DEFINED ENV{CUDAToolkit_ROOT})
|
||||
message(FATAL_ERROR ${env_cuda_root_fail})
|
||||
endif()
|
||||
else()
|
||||
if(NOT CUDAToolkit_FIND_QUIETLY)
|
||||
if(DEFINED CUDAToolkit_ROOT)
|
||||
message(STATUS ${cuda_root_fail})
|
||||
elseif(DEFINED ENV{CUDAToolkit_ROOT})
|
||||
message(STATUS ${env_cuda_root_fail})
|
||||
endif()
|
||||
endif()
|
||||
set(CUDAToolkit_FOUND FALSE)
|
||||
unset(fail_base)
|
||||
unset(cuda_root_fail)
|
||||
unset(env_cuda_root_fail)
|
||||
return()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# CUDAToolkit_ROOT cmake / env variable not specified, try platform defaults.
|
||||
#
|
||||
# - Linux: /usr/local/cuda-X.Y
|
||||
# - macOS: /Developer/NVIDIA/CUDA-X.Y
|
||||
# - Windows: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\vX.Y
|
||||
#
|
||||
# We will also search the default symlink location /usr/local/cuda first since
|
||||
# if CUDAToolkit_ROOT is not specified, it is assumed that the symlinked
|
||||
# directory is the desired location.
|
||||
if(NOT CUDAToolkit_NVCC_EXECUTABLE)
|
||||
if(UNIX)
|
||||
if(NOT APPLE)
|
||||
set(platform_base "/usr/local/cuda-")
|
||||
else()
|
||||
set(platform_base "/Developer/NVIDIA/CUDA-")
|
||||
endif()
|
||||
else()
|
||||
set(platform_base "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v")
|
||||
endif()
|
||||
|
||||
# Build out a descending list of possible cuda installations, e.g.
|
||||
file(GLOB possible_paths "${platform_base}*")
|
||||
# Iterate the glob results and create a descending list.
|
||||
set(possible_versions)
|
||||
foreach (p ${possible_paths})
|
||||
# Extract version number from end of string
|
||||
string(REGEX MATCH "[0-9][0-9]?\\.[0-9]$" p_version ${p})
|
||||
if(IS_DIRECTORY ${p} AND p_version)
|
||||
list(APPEND possible_versions ${p_version})
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
# Cannot use list(SORT) because that is alphabetical, we need numerical.
|
||||
# NOTE: this is not an efficient sorting strategy. But even if a user had
|
||||
# every possible version of CUDA installed, this wouldn't create any
|
||||
# significant overhead.
|
||||
set(versions)
|
||||
foreach (v ${possible_versions})
|
||||
list(LENGTH versions num_versions)
|
||||
# First version, nothing to compare with so just append.
|
||||
if(num_versions EQUAL 0)
|
||||
list(APPEND versions ${v})
|
||||
else()
|
||||
# Loop through list. Insert at an index when comparison is
|
||||
# VERSION_GREATER since we want a descending list. Duplicates will not
|
||||
# happen since this came from a glob list of directories.
|
||||
set(i 0)
|
||||
set(early_terminate FALSE)
|
||||
while (i LESS num_versions)
|
||||
list(GET versions ${i} curr)
|
||||
if(v VERSION_GREATER curr)
|
||||
list(INSERT versions ${i} ${v})
|
||||
set(early_terminate TRUE)
|
||||
break()
|
||||
endif()
|
||||
math(EXPR i "${i} + 1")
|
||||
endwhile()
|
||||
# If it did not get inserted, place it at the end.
|
||||
if(NOT early_terminate)
|
||||
list(APPEND versions ${v})
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
# With a descending list of versions, populate possible paths to search.
|
||||
set(search_paths)
|
||||
foreach (v ${versions})
|
||||
list(APPEND search_paths "${platform_base}${v}")
|
||||
endforeach()
|
||||
|
||||
# Force the global default /usr/local/cuda to the front on Unix.
|
||||
if(UNIX)
|
||||
list(INSERT search_paths 0 "/usr/local/cuda")
|
||||
endif()
|
||||
|
||||
# Now search for nvcc again using the platform default search paths.
|
||||
find_program(CUDAToolkit_NVCC_EXECUTABLE
|
||||
NAMES nvcc nvcc.exe
|
||||
PATHS ${search_paths}
|
||||
PATH_SUFFIXES bin
|
||||
)
|
||||
|
||||
# We are done with these variables now, cleanup for caller.
|
||||
unset(platform_base)
|
||||
unset(possible_paths)
|
||||
unset(possible_versions)
|
||||
unset(versions)
|
||||
unset(i)
|
||||
unset(early_terminate)
|
||||
unset(search_paths)
|
||||
|
||||
if(NOT CUDAToolkit_NVCC_EXECUTABLE)
|
||||
if(CUDAToolkit_FIND_REQUIRED)
|
||||
message(FATAL_ERROR "Could not find nvcc, please set CUDAToolkit_ROOT.")
|
||||
elseif(NOT CUDAToolkit_FIND_QUIETLY)
|
||||
message(STATUS "Could not find nvcc, please set CUDAToolkit_ROOT.")
|
||||
endif()
|
||||
|
||||
set(CUDAToolkit_FOUND FALSE)
|
||||
return()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT CUDAToolkit_BIN_DIR AND CUDAToolkit_NVCC_EXECUTABLE)
|
||||
get_filename_component(cuda_dir "${CUDAToolkit_NVCC_EXECUTABLE}" DIRECTORY)
|
||||
set(CUDAToolkit_BIN_DIR "${cuda_dir}" CACHE PATH "" FORCE)
|
||||
mark_as_advanced(CUDAToolkit_BIN_DIR)
|
||||
unset(cuda_dir)
|
||||
endif()
|
||||
|
||||
get_filename_component(CUDAToolkit_ROOT_DIR ${CUDAToolkit_BIN_DIR} DIRECTORY ABSOLUTE)
|
||||
|
||||
# Handle cross compilation
|
||||
if(CMAKE_CROSSCOMPILING)
|
||||
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7-a")
|
||||
# Support for NVPACK
|
||||
set(CUDAToolkit_TARGET_NAME "armv7-linux-androideabi")
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm")
|
||||
# Support for arm cross compilation
|
||||
set(CUDAToolkit_TARGET_NAME "armv7-linux-gnueabihf")
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
|
||||
# Support for aarch64 cross compilation
|
||||
if(ANDROID_ARCH_NAME STREQUAL "arm64")
|
||||
set(CUDAToolkit_TARGET_NAME "aarch64-linux-androideabi")
|
||||
else()
|
||||
set(CUDAToolkit_TARGET_NAME "aarch64-linux")
|
||||
endif(ANDROID_ARCH_NAME STREQUAL "arm64")
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
|
||||
set(CUDAToolkit_TARGET_NAME "x86_64-linux")
|
||||
endif()
|
||||
|
||||
if(EXISTS "${CUDAToolkit_ROOT_DIR}/targets/${CUDAToolkit_TARGET_NAME}")
|
||||
set(CUDAToolkit_TARGET_DIR "${CUDAToolkit_ROOT_DIR}/targets/${CUDAToolkit_TARGET_NAME}")
|
||||
# add known CUDA target root path to the set of directories we search for programs, libraries and headers
|
||||
list(PREPEND CMAKE_FIND_ROOT_PATH "${CUDAToolkit_TARGET_DIR}")
|
||||
|
||||
# Mark that we need to pop the root search path changes after we have
|
||||
# found all cuda libraries so that searches for our cross-compilation
|
||||
# libraries work when another cuda sdk is in CMAKE_PREFIX_PATH or
|
||||
# PATh
|
||||
set(_CUDAToolkit_Pop_ROOT_PATH True)
|
||||
endif()
|
||||
else()
|
||||
# Not cross compiling
|
||||
set(CUDAToolkit_TARGET_DIR "${CUDAToolkit_ROOT_DIR}")
|
||||
# Now that we have the real ROOT_DIR, find components inside it.
|
||||
list(APPEND CMAKE_PREFIX_PATH ${CUDAToolkit_ROOT_DIR})
|
||||
|
||||
# Mark that we need to pop the prefix path changes after we have
|
||||
# found the cudart library.
|
||||
set(_CUDAToolkit_Pop_Prefix True)
|
||||
endif()
|
||||
|
||||
# Find the include/ directory
|
||||
find_path(CUDAToolkit_INCLUDE_DIR
|
||||
NAMES cuda_runtime.h
|
||||
)
|
||||
|
||||
# Find a tentative CUDAToolkit_LIBRARY_DIR. FindCUDAToolkit overrides it by searching for the CUDA runtime,
|
||||
# but we can't do that here, as CMakeDetermineCUDACompiler wants to use it before the variables necessary
|
||||
# for find_library() have been initialized.
|
||||
if(EXISTS "${CUDAToolkit_TARGET_DIR}/lib64")
|
||||
set(CUDAToolkit_LIBRARY_DIR "${CUDAToolkit_TARGET_DIR}/lib64")
|
||||
elseif(EXISTS "${CUDAToolkit_TARGET_DIR}/lib")
|
||||
set(CUDAToolkit_LIBRARY_DIR "${CUDAToolkit_TARGET_DIR}/lib")
|
||||
endif()
|
@ -74,6 +74,10 @@ set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_STATIC "cudadevrt;cudart_static")
|
||||
set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_SHARED "cudadevrt;cudart")
|
||||
set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_NONE "")
|
||||
|
||||
if(UNIX)
|
||||
list(APPEND CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_STATIC "rt" "pthread" "dl")
|
||||
endif()
|
||||
|
||||
string(APPEND CMAKE_CUDA_FLAGS_INIT " ${PLATFORM_DEFINES_CUDA} -D_WINDOWS -Xcompiler=\"${_W3}${_FLAGS_CXX}\"")
|
||||
string(APPEND CMAKE_CUDA_FLAGS_DEBUG_INIT " -Xcompiler=\"${_MDd}-Zi -Ob0 -Od ${_RTC1}\"")
|
||||
string(APPEND CMAKE_CUDA_FLAGS_RELEASE_INIT " -Xcompiler=\"${_MD}-O2 -Ob2\" -DNDEBUG")
|
||||
|
@ -3180,6 +3180,20 @@ void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const
|
||||
|
||||
flags += "]";
|
||||
}
|
||||
} else if (compiler == "Clang") {
|
||||
for (CudaArchitecture& architecture : architectures) {
|
||||
flags += " --cuda-gpu-arch=sm_" + architecture.name;
|
||||
|
||||
if (!architecture.real) {
|
||||
Makefile->IssueMessage(
|
||||
MessageType::WARNING,
|
||||
"Clang doesn't support disabling CUDA real code generation.");
|
||||
}
|
||||
|
||||
if (!architecture.virtual_) {
|
||||
flags += " --no-cuda-include-ptx=sm_" + architecture.name;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,4 @@
|
||||
|
||||
ADD_TEST_MACRO(Cuda.Complex CudaComplex)
|
||||
ADD_TEST_MACRO(Cuda.ConsumeCompileFeatures CudaConsumeCompileFeatures)
|
||||
ADD_TEST_MACRO(Cuda.CXXStandardSetTwice CXXStandardSetTwice)
|
||||
ADD_TEST_MACRO(Cuda.ObjectLibrary CudaObjectLibrary)
|
||||
@ -12,10 +11,16 @@ ADD_TEST_MACRO(Cuda.NotEnabled CudaNotEnabled)
|
||||
ADD_TEST_MACRO(Cuda.SeparableCompCXXOnly SeparableCompCXXOnly)
|
||||
ADD_TEST_MACRO(Cuda.Toolkit Toolkit)
|
||||
ADD_TEST_MACRO(Cuda.IncludePathNoToolkit IncludePathNoToolkit)
|
||||
ADD_TEST_MACRO(Cuda.ProperDeviceLibraries ProperDeviceLibraries)
|
||||
ADD_TEST_MACRO(Cuda.ProperLinkFlags ProperLinkFlags)
|
||||
ADD_TEST_MACRO(Cuda.SharedRuntimePlusToolkit SharedRuntimePlusToolkit)
|
||||
|
||||
# Separable compilation is currently only supported on NVCC. Disable tests
|
||||
# using it for other compilers.
|
||||
if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
|
||||
ADD_TEST_MACRO(Cuda.Complex CudaComplex)
|
||||
ADD_TEST_MACRO(Cuda.ProperDeviceLibraries ProperDeviceLibraries)
|
||||
ADD_TEST_MACRO(Cuda.ProperLinkFlags ProperLinkFlags)
|
||||
endif()
|
||||
|
||||
# The CUDA only ships the shared version of the toolkit libraries
|
||||
# on windows
|
||||
if(NOT WIN32)
|
||||
|
@ -9,11 +9,17 @@ project (ProperLinkFlags CUDA CXX)
|
||||
|
||||
#Specify a set of valid CUDA flags and an invalid set of CXX flags ( for CUDA )
|
||||
#to make sure we don't use the CXX flags when linking CUDA executables
|
||||
string(APPEND CMAKE_CUDA_FLAGS " -arch=sm_35 --use_fast_math")
|
||||
if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
|
||||
string(APPEND CMAKE_CUDA_FLAGS "--use_fast_math")
|
||||
elseif(CMAKE_CUDA_COMPILER_ID STREQUAL "Clang")
|
||||
string(APPEND CMAKE_CUDA_FLAGS "-ffast-math")
|
||||
endif()
|
||||
|
||||
set(CMAKE_CXX_FLAGS "-Wall")
|
||||
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
set(CMAKE_CUDA_STANDARD 11)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 35)
|
||||
add_executable(ProperLinkFlags file1.cu main.cxx)
|
||||
|
||||
set_target_properties( ProperLinkFlags
|
||||
|
@ -1,17 +1,35 @@
|
||||
|
||||
ADD_TEST_MACRO(CudaOnly.Architecture Architecture)
|
||||
ADD_TEST_MACRO(CudaOnly.CircularLinkLine CudaOnlyCircularLinkLine)
|
||||
ADD_TEST_MACRO(CudaOnly.CompileFlags CudaOnlyCompileFlags)
|
||||
ADD_TEST_MACRO(CudaOnly.EnableStandard CudaOnlyEnableStandard)
|
||||
ADD_TEST_MACRO(CudaOnly.ExportPTX CudaOnlyExportPTX)
|
||||
ADD_TEST_MACRO(CudaOnly.GPUDebugFlag CudaOnlyGPUDebugFlag)
|
||||
ADD_TEST_MACRO(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols)
|
||||
ADD_TEST_MACRO(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation)
|
||||
ADD_TEST_MACRO(CudaOnly.SharedRuntimePlusToolkit CudaOnlySharedRuntimePlusToolkit)
|
||||
ADD_TEST_MACRO(CudaOnly.Standard98 CudaOnlyStandard98)
|
||||
ADD_TEST_MACRO(CudaOnly.Toolkit CudaOnlyToolkit)
|
||||
ADD_TEST_MACRO(CudaOnly.WithDefs CudaOnlyWithDefs)
|
||||
|
||||
if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
|
||||
# Separable compilation is currently only supported on NVCC. Disable tests
|
||||
# using it for other compilers.
|
||||
ADD_TEST_MACRO(CudaOnly.CircularLinkLine CudaOnlyCircularLinkLine)
|
||||
ADD_TEST_MACRO(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols)
|
||||
ADD_TEST_MACRO(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation)
|
||||
|
||||
add_test(NAME CudaOnly.DontResolveDeviceSymbols COMMAND
|
||||
${CMAKE_CTEST_COMMAND} -C $<CONFIGURATION>
|
||||
--build-and-test
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/DontResolveDeviceSymbols/"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/DontResolveDeviceSymbols/"
|
||||
${build_generator_args}
|
||||
--build-project DontResolveDeviceSymbols
|
||||
--build-options ${build_options}
|
||||
--test-command ${CMAKE_CTEST_COMMAND} -V -C $<CONFIGURATION>
|
||||
)
|
||||
|
||||
# Only NVCC defines __CUDACC_DEBUG__ when compiling in debug mode.
|
||||
ADD_TEST_MACRO(CudaOnly.GPUDebugFlag CudaOnlyGPUDebugFlag)
|
||||
endif()
|
||||
|
||||
# The CUDA only ships the shared version of the toolkit libraries
|
||||
# on windows
|
||||
if(NOT WIN32)
|
||||
@ -22,17 +40,6 @@ if(MSVC)
|
||||
ADD_TEST_MACRO(CudaOnly.PDB CudaOnlyPDB)
|
||||
endif()
|
||||
|
||||
add_test(NAME CudaOnly.DontResolveDeviceSymbols COMMAND
|
||||
${CMAKE_CTEST_COMMAND} -C $<CONFIGURATION>
|
||||
--build-and-test
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/DontResolveDeviceSymbols/"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/DontResolveDeviceSymbols/"
|
||||
${build_generator_args}
|
||||
--build-project DontResolveDeviceSymbols
|
||||
--build-options ${build_options}
|
||||
--test-command ${CMAKE_CTEST_COMMAND} -V -C $<CONFIGURATION>
|
||||
)
|
||||
|
||||
add_test(NAME CudaOnly.RuntimeControls COMMAND
|
||||
${CMAKE_CTEST_COMMAND} -C $<CONFIGURATION>
|
||||
--build-and-test
|
||||
|
@ -1,16 +1,15 @@
|
||||
cmake_minimum_required(VERSION 3.17)
|
||||
cmake_policy(SET CMP0104 OLD)
|
||||
project(CompileFlags CUDA)
|
||||
|
||||
# Clear defaults.
|
||||
set(CMAKE_CUDA_ARCHITECTURES)
|
||||
|
||||
add_executable(CudaOnlyCompileFlags main.cu)
|
||||
|
||||
# Try passing CUDA architecture flags explicitly.
|
||||
if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
|
||||
target_compile_options(CudaOnlyCompileFlags PRIVATE
|
||||
-gencode arch=compute_50,code=compute_50
|
||||
--compiler-options=-DHOST_DEFINE
|
||||
)
|
||||
else()
|
||||
set_property(TARGET CudaOnlyCompileFlags PROPERTY CUDA_ARCHITECTURES 50-real)
|
||||
endif()
|
||||
|
||||
target_compile_options(CudaOnlyCompileFlags PRIVATE -DALWAYS_DEFINE)
|
||||
|
@ -4,11 +4,8 @@
|
||||
# endif
|
||||
#endif
|
||||
|
||||
// Check HOST_DEFINE only for nvcc
|
||||
#ifndef __CUDA__
|
||||
# ifndef HOST_DEFINE
|
||||
# error "HOST_DEFINE not defined!"
|
||||
# endif
|
||||
#ifndef ALWAYS_DEFINE
|
||||
# error "ALWAYS_DEFINE not defined!"
|
||||
#endif
|
||||
|
||||
int main()
|
||||
|
@ -34,16 +34,15 @@ static std::string ptx_paths = "$<TARGET_OBJECTS:CudaPTX>";
|
||||
# need to also pass the --name option
|
||||
set(output_file ${CMAKE_CURRENT_BINARY_DIR}/embedded_objs.h)
|
||||
|
||||
get_filename_component(cuda_compiler_bin "${CMAKE_CUDA_COMPILER}" DIRECTORY)
|
||||
find_package(CUDAToolkit REQUIRED)
|
||||
find_program(bin_to_c
|
||||
NAMES bin2c
|
||||
PATHS ${cuda_compiler_bin}
|
||||
PATHS ${CUDAToolkit_BIN_DIR}
|
||||
)
|
||||
if(NOT bin_to_c)
|
||||
message(FATAL_ERROR
|
||||
"bin2c not found:\n"
|
||||
" CMAKE_CUDA_COMPILER='${CMAKE_CUDA_COMPILER}'\n"
|
||||
" cuda_compiler_bin='${cuda_compiler_bin}'\n"
|
||||
" CUDAToolkit_BIN_DIR='${CUDAToolkit_BIN_DIR}'\n"
|
||||
)
|
||||
endif()
|
||||
|
||||
|
@ -2,18 +2,19 @@
|
||||
cmake_minimum_required(VERSION 3.7)
|
||||
project (GPUDebugFlag CUDA)
|
||||
|
||||
#Goal for this example:
|
||||
#verify that -G enables gpu debug flags
|
||||
string(APPEND CMAKE_CUDA_FLAGS " -gencode=arch=compute_30,code=compute_30")
|
||||
string(APPEND CMAKE_CUDA_FLAGS " -G")
|
||||
set(CMAKE_CUDA_STANDARD 11)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 30)
|
||||
|
||||
# Goal for this example:
|
||||
# Verify that enabling device debug works.
|
||||
string(APPEND CMAKE_CUDA_FLAGS "-G")
|
||||
|
||||
add_executable(CudaOnlyGPUDebugFlag main.cu)
|
||||
|
||||
#CUDA's __CUDACC_DEBUG__ define was added in NVCC 9.0
|
||||
#so if we are below 9.0.0 we will manually add the define so that the test
|
||||
#passes
|
||||
if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 9.0.0)
|
||||
#CUDA's __CUDACC_DEBUG__ define was added in 9.0
|
||||
#so if we are below 9.0.0 we will manually add the define so that the test
|
||||
#passes
|
||||
target_compile_definitions(CudaOnlyGPUDebugFlag PRIVATE "__CUDACC_DEBUG__")
|
||||
endif()
|
||||
|
||||
|
@ -18,7 +18,7 @@ target_compile_options(CudaOnlyWithDefs
|
||||
PRIVATE
|
||||
-DFLAG_COMPILE_LANG_$<COMPILE_LANGUAGE>
|
||||
-DFLAG_LANG_IS_CUDA=$<COMPILE_LANGUAGE:CUDA>
|
||||
--compiler-options=-DHOST_DEFINE
|
||||
$<$<CUDA_COMPILER_ID:NVIDIA>:--compiler-options=-DHOST_DEFINE> # Host-only defines are possible only on NVCC.
|
||||
)
|
||||
|
||||
target_compile_definitions(CudaOnlyWithDefs
|
||||
|
@ -7,8 +7,10 @@
|
||||
# error "INC_CUDA not defined!"
|
||||
#endif
|
||||
|
||||
#ifndef HOST_DEFINE
|
||||
# error "HOST_DEFINE not defined!"
|
||||
#ifdef __NVCC__
|
||||
# ifndef HOST_DEFINE
|
||||
# error "HOST_DEFINE not defined!"
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef PACKED_DEFINE
|
||||
|
@ -1 +1,2 @@
|
||||
include(CMP0104-Common.cmake)
|
||||
set_property(TARGET cuda PROPERTY CUDA_ARCHITECTURES)
|
||||
|
@ -285,7 +285,8 @@ run_cmake_command(NoUnusedVariables ${CMAKE_COMMAND} ${CMAKE_CURRENT_LIST_DIR}
|
||||
"-DCMAKE_DEFAULT_CONFIGS=all"
|
||||
)
|
||||
|
||||
if(CMake_TEST_CUDA)
|
||||
# CudaSimple uses separable compilation, which is currently only supported on NVCC.
|
||||
if(CMake_TEST_CUDA AND CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
|
||||
set(RunCMake_TEST_BINARY_DIR ${RunCMake_BINARY_DIR}/CudaSimple-build)
|
||||
run_cmake_configure(CudaSimple)
|
||||
include(${RunCMake_TEST_BINARY_DIR}/target_files.cmake)
|
||||
|
@ -50,10 +50,14 @@ if (NOT CMAKE_C_COMPILER_ID STREQUAL "Intel")
|
||||
run_cmake_target(genex_DEVICE_LINK interface LinkOptions_shared_interface --config Release)
|
||||
run_cmake_target(genex_DEVICE_LINK private LinkOptions_private --config Release)
|
||||
if (CMake_TEST_CUDA)
|
||||
run_cmake_target(genex_DEVICE_LINK CMP0105_UNSET LinkOptions_CMP0105_UNSET --config Release)
|
||||
run_cmake_target(genex_DEVICE_LINK CMP0105_OLD LinkOptions_CMP0105_OLD --config Release)
|
||||
run_cmake_target(genex_DEVICE_LINK CMP0105_NEW LinkOptions_CMP0105_NEW --config Release)
|
||||
run_cmake_target(genex_DEVICE_LINK device LinkOptions_device --config Release)
|
||||
# Separable compilation is only supported on NVCC.
|
||||
if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
|
||||
run_cmake_target(genex_DEVICE_LINK CMP0105_UNSET LinkOptions_CMP0105_UNSET --config Release)
|
||||
run_cmake_target(genex_DEVICE_LINK CMP0105_OLD LinkOptions_CMP0105_OLD --config Release)
|
||||
run_cmake_target(genex_DEVICE_LINK CMP0105_NEW LinkOptions_CMP0105_NEW --config Release)
|
||||
run_cmake_target(genex_DEVICE_LINK device LinkOptions_device --config Release)
|
||||
endif()
|
||||
|
||||
run_cmake_target(genex_DEVICE_LINK no_device LinkOptions_no_device --config Release)
|
||||
endif()
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user