[libc] Support add_object_library for the GPU build

This patch unifies the handling of generating the GPU build targets
between the `add_entrypoint_library` and the `add_object_library`
functions. The `_build_gpu_objects` function will create two targets.
One contains a single object file with several GPU binaries embedded in
it, a so-called fatbinary. The other is a direct compile of the
supported target to be used internally only. This patch pulls out some
of the properties logic so that we can handle both more easily. This
patch also required adding an ovverride  `NO_GPU_BUILD` for cases when
we only want to build the source file as normal.

Reviewed By: sivachandra

Differential Revision: https://reviews.llvm.org/D144214
This commit is contained in:
Joseph Huber 2023-02-16 14:46:39 -06:00
parent 287267c23a
commit d51d2b5909
3 changed files with 90 additions and 66 deletions

View File

@ -59,19 +59,23 @@ function(_get_common_compile_options output_var flags)
set(${output_var} ${compile_options} PARENT_SCOPE)
endfunction()
# Builds the entrypoint target for the GPU.
# Builds the object target for the GPU.
# This compiles the target for all supported architectures and embeds it into
# host binary for installing. The internal target contains the GPU code directly
# compiled for a single architecture used internally.
# Usage:
# _build_gpu_entrypoint_objects(
# _build_gpu_objects(
# <target_name>
# <internal_target_name>
# SRCS <list of .cpp files>
# HDRS <list of .h files>
# DEPENDS <list of dependencies>
# COMPILE_OPTIONS <optional list of special compile options for this target>
# FLAGS <optional list of flags>
# )
function(_build_gpu_entrypoint_objects fq_target_name)
function(_build_gpu_objects fq_target_name internal_target_name)
cmake_parse_arguments(
"ADD_GPU_ENTRYPOINT_OBJ"
"ADD_GPU_OBJ"
"" # No optional arguments
"NAME;CXX_STANDARD" # Single value arguments
"SRCS;HDRS;DEPENDS;COMPILE_OPTIONS;FLAGS" # Multi value arguments
@ -82,7 +86,7 @@ function(_build_gpu_entrypoint_objects fq_target_name)
# this so we can support multiple accelerators on the same machine.
foreach(gpu_arch ${all_gpu_architectures})
set(gpu_target_name ${fq_target_name}.${gpu_arch})
set(compile_options ${ADD_GPU_ENTRYPOINT_OBJ_COMPILE_OPTIONS})
set(compile_options ${ADD_GPU_OBJ_COMPILE_OPTIONS})
# Derive the triple from the specified architecture.
if("${gpu_arch}" IN_LIST all_amdgpu_architectures)
set(gpu_target_triple "amdgcn-amd-amdhsa")
@ -101,14 +105,16 @@ function(_build_gpu_entrypoint_objects fq_target_name)
add_library(${gpu_target_name}
EXCLUDE_FROM_ALL
OBJECT
${ADD_GPU_ENTRYPOINT_OBJ_SRCS}
${ADD_GPU_ENTRYPOINT_OBJ_HDRS}
${ADD_GPU_OBJ_SRCS}
${ADD_GPU_OBJ_HDRS}
)
target_compile_options(${gpu_target_name} PRIVATE ${compile_options})
target_include_directories(${gpu_target_name} PRIVATE ${include_dirs})
add_dependencies(${gpu_target_name} ${ADD_GPU_ENTRYPOINT_OBJ_DEPENDS})
target_compile_definitions(${gpu_target_name} PRIVATE LIBC_COPT_PUBLIC_PACKAGING)
if(ADD_GPU_OBJ_DEPENDS)
add_dependencies(${gpu_target_name} ${ADD_GPU_OBJ_DEPENDS})
endif()
# Append this target to a list of images to package into a single binary.
set(input_file $<TARGET_OBJECTS:${gpu_target_name}>)
@ -135,7 +141,7 @@ function(_build_gpu_entrypoint_objects fq_target_name)
# TODO: In the future we will want to combine every architecture for a target
# into a single bitcode file and use that. For now we simply build for
# every single one and let the offloading linker handle it.
get_filename_component(stub_filename ${ADD_GPU_ENTRYPOINT_OBJ_SRCS} NAME)
get_filename_component(stub_filename ${ADD_GPU_OBJ_SRCS} NAME)
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/stubs)
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/stubs/${stub_filename} "// Empty file.\n")
add_library(
@ -151,19 +157,7 @@ function(_build_gpu_entrypoint_objects fq_target_name)
target_include_directories(${fq_target_name} PRIVATE ${include_dirs})
add_dependencies(${fq_target_name} ${full_deps_list} ${packaged_target_name})
set_target_properties(
${fq_target_name}
PROPERTIES
ENTRYPOINT_NAME ${ADD_ENTRYPOINT_OBJ_NAME}
TARGET_TYPE ${ENTRYPOINT_OBJ_TARGET_TYPE}
OBJECT_FILE "$<TARGET_OBJECTS:${fq_target_name}>"
CXX_STANDARD ${ADD_ENTRYPOINT_OBJ_CXX_STANDARD}
DEPS "${fq_deps_list}"
FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS}"
)
# We only build the internal target for a single supported architecture.
set(internal_target_name ${fq_target_name}.__internal__)
set(include_dirs ${LIBC_BUILD_DIR}/include ${LIBC_SOURCE_DIR} ${LIBC_BUILD_DIR})
if(LIBC_GPU_TARGET_ARCHITECTURE_IS_AMDGPU OR
LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX)
@ -171,8 +165,8 @@ function(_build_gpu_entrypoint_objects fq_target_name)
${internal_target_name}
EXCLUDE_FROM_ALL
OBJECT
${ADD_ENTRYPOINT_OBJ_SRCS}
${ADD_ENTRYPOINT_OBJ_HDRS}
${ADD_GPU_OBJ_SRCS}
${ADD_GPU_OBJ_HDRS}
)
target_compile_options(${internal_target_name} BEFORE PRIVATE
${common_compile_options} --target=${LIBC_GPU_TARGET_TRIPLE})
@ -182,17 +176,9 @@ function(_build_gpu_entrypoint_objects fq_target_name)
target_compile_options(${internal_target_name} PRIVATE -march=${LIBC_GPU_TARGET_ARCHITECTURE})
endif()
target_include_directories(${internal_target_name} PRIVATE ${include_dirs})
add_dependencies(${internal_target_name} ${full_deps_list})
set_target_properties(
${internal_target_name}
PROPERTIES
CXX_STANDARD ${ADD_ENTRYPOINT_OBJ_CXX_STANDARD}
FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS}"
)
set_target_properties(
${fq_target_name}
PROPERTIES OBJECT_FILE_RAW "$<TARGET_OBJECTS:${internal_target_name}>"
)
if(full_deps_list)
add_dependencies(${internal_target_name} ${full_deps_list})
endif()
endif()
endfunction()
@ -209,7 +195,7 @@ endfunction()
function(create_object_library fq_target_name)
cmake_parse_arguments(
"ADD_OBJECT"
"" # No optional arguments
"NO_GPU_BUNDLE" # No optional arguments
"CXX_STANDARD" # Single value arguments
"SRCS;HDRS;COMPILE_OPTIONS;DEPENDS;FLAGS" # Multivalue arguments
${ARGN}
@ -219,28 +205,49 @@ function(create_object_library fq_target_name)
message(FATAL_ERROR "'add_object_library' rule requires SRCS to be specified.")
endif()
add_library(
${fq_target_name}
EXCLUDE_FROM_ALL
OBJECT
${ADD_OBJECT_SRCS}
${ADD_OBJECT_HDRS}
)
target_include_directories(
${fq_target_name}
PRIVATE
${LIBC_BUILD_DIR}/include
${LIBC_SOURCE_DIR}
${LIBC_BUILD_DIR}
)
# The GPU build uses a separate internal file.
if(LIBC_TARGET_ARCHITECTURE_IS_GPU AND NOT ${ADD_OBJECT_NO_GPU_BUNDLE})
set(internal_target_name ${fq_target_name}.__internal__)
else()
set(internal_target_name ${fq_target_name})
endif()
get_fq_deps_list(fq_deps_list ${ADD_OBJECT_DEPENDS})
_get_common_compile_options(
compile_options
"${ADD_OBJECT_FLAGS}"
${ADD_OBJECT_COMPILE_OPTIONS}
)
target_compile_options(${fq_target_name} PRIVATE ${compile_options})
get_fq_deps_list(fq_deps_list ${ADD_OBJECT_DEPENDS})
# GPU builds require special handling for the objects because we want to
# export several different targets at once, e.g. for both Nvidia and AMD.
if(LIBC_TARGET_ARCHITECTURE_IS_GPU AND NOT ${ADD_OBJECT_NO_GPU_BUNDLE})
_build_gpu_objects(
${fq_target_name}
${internal_target_name}
SRCS ${ADD_OBJECT_SRCS}
HDRS ${ADD_OBJECT_HDRS}
DEPENDS ${fq_deps_list}
COMPILE_OPTIONS ${common_compile_options}
FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS}"
)
else()
add_library(
${fq_target_name}
EXCLUDE_FROM_ALL
OBJECT
${ADD_OBJECT_SRCS}
${ADD_OBJECT_HDRS}
)
target_include_directories(
${fq_target_name}
PRIVATE
${LIBC_BUILD_DIR}/include
${LIBC_SOURCE_DIR}
${LIBC_BUILD_DIR}
)
target_compile_options(${fq_target_name} PRIVATE ${compile_options})
endif()
if(SHOW_INTERMEDIATE_OBJECTS)
message(STATUS "Adding object library ${fq_target_name}")
@ -262,11 +269,18 @@ function(create_object_library fq_target_name)
${fq_target_name}
PROPERTIES
TARGET_TYPE ${OBJECT_LIBRARY_TARGET_TYPE}
OBJECT_FILES "$<TARGET_OBJECTS:${fq_target_name}>"
CXX_STANDARD ${ADD_OBJECT_CXX_STANDARD}
DEPS "${fq_deps_list}"
FLAGS "${ADD_OBJECT_FLAGS}"
)
if(TARGET ${internal_target_name})
set_target_properties(
${fq_target_name}
PROPERTIES
OBJECT_FILES "$<TARGET_OBJECTS:${internal_target_name}>"
)
endif()
endfunction(create_object_library)
# Internal function, used by `add_object_library`.
@ -483,13 +497,13 @@ function(create_entrypoint_object fq_target_name)
# GPU builds require special handling for the objects because we want to
# export several different targets at once, e.g. for both Nvidia and AMD.
if(LIBC_TARGET_ARCHITECTURE_IS_GPU)
_build_gpu_entrypoint_objects(
_build_gpu_objects(
${fq_target_name}
${internal_target_name}
SRCS ${ADD_ENTRYPOINT_OBJ_SRCS}
HDRS ${ADD_ENTRYPOINT_OBJ_HDRS}
COMPILE_OPTIONS ${common_compile_options}
DEPENDS ${full_deps_list}
CXX_STANDARD ${ADD_ENTRYPOINT_OBJ_CXX_STANDARD}
FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS}"
)
else()
@ -505,12 +519,6 @@ function(create_entrypoint_object fq_target_name)
target_compile_options(${internal_target_name} BEFORE PRIVATE ${common_compile_options})
target_include_directories(${internal_target_name} PRIVATE ${include_dirs})
add_dependencies(${internal_target_name} ${full_deps_list})
set_target_properties(
${internal_target_name}
PROPERTIES
CXX_STANDARD ${ADD_ENTRYPOINT_OBJ_CXX_STANDARD}
FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS}"
)
add_library(
${fq_target_name}
@ -524,22 +532,36 @@ function(create_entrypoint_object fq_target_name)
target_compile_options(${fq_target_name} BEFORE PRIVATE ${common_compile_options} -DLIBC_COPT_PUBLIC_PACKAGING)
target_include_directories(${fq_target_name} PRIVATE ${include_dirs})
add_dependencies(${fq_target_name} ${full_deps_list})
endif()
set_target_properties(
${fq_target_name}
PROPERTIES
ENTRYPOINT_NAME ${ADD_ENTRYPOINT_OBJ_NAME}
TARGET_TYPE ${ENTRYPOINT_OBJ_TARGET_TYPE}
OBJECT_FILE "$<TARGET_OBJECTS:${fq_target_name}>"
CXX_STANDARD ${ADD_ENTRYPOINT_OBJ_CXX_STANDARD}
DEPS "${fq_deps_list}"
FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS}"
)
if(TARGET ${internal_target_name})
set_target_properties(
${internal_target_name}
PROPERTIES
CXX_STANDARD ${ADD_ENTRYPOINT_OBJ_CXX_STANDARD}
FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS}"
)
set_target_properties(
${fq_target_name}
PROPERTIES
ENTRYPOINT_NAME ${ADD_ENTRYPOINT_OBJ_NAME}
TARGET_TYPE ${ENTRYPOINT_OBJ_TARGET_TYPE}
OBJECT_FILE "$<TARGET_OBJECTS:${fq_target_name}>"
# TODO: We don't need to list internal object files if the internal
# target is a normal static library.
OBJECT_FILE_RAW "$<TARGET_OBJECTS:${internal_target_name}>"
CXX_STANDARD ${ADD_ENTRYPOINT_OBJ_CXX_STANDARD}
DEPS "${fq_deps_list}"
FLAGS "${ADD_ENTRYPOINT_OBJ_FLAGS}"
)
endif()
if(LLVM_LIBC_ENABLE_LINTING AND TARGET ${internal_target_name})
if(NOT LLVM_LIBC_CLANG_TIDY)
message(FATAL_ERROR "Something is wrong! LLVM_LIBC_ENABLE_LINTING is "

View File

@ -10,6 +10,7 @@ add_startup_object(
-mcpu=${LIBC_GPU_TARGET_ARCHITECTURE}
-emit-llvm # AMDGPU's intermediate object file format is bitcode.
--target=${LIBC_GPU_TARGET_TRIPLE}
NO_GPU_BUNDLE # Compile this file directly without special GPU handling.
)
get_fq_target_name(crt1 fq_name)

View File

@ -10,6 +10,7 @@ add_startup_object(
-x cuda # Use the CUDA toolchain to emit the `_start` kernel.
--offload-device-only
--offload-arch=${LIBC_GPU_TARGET_ARCHITECTURE}
NO_GPU_BUNDLE # Compile this file directly without special GPU handling.
)
get_fq_target_name(crt1 fq_name)