[libc] Begin implementing a 'libmgpu.a' for math on the GPU

This patch adds an outline to begin adding a `libmgpu.a` file for
provindg math on the GPU. Currently, this is most likely going to be
wrapping around existing vendor libraries and placing them in a more
usable format. Long term, we would like to provide our own
implementations of math functions that can be used instead.

This patch works by simply forwarding the calls to the standard C math
library calls like `sin` to the appropriate vendor call like `__nv_sin`.
Currently, we will use the vendor libraries directly and link them in
via `-mlink-builtin-bitcode`. This is necessary because of bizarre
interactions with the generic bitcode, `-mlink-builtin-bitcode`
internalizes and only links in the used symbols, furthermore is
propagates the target's default attributes and its the only "truly"
correct way to pull in these vendor bitcode libraries without error.

If the vendor libraries are not availible at build time, we will still
create the `libmgpu.a`, but we will expect that the vendor library
definitions will be provided by the user's compilation as is made
possible by https://reviews.llvm.org/D152442.

Reviewed By: sivachandra

Differential Revision: https://reviews.llvm.org/D152486
This commit is contained in:
Joseph Huber 2023-06-08 17:51:43 -05:00
parent 7d21f5714e
commit 8060d96aed
15 changed files with 393 additions and 2 deletions

View File

@ -81,7 +81,15 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.stdio.stderr
)
set(TARGET_LLVMLIBC_ENTRYPOINTS
${TARGET_LIBC_ENTRYPOINTS}
set(TARGET_LIBM_ENTRYPOINTS
# math.h entrypoints
libc.src.math.sin
libc.src.math.round
libc.src.math.roundf
libc.src.math.roundl
)
set(TARGET_LLVMLIBC_ENTRYPOINTS
${TARGET_LIBC_ENTRYPOINTS}
${TARGET_LIBM_ENTRYPOINTS}
)

View File

@ -1,6 +1,7 @@
set(TARGET_PUBLIC_HEADERS
libc.include.ctype
libc.include.string
libc.include.math
libc.include.fenv
libc.include.errno
libc.include.stdlib

View File

@ -18,6 +18,19 @@ function(add_math_entrypoint_object name)
return()
endif()
# The GPU optionally depends on vendor libraries. If we emitted one of these
# entrypoints it means the user requested it and we should use it instead.
get_fq_target_name("${LIBC_TARGET_ARCHITECTURE}.vendor.${name}" fq_vendor_specific_target_name)
if(TARGET ${fq_vendor_specific_target_name})
add_entrypoint_object(
${name}
ALIAS
DEPENDS
.${LIBC_TARGET_ARCHITECTURE}.vendor.${name}
)
return()
endif()
get_fq_target_name("generic.${name}" fq_generic_target_name)
if(TARGET ${fq_generic_target_name})
add_entrypoint_object(

View File

@ -0,0 +1,34 @@
# Math functions not yet available in the libc project, or those not yet tuned
# for GPU workloads are provided as wrappers over vendor libraries. If we find
# them ahead of time we will import them statically. Otherwise, we will keep
# them as external references and expect them to be resolved by the user when
# they compile. In the future,we will use implementations from the 'libc'
# project and not provide these wrappers.
add_subdirectory(vendor)
# For the GPU we want to be able to optionally depend on the vendor libraries
# until we have a suitable replacement inside `libc`.
# TODO: We should have an option to enable or disable these on a per-function
# basis.
option(LIBC_GPU_VENDOR_MATH "Use vendor wrappers for GPU math" ON)
function(add_math_entrypoint_gpu_object name)
get_fq_target_name("vendor.${name}" fq_vendor_specific_target_name)
if(TARGET ${fq_vendor_specific_target_name} AND ${LIBC_GPU_VENDOR_MATH})
return()
endif()
add_entrypoint_object(
${name}
${ARGN}
)
endfunction()
add_math_entrypoint_gpu_object(
round
SRCS
round.cpp
HDRS
../round.h
COMPILE_OPTIONS
-O2
)

View File

@ -0,0 +1,16 @@
//===-- Implementation of the GPU round function --------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "src/math/round.h"
#include "src/__support/common.h"
namespace __llvm_libc {
LLVM_LIBC_FUNCTION(double, round, (double x)) { return __builtin_round(x); }
} // namespace __llvm_libc

View File

@ -0,0 +1,16 @@
//===-- Implementation of the GPU roundf function -------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "src/math/roundf.h"
#include "src/__support/common.h"
namespace __llvm_libc {
LLVM_LIBC_FUNCTION(float, roundf, (float x)) { return __builtin_roundf(x); }
} // namespace __llvm_libc

View File

@ -0,0 +1,23 @@
//===-- Implementation of the GPU roundl function -------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "src/math/roundl.h"
#include "src/__support/FPUtil/PlatformDefs.h"
#include "src/__support/common.h"
namespace __llvm_libc {
#ifndef LONG_DOUBLE_IS_DOUBLE
#error "GPU targets do not support long doubles"
#endif
LLVM_LIBC_FUNCTION(long double, roundl, (long double x)) {
return __builtin_round(x);
}
} // namespace __llvm_libc

41
libc/src/math/gpu/vendor/CMakeLists.txt vendored Normal file
View File

@ -0,0 +1,41 @@
find_package(AMDDeviceLibs QUIET HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
if(AMDDeviceLibs_FOUND)
message(STATUS "Found the ROCm device library. Implementations falling back "
"to the vendor libraries will be resolved statically.")
get_target_property(ocml_path ocml IMPORTED_LOCATION)
list(APPEND bitcode_link_flags
"SHELL:-Xclang -mlink-builtin-bitcode -Xclang ${ocml_path}")
else()
message(STATUS "Could not find the ROCm device library. Unimplemented "
"functions will be an external reference to the vendor libraries.")
endif()
find_package(CUDAToolkit QUIET)
if(CUDAToolkit_FOUND)
set(libdevice_path ${CUDAToolkit_BIN_DIR}/../nvvm/libdevice/libdevice.10.bc)
if (EXISTS ${libdevice_path})
message(STATUS "Found the CUDA device library. Implementations falling back "
"to the vendor libraries will be resolved statically.")
list(APPEND bitcode_link_flags
"SHELL:-Xclang -mlink-builtin-bitcode -Xclang ${libdevice_path}")
endif()
else()
message(STATUS "Could not find the ROCm device library. Unimplemented "
"functions will be an external reference to the vendor libraries.")
endif()
# FIXME: We need a way to pass the library to only the NVTPX / AMDGPU build.
# This shouldn't cause issues because we only link in needed symbols, but it
# will link in identity metadata from both libraries. This silences the warning.
list(APPEND bitcode_link_flags "-Wno-linker-warnings")
add_entrypoint_object(
sin
SRCS
sin.cpp
HDRS
../../sin.h
COMPILE_OPTIONS
${bitcode_link_flags}
-O2
)

View File

@ -0,0 +1,25 @@
//===-- AMDGPU specific definitions for math support ----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_SRC_MATH_GPU_AMDGPU_H
#define LLVM_LIBC_SRC_MATH_GPU_AMDGPU_H
#include "declarations.h"
#include "platform.h"
#include "src/__support/macros/attributes.h"
namespace __llvm_libc {
namespace internal {
LIBC_INLINE double sin(double x) { return __ocml_sin_f64(x); }
} // namespace internal
} // namespace __llvm_libc
#endif // LLVM_LIBC_SRC_MATH_GPU_AMDGPU_H

View File

@ -0,0 +1,20 @@
//===-- AMDGPU specific declarations for math support ---------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_SRC_MATH_GPU_AMDGPU_DECLARATIONS_H
#define LLVM_LIBC_SRC_MATH_GPU_AMDGPU_DECLARATIONS_H
namespace __llvm_libc {
extern "C" {
double __ocml_sin_f64(double);
}
} // namespace __llvm_libc
#endif // LLVM_LIBC_SRC_MATH_GPU_AMDGPU_DECLARATIONS_H

View File

@ -0,0 +1,110 @@
//===-- AMDGPU specific platform definitions for math support -------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_SRC_MATH_GPU_AMDGPU_PLATFORM_H
#define LLVM_LIBC_SRC_MATH_GPU_AMDGPU_PLATFORM_H
#include <stdint.h>
namespace __llvm_libc {
// The ROCm device library uses control globals to alter codegen for the
// different targets. To avoid needing to link them in manually we simply
// define them here.
extern "C" {
// Disable unsafe math optimizations in the implementation.
extern const uint8_t __oclc_unsafe_math_opt = 0;
// Disable denormalization at zero optimizations in the implementation.
extern const uint8_t __oclc_daz_opt = 0;
// Disable rounding optimizations for 32-bit square roots.
extern const uint8_t __oclc_correctly_rounded_sqrt32 = 0;
// Disable finite math optimizations.
extern const uint8_t __oclc_finite_only_opt = 0;
#if defined(__gfx700__)
extern const uint32_t __oclc_ISA_version = 7000;
#elif defined(__gfx701__)
extern const uint32_t __oclc_ISA_version = 7001;
#elif defined(__gfx702__)
extern const uint32_t __oclc_ISA_version = 7002;
#elif defined(__gfx703__)
extern const uint32_t __oclc_ISA_version = 7003;
#elif defined(__gfx704__)
extern const uint32_t __oclc_ISA_version = 7004;
#elif defined(__gfx705__)
extern const uint32_t __oclc_ISA_version = 7005;
#elif defined(__gfx801__)
extern const uint32_t __oclc_ISA_version = 8001;
#elif defined(__gfx802__)
extern const uint32_t __oclc_ISA_version = 8002;
#elif defined(__gfx803__)
extern const uint32_t __oclc_ISA_version = 8003;
#elif defined(__gfx805__)
extern const uint32_t __oclc_ISA_version = 8005;
#elif defined(__gfx810__)
extern const uint32_t __oclc_ISA_version = 8100;
#elif defined(__gfx900__)
extern const uint32_t __oclc_ISA_version = 9000;
#elif defined(__gfx902__)
extern const uint32_t __oclc_ISA_version = 9002;
#elif defined(__gfx904__)
extern const uint32_t __oclc_ISA_version = 9004;
#elif defined(__gfx906__)
extern const uint32_t __oclc_ISA_version = 9006;
#elif defined(__gfx908__)
extern const uint32_t __oclc_ISA_version = 9008;
#elif defined(__gfx909__)
extern const uint32_t __oclc_ISA_version = 9009;
#elif defined(__gfx90a__)
extern const uint32_t __oclc_ISA_version = 9010;
#elif defined(__gfx90c__)
extern const uint32_t __oclc_ISA_version = 9012;
#elif defined(__gfx940__)
extern const uint32_t __oclc_ISA_version = 9400;
#elif defined(__gfx1010__)
extern const uint32_t __oclc_ISA_version = 10100;
#elif defined(__gfx1011__)
extern const uint32_t __oclc_ISA_version = 10101;
#elif defined(__gfx1012__)
extern const uint32_t __oclc_ISA_version = 10102;
#elif defined(__gfx1013__)
extern const uint32_t __oclc_ISA_version = 10103;
#elif defined(__gfx1030__)
extern const uint32_t __oclc_ISA_version = 10300;
#elif defined(__gfx1031__)
extern const uint32_t __oclc_ISA_version = 10301;
#elif defined(__gfx1032__)
extern const uint32_t __oclc_ISA_version = 10302;
#elif defined(__gfx1033__)
extern const uint32_t __oclc_ISA_version = 10303;
#elif defined(__gfx1034__)
extern const uint32_t __oclc_ISA_version = 10304;
#elif defined(__gfx1035__)
extern const uint32_t __oclc_ISA_version = 10305;
#elif defined(__gfx1036__)
extern const uint32_t __oclc_ISA_version = 10306;
#elif defined(__gfx1100__)
extern const uint32_t __oclc_ISA_version = 11000;
#elif defined(__gfx1101__)
extern const uint32_t __oclc_ISA_version = 11001;
#elif defined(__gfx1102__)
extern const uint32_t __oclc_ISA_version = 11002;
#elif defined(__gfx1103__)
extern const uint32_t __oclc_ISA_version = 11003;
#else
#error "Unknown AMDGPU architecture"
#endif
}
} // namespace __llvm_libc
#endif // LLVM_LIBC_SRC_MATH_GPU_AMDGPU_PLATFORM_H

22
libc/src/math/gpu/vendor/common.h vendored Normal file
View File

@ -0,0 +1,22 @@
//===-- Common interface for compiling the GPU math -----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_SRC_MATH_GPU_COMMON_H
#define LLVM_LIBC_SRC_MATH_GPU_COMMON_H
#include "src/__support/macros/properties/architectures.h"
#if defined(LIBC_TARGET_ARCH_IS_AMDGPU)
#include "amdgpu/amdgpu.h"
#elif defined(LIBC_TARGET_ARCH_IS_NVPTX)
#include "nvptx/nvptx.h"
#else
#error "Unsupported platform"
#endif
#endif // LLVM_LIBC_SRC_MATH_GPU_COMMON_H

View File

@ -0,0 +1,20 @@
//===-- NVPTX specific declarations for math support ----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_SRC_MATH_GPU_NVPTX_DECLARATIONS_H
#define LLVM_LIBC_SRC_MATH_GPU_NVPTX_DECLARATIONS_H
namespace __llvm_libc {
extern "C" {
double __nv_sin(double);
}
} // namespace __llvm_libc
#endif // LLVM_LIBC_SRC_MATH_GPU_NVPTX_DECLARATIONS_H

24
libc/src/math/gpu/vendor/nvptx/nvptx.h vendored Normal file
View File

@ -0,0 +1,24 @@
//===-- NVPTX specific definitions for math support -----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_SRC_MATH_GPU_NVPTX_H
#define LLVM_LIBC_SRC_MATH_GPU_NVPTX_H
#include "declarations.h"
#include "src/__support/macros/attributes.h"
namespace __llvm_libc {
namespace internal {
LIBC_INLINE double sin(double x) { return __nv_sin(x); }
} // namespace internal
} // namespace __llvm_libc
#endif // LLVM_LIBC_SRC_MATH_GPU_NVPTX_H

18
libc/src/math/gpu/vendor/sin.cpp vendored Normal file
View File

@ -0,0 +1,18 @@
//===-- Implementation of the sin function for GPU ------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "src/math/sin.h"
#include "src/__support/common.h"
#include "common.h"
namespace __llvm_libc {
LLVM_LIBC_FUNCTION(double, sin, (double x)) { return internal::sin(x); }
} // namespace __llvm_libc