[libc] Begin implementing a 'libmgpu.a' for math on the GPU

This patch adds an outline to begin adding a `libmgpu.a` file for provindg math on the GPU. Currently, this is most likely going to be wrapping around existing vendor libraries and placing them in a more usable format. Long term, we would like to provide our own implementations of math functions that can be used instead. This patch works by simply forwarding the calls to the standard C math library calls like `sin` to the appropriate vendor call like `__nv_sin`. Currently, we will use the vendor libraries directly and link them in via `-mlink-builtin-bitcode`. This is necessary because of bizarre interactions with the generic bitcode, `-mlink-builtin-bitcode` internalizes and only links in the used symbols, furthermore is propagates the target's default attributes and its the only "truly" correct way to pull in these vendor bitcode libraries without error. If the vendor libraries are not availible at build time, we will still create the `libmgpu.a`, but we will expect that the vendor library definitions will be provided by the user's compilation as is made possible by https://reviews.llvm.org/D152442. Reviewed By: sivachandra Differential Revision: https://reviews.llvm.org/D152486
2025-03-01 14:58:18 +00:00 · 2023-06-08 17:51:43 -05:00 · 2023-06-08 17:51:43 -05:00 · 8060d96aed
commit 8060d96aed
parent 7d21f5714e
15 changed files with 393 additions and 2 deletions
--- a/libc/config/gpu/entrypoints.txt
+++ b/libc/config/gpu/entrypoints.txt
@ -81,7 +81,15 @@ set(TARGET_LIBC_ENTRYPOINTS
    libc.src.stdio.stderr
 )

-set(TARGET_LLVMLIBC_ENTRYPOINTS
-  ${TARGET_LIBC_ENTRYPOINTS}
+set(TARGET_LIBM_ENTRYPOINTS
+    # math.h entrypoints
+    libc.src.math.sin
+    libc.src.math.round
+    libc.src.math.roundf
+    libc.src.math.roundl
 )

+set(TARGET_LLVMLIBC_ENTRYPOINTS
+  ${TARGET_LIBC_ENTRYPOINTS}
+  ${TARGET_LIBM_ENTRYPOINTS}
+)
--- a/libc/config/gpu/headers.txt
+++ b/libc/config/gpu/headers.txt
@ -1,6 +1,7 @@
 set(TARGET_PUBLIC_HEADERS
    libc.include.ctype
    libc.include.string
+    libc.include.math
    libc.include.fenv
    libc.include.errno
    libc.include.stdlib
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt
@ -18,6 +18,19 @@ function(add_math_entrypoint_object name)
    return()
  endif()

+  # The GPU optionally depends on vendor libraries. If we emitted one of these
+  # entrypoints it means the user requested it and we should use it instead.
+  get_fq_target_name("${LIBC_TARGET_ARCHITECTURE}.vendor.${name}" fq_vendor_specific_target_name)
+  if(TARGET ${fq_vendor_specific_target_name})
+    add_entrypoint_object(
+      ${name}
+      ALIAS
+      DEPENDS
+        .${LIBC_TARGET_ARCHITECTURE}.vendor.${name}
+    )
+    return()
+  endif()
+
  get_fq_target_name("generic.${name}" fq_generic_target_name)
  if(TARGET ${fq_generic_target_name})
    add_entrypoint_object(
--- a/libc/src/math/gpu/CMakeLists.txt
+++ b/libc/src/math/gpu/CMakeLists.txt
@ -0,0 +1,34 @@
+# Math functions not yet available in the libc project, or those not yet tuned
+# for GPU workloads are provided as wrappers over vendor libraries. If we find
+# them ahead of time we will import them statically. Otherwise, we will keep
+# them as external references and expect them to be resolved by the user when
+# they compile. In the future,we will use implementations from the 'libc'
+# project and not provide these wrappers.
+add_subdirectory(vendor)
+
+# For the GPU we want to be able to optionally depend on the vendor libraries
+# until we have a suitable replacement inside `libc`.
+# TODO: We should have an option to enable or disable these on a per-function
+# basis.
+option(LIBC_GPU_VENDOR_MATH "Use vendor wrappers for GPU math" ON)
+function(add_math_entrypoint_gpu_object name)
+  get_fq_target_name("vendor.${name}" fq_vendor_specific_target_name)
+  if(TARGET ${fq_vendor_specific_target_name} AND ${LIBC_GPU_VENDOR_MATH})
+    return()
+  endif()
+
+  add_entrypoint_object(
+    ${name}
+    ${ARGN}
+  )
+endfunction()
+
+add_math_entrypoint_gpu_object(
+  round
+  SRCS
+    round.cpp
+  HDRS
+    ../round.h
+  COMPILE_OPTIONS
+    -O2
+)
--- a/libc/src/math/gpu/round.cpp
+++ b/libc/src/math/gpu/round.cpp
@ -0,0 +1,16 @@
+//===-- Implementation of the GPU round function --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/round.h"
+#include "src/__support/common.h"
+
+namespace __llvm_libc {
+
+LLVM_LIBC_FUNCTION(double, round, (double x)) { return __builtin_round(x); }
+
+} // namespace __llvm_libc
--- a/libc/src/math/gpu/roundf.cpp
+++ b/libc/src/math/gpu/roundf.cpp
@ -0,0 +1,16 @@
+//===-- Implementation of the GPU roundf function -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/roundf.h"
+#include "src/__support/common.h"
+
+namespace __llvm_libc {
+
+LLVM_LIBC_FUNCTION(float, roundf, (float x)) { return __builtin_roundf(x); }
+
+} // namespace __llvm_libc
--- a/libc/src/math/gpu/roundl.cpp
+++ b/libc/src/math/gpu/roundl.cpp
@ -0,0 +1,23 @@
+//===-- Implementation of the GPU roundl function -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/roundl.h"
+#include "src/__support/FPUtil/PlatformDefs.h"
+#include "src/__support/common.h"
+
+namespace __llvm_libc {
+
+#ifndef LONG_DOUBLE_IS_DOUBLE
+#error "GPU targets do not support long doubles"
+#endif
+
+LLVM_LIBC_FUNCTION(long double, roundl, (long double x)) {
+  return __builtin_round(x);
+}
+
+} // namespace __llvm_libc
--- a/libc/src/math/gpu/vendor/CMakeLists.txt
+++ b/libc/src/math/gpu/vendor/CMakeLists.txt
@ -0,0 +1,41 @@
+find_package(AMDDeviceLibs QUIET HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
+if(AMDDeviceLibs_FOUND)
+  message(STATUS "Found the ROCm device library. Implementations falling back "
+                 "to the vendor libraries will be resolved statically.")
+  get_target_property(ocml_path ocml IMPORTED_LOCATION)
+  list(APPEND bitcode_link_flags
+       "SHELL:-Xclang -mlink-builtin-bitcode -Xclang ${ocml_path}")
+else()
+  message(STATUS "Could not find the ROCm device library. Unimplemented "
+                 "functions will be an external reference to the vendor libraries.")
+endif()
+
+find_package(CUDAToolkit QUIET)
+if(CUDAToolkit_FOUND)
+  set(libdevice_path ${CUDAToolkit_BIN_DIR}/../nvvm/libdevice/libdevice.10.bc)
+  if (EXISTS ${libdevice_path})
+    message(STATUS "Found the CUDA device library. Implementations falling back "
+                   "to the vendor libraries will be resolved statically.")
+    list(APPEND bitcode_link_flags
+         "SHELL:-Xclang -mlink-builtin-bitcode -Xclang ${libdevice_path}")
+  endif()
+else()
+  message(STATUS "Could not find the ROCm device library. Unimplemented "
+                 "functions will be an external reference to the vendor libraries.")
+endif()
+
+# FIXME: We need a way to pass the library to only the NVTPX / AMDGPU build.
+# This shouldn't cause issues because we only link in needed symbols, but it
+# will link in identity metadata from both libraries. This silences the warning.
+list(APPEND bitcode_link_flags "-Wno-linker-warnings")
+
+add_entrypoint_object(
+  sin
+  SRCS
+    sin.cpp
+  HDRS
+    ../../sin.h
+  COMPILE_OPTIONS
+    ${bitcode_link_flags}
+    -O2
+)
--- a/libc/src/math/gpu/vendor/amdgpu/amdgpu.h
+++ b/libc/src/math/gpu/vendor/amdgpu/amdgpu.h
@ -0,0 +1,25 @@
+//===-- AMDGPU specific definitions for math support ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_GPU_AMDGPU_H
+#define LLVM_LIBC_SRC_MATH_GPU_AMDGPU_H
+
+#include "declarations.h"
+#include "platform.h"
+
+#include "src/__support/macros/attributes.h"
+
+namespace __llvm_libc {
+namespace internal {
+
+LIBC_INLINE double sin(double x) { return __ocml_sin_f64(x); }
+
+} // namespace internal
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_MATH_GPU_AMDGPU_H
--- a/libc/src/math/gpu/vendor/amdgpu/declarations.h
+++ b/libc/src/math/gpu/vendor/amdgpu/declarations.h
@ -0,0 +1,20 @@
+//===-- AMDGPU specific declarations for math support ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_GPU_AMDGPU_DECLARATIONS_H
+#define LLVM_LIBC_SRC_MATH_GPU_AMDGPU_DECLARATIONS_H
+
+namespace __llvm_libc {
+
+extern "C" {
+double __ocml_sin_f64(double);
+}
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_MATH_GPU_AMDGPU_DECLARATIONS_H
--- a/libc/src/math/gpu/vendor/amdgpu/platform.h
+++ b/libc/src/math/gpu/vendor/amdgpu/platform.h
@ -0,0 +1,110 @@
+//===-- AMDGPU specific platform definitions for math support -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_GPU_AMDGPU_PLATFORM_H
+#define LLVM_LIBC_SRC_MATH_GPU_AMDGPU_PLATFORM_H
+
+#include <stdint.h>
+
+namespace __llvm_libc {
+
+// The ROCm device library uses control globals to alter codegen for the
+// different targets. To avoid needing to link them in manually we simply
+// define them here.
+extern "C" {
+
+// Disable unsafe math optimizations in the implementation.
+extern const uint8_t __oclc_unsafe_math_opt = 0;
+
+// Disable denormalization at zero optimizations in the implementation.
+extern const uint8_t __oclc_daz_opt = 0;
+
+// Disable rounding optimizations for 32-bit square roots.
+extern const uint8_t __oclc_correctly_rounded_sqrt32 = 0;
+
+// Disable finite math optimizations.
+extern const uint8_t __oclc_finite_only_opt = 0;
+
+#if defined(__gfx700__)
+extern const uint32_t __oclc_ISA_version = 7000;
+#elif defined(__gfx701__)
+extern const uint32_t __oclc_ISA_version = 7001;
+#elif defined(__gfx702__)
+extern const uint32_t __oclc_ISA_version = 7002;
+#elif defined(__gfx703__)
+extern const uint32_t __oclc_ISA_version = 7003;
+#elif defined(__gfx704__)
+extern const uint32_t __oclc_ISA_version = 7004;
+#elif defined(__gfx705__)
+extern const uint32_t __oclc_ISA_version = 7005;
+#elif defined(__gfx801__)
+extern const uint32_t __oclc_ISA_version = 8001;
+#elif defined(__gfx802__)
+extern const uint32_t __oclc_ISA_version = 8002;
+#elif defined(__gfx803__)
+extern const uint32_t __oclc_ISA_version = 8003;
+#elif defined(__gfx805__)
+extern const uint32_t __oclc_ISA_version = 8005;
+#elif defined(__gfx810__)
+extern const uint32_t __oclc_ISA_version = 8100;
+#elif defined(__gfx900__)
+extern const uint32_t __oclc_ISA_version = 9000;
+#elif defined(__gfx902__)
+extern const uint32_t __oclc_ISA_version = 9002;
+#elif defined(__gfx904__)
+extern const uint32_t __oclc_ISA_version = 9004;
+#elif defined(__gfx906__)
+extern const uint32_t __oclc_ISA_version = 9006;
+#elif defined(__gfx908__)
+extern const uint32_t __oclc_ISA_version = 9008;
+#elif defined(__gfx909__)
+extern const uint32_t __oclc_ISA_version = 9009;
+#elif defined(__gfx90a__)
+extern const uint32_t __oclc_ISA_version = 9010;
+#elif defined(__gfx90c__)
+extern const uint32_t __oclc_ISA_version = 9012;
+#elif defined(__gfx940__)
+extern const uint32_t __oclc_ISA_version = 9400;
+#elif defined(__gfx1010__)
+extern const uint32_t __oclc_ISA_version = 10100;
+#elif defined(__gfx1011__)
+extern const uint32_t __oclc_ISA_version = 10101;
+#elif defined(__gfx1012__)
+extern const uint32_t __oclc_ISA_version = 10102;
+#elif defined(__gfx1013__)
+extern const uint32_t __oclc_ISA_version = 10103;
+#elif defined(__gfx1030__)
+extern const uint32_t __oclc_ISA_version = 10300;
+#elif defined(__gfx1031__)
+extern const uint32_t __oclc_ISA_version = 10301;
+#elif defined(__gfx1032__)
+extern const uint32_t __oclc_ISA_version = 10302;
+#elif defined(__gfx1033__)
+extern const uint32_t __oclc_ISA_version = 10303;
+#elif defined(__gfx1034__)
+extern const uint32_t __oclc_ISA_version = 10304;
+#elif defined(__gfx1035__)
+extern const uint32_t __oclc_ISA_version = 10305;
+#elif defined(__gfx1036__)
+extern const uint32_t __oclc_ISA_version = 10306;
+#elif defined(__gfx1100__)
+extern const uint32_t __oclc_ISA_version = 11000;
+#elif defined(__gfx1101__)
+extern const uint32_t __oclc_ISA_version = 11001;
+#elif defined(__gfx1102__)
+extern const uint32_t __oclc_ISA_version = 11002;
+#elif defined(__gfx1103__)
+extern const uint32_t __oclc_ISA_version = 11003;
+#else
+#error "Unknown AMDGPU architecture"
+#endif
+}
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_MATH_GPU_AMDGPU_PLATFORM_H
--- a/libc/src/math/gpu/vendor/common.h
+++ b/libc/src/math/gpu/vendor/common.h
@ -0,0 +1,22 @@
+//===-- Common interface for compiling the GPU math -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_GPU_COMMON_H
+#define LLVM_LIBC_SRC_MATH_GPU_COMMON_H
+
+#include "src/__support/macros/properties/architectures.h"
+
+#if defined(LIBC_TARGET_ARCH_IS_AMDGPU)
+#include "amdgpu/amdgpu.h"
+#elif defined(LIBC_TARGET_ARCH_IS_NVPTX)
+#include "nvptx/nvptx.h"
+#else
+#error "Unsupported platform"
+#endif
+
+#endif // LLVM_LIBC_SRC_MATH_GPU_COMMON_H
--- a/libc/src/math/gpu/vendor/nvptx/declarations.h
+++ b/libc/src/math/gpu/vendor/nvptx/declarations.h
@ -0,0 +1,20 @@
+//===-- NVPTX specific declarations for math support ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_GPU_NVPTX_DECLARATIONS_H
+#define LLVM_LIBC_SRC_MATH_GPU_NVPTX_DECLARATIONS_H
+
+namespace __llvm_libc {
+
+extern "C" {
+double __nv_sin(double);
+}
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_MATH_GPU_NVPTX_DECLARATIONS_H
--- a/libc/src/math/gpu/vendor/nvptx/nvptx.h
+++ b/libc/src/math/gpu/vendor/nvptx/nvptx.h
@ -0,0 +1,24 @@
+//===-- NVPTX specific definitions for math support -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_GPU_NVPTX_H
+#define LLVM_LIBC_SRC_MATH_GPU_NVPTX_H
+
+#include "declarations.h"
+
+#include "src/__support/macros/attributes.h"
+
+namespace __llvm_libc {
+namespace internal {
+
+LIBC_INLINE double sin(double x) { return __nv_sin(x); }
+
+} // namespace internal
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_MATH_GPU_NVPTX_H
--- a/libc/src/math/gpu/vendor/sin.cpp
+++ b/libc/src/math/gpu/vendor/sin.cpp
@ -0,0 +1,18 @@
+//===-- Implementation of the sin function for GPU ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/sin.h"
+#include "src/__support/common.h"
+
+#include "common.h"
+
+namespace __llvm_libc {
+
+LLVM_LIBC_FUNCTION(double, sin, (double x)) { return internal::sin(x); }
+
+} // namespace __llvm_libc