mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-10-07 10:54:01 +00:00
[OpenMP] Delete old plugins
It's time to remove the old plugins as the next-gen has already been set to default in LLVM 16. Reviewed By: tianshilei1992 Differential Revision: https://reviews.llvm.org/D142820
This commit is contained in:
parent
70c08dbcfb
commit
e90ab9148b
@ -1378,7 +1378,7 @@ clause. Examples for both are given below.
|
||||
.. code-block:: console
|
||||
|
||||
$ clang++ -fopenmp --offload-arch=gfx90a -O3 shared.c
|
||||
$ env LIBOMPTARGET_NEXTGEN_PLUGINS=1 ./shared
|
||||
$ env ./shared
|
||||
|
||||
|
||||
.. _libomptarget_device_debugging:
|
||||
|
@ -105,7 +105,6 @@ set(LIBOMPTARGET_LLVM_LIBRARY_DIR "${LLVM_LIBRARY_DIR}" CACHE STRING
|
||||
"Path to folder containing llvm library libomptarget.so")
|
||||
|
||||
# Build offloading plugins and device RTLs if they are available.
|
||||
add_subdirectory(plugins)
|
||||
add_subdirectory(plugins-nextgen)
|
||||
add_subdirectory(DeviceRTL)
|
||||
add_subdirectory(tools)
|
||||
|
@ -12,12 +12,12 @@
|
||||
|
||||
add_subdirectory(common)
|
||||
|
||||
# void build_generic_elf64_nextgen(string tmachine, string tmachine_name, string tmachine_libname, string elf_machine_id);
|
||||
# void build_generic_elf64(string tmachine, string tmachine_name, string tmachine_libname, string elf_machine_id);
|
||||
# - build a plugin for an ELF based generic 64-bit target based on libffi.
|
||||
# - tmachine: name of the machine processor as used in the cmake build system.
|
||||
# - tmachine_name: name of the machine to be printed with the debug messages.
|
||||
# - tmachine_libname: machine name to be appended to the plugin library name.
|
||||
macro(build_generic_elf64_nextgen tmachine tmachine_name tmachine_libname tmachine_triple elf_machine_id)
|
||||
macro(build_generic_elf64 tmachine tmachine_name tmachine_libname tmachine_triple elf_machine_id)
|
||||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "${tmachine}$")
|
||||
if(LIBOMPTARGET_DEP_LIBFFI_FOUND)
|
||||
|
||||
@ -36,7 +36,7 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "${tmachine}$")
|
||||
# Define target regiple
|
||||
add_definitions("-DLIBOMPTARGET_NEXTGEN_GENERIC_PLUGIN_TRIPLE=${tmachine}")
|
||||
|
||||
add_llvm_library("omptarget.rtl.${tmachine_libname}.nextgen"
|
||||
add_llvm_library("omptarget.rtl.${tmachine_libname}"
|
||||
SHARED
|
||||
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../generic-elf-64bit/src/rtl.cpp
|
||||
@ -58,23 +58,23 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "${tmachine}$")
|
||||
)
|
||||
|
||||
if (LIBOMP_HAVE_VERSION_SCRIPT_FLAG)
|
||||
target_link_libraries("omptarget.rtl.${tmachine_libname}.nextgen" PRIVATE
|
||||
target_link_libraries("omptarget.rtl.${tmachine_libname}" PRIVATE
|
||||
"-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exports")
|
||||
endif()
|
||||
|
||||
# Install plugin under the lib destination folder.
|
||||
install(TARGETS "omptarget.rtl.${tmachine_libname}.nextgen"
|
||||
install(TARGETS "omptarget.rtl.${tmachine_libname}"
|
||||
LIBRARY DESTINATION "${OPENMP_INSTALL_LIBDIR}")
|
||||
set_target_properties("omptarget.rtl.${tmachine_libname}.nextgen" PROPERTIES
|
||||
set_target_properties("omptarget.rtl.${tmachine_libname}" PROPERTIES
|
||||
INSTALL_RPATH "$ORIGIN" BUILD_RPATH "$ORIGIN:${CMAKE_CURRENT_BINARY_DIR}/.."
|
||||
CXX_VISIBILITY_PRESET protected)
|
||||
|
||||
target_include_directories( "omptarget.rtl.${tmachine_libname}.nextgen" PRIVATE
|
||||
target_include_directories( "omptarget.rtl.${tmachine_libname}" PRIVATE
|
||||
${LIBOMPTARGET_INCLUDE_DIR}
|
||||
${LIBOMPTARGET_DEP_LIBFFI_INCLUDE_DIR})
|
||||
|
||||
list(APPEND LIBOMPTARGET_TESTED_PLUGINS
|
||||
"omptarget.rtl.${tmachine_libname}.nextgen")
|
||||
"omptarget.rtl.${tmachine_libname}")
|
||||
|
||||
else(LIBOMPTARGET_DEP_LIBFFI_FOUND)
|
||||
libomptarget_say("Not building ${tmachine_name} NextGen offloading plugin: libffi dependency not found.")
|
||||
|
@ -11,7 +11,7 @@
|
||||
##===----------------------------------------------------------------------===##
|
||||
|
||||
if(CMAKE_SYSTEM_NAME MATCHES "Linux")
|
||||
build_generic_elf64_nextgen("aarch64" "aarch64" "aarch64" "aarch64-unknown-linux-gnu" "183")
|
||||
build_generic_elf64("aarch64" "aarch64" "aarch64" "aarch64-unknown-linux-gnu" "183")
|
||||
else()
|
||||
libomptarget_say("Not building aarch64 NextGen offloading plugin: machine not found in the system.")
|
||||
endif()
|
||||
|
@ -52,8 +52,8 @@ if (${hsa-runtime64_FOUND} AND NOT LIBOMPTARGET_FORCE_DLOPEN_LIBHSA)
|
||||
set(LIBOMPTARGET_DEP_LIBRARIES hsa-runtime64::hsa-runtime64)
|
||||
else()
|
||||
libomptarget_say("Building AMDGPU NextGen plugin for dlopened libhsa")
|
||||
include_directories(../../plugins/amdgpu/dynamic_hsa)
|
||||
set(LIBOMPTARGET_EXTRA_SOURCE ../../plugins/amdgpu/dynamic_hsa/hsa.cpp)
|
||||
include_directories(dynamic_hsa)
|
||||
set(LIBOMPTARGET_EXTRA_SOURCE dynamic_hsa/hsa.cpp)
|
||||
set(LIBOMPTARGET_DEP_LIBRARIES)
|
||||
endif()
|
||||
|
||||
@ -66,7 +66,7 @@ else()
|
||||
set(LDFLAGS_UNDEFINED "-Wl,-z,defs")
|
||||
endif()
|
||||
|
||||
add_llvm_library(omptarget.rtl.amdgpu.nextgen SHARED
|
||||
add_llvm_library(omptarget.rtl.amdgpu SHARED
|
||||
src/rtl.cpp
|
||||
${LIBOMPTARGET_EXTRA_SOURCE}
|
||||
|
||||
@ -91,16 +91,16 @@ add_llvm_library(omptarget.rtl.amdgpu.nextgen SHARED
|
||||
)
|
||||
|
||||
if ((OMPT_TARGET_DEFAULT) AND (LIBOMPTARGET_OMPT_SUPPORT))
|
||||
target_link_libraries(omptarget.rtl.amdgpu.nextgen PRIVATE OMPT)
|
||||
target_link_libraries(omptarget.rtl.amdgpu PRIVATE OMPT)
|
||||
endif()
|
||||
|
||||
if (LIBOMP_HAVE_VERSION_SCRIPT_FLAG)
|
||||
target_link_libraries(omptarget.rtl.amdgpu.nextgen PRIVATE
|
||||
target_link_libraries(omptarget.rtl.amdgpu PRIVATE
|
||||
"-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exports")
|
||||
endif()
|
||||
|
||||
target_include_directories(
|
||||
omptarget.rtl.amdgpu.nextgen
|
||||
omptarget.rtl.amdgpu
|
||||
PRIVATE
|
||||
${LIBOMPTARGET_INCLUDE_DIR}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/utils
|
||||
@ -108,7 +108,7 @@ target_include_directories(
|
||||
|
||||
|
||||
# Install plugin under the lib destination folder.
|
||||
install(TARGETS omptarget.rtl.amdgpu.nextgen LIBRARY DESTINATION "${OPENMP_INSTALL_LIBDIR}")
|
||||
set_target_properties(omptarget.rtl.amdgpu.nextgen PROPERTIES
|
||||
install(TARGETS omptarget.rtl.amdgpu LIBRARY DESTINATION "${OPENMP_INSTALL_LIBDIR}")
|
||||
set_target_properties(omptarget.rtl.amdgpu PROPERTIES
|
||||
INSTALL_RPATH "$ORIGIN" BUILD_RPATH "$ORIGIN:${CMAKE_CURRENT_BINARY_DIR}/.."
|
||||
CXX_VISIBILITY_PRESET protected)
|
||||
|
@ -12,3 +12,5 @@
|
||||
|
||||
add_subdirectory(OMPT)
|
||||
add_subdirectory(PluginInterface)
|
||||
add_subdirectory(MemoryManager)
|
||||
add_subdirectory(elf_common)
|
||||
|
@ -11,7 +11,7 @@
|
||||
##===----------------------------------------------------------------------===##
|
||||
|
||||
# NOTE: Don't try to build `elf_common` using `add_llvm_library`.
|
||||
# See openmp/libomptarget/plugins-nextgen/common/PluginInterface/CMakeLists.txt
|
||||
# See openmp/libomptarget/plugins/common/PluginInterface/CMakeLists.txt
|
||||
# for more explanation.
|
||||
add_library(elf_common OBJECT elf_common.cpp ELFSymbols.cpp)
|
||||
|
@ -26,7 +26,7 @@ libomptarget_say("Building CUDA NextGen offloading plugin.")
|
||||
set(LIBOMPTARGET_DLOPEN_LIBCUDA OFF)
|
||||
option(LIBOMPTARGET_FORCE_DLOPEN_LIBCUDA "Build with dlopened libcuda" ${LIBOMPTARGET_DLOPEN_LIBCUDA})
|
||||
|
||||
add_llvm_library(omptarget.rtl.cuda.nextgen SHARED
|
||||
add_llvm_library(omptarget.rtl.cuda SHARED
|
||||
src/rtl.cpp
|
||||
|
||||
LINK_COMPONENTS
|
||||
@ -43,33 +43,33 @@ add_llvm_library(omptarget.rtl.cuda.nextgen SHARED
|
||||
)
|
||||
|
||||
if ((OMPT_TARGET_DEFAULT) AND (LIBOMPTARGET_OMPT_SUPPORT))
|
||||
target_link_libraries(omptarget.rtl.cuda.nextgen PRIVATE OMPT)
|
||||
target_link_libraries(omptarget.rtl.cuda PRIVATE OMPT)
|
||||
endif()
|
||||
|
||||
if (LIBOMP_HAVE_VERSION_SCRIPT_FLAG)
|
||||
target_link_libraries(omptarget.rtl.cuda.nextgen PRIVATE
|
||||
target_link_libraries(omptarget.rtl.cuda PRIVATE
|
||||
"-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exports,-z,defs")
|
||||
endif()
|
||||
|
||||
|
||||
if(LIBOMPTARGET_DEP_CUDA_FOUND AND NOT LIBOMPTARGET_FORCE_DLOPEN_LIBCUDA)
|
||||
libomptarget_say("Building CUDA plugin linked against libcuda")
|
||||
target_link_libraries(omptarget.rtl.cuda.nextgen PRIVATE CUDA::cuda_driver)
|
||||
target_link_libraries(omptarget.rtl.cuda PRIVATE CUDA::cuda_driver)
|
||||
else()
|
||||
libomptarget_say("Building CUDA plugin for dlopened libcuda")
|
||||
target_include_directories(omptarget.rtl.cuda.nextgen PRIVATE ../../plugins/cuda/dynamic_cuda)
|
||||
target_sources(omptarget.rtl.cuda.nextgen PRIVATE ../../plugins/cuda/dynamic_cuda/cuda.cpp)
|
||||
target_include_directories(omptarget.rtl.cuda PRIVATE dynamic_cuda)
|
||||
target_sources(omptarget.rtl.cuda PRIVATE dynamic_cuda/cuda.cpp)
|
||||
endif()
|
||||
|
||||
# Define debug prefix. TODO: This should be automatized in the Debug.h but it
|
||||
# requires changing the original plugins.
|
||||
target_compile_definitions(omptarget.rtl.cuda.nextgen PRIVATE TARGET_NAME="CUDA")
|
||||
target_compile_definitions(omptarget.rtl.cuda.nextgen PRIVATE DEBUG_PREFIX="TARGET CUDA RTL")
|
||||
target_compile_definitions(omptarget.rtl.cuda PRIVATE TARGET_NAME="CUDA")
|
||||
target_compile_definitions(omptarget.rtl.cuda PRIVATE DEBUG_PREFIX="TARGET CUDA RTL")
|
||||
|
||||
target_include_directories(omptarget.rtl.cuda.nextgen PRIVATE ${LIBOMPTARGET_INCLUDE_DIR})
|
||||
target_include_directories(omptarget.rtl.cuda PRIVATE ${LIBOMPTARGET_INCLUDE_DIR})
|
||||
|
||||
# Install plugin under the lib destination folder.
|
||||
install(TARGETS omptarget.rtl.cuda.nextgen LIBRARY DESTINATION "${OPENMP_INSTALL_LIBDIR}")
|
||||
set_target_properties(omptarget.rtl.cuda.nextgen PROPERTIES
|
||||
install(TARGETS omptarget.rtl.cuda LIBRARY DESTINATION "${OPENMP_INSTALL_LIBDIR}")
|
||||
set_target_properties(omptarget.rtl.cuda PROPERTIES
|
||||
INSTALL_RPATH "$ORIGIN" BUILD_RPATH "$ORIGIN:${CMAKE_CURRENT_BINARY_DIR}/.."
|
||||
CXX_VISIBILITY_PRESET protected)
|
||||
|
@ -11,7 +11,7 @@
|
||||
##===----------------------------------------------------------------------===##
|
||||
|
||||
if(CMAKE_SYSTEM_NAME MATCHES "Linux")
|
||||
build_generic_elf64_nextgen("ppc64" "PPC64" "ppc64" "powerpc64-ibm-linux-gnu" "21")
|
||||
build_generic_elf64("ppc64" "PPC64" "ppc64" "powerpc64-ibm-linux-gnu" "21")
|
||||
else()
|
||||
libomptarget_say("Not building ppc64 NextGen offloading plugin: machine not found in the system.")
|
||||
endif()
|
||||
|
@ -11,7 +11,7 @@
|
||||
##===----------------------------------------------------------------------===##
|
||||
|
||||
if(CMAKE_SYSTEM_NAME MATCHES "Linux")
|
||||
build_generic_elf64_nextgen("ppc64le" "PPC64le" "ppc64" "powerpc64le-ibm-linux-gnu" "21")
|
||||
build_generic_elf64("ppc64le" "PPC64le" "ppc64" "powerpc64le-ibm-linux-gnu" "21")
|
||||
else()
|
||||
libomptarget_say("Not building ppc64le NextGen offloading plugin: machine not found in the system.")
|
||||
endif()
|
||||
|
@ -11,7 +11,7 @@
|
||||
##===----------------------------------------------------------------------===##
|
||||
|
||||
if(CMAKE_SYSTEM_NAME MATCHES "Linux")
|
||||
build_generic_elf64_nextgen("x86_64" "x86_64" "x86_64" "x86_64-pc-linux-gnu" "62")
|
||||
build_generic_elf64("x86_64" "x86_64" "x86_64" "x86_64-pc-linux-gnu" "62")
|
||||
else()
|
||||
libomptarget_say("Not building x86_64 NextGen offloading plugin: machine not found in the system.")
|
||||
endif()
|
||||
|
@ -1,89 +0,0 @@
|
||||
##===----------------------------------------------------------------------===##
|
||||
#
|
||||
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
# See https://llvm.org/LICENSE.txt for license information.
|
||||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
#
|
||||
##===----------------------------------------------------------------------===##
|
||||
#
|
||||
# Build plugins for the user system if available.
|
||||
#
|
||||
##===----------------------------------------------------------------------===##
|
||||
|
||||
add_subdirectory(common)
|
||||
|
||||
# void build_generic_elf64(string tmachine, string tmachine_name, string tmachine_libname, string elf_machine_id);
|
||||
# - build a plugin for an ELF based generic 64-bit target based on libffi.
|
||||
# - tmachine: name of the machine processor as used in the cmake build system.
|
||||
# - tmachine_name: name of the machine to be printed with the debug messages.
|
||||
# - tmachine_libname: machine name to be appended to the plugin library name.
|
||||
macro(build_generic_elf64 tmachine tmachine_name tmachine_libname tmachine_triple elf_machine_id)
|
||||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "${tmachine}$")
|
||||
if(LIBOMPTARGET_DEP_LIBFFI_FOUND)
|
||||
|
||||
libomptarget_say("Building ${tmachine_name} offloading plugin.")
|
||||
|
||||
# Define macro to be used as prefix of the runtime messages for this target.
|
||||
add_definitions("-DTARGET_NAME=${tmachine_name}")
|
||||
|
||||
# Define macro with the ELF ID for this target.
|
||||
add_definitions("-DTARGET_ELF_ID=${elf_machine_id}")
|
||||
|
||||
add_llvm_library("omptarget.rtl.${tmachine_libname}"
|
||||
SHARED
|
||||
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../generic-elf-64bit/src/rtl.cpp
|
||||
|
||||
ADDITIONAL_HEADER_DIRS
|
||||
${LIBOMPTARGET_INCLUDE_DIR}
|
||||
${LIBOMPTARGET_DEP_LIBFFI_INCLUDE_DIR}
|
||||
|
||||
LINK_LIBS
|
||||
PRIVATE
|
||||
elf_common
|
||||
${LIBOMPTARGET_DEP_LIBFFI_LIBRARIES}
|
||||
${OPENMP_PTHREAD_LIB}
|
||||
"-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exports"
|
||||
|
||||
NO_INSTALL_RPATH
|
||||
)
|
||||
|
||||
# Install plugin under the lib destination folder.
|
||||
install(TARGETS "omptarget.rtl.${tmachine_libname}"
|
||||
LIBRARY DESTINATION "${OPENMP_INSTALL_LIBDIR}")
|
||||
set_target_properties("omptarget.rtl.${tmachine_libname}" PROPERTIES
|
||||
INSTALL_RPATH "$ORIGIN" BUILD_RPATH "$ORIGIN:${CMAKE_CURRENT_BINARY_DIR}/.."
|
||||
CXX_VISIBILITY_PRESET protected)
|
||||
|
||||
target_include_directories( "omptarget.rtl.${tmachine_libname}" PRIVATE
|
||||
${LIBOMPTARGET_INCLUDE_DIR}
|
||||
${LIBOMPTARGET_DEP_LIBFFI_INCLUDE_DIR})
|
||||
|
||||
list(APPEND LIBOMPTARGET_TESTED_PLUGINS
|
||||
"omptarget.rtl.${tmachine_libname}")
|
||||
|
||||
# Report to the parent scope that we are building a plugin.
|
||||
set(LIBOMPTARGET_SYSTEM_TARGETS
|
||||
"${LIBOMPTARGET_SYSTEM_TARGETS} ${tmachine_triple} ${tmachine_triple}-LTO" PARENT_SCOPE)
|
||||
set(LIBOMPTARGET_TESTED_PLUGINS
|
||||
"${LIBOMPTARGET_TESTED_PLUGINS}" PARENT_SCOPE)
|
||||
|
||||
else(LIBOMPTARGET_DEP_LIBFFI_FOUND)
|
||||
libomptarget_say("Not building ${tmachine_name} offloading plugin: libffi dependency not found.")
|
||||
endif(LIBOMPTARGET_DEP_LIBFFI_FOUND)
|
||||
else()
|
||||
libomptarget_say("Not building ${tmachine_name} offloading plugin: machine not found in the system.")
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
add_subdirectory(aarch64)
|
||||
add_subdirectory(amdgpu)
|
||||
add_subdirectory(cuda)
|
||||
add_subdirectory(ppc64)
|
||||
add_subdirectory(ppc64le)
|
||||
add_subdirectory(x86_64)
|
||||
|
||||
# Make sure the parent scope can see the plugins that will be created.
|
||||
set(LIBOMPTARGET_SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS}" PARENT_SCOPE)
|
||||
set(LIBOMPTARGET_TESTED_PLUGINS "${LIBOMPTARGET_TESTED_PLUGINS}" PARENT_SCOPE)
|
||||
|
@ -1,17 +0,0 @@
|
||||
##===----------------------------------------------------------------------===##
|
||||
#
|
||||
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
# See https://llvm.org/LICENSE.txt for license information.
|
||||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
#
|
||||
##===----------------------------------------------------------------------===##
|
||||
#
|
||||
# Build a plugin for an aarch64 machine if available.
|
||||
#
|
||||
##===----------------------------------------------------------------------===##
|
||||
|
||||
if(CMAKE_SYSTEM_NAME MATCHES "Linux")
|
||||
build_generic_elf64("aarch64" "aarch64" "aarch64" "aarch64-unknown-linux-gnu" "183")
|
||||
else()
|
||||
libomptarget_say("Not building aarch64 offloading plugin: machine not found in the system.")
|
||||
endif()
|
@ -1,122 +0,0 @@
|
||||
##===----------------------------------------------------------------------===##
|
||||
#
|
||||
# The LLVM Compiler Infrastructure
|
||||
#
|
||||
# This file is dual licensed under the MIT and the University of Illinois Open
|
||||
# Source Licenses. See LICENSE.txt for details.
|
||||
#
|
||||
##===----------------------------------------------------------------------===##
|
||||
#
|
||||
# Build a plugin for an AMDGPU machine if available.
|
||||
#
|
||||
##===----------------------------------------------------------------------===##
|
||||
|
||||
################################################################################
|
||||
set(LIBOMPTARGET_BUILD_AMDGPU_PLUGIN TRUE CACHE BOOL
|
||||
"Whether to build AMDGPU plugin")
|
||||
if (NOT LIBOMPTARGET_BUILD_AMDGPU_PLUGIN)
|
||||
libomptarget_say("Not building AMDGPU offloading plugin: LIBOMPTARGET_BUILD_AMDGPU_PLUGIN is false")
|
||||
return()
|
||||
endif()
|
||||
|
||||
# as of rocm-3.7, hsa is installed with cmake packages and kmt is found via hsa
|
||||
find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
|
||||
|
||||
if(NOT (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(ppc64le)|(aarch64)$" AND CMAKE_SYSTEM_NAME MATCHES "Linux"))
|
||||
libomptarget_say("Not building AMDGPU plugin: only support AMDGPU in Linux x86_64, ppc64le, or aarch64 hosts")
|
||||
return()
|
||||
endif()
|
||||
|
||||
################################################################################
|
||||
# Define the suffix for the runtime messaging dumps.
|
||||
add_definitions(-DTARGET_NAME=AMDGPU)
|
||||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(ppc64le)|(aarch64)$")
|
||||
add_definitions(-DLITTLEENDIAN_CPU=1)
|
||||
endif()
|
||||
|
||||
if(CMAKE_BUILD_TYPE MATCHES Debug)
|
||||
add_definitions(-DDEBUG)
|
||||
endif()
|
||||
|
||||
set(LIBOMPTARGET_DLOPEN_LIBHSA OFF)
|
||||
option(LIBOMPTARGET_FORCE_DLOPEN_LIBHSA "Build with dlopened libhsa" ${LIBOMPTARGET_DLOPEN_LIBHSA})
|
||||
|
||||
if (${hsa-runtime64_FOUND} AND NOT LIBOMPTARGET_FORCE_DLOPEN_LIBHSA)
|
||||
libomptarget_say("Building AMDGPU plugin linked against libhsa")
|
||||
set(LIBOMPTARGET_EXTRA_SOURCE)
|
||||
set(LIBOMPTARGET_DEP_LIBRARIES hsa-runtime64::hsa-runtime64)
|
||||
else()
|
||||
libomptarget_say("Building AMDGPU plugin for dlopened libhsa")
|
||||
include_directories(dynamic_hsa)
|
||||
set(LIBOMPTARGET_EXTRA_SOURCE dynamic_hsa/hsa.cpp)
|
||||
set(LIBOMPTARGET_DEP_LIBRARIES)
|
||||
endif()
|
||||
|
||||
if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
|
||||
# On FreeBSD, the 'environ' symbol is undefined at link time, but resolved by
|
||||
# the dynamic linker at runtime. Therefore, allow the symbol to be undefined
|
||||
# when creating a shared library.
|
||||
set(LDFLAGS_UNDEFINED "-Wl,--allow-shlib-undefined")
|
||||
else()
|
||||
set(LDFLAGS_UNDEFINED "-Wl,-z,defs")
|
||||
endif()
|
||||
|
||||
add_llvm_library(omptarget.rtl.amdgpu SHARED
|
||||
impl/impl.cpp
|
||||
impl/interop_hsa.cpp
|
||||
impl/data.cpp
|
||||
impl/get_elf_mach_gfx_name.cpp
|
||||
impl/system.cpp
|
||||
impl/msgpack.cpp
|
||||
src/rtl.cpp
|
||||
${LIBOMPTARGET_EXTRA_SOURCE}
|
||||
|
||||
ADDITIONAL_HEADER_DIRS
|
||||
${LIBOMPTARGET_INCLUDE_DIR}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/impl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../plugins-nextgen/amdgpu/utils
|
||||
|
||||
LINK_COMPONENTS
|
||||
Support
|
||||
Object
|
||||
|
||||
LINK_LIBS
|
||||
PRIVATE
|
||||
elf_common
|
||||
${LIBOMPTARGET_DEP_LIBRARIES}
|
||||
${OPENMP_PTHREAD_LIB}
|
||||
"-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exports"
|
||||
${LDFLAGS_UNDEFINED}
|
||||
|
||||
NO_INSTALL_RPATH
|
||||
)
|
||||
|
||||
target_include_directories(
|
||||
omptarget.rtl.amdgpu
|
||||
PRIVATE
|
||||
${LIBOMPTARGET_INCLUDE_DIR}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/impl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../plugins-nextgen/amdgpu/utils
|
||||
)
|
||||
|
||||
|
||||
# Install plugin under the lib destination folder.
|
||||
install(TARGETS omptarget.rtl.amdgpu LIBRARY DESTINATION "${OPENMP_INSTALL_LIBDIR}")
|
||||
set_target_properties(omptarget.rtl.amdgpu PROPERTIES
|
||||
INSTALL_RPATH "$ORIGIN" BUILD_RPATH "$ORIGIN:${CMAKE_CURRENT_BINARY_DIR}/.."
|
||||
CXX_VISIBILITY_PRESET protected)
|
||||
|
||||
# Report to the parent scope that we are building a plugin for hsa.
|
||||
# This controls whether tests are run for the nvptx offloading target
|
||||
# Run them if libhsa is available, or if the user explicitly asked for dlopen
|
||||
# Otherwise this plugin is being built speculatively and there may be no hsa available
|
||||
option(LIBOMPTARGET_FORCE_AMDGPU_TESTS "Build AMDGPU libomptarget tests" OFF)
|
||||
if (LIBOMPTARGET_FOUND_AMDGPU_GPU OR LIBOMPTARGET_FORCE_AMDGPU_TESTS)
|
||||
# Report to the parent scope that we are building a plugin for amdgpu
|
||||
set(LIBOMPTARGET_SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS} amdgcn-amd-amdhsa" PARENT_SCOPE)
|
||||
list(APPEND LIBOMPTARGET_TESTED_PLUGINS "omptarget.rtl.amdgpu")
|
||||
set(LIBOMPTARGET_TESTED_PLUGINS "${LIBOMPTARGET_TESTED_PLUGINS}" PARENT_SCOPE)
|
||||
else()
|
||||
libomptarget_say("Not generating AMDGPU tests, no supported devices detected. Use 'LIBOMPTARGET_FORCE_AMDGPU_TESTS' to override.")
|
||||
return()
|
||||
endif()
|
@ -1,37 +0,0 @@
|
||||
//===--- amdgpu/impl/data.cpp ------------------------------------- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#include "impl_runtime.h"
|
||||
#include "hsa_api.h"
|
||||
#include "internal.h"
|
||||
#include "rt.h"
|
||||
#include <cassert>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <vector>
|
||||
|
||||
using core::TaskImpl;
|
||||
|
||||
namespace core {
|
||||
namespace Runtime {
|
||||
hsa_status_t HostMalloc(void **ptr, size_t size,
|
||||
hsa_amd_memory_pool_t MemoryPool) {
|
||||
hsa_status_t err = hsa_amd_memory_pool_allocate(MemoryPool, size, 0, ptr);
|
||||
DP("Malloced %p\n", *ptr);
|
||||
if (err == HSA_STATUS_SUCCESS) {
|
||||
err = core::allow_access_to_all_gpu_agents(*ptr);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
hsa_status_t Memfree(void *ptr) {
|
||||
hsa_status_t err = hsa_amd_memory_pool_free(ptr);
|
||||
DP("Freed %p\n", ptr);
|
||||
return err;
|
||||
}
|
||||
} // namespace Runtime
|
||||
} // namespace core
|
@ -1,80 +0,0 @@
|
||||
//===--- amdgpu/impl/get_elf_mach_gfx_name.cpp -------------------- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#include "get_elf_mach_gfx_name.h"
|
||||
|
||||
// This header conflicts with the system elf.h (macros vs enums of the same
|
||||
// identifier) and contains more up to date values for the enum checked here.
|
||||
// rtl.cpp uses the system elf.h.
|
||||
#include "llvm/BinaryFormat/ELF.h"
|
||||
|
||||
const char *get_elf_mach_gfx_name(uint32_t EFlags) {
|
||||
using namespace llvm::ELF;
|
||||
uint32_t Gfx = (EFlags & EF_AMDGPU_MACH);
|
||||
switch (Gfx) {
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX801:
|
||||
return "gfx801";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX802:
|
||||
return "gfx802";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX803:
|
||||
return "gfx803";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX805:
|
||||
return "gfx805";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX810:
|
||||
return "gfx810";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX900:
|
||||
return "gfx900";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX902:
|
||||
return "gfx902";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX904:
|
||||
return "gfx904";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX906:
|
||||
return "gfx906";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX908:
|
||||
return "gfx908";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX909:
|
||||
return "gfx909";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX90A:
|
||||
return "gfx90a";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX90C:
|
||||
return "gfx90c";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX940:
|
||||
return "gfx940";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1010:
|
||||
return "gfx1010";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1011:
|
||||
return "gfx1011";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1012:
|
||||
return "gfx1012";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1013:
|
||||
return "gfx1013";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1030:
|
||||
return "gfx1030";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1031:
|
||||
return "gfx1031";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1032:
|
||||
return "gfx1032";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1033:
|
||||
return "gfx1033";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1034:
|
||||
return "gfx1034";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1035:
|
||||
return "gfx1035";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1036:
|
||||
return "gfx1036";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1100:
|
||||
return "gfx1100";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1101:
|
||||
return "gfx1101";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1102:
|
||||
return "gfx1102";
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1103:
|
||||
return "gfx1103";
|
||||
default:
|
||||
return "--unknown gfx";
|
||||
}
|
||||
}
|
@ -1,15 +0,0 @@
|
||||
//===--- amdgpu/impl/get_elf_mach_gfx_name.h ---------------------- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#ifndef GET_ELF_MACH_GFX_NAME_H_INCLUDED
|
||||
#define GET_ELF_MACH_GFX_NAME_H_INCLUDED
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
const char *get_elf_mach_gfx_name(uint32_t EFlags);
|
||||
|
||||
#endif
|
@ -1,26 +0,0 @@
|
||||
//===--- amdgpu/impl/hsa_api.h ------------------------------------ C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#ifndef AMDGPU_HSA_API_H_INCLUDED
|
||||
#define AMDGPU_HSA_API_H_INCLUDED
|
||||
|
||||
#if defined(__has_include)
|
||||
#if __has_include("hsa/hsa.h")
|
||||
#include "hsa/hsa.h"
|
||||
#include "hsa/hsa_ext_amd.h"
|
||||
#elif __has_include("hsa.h")
|
||||
#include "hsa.h"
|
||||
#include "hsa_ext_amd.h"
|
||||
#endif
|
||||
#else
|
||||
#include "hsa/hsa.h"
|
||||
#include "hsa_ext_amd.h"
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#endif
|
@ -1,182 +0,0 @@
|
||||
//===--- amdgpu/impl/impl.cpp ------------------------------------- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#include "rt.h"
|
||||
#include <memory>
|
||||
|
||||
/*
|
||||
* Data
|
||||
*/
|
||||
|
||||
hsa_status_t is_locked(void *ptr, void **agentBaseAddress) {
|
||||
hsa_status_t err = HSA_STATUS_SUCCESS;
|
||||
hsa_amd_pointer_info_t info;
|
||||
info.size = sizeof(hsa_amd_pointer_info_t);
|
||||
err = hsa_amd_pointer_info(ptr, &info, /*alloc=*/nullptr,
|
||||
/*num_agents_accessible=*/nullptr,
|
||||
/*accessible=*/nullptr);
|
||||
if (err != HSA_STATUS_SUCCESS) {
|
||||
DP("Error when getting pointer info\n");
|
||||
return err;
|
||||
}
|
||||
|
||||
if (info.type == HSA_EXT_POINTER_TYPE_LOCKED) {
|
||||
// When user passes in a basePtr+offset we need to fix the
|
||||
// locked pointer to include the offset: ROCr always returns
|
||||
// the base locked address, not the shifted one.
|
||||
if ((char *)info.hostBaseAddress <= (char *)ptr &&
|
||||
(char *)ptr < (char *)info.hostBaseAddress + info.sizeInBytes)
|
||||
*agentBaseAddress =
|
||||
(void *)((uint64_t)info.agentBaseAddress + (uint64_t)ptr -
|
||||
(uint64_t)info.hostBaseAddress);
|
||||
else // address is already device-agent accessible, no need to compute
|
||||
// offset
|
||||
*agentBaseAddress = ptr;
|
||||
} else
|
||||
*agentBaseAddress = nullptr;
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// host pointer (either src or dest) must be locked via hsa_amd_memory_lock
|
||||
static hsa_status_t invoke_hsa_copy(hsa_signal_t signal, void *dest,
|
||||
hsa_agent_t agent, const void *src,
|
||||
size_t size) {
|
||||
const hsa_signal_value_t init = 1;
|
||||
const hsa_signal_value_t success = 0;
|
||||
hsa_signal_store_screlease(signal, init);
|
||||
|
||||
hsa_status_t err = hsa_amd_memory_async_copy(dest, agent, src, agent, size, 0,
|
||||
nullptr, signal);
|
||||
if (err != HSA_STATUS_SUCCESS)
|
||||
return err;
|
||||
|
||||
// async_copy reports success by decrementing and failure by setting to < 0
|
||||
hsa_signal_value_t got = init;
|
||||
while (got == init)
|
||||
got = hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_NE, init,
|
||||
UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
|
||||
|
||||
if (got != success)
|
||||
return HSA_STATUS_ERROR;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
struct implFreePtrDeletor {
|
||||
void operator()(void *p) {
|
||||
core::Runtime::Memfree(p); // ignore failure to free
|
||||
}
|
||||
};
|
||||
|
||||
enum CopyDirection { H2D, D2H };
|
||||
|
||||
static hsa_status_t locking_async_memcpy(enum CopyDirection direction,
|
||||
hsa_signal_t signal, void *dest,
|
||||
hsa_agent_t agent, void *src,
|
||||
void *lockingPtr, size_t size) {
|
||||
void *lockedPtr = nullptr;
|
||||
hsa_status_t err = is_locked(lockingPtr, &lockedPtr);
|
||||
bool HostPtrIsLocked = true;
|
||||
if (err != HSA_STATUS_SUCCESS)
|
||||
return err;
|
||||
if (!lockedPtr) { // not locked
|
||||
HostPtrIsLocked = false;
|
||||
hsa_agent_t agents[1] = {agent};
|
||||
err = hsa_amd_memory_lock(lockingPtr, size, agents, /*num_agent=*/1,
|
||||
(void **)&lockedPtr);
|
||||
if (err != HSA_STATUS_SUCCESS)
|
||||
return err;
|
||||
DP("locking_async_memcpy: lockingPtr=%p lockedPtr=%p Size = %lu\n",
|
||||
lockingPtr, lockedPtr, size);
|
||||
}
|
||||
|
||||
switch (direction) {
|
||||
case H2D:
|
||||
err = invoke_hsa_copy(signal, dest, agent, lockedPtr, size);
|
||||
break;
|
||||
case D2H:
|
||||
err = invoke_hsa_copy(signal, lockedPtr, agent, src, size);
|
||||
break;
|
||||
}
|
||||
|
||||
if (err != HSA_STATUS_SUCCESS && !HostPtrIsLocked) {
|
||||
// do not leak locked host pointers, but discard potential error message
|
||||
// because the initial error was in the copy function
|
||||
hsa_amd_memory_unlock(lockingPtr);
|
||||
return err;
|
||||
}
|
||||
|
||||
// unlock only if not user locked
|
||||
if (!HostPtrIsLocked)
|
||||
err = hsa_amd_memory_unlock(lockingPtr);
|
||||
if (err != HSA_STATUS_SUCCESS)
|
||||
return err;
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t impl_memcpy_h2d(hsa_signal_t signal, void *deviceDest,
|
||||
void *hostSrc, size_t size,
|
||||
hsa_agent_t device_agent,
|
||||
hsa_amd_memory_pool_t MemoryPool) {
|
||||
hsa_status_t err;
|
||||
|
||||
err = locking_async_memcpy(CopyDirection::H2D, signal, deviceDest,
|
||||
device_agent, hostSrc, hostSrc, size);
|
||||
|
||||
if (err == HSA_STATUS_SUCCESS)
|
||||
return err;
|
||||
|
||||
// async memcpy sometimes fails in situations where
|
||||
// allocate + copy succeeds. Looks like it might be related to
|
||||
// locking part of a read only segment. Fall back for now.
|
||||
void *tempHostPtr;
|
||||
hsa_status_t ret = core::Runtime::HostMalloc(&tempHostPtr, size, MemoryPool);
|
||||
if (ret != HSA_STATUS_SUCCESS) {
|
||||
DP("HostMalloc: Unable to alloc %zu bytes for temp scratch\n", size);
|
||||
return ret;
|
||||
}
|
||||
std::unique_ptr<void, implFreePtrDeletor> del(tempHostPtr);
|
||||
memcpy(tempHostPtr, hostSrc, size);
|
||||
|
||||
return locking_async_memcpy(CopyDirection::H2D, signal, deviceDest,
|
||||
device_agent, tempHostPtr, tempHostPtr, size);
|
||||
}
|
||||
|
||||
hsa_status_t impl_memcpy_d2h(hsa_signal_t signal, void *hostDest,
|
||||
void *deviceSrc, size_t size,
|
||||
hsa_agent_t deviceAgent,
|
||||
hsa_amd_memory_pool_t MemoryPool) {
|
||||
hsa_status_t err;
|
||||
|
||||
// device has always visibility over both pointers, so use that
|
||||
err = locking_async_memcpy(CopyDirection::D2H, signal, hostDest, deviceAgent,
|
||||
deviceSrc, hostDest, size);
|
||||
|
||||
if (err == HSA_STATUS_SUCCESS)
|
||||
return err;
|
||||
|
||||
// hsa_memory_copy sometimes fails in situations where
|
||||
// allocate + copy succeeds. Looks like it might be related to
|
||||
// locking part of a read only segment. Fall back for now.
|
||||
void *tempHostPtr;
|
||||
hsa_status_t ret = core::Runtime::HostMalloc(&tempHostPtr, size, MemoryPool);
|
||||
if (ret != HSA_STATUS_SUCCESS) {
|
||||
DP("HostMalloc: Unable to alloc %zu bytes for temp scratch\n", size);
|
||||
return ret;
|
||||
}
|
||||
std::unique_ptr<void, implFreePtrDeletor> del(tempHostPtr);
|
||||
|
||||
err = locking_async_memcpy(CopyDirection::D2H, signal, tempHostPtr,
|
||||
deviceAgent, deviceSrc, tempHostPtr, size);
|
||||
if (err != HSA_STATUS_SUCCESS)
|
||||
return HSA_STATUS_ERROR;
|
||||
|
||||
memcpy(hostDest, tempHostPtr, size);
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
@ -1,34 +0,0 @@
|
||||
//===--- amdgpu/impl/impl_runtime.h ------------------------------- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#ifndef INCLUDE_IMPL_RUNTIME_H_
|
||||
#define INCLUDE_IMPL_RUNTIME_H_
|
||||
|
||||
#include "hsa_api.h"
|
||||
|
||||
extern "C" {
|
||||
|
||||
// Check if pointer ptr is already locked
|
||||
hsa_status_t is_locked(void *ptr, void **agentBaseAddress);
|
||||
|
||||
hsa_status_t impl_module_register_from_memory_to_place(
|
||||
void *module_bytes, size_t module_size, int DeviceId,
|
||||
hsa_status_t (*on_deserialized_data)(void *data, size_t size,
|
||||
void *cb_state),
|
||||
void *cb_state);
|
||||
|
||||
hsa_status_t impl_memcpy_h2d(hsa_signal_t signal, void *deviceDest,
|
||||
void *hostSrc, size_t size,
|
||||
hsa_agent_t device_agent,
|
||||
hsa_amd_memory_pool_t MemoryPool);
|
||||
|
||||
hsa_status_t impl_memcpy_d2h(hsa_signal_t sig, void *hostDest, void *deviceSrc,
|
||||
size_t size, hsa_agent_t device_agent,
|
||||
hsa_amd_memory_pool_t MemoryPool);
|
||||
}
|
||||
|
||||
#endif // INCLUDE_IMPL_RUNTIME_H_
|
@ -1,154 +0,0 @@
|
||||
//===--- amdgpu/impl/internal.h ----------------------------------- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#ifndef SRC_RUNTIME_INCLUDE_INTERNAL_H_
|
||||
#define SRC_RUNTIME_INCLUDE_INTERNAL_H_
|
||||
#include <inttypes.h>
|
||||
#include <pthread.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <cstring>
|
||||
#include <map>
|
||||
#include <queue>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "hsa_api.h"
|
||||
|
||||
#include "impl_runtime.h"
|
||||
|
||||
#ifndef TARGET_NAME
|
||||
#error "Missing TARGET_NAME macro"
|
||||
#endif
|
||||
#define DEBUG_PREFIX "Target " GETNAME(TARGET_NAME) " RTL"
|
||||
#include "Debug.h"
|
||||
|
||||
#define MAX_NUM_KERNELS (1024 * 16)
|
||||
|
||||
// ---------------------- Kernel Start -------------
|
||||
typedef struct atl_kernel_info_s {
|
||||
uint64_t kernel_object;
|
||||
uint32_t group_segment_size;
|
||||
uint32_t private_segment_size;
|
||||
uint32_t sgpr_count;
|
||||
uint32_t vgpr_count;
|
||||
uint32_t sgpr_spill_count;
|
||||
uint32_t vgpr_spill_count;
|
||||
uint32_t kernel_segment_size;
|
||||
uint32_t explicit_argument_count;
|
||||
uint32_t implicit_argument_count;
|
||||
} atl_kernel_info_t;
|
||||
|
||||
typedef struct atl_symbol_info_s {
|
||||
uint64_t addr;
|
||||
uint32_t size;
|
||||
} atl_symbol_info_t;
|
||||
|
||||
// ---------------------- Kernel End -------------
|
||||
|
||||
namespace core {
|
||||
class TaskgroupImpl;
|
||||
class TaskImpl;
|
||||
class Kernel;
|
||||
class KernelImpl;
|
||||
} // namespace core
|
||||
|
||||
struct SignalPoolT {
|
||||
SignalPoolT() {}
|
||||
SignalPoolT(const SignalPoolT &) = delete;
|
||||
SignalPoolT(SignalPoolT &&) = delete;
|
||||
~SignalPoolT() {
|
||||
size_t N = state.size();
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
hsa_signal_t signal = state.front();
|
||||
state.pop();
|
||||
hsa_status_t rc = hsa_signal_destroy(signal);
|
||||
if (rc != HSA_STATUS_SUCCESS) {
|
||||
DP("Signal pool destruction failed\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
size_t size() {
|
||||
lock l(&mutex);
|
||||
return state.size();
|
||||
}
|
||||
void push(hsa_signal_t s) {
|
||||
lock l(&mutex);
|
||||
state.push(s);
|
||||
}
|
||||
hsa_signal_t pop(void) {
|
||||
lock l(&mutex);
|
||||
if (!state.empty()) {
|
||||
hsa_signal_t res = state.front();
|
||||
state.pop();
|
||||
return res;
|
||||
}
|
||||
|
||||
// Pool empty, attempt to create another signal
|
||||
hsa_signal_t new_signal;
|
||||
hsa_status_t err = hsa_signal_create(0, 0, NULL, &new_signal);
|
||||
if (err == HSA_STATUS_SUCCESS) {
|
||||
return new_signal;
|
||||
}
|
||||
|
||||
// Fail
|
||||
return {0};
|
||||
}
|
||||
|
||||
private:
|
||||
static pthread_mutex_t mutex;
|
||||
std::queue<hsa_signal_t> state;
|
||||
struct lock {
|
||||
lock(pthread_mutex_t *m) : m(m) { pthread_mutex_lock(m); }
|
||||
~lock() { pthread_mutex_unlock(m); }
|
||||
pthread_mutex_t *m;
|
||||
};
|
||||
};
|
||||
|
||||
namespace core {
|
||||
hsa_status_t atl_init_gpu_context();
|
||||
|
||||
hsa_status_t init_hsa();
|
||||
hsa_status_t finalize_hsa();
|
||||
/*
|
||||
* Generic utils
|
||||
*/
|
||||
template <typename T> inline T alignDown(T value, size_t alignment) {
|
||||
return (T)(value & ~(alignment - 1));
|
||||
}
|
||||
|
||||
template <typename T> inline T *alignDown(T *value, size_t alignment) {
|
||||
return reinterpret_cast<T *>(alignDown((intptr_t)value, alignment));
|
||||
}
|
||||
|
||||
template <typename T> inline T alignUp(T value, size_t alignment) {
|
||||
return alignDown((T)(value + alignment - 1), alignment);
|
||||
}
|
||||
|
||||
template <typename T> inline T *alignUp(T *value, size_t alignment) {
|
||||
return reinterpret_cast<T *>(
|
||||
alignDown((intptr_t)(value + alignment - 1), alignment));
|
||||
}
|
||||
|
||||
extern bool atl_is_impl_initialized();
|
||||
|
||||
bool handle_group_signal(hsa_signal_value_t value, void *arg);
|
||||
|
||||
hsa_status_t allow_access_to_all_gpu_agents(void *ptr);
|
||||
} // namespace core
|
||||
|
||||
inline const char *get_error_string(hsa_status_t err) {
|
||||
const char *res;
|
||||
hsa_status_t rc = hsa_status_string(err, &res);
|
||||
return (rc == HSA_STATUS_SUCCESS) ? res : "HSA_STATUS UNKNOWN.";
|
||||
}
|
||||
|
||||
#endif // SRC_RUNTIME_INCLUDE_INTERNAL_H_
|
@ -1,39 +0,0 @@
|
||||
//===--- amdgpu/impl/interop_hsa.cpp ------------------------------ C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#include "interop_hsa.h"
|
||||
#include "internal.h"
|
||||
|
||||
hsa_status_t interop_hsa_get_symbol_info(
|
||||
const std::map<std::string, atl_symbol_info_t> &SymbolInfoTable,
|
||||
int DeviceId, const char *symbol, void **var_addr, unsigned int *var_size) {
|
||||
/*
|
||||
// Typical usage:
|
||||
void *var_addr;
|
||||
size_t var_size;
|
||||
interop_hsa_get_symbol_addr(gpu_place, "symbol_name", &var_addr,
|
||||
&var_size);
|
||||
impl_memcpy(signal, host_add, var_addr, var_size);
|
||||
*/
|
||||
|
||||
if (!symbol || !var_addr || !var_size)
|
||||
return HSA_STATUS_ERROR;
|
||||
|
||||
// get the symbol info
|
||||
std::string symbolStr = std::string(symbol);
|
||||
auto It = SymbolInfoTable.find(symbolStr);
|
||||
if (It != SymbolInfoTable.end()) {
|
||||
atl_symbol_info_t info = It->second;
|
||||
*var_addr = reinterpret_cast<void *>(info.addr);
|
||||
*var_size = info.size;
|
||||
return HSA_STATUS_SUCCESS;
|
||||
} else {
|
||||
*var_addr = NULL;
|
||||
*var_size = 0;
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
}
|
@ -1,26 +0,0 @@
|
||||
//===--- amdgpu/impl/interop_hsa.h -------------------------------- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#ifndef INCLUDE_INTEROP_HSA_H_
|
||||
#define INCLUDE_INTEROP_HSA_H_
|
||||
|
||||
#include "impl_runtime.h"
|
||||
#include "hsa_api.h"
|
||||
#include "internal.h"
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
extern "C" {
|
||||
|
||||
hsa_status_t interop_hsa_get_symbol_info(
|
||||
const std::map<std::string, atl_symbol_info_t> &SymbolInfoTable,
|
||||
int DeviceId, const char *symbol, void **var_addr, unsigned int *var_size);
|
||||
|
||||
}
|
||||
|
||||
#endif // INCLUDE_INTEROP_HSA_H_
|
@ -1,271 +0,0 @@
|
||||
//===--- amdgpu/impl/msgpack.cpp ---------------------------------- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <functional>
|
||||
#include <string>
|
||||
|
||||
#include "msgpack.h"
|
||||
|
||||
namespace msgpack {
|
||||
|
||||
[[noreturn]] void internal_error() {
|
||||
printf("internal error\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
const char *type_name(type ty) {
|
||||
switch (ty) {
|
||||
#define X(NAME, WIDTH, PAYLOAD, LOWER, UPPER) \
|
||||
case NAME: \
|
||||
return #NAME;
|
||||
#include "msgpack.def"
|
||||
#undef X
|
||||
}
|
||||
internal_error();
|
||||
}
|
||||
|
||||
unsigned bytes_used_fixed(msgpack::type ty) {
|
||||
using namespace msgpack;
|
||||
switch (ty) {
|
||||
#define X(NAME, WIDTH, PAYLOAD, LOWER, UPPER) \
|
||||
case NAME: \
|
||||
return WIDTH;
|
||||
#include "msgpack.def"
|
||||
#undef X
|
||||
}
|
||||
internal_error();
|
||||
}
|
||||
|
||||
msgpack::type parse_type(unsigned char x) {
|
||||
|
||||
#define X(NAME, WIDTH, PAYLOAD, LOWER, UPPER) \
|
||||
if (x >= LOWER && x <= UPPER) { \
|
||||
return NAME; \
|
||||
} else
|
||||
#include "msgpack.def"
|
||||
#undef X
|
||||
{ internal_error(); }
|
||||
}
|
||||
|
||||
template <typename T, typename R> R bitcast(T x) {
|
||||
static_assert(sizeof(T) == sizeof(R), "");
|
||||
R tmp;
|
||||
memcpy(&tmp, &x, sizeof(T));
|
||||
return tmp;
|
||||
}
|
||||
template int64_t bitcast<uint64_t, int64_t>(uint64_t);
|
||||
} // namespace msgpack
|
||||
|
||||
// Helper functions for reading additional payload from the header
|
||||
// Depending on the type, this can be a number of bytes, elements,
|
||||
// key-value pairs or an embedded integer.
|
||||
// Each takes a pointer to the start of the header and returns a uint64_t
|
||||
|
||||
namespace {
|
||||
namespace payload {
|
||||
uint64_t read_zero(const unsigned char *) { return 0; }
|
||||
|
||||
// Read the first byte and zero/sign extend it
|
||||
uint64_t read_embedded_u8(const unsigned char *start) { return start[0]; }
|
||||
uint64_t read_embedded_s8(const unsigned char *start) {
|
||||
int64_t res = msgpack::bitcast<uint8_t, int8_t>(start[0]);
|
||||
return msgpack::bitcast<int64_t, uint64_t>(res);
|
||||
}
|
||||
|
||||
// Read a masked part of the first byte
|
||||
uint64_t read_via_mask_0x1(const unsigned char *start) { return *start & 0x1u; }
|
||||
uint64_t read_via_mask_0xf(const unsigned char *start) { return *start & 0xfu; }
|
||||
uint64_t read_via_mask_0x1f(const unsigned char *start) {
|
||||
return *start & 0x1fu;
|
||||
}
|
||||
|
||||
// Read 1/2/4/8 bytes immediately following the type byte and zero/sign extend
|
||||
// Big endian format.
|
||||
uint64_t read_size_field_u8(const unsigned char *from) {
|
||||
from++;
|
||||
return from[0];
|
||||
}
|
||||
|
||||
// TODO: detect whether host is little endian or not, and whether the intrinsic
|
||||
// is available. And probably use the builtin to test the diy
|
||||
const bool use_bswap = false;
|
||||
|
||||
uint64_t read_size_field_u16(const unsigned char *from) {
|
||||
from++;
|
||||
if (use_bswap) {
|
||||
uint16_t b;
|
||||
memcpy(&b, from, 2);
|
||||
return __builtin_bswap16(b);
|
||||
} else {
|
||||
return (from[0] << 8u) | from[1];
|
||||
}
|
||||
}
|
||||
uint64_t read_size_field_u32(const unsigned char *from) {
|
||||
from++;
|
||||
if (use_bswap) {
|
||||
uint32_t b;
|
||||
memcpy(&b, from, 4);
|
||||
return __builtin_bswap32(b);
|
||||
} else {
|
||||
return (from[0] << 24u) | (from[1] << 16u) | (from[2] << 8u) |
|
||||
(from[3] << 0u);
|
||||
}
|
||||
}
|
||||
uint64_t read_size_field_u64(const unsigned char *from) {
|
||||
from++;
|
||||
if (use_bswap) {
|
||||
uint64_t b;
|
||||
memcpy(&b, from, 8);
|
||||
return __builtin_bswap64(b);
|
||||
} else {
|
||||
return ((uint64_t)from[0] << 56u) | ((uint64_t)from[1] << 48u) |
|
||||
((uint64_t)from[2] << 40u) | ((uint64_t)from[3] << 32u) |
|
||||
(from[4] << 24u) | (from[5] << 16u) | (from[6] << 8u) |
|
||||
(from[7] << 0u);
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t read_size_field_s8(const unsigned char *from) {
|
||||
uint8_t u = read_size_field_u8(from);
|
||||
int64_t res = msgpack::bitcast<uint8_t, int8_t>(u);
|
||||
return msgpack::bitcast<int64_t, uint64_t>(res);
|
||||
}
|
||||
uint64_t read_size_field_s16(const unsigned char *from) {
|
||||
uint16_t u = read_size_field_u16(from);
|
||||
int64_t res = msgpack::bitcast<uint16_t, int16_t>(u);
|
||||
return msgpack::bitcast<int64_t, uint64_t>(res);
|
||||
}
|
||||
uint64_t read_size_field_s32(const unsigned char *from) {
|
||||
uint32_t u = read_size_field_u32(from);
|
||||
int64_t res = msgpack::bitcast<uint32_t, int32_t>(u);
|
||||
return msgpack::bitcast<int64_t, uint64_t>(res);
|
||||
}
|
||||
uint64_t read_size_field_s64(const unsigned char *from) {
|
||||
uint64_t u = read_size_field_u64(from);
|
||||
int64_t res = msgpack::bitcast<uint64_t, int64_t>(u);
|
||||
return msgpack::bitcast<int64_t, uint64_t>(res);
|
||||
}
|
||||
} // namespace payload
|
||||
} // namespace
|
||||
|
||||
namespace msgpack {
|
||||
|
||||
payload_info_t payload_info(msgpack::type ty) {
|
||||
using namespace msgpack;
|
||||
switch (ty) {
|
||||
#define X(NAME, WIDTH, PAYLOAD, LOWER, UPPER) \
|
||||
case NAME: \
|
||||
return payload::PAYLOAD;
|
||||
#include "msgpack.def"
|
||||
#undef X
|
||||
}
|
||||
internal_error();
|
||||
}
|
||||
|
||||
} // namespace msgpack
|
||||
|
||||
const unsigned char *msgpack::skip_next_message(const unsigned char *start,
|
||||
const unsigned char *end) {
|
||||
class f : public functors_defaults<f> {};
|
||||
return handle_msgpack({start, end}, f());
|
||||
}
|
||||
|
||||
namespace msgpack {
|
||||
bool message_is_string(byte_range bytes, const char *needle) {
|
||||
bool matched = false;
|
||||
size_t needleN = strlen(needle);
|
||||
|
||||
foronly_string(bytes, [=, &matched](size_t N, const unsigned char *str) {
|
||||
if (N == needleN) {
|
||||
if (memcmp(needle, str, N) == 0) {
|
||||
matched = true;
|
||||
}
|
||||
}
|
||||
});
|
||||
return matched;
|
||||
}
|
||||
|
||||
void dump(byte_range bytes) {
|
||||
struct inner : functors_defaults<inner> {
|
||||
inner(unsigned indent) : indent(indent) {}
|
||||
const unsigned by = 2;
|
||||
unsigned indent = 0;
|
||||
|
||||
void handle_string(size_t N, const unsigned char *bytes) {
|
||||
char *tmp = (char *)malloc(N + 1);
|
||||
memcpy(tmp, bytes, N);
|
||||
tmp[N] = '\0';
|
||||
printf("\"%s\"", tmp);
|
||||
free(tmp);
|
||||
}
|
||||
|
||||
void handle_signed(int64_t x) { printf("%ld", x); }
|
||||
void handle_unsigned(uint64_t x) { printf("%lu", x); }
|
||||
|
||||
const unsigned char *handle_array(uint64_t N, byte_range bytes) {
|
||||
printf("\n%*s[\n", indent, "");
|
||||
indent += by;
|
||||
|
||||
for (uint64_t i = 0; i < N; i++) {
|
||||
indent += by;
|
||||
printf("%*s", indent, "");
|
||||
const unsigned char *next = handle_msgpack<inner>(bytes, {indent});
|
||||
printf(",\n");
|
||||
indent -= by;
|
||||
bytes.start = next;
|
||||
if (!next) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
indent -= by;
|
||||
printf("%*s]", indent, "");
|
||||
|
||||
return bytes.start;
|
||||
}
|
||||
|
||||
const unsigned char *handle_map(uint64_t N, byte_range bytes) {
|
||||
printf("\n%*s{\n", indent, "");
|
||||
indent += by;
|
||||
|
||||
for (uint64_t i = 0; i < 2 * N; i += 2) {
|
||||
const unsigned char *start_key = bytes.start;
|
||||
printf("%*s", indent, "");
|
||||
const unsigned char *end_key =
|
||||
handle_msgpack<inner>({start_key, bytes.end}, {indent});
|
||||
if (!end_key) {
|
||||
break;
|
||||
}
|
||||
|
||||
printf(" : ");
|
||||
|
||||
const unsigned char *start_value = end_key;
|
||||
const unsigned char *end_value =
|
||||
handle_msgpack<inner>({start_value, bytes.end}, {indent});
|
||||
|
||||
if (!end_value) {
|
||||
break;
|
||||
}
|
||||
|
||||
printf(",\n");
|
||||
bytes.start = end_value;
|
||||
}
|
||||
|
||||
indent -= by;
|
||||
printf("%*s}", indent, "");
|
||||
|
||||
return bytes.start;
|
||||
}
|
||||
};
|
||||
|
||||
handle_msgpack<inner>(bytes, {0});
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
} // namespace msgpack
|
@ -1,46 +0,0 @@
|
||||
//===--- amdgpu/impl/msgpack.def ---------------------------------- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// name, header width, reader, [lower, upper] encoding
|
||||
X(posfixint, 1, read_embedded_u8, 0x00, 0x7f)
|
||||
X(negfixint, 1, read_embedded_s8, 0xe0, 0xff)
|
||||
X(fixmap, 1, read_via_mask_0xf, 0x80, 0x8f)
|
||||
X(fixarray, 1, read_via_mask_0xf, 0x90, 0x9f)
|
||||
X(fixstr, 1, read_via_mask_0x1f, 0xa0, 0xbf)
|
||||
X(nil, 1, read_zero, 0xc0, 0xc0)
|
||||
X(never_used, 1, read_zero, 0xc1, 0xc1)
|
||||
X(f, 1, read_via_mask_0x1, 0xc2, 0xc2)
|
||||
X(t, 1, read_via_mask_0x1, 0xc3, 0xc3)
|
||||
X(bin8, 2, read_size_field_u8, 0xc4, 0xc4)
|
||||
X(bin16, 3, read_size_field_u16, 0xc5, 0xc5)
|
||||
X(bin32, 5, read_size_field_u32, 0xc6, 0xc6)
|
||||
X(ext8, 3, read_size_field_u8, 0xc7, 0xc7)
|
||||
X(ext16, 4, read_size_field_u16, 0xc8, 0xc8)
|
||||
X(ext32, 6, read_size_field_u32, 0xc9, 0xc9)
|
||||
X(float32, 5, read_zero, 0xca, 0xca)
|
||||
X(float64, 9, read_zero, 0xcb, 0xcb)
|
||||
X(uint8, 2, read_size_field_u8, 0xcc, 0xcc)
|
||||
X(uint16, 3, read_size_field_u16, 0xcd, 0xcd)
|
||||
X(uint32, 5, read_size_field_u32, 0xce, 0xce)
|
||||
X(uint64, 9, read_size_field_u64, 0xcf, 0xcf)
|
||||
X(int8, 2, read_size_field_s8, 0xd0, 0xd0)
|
||||
X(int16, 3, read_size_field_s16, 0xd1, 0xd1)
|
||||
X(int32, 5, read_size_field_s32, 0xd2, 0xd2)
|
||||
X(int64, 9, read_size_field_s64, 0xd3, 0xd3)
|
||||
X(fixext1, 3, read_zero, 0xd4, 0xd4)
|
||||
X(fixext2, 4, read_zero, 0xd5, 0xd5)
|
||||
X(fixext4, 6, read_zero, 0xd6, 0xd6)
|
||||
X(fixext8, 10, read_zero, 0xd7, 0xd7)
|
||||
X(fixext16, 18, read_zero, 0xd8, 0xd8)
|
||||
X(str8, 2, read_size_field_u8, 0xd9, 0xd9)
|
||||
X(str16, 3, read_size_field_u16, 0xda, 0xda)
|
||||
X(str32, 5, read_size_field_u32, 0xdb, 0xdb)
|
||||
X(array16, 3, read_size_field_u16, 0xdc, 0xdc)
|
||||
X(array32, 5, read_size_field_u32, 0xdd, 0xdd)
|
||||
X(map16, 3, read_size_field_u16, 0xde, 0xde)
|
||||
X(map32, 5, read_size_field_u32, 0xdf, 0xdf)
|
@ -1,282 +0,0 @@
|
||||
//===--- amdgpu/impl/msgpack.h ------------------------------------ C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#ifndef MSGPACK_H
|
||||
#define MSGPACK_H
|
||||
|
||||
#include <functional>
|
||||
|
||||
namespace msgpack {
|
||||
|
||||
// The message pack format is dynamically typed, schema-less. Format is:
|
||||
// message: [type][header][payload]
|
||||
// where type is one byte, header length is a fixed length function of type
|
||||
// payload is zero to N bytes, with the length encoded in [type][header]
|
||||
|
||||
// Scalar fields include boolean, signed integer, float, string etc
|
||||
// Composite types are sequences of messages
|
||||
// Array field is [header][element][element]...
|
||||
// Map field is [header][key][value][key][value]...
|
||||
|
||||
// Multibyte integer fields are big endian encoded
|
||||
// The map key can be any message type
|
||||
// Maps may contain duplicate keys
|
||||
// Data is not uniquely encoded, e.g. integer "8" may be stored as one byte or
|
||||
// in as many as nine, as signed or unsigned. Implementation defined.
|
||||
// Similarly "foo" may embed the length in the type field or in multiple bytes
|
||||
|
||||
// This parser is structured as an iterator over a sequence of bytes.
|
||||
// It calls a user provided function on each message in order to extract fields
|
||||
// The default implementation for each scalar type is to do nothing. For map or
|
||||
// arrays, the default implementation returns just after that message to support
|
||||
// iterating to the next message, but otherwise has no effect.
|
||||
|
||||
struct byte_range {
|
||||
const unsigned char *start;
|
||||
const unsigned char *end;
|
||||
};
|
||||
|
||||
const unsigned char *skip_next_message(const unsigned char *start,
|
||||
const unsigned char *end);
|
||||
|
||||
template <typename Derived> class functors_defaults {
|
||||
public:
|
||||
void cb_string(size_t N, const unsigned char *str) {
|
||||
derived().handle_string(N, str);
|
||||
}
|
||||
void cb_boolean(bool x) { derived().handle_boolean(x); }
|
||||
void cb_signed(int64_t x) { derived().handle_signed(x); }
|
||||
void cb_unsigned(uint64_t x) { derived().handle_unsigned(x); }
|
||||
void cb_array_elements(byte_range bytes) {
|
||||
derived().handle_array_elements(bytes);
|
||||
}
|
||||
void cb_map_elements(byte_range key, byte_range value) {
|
||||
derived().handle_map_elements(key, value);
|
||||
}
|
||||
const unsigned char *cb_array(uint64_t N, byte_range bytes) {
|
||||
return derived().handle_array(N, bytes);
|
||||
}
|
||||
const unsigned char *cb_map(uint64_t N, byte_range bytes) {
|
||||
return derived().handle_map(N, bytes);
|
||||
}
|
||||
|
||||
private:
|
||||
Derived &derived() { return *static_cast<Derived *>(this); }
|
||||
|
||||
// Default implementations for scalar ops are no-ops
|
||||
void handle_string(size_t, const unsigned char *) {}
|
||||
void handle_boolean(bool) {}
|
||||
void handle_signed(int64_t) {}
|
||||
void handle_unsigned(uint64_t) {}
|
||||
void handle_array_elements(byte_range) {}
|
||||
void handle_map_elements(byte_range, byte_range) {}
|
||||
|
||||
// Default implementation for sequences is to skip over the messages
|
||||
const unsigned char *handle_array(uint64_t N, byte_range bytes) {
|
||||
for (uint64_t i = 0; i < N; i++) {
|
||||
const unsigned char *next = skip_next_message(bytes.start, bytes.end);
|
||||
if (!next) {
|
||||
return nullptr;
|
||||
}
|
||||
cb_array_elements(bytes);
|
||||
bytes.start = next;
|
||||
}
|
||||
return bytes.start;
|
||||
}
|
||||
const unsigned char *handle_map(uint64_t N, byte_range bytes) {
|
||||
for (uint64_t i = 0; i < N; i++) {
|
||||
const unsigned char *start_key = bytes.start;
|
||||
const unsigned char *end_key = skip_next_message(start_key, bytes.end);
|
||||
if (!end_key) {
|
||||
return nullptr;
|
||||
}
|
||||
const unsigned char *start_value = end_key;
|
||||
const unsigned char *end_value =
|
||||
skip_next_message(start_value, bytes.end);
|
||||
if (!end_value) {
|
||||
return nullptr;
|
||||
}
|
||||
cb_map_elements({start_key, end_key}, {start_value, end_value});
|
||||
bytes.start = end_value;
|
||||
}
|
||||
return bytes.start;
|
||||
}
|
||||
};
|
||||
|
||||
typedef enum : uint8_t {
|
||||
#define X(NAME, WIDTH, PAYLOAD, LOWER, UPPER) NAME,
|
||||
#include "msgpack.def"
|
||||
#undef X
|
||||
} type;
|
||||
|
||||
[[noreturn]] void internal_error();
|
||||
type parse_type(unsigned char x);
|
||||
unsigned bytes_used_fixed(type ty);
|
||||
|
||||
typedef uint64_t (*payload_info_t)(const unsigned char *);
|
||||
payload_info_t payload_info(msgpack::type ty);
|
||||
|
||||
template <typename T, typename R> R bitcast(T x);
|
||||
|
||||
template <typename F, msgpack::type ty>
|
||||
const unsigned char *handle_msgpack_given_type(byte_range bytes, F f) {
|
||||
const unsigned char *start = bytes.start;
|
||||
const unsigned char *end = bytes.end;
|
||||
const uint64_t available = end - start;
|
||||
assert(available != 0);
|
||||
assert(ty == parse_type(*start));
|
||||
|
||||
const uint64_t bytes_used = bytes_used_fixed(ty);
|
||||
if (available < bytes_used) {
|
||||
return 0;
|
||||
}
|
||||
const uint64_t available_post_header = available - bytes_used;
|
||||
|
||||
const payload_info_t info = payload_info(ty);
|
||||
const uint64_t N = info(start);
|
||||
|
||||
switch (ty) {
|
||||
case msgpack::t:
|
||||
case msgpack::f: {
|
||||
// t is 0b11000010, f is 0b11000011, masked with 0x1
|
||||
f.cb_boolean(N);
|
||||
return start + bytes_used;
|
||||
}
|
||||
|
||||
case msgpack::posfixint:
|
||||
case msgpack::uint8:
|
||||
case msgpack::uint16:
|
||||
case msgpack::uint32:
|
||||
case msgpack::uint64: {
|
||||
f.cb_unsigned(N);
|
||||
return start + bytes_used;
|
||||
}
|
||||
|
||||
case msgpack::negfixint:
|
||||
case msgpack::int8:
|
||||
case msgpack::int16:
|
||||
case msgpack::int32:
|
||||
case msgpack::int64: {
|
||||
f.cb_signed(bitcast<uint64_t, int64_t>(N));
|
||||
return start + bytes_used;
|
||||
}
|
||||
|
||||
case msgpack::fixstr:
|
||||
case msgpack::str8:
|
||||
case msgpack::str16:
|
||||
case msgpack::str32: {
|
||||
if (available_post_header < N) {
|
||||
return 0;
|
||||
} else {
|
||||
f.cb_string(N, start + bytes_used);
|
||||
return start + bytes_used + N;
|
||||
}
|
||||
}
|
||||
|
||||
case msgpack::fixarray:
|
||||
case msgpack::array16:
|
||||
case msgpack::array32: {
|
||||
return f.cb_array(N, {start + bytes_used, end});
|
||||
}
|
||||
|
||||
case msgpack::fixmap:
|
||||
case msgpack::map16:
|
||||
case msgpack::map32: {
|
||||
return f.cb_map(N, {start + bytes_used, end});
|
||||
}
|
||||
|
||||
case msgpack::nil:
|
||||
case msgpack::bin8:
|
||||
case msgpack::bin16:
|
||||
case msgpack::bin32:
|
||||
case msgpack::float32:
|
||||
case msgpack::float64:
|
||||
case msgpack::ext8:
|
||||
case msgpack::ext16:
|
||||
case msgpack::ext32:
|
||||
case msgpack::fixext1:
|
||||
case msgpack::fixext2:
|
||||
case msgpack::fixext4:
|
||||
case msgpack::fixext8:
|
||||
case msgpack::fixext16:
|
||||
case msgpack::never_used: {
|
||||
if (available_post_header < N) {
|
||||
return 0;
|
||||
}
|
||||
return start + bytes_used + N;
|
||||
}
|
||||
}
|
||||
internal_error();
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
const unsigned char *handle_msgpack(byte_range bytes, F f) {
|
||||
const unsigned char *start = bytes.start;
|
||||
const unsigned char *end = bytes.end;
|
||||
const uint64_t available = end - start;
|
||||
if (available == 0) {
|
||||
return 0;
|
||||
}
|
||||
const type ty = parse_type(*start);
|
||||
|
||||
switch (ty) {
|
||||
#define X(NAME, WIDTH, PAYLOAD, LOWER, UPPER) \
|
||||
case msgpack::NAME: \
|
||||
return handle_msgpack_given_type<F, msgpack::NAME>(bytes, f);
|
||||
#include "msgpack.def"
|
||||
#undef X
|
||||
}
|
||||
|
||||
internal_error();
|
||||
}
|
||||
|
||||
bool message_is_string(byte_range bytes, const char *str);
|
||||
|
||||
template <typename C> void foronly_string(byte_range bytes, C callback) {
|
||||
struct inner : functors_defaults<inner> {
|
||||
inner(C &cb) : cb(cb) {}
|
||||
C &cb;
|
||||
void handle_string(size_t N, const unsigned char *str) { cb(N, str); }
|
||||
};
|
||||
handle_msgpack<inner>(bytes, {callback});
|
||||
}
|
||||
|
||||
template <typename C> void foronly_unsigned(byte_range bytes, C callback) {
|
||||
struct inner : functors_defaults<inner> {
|
||||
inner(C &cb) : cb(cb) {}
|
||||
C &cb;
|
||||
void handle_unsigned(uint64_t x) { cb(x); }
|
||||
};
|
||||
handle_msgpack<inner>(bytes, {callback});
|
||||
}
|
||||
|
||||
template <typename C> void foreach_array(byte_range bytes, C callback) {
|
||||
struct inner : functors_defaults<inner> {
|
||||
inner(C &cb) : cb(cb) {}
|
||||
C &cb;
|
||||
void handle_array_elements(byte_range element) { cb(element); }
|
||||
};
|
||||
handle_msgpack<inner>(bytes, {callback});
|
||||
}
|
||||
|
||||
template <typename C> void foreach_map(byte_range bytes, C callback) {
|
||||
struct inner : functors_defaults<inner> {
|
||||
inner(C &cb) : cb(cb) {}
|
||||
C &cb;
|
||||
void handle_map_elements(byte_range key, byte_range value) {
|
||||
cb(key, value);
|
||||
}
|
||||
};
|
||||
handle_msgpack<inner>(bytes, {callback});
|
||||
}
|
||||
|
||||
// Crude approximation to json
|
||||
void dump(byte_range);
|
||||
|
||||
} // namespace msgpack
|
||||
|
||||
#endif
|
@ -1,34 +0,0 @@
|
||||
//===--- amdgpu/impl/rt.h ----------------------------------------- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#ifndef SRC_RUNTIME_INCLUDE_RT_H_
|
||||
#define SRC_RUNTIME_INCLUDE_RT_H_
|
||||
|
||||
#include "hsa_api.h"
|
||||
#include "impl_runtime.h"
|
||||
#include "internal.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace core {
|
||||
namespace Runtime {
|
||||
hsa_status_t Memfree(void *);
|
||||
hsa_status_t HostMalloc(void **ptr, size_t size,
|
||||
hsa_amd_memory_pool_t MemoryPool);
|
||||
|
||||
} // namespace Runtime
|
||||
hsa_status_t RegisterModuleFromMemory(
|
||||
std::map<std::string, atl_kernel_info_t> &KernelInfoTable,
|
||||
std::map<std::string, atl_symbol_info_t> &SymbolInfoTable,
|
||||
void *module_bytes, size_t module_size, hsa_agent_t agent,
|
||||
hsa_status_t (*on_deserialized_data)(void *data, size_t size,
|
||||
void *cb_state),
|
||||
void *cb_state, std::vector<hsa_executable_t> &HSAExecutables);
|
||||
|
||||
} // namespace core
|
||||
|
||||
#endif // SRC_RUNTIME_INCLUDE_RT_H_
|
@ -1,744 +0,0 @@
|
||||
//===--- amdgpu/impl/system.cpp ----------------------------------- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/BinaryFormat/ELF.h"
|
||||
#include "llvm/Object/ELF.h"
|
||||
#include "llvm/Object/ELFObjectFile.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#include "internal.h"
|
||||
#include "rt.h"
|
||||
|
||||
#include "msgpack.h"
|
||||
|
||||
using namespace llvm;
|
||||
using namespace llvm::object;
|
||||
using namespace llvm::ELF;
|
||||
|
||||
namespace hsa {
|
||||
// Wrap HSA iterate API in a shim that allows passing general callables
|
||||
template <typename C>
|
||||
hsa_status_t executable_iterate_symbols(hsa_executable_t executable, C cb) {
|
||||
auto L = [](hsa_executable_t executable, hsa_executable_symbol_t symbol,
|
||||
void *data) -> hsa_status_t {
|
||||
C *unwrapped = static_cast<C *>(data);
|
||||
return (*unwrapped)(executable, symbol);
|
||||
};
|
||||
return hsa_executable_iterate_symbols(executable, L,
|
||||
static_cast<void *>(&cb));
|
||||
}
|
||||
} // namespace hsa
|
||||
|
||||
typedef unsigned char *address;
|
||||
/*
|
||||
* Note descriptors.
|
||||
*/
|
||||
// FreeBSD already declares Elf_Note (indirectly via <libelf.h>)
|
||||
#if !defined(__FreeBSD__)
|
||||
typedef struct {
|
||||
uint32_t n_namesz; /* Length of note's name. */
|
||||
uint32_t n_descsz; /* Length of note's value. */
|
||||
uint32_t n_type; /* Type of note. */
|
||||
// then name
|
||||
// then padding, optional
|
||||
// then desc, at 4 byte alignment (not 8, despite being elf64)
|
||||
} Elf_Note;
|
||||
#endif
|
||||
|
||||
class KernelArgMD {
|
||||
public:
|
||||
enum class ValueKind {
|
||||
HiddenGlobalOffsetX,
|
||||
HiddenGlobalOffsetY,
|
||||
HiddenGlobalOffsetZ,
|
||||
HiddenNone,
|
||||
HiddenPrintfBuffer,
|
||||
HiddenDefaultQueue,
|
||||
HiddenCompletionAction,
|
||||
HiddenMultiGridSyncArg,
|
||||
HiddenHostcallBuffer,
|
||||
HiddenHeapV1,
|
||||
Unknown
|
||||
};
|
||||
|
||||
KernelArgMD()
|
||||
: name_(std::string()), size_(0), offset_(0),
|
||||
valueKind_(ValueKind::Unknown) {}
|
||||
|
||||
// fields
|
||||
std::string name_;
|
||||
uint32_t size_;
|
||||
uint32_t offset_;
|
||||
ValueKind valueKind_;
|
||||
};
|
||||
|
||||
static const std::map<std::string, KernelArgMD::ValueKind> ArgValueKind = {
|
||||
// v3
|
||||
// {"by_value", KernelArgMD::ValueKind::ByValue},
|
||||
// {"global_buffer", KernelArgMD::ValueKind::GlobalBuffer},
|
||||
// {"dynamic_shared_pointer",
|
||||
// KernelArgMD::ValueKind::DynamicSharedPointer},
|
||||
// {"sampler", KernelArgMD::ValueKind::Sampler},
|
||||
// {"image", KernelArgMD::ValueKind::Image},
|
||||
// {"pipe", KernelArgMD::ValueKind::Pipe},
|
||||
// {"queue", KernelArgMD::ValueKind::Queue},
|
||||
{"hidden_global_offset_x", KernelArgMD::ValueKind::HiddenGlobalOffsetX},
|
||||
{"hidden_global_offset_y", KernelArgMD::ValueKind::HiddenGlobalOffsetY},
|
||||
{"hidden_global_offset_z", KernelArgMD::ValueKind::HiddenGlobalOffsetZ},
|
||||
{"hidden_none", KernelArgMD::ValueKind::HiddenNone},
|
||||
{"hidden_printf_buffer", KernelArgMD::ValueKind::HiddenPrintfBuffer},
|
||||
{"hidden_default_queue", KernelArgMD::ValueKind::HiddenDefaultQueue},
|
||||
{"hidden_completion_action",
|
||||
KernelArgMD::ValueKind::HiddenCompletionAction},
|
||||
{"hidden_multigrid_sync_arg",
|
||||
KernelArgMD::ValueKind::HiddenMultiGridSyncArg},
|
||||
{"hidden_hostcall_buffer", KernelArgMD::ValueKind::HiddenHostcallBuffer},
|
||||
{"hidden_heap_v1", KernelArgMD::ValueKind::HiddenHeapV1}};
|
||||
|
||||
namespace core {
|
||||
|
||||
hsa_status_t callbackEvent(const hsa_amd_event_t *event, void *data) {
|
||||
if (event->event_type == HSA_AMD_GPU_MEMORY_FAULT_EVENT) {
|
||||
hsa_amd_gpu_memory_fault_info_t memory_fault = event->memory_fault;
|
||||
// memory_fault.agent
|
||||
// memory_fault.virtual_address
|
||||
// memory_fault.fault_reason_mask
|
||||
// fprintf("[GPU Error at %p: Reason is ", memory_fault.virtual_address);
|
||||
std::stringstream stream;
|
||||
stream << std::hex << (uintptr_t)memory_fault.virtual_address;
|
||||
std::string addr("0x" + stream.str());
|
||||
|
||||
std::string err_string = "[GPU Memory Error] Addr: " + addr;
|
||||
err_string += " Reason: ";
|
||||
if (!(memory_fault.fault_reason_mask & 0x00111111)) {
|
||||
err_string += "No Idea! ";
|
||||
} else {
|
||||
if (memory_fault.fault_reason_mask & 0x00000001)
|
||||
err_string += "Page not present or supervisor privilege. ";
|
||||
if (memory_fault.fault_reason_mask & 0x00000010)
|
||||
err_string += "Write access to a read-only page. ";
|
||||
if (memory_fault.fault_reason_mask & 0x00000100)
|
||||
err_string += "Execute access to a page marked NX. ";
|
||||
if (memory_fault.fault_reason_mask & 0x00001000)
|
||||
err_string += "Host access only. ";
|
||||
if (memory_fault.fault_reason_mask & 0x00010000)
|
||||
err_string += "ECC failure (if supported by HW). ";
|
||||
if (memory_fault.fault_reason_mask & 0x00100000)
|
||||
err_string += "Can't determine the exact fault address. ";
|
||||
}
|
||||
fprintf(stderr, "%s\n", err_string.c_str());
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t atl_init_gpu_context() {
|
||||
hsa_status_t err = hsa_amd_register_system_event_handler(callbackEvent, NULL);
|
||||
if (err != HSA_STATUS_SUCCESS) {
|
||||
printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
|
||||
"Registering the system for memory faults", get_error_string(err));
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
static bool isImplicit(KernelArgMD::ValueKind value_kind) {
|
||||
switch (value_kind) {
|
||||
case KernelArgMD::ValueKind::HiddenGlobalOffsetX:
|
||||
case KernelArgMD::ValueKind::HiddenGlobalOffsetY:
|
||||
case KernelArgMD::ValueKind::HiddenGlobalOffsetZ:
|
||||
case KernelArgMD::ValueKind::HiddenNone:
|
||||
case KernelArgMD::ValueKind::HiddenPrintfBuffer:
|
||||
case KernelArgMD::ValueKind::HiddenDefaultQueue:
|
||||
case KernelArgMD::ValueKind::HiddenCompletionAction:
|
||||
case KernelArgMD::ValueKind::HiddenMultiGridSyncArg:
|
||||
case KernelArgMD::ValueKind::HiddenHostcallBuffer:
|
||||
case KernelArgMD::ValueKind::HiddenHeapV1:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static std::pair<const unsigned char *, const unsigned char *>
|
||||
findMetadata(const ELFObjectFile<ELF64LE> &ELFObj) {
|
||||
constexpr std::pair<const unsigned char *, const unsigned char *> Failure = {
|
||||
nullptr, nullptr};
|
||||
const auto &Elf = ELFObj.getELFFile();
|
||||
auto PhdrsOrErr = Elf.program_headers();
|
||||
if (!PhdrsOrErr) {
|
||||
consumeError(PhdrsOrErr.takeError());
|
||||
return Failure;
|
||||
}
|
||||
|
||||
for (auto Phdr : *PhdrsOrErr) {
|
||||
if (Phdr.p_type != PT_NOTE)
|
||||
continue;
|
||||
|
||||
Error Err = Error::success();
|
||||
for (auto Note : Elf.notes(Phdr, Err)) {
|
||||
if (Note.getType() == 7 || Note.getType() == 8)
|
||||
return Failure;
|
||||
|
||||
// Code object v2 uses yaml metadata and is no longer supported.
|
||||
if (Note.getType() == NT_AMD_HSA_METADATA && Note.getName() == "AMD")
|
||||
return Failure;
|
||||
// Code object v3 should have AMDGPU metadata.
|
||||
if (Note.getType() == NT_AMDGPU_METADATA && Note.getName() != "AMDGPU")
|
||||
return Failure;
|
||||
|
||||
ArrayRef<uint8_t> Desc = Note.getDesc(Phdr.p_align);
|
||||
return {Desc.data(), Desc.data() + Desc.size()};
|
||||
}
|
||||
|
||||
if (Err) {
|
||||
consumeError(std::move(Err));
|
||||
return Failure;
|
||||
}
|
||||
}
|
||||
|
||||
return Failure;
|
||||
}
|
||||
|
||||
static std::pair<const unsigned char *, const unsigned char *>
|
||||
find_metadata(void *binary, size_t binSize) {
|
||||
constexpr std::pair<const unsigned char *, const unsigned char *> Failure = {
|
||||
nullptr, nullptr};
|
||||
|
||||
StringRef Buffer = StringRef(static_cast<const char *>(binary), binSize);
|
||||
auto ElfOrErr = ObjectFile::createELFObjectFile(MemoryBufferRef(Buffer, ""),
|
||||
/*InitContent=*/false);
|
||||
if (!ElfOrErr) {
|
||||
consumeError(ElfOrErr.takeError());
|
||||
return Failure;
|
||||
}
|
||||
|
||||
if (const auto *ELFObj = dyn_cast<ELF64LEObjectFile>(ElfOrErr->get()))
|
||||
return findMetadata(*ELFObj);
|
||||
return Failure;
|
||||
}
|
||||
|
||||
namespace {
|
||||
int map_lookup_array(msgpack::byte_range message, const char *needle,
|
||||
msgpack::byte_range *res, uint64_t *size) {
|
||||
unsigned count = 0;
|
||||
struct s : msgpack::functors_defaults<s> {
|
||||
s(unsigned &count, uint64_t *size) : count(count), size(size) {}
|
||||
unsigned &count;
|
||||
uint64_t *size;
|
||||
const unsigned char *handle_array(uint64_t N, msgpack::byte_range bytes) {
|
||||
count++;
|
||||
*size = N;
|
||||
return bytes.end;
|
||||
}
|
||||
};
|
||||
|
||||
msgpack::foreach_map(message,
|
||||
[&](msgpack::byte_range key, msgpack::byte_range value) {
|
||||
if (msgpack::message_is_string(key, needle)) {
|
||||
// If the message is an array, record number of
|
||||
// elements in *size
|
||||
msgpack::handle_msgpack<s>(value, {count, size});
|
||||
// return the whole array
|
||||
*res = value;
|
||||
}
|
||||
});
|
||||
// Only claim success if exactly one key/array pair matched
|
||||
return count != 1;
|
||||
}
|
||||
|
||||
int map_lookup_string(msgpack::byte_range message, const char *needle,
|
||||
std::string *res) {
|
||||
unsigned count = 0;
|
||||
struct s : public msgpack::functors_defaults<s> {
|
||||
s(unsigned &count, std::string *res) : count(count), res(res) {}
|
||||
unsigned &count;
|
||||
std::string *res;
|
||||
void handle_string(size_t N, const unsigned char *str) {
|
||||
count++;
|
||||
*res = std::string(str, str + N);
|
||||
}
|
||||
};
|
||||
msgpack::foreach_map(message,
|
||||
[&](msgpack::byte_range key, msgpack::byte_range value) {
|
||||
if (msgpack::message_is_string(key, needle)) {
|
||||
msgpack::handle_msgpack<s>(value, {count, res});
|
||||
}
|
||||
});
|
||||
return count != 1;
|
||||
}
|
||||
|
||||
int map_lookup_uint64_t(msgpack::byte_range message, const char *needle,
|
||||
uint64_t *res) {
|
||||
unsigned count = 0;
|
||||
msgpack::foreach_map(message,
|
||||
[&](msgpack::byte_range key, msgpack::byte_range value) {
|
||||
if (msgpack::message_is_string(key, needle)) {
|
||||
msgpack::foronly_unsigned(value, [&](uint64_t x) {
|
||||
count++;
|
||||
*res = x;
|
||||
});
|
||||
}
|
||||
});
|
||||
return count != 1;
|
||||
}
|
||||
|
||||
int array_lookup_element(msgpack::byte_range message, uint64_t elt,
|
||||
msgpack::byte_range *res) {
|
||||
int rc = 1;
|
||||
uint64_t i = 0;
|
||||
msgpack::foreach_array(message, [&](msgpack::byte_range value) {
|
||||
if (i == elt) {
|
||||
*res = value;
|
||||
rc = 0;
|
||||
}
|
||||
i++;
|
||||
});
|
||||
return rc;
|
||||
}
|
||||
|
||||
int populate_kernelArgMD(msgpack::byte_range args_element,
|
||||
KernelArgMD *kernelarg) {
|
||||
using namespace msgpack;
|
||||
int error = 0;
|
||||
foreach_map(args_element, [&](byte_range key, byte_range value) -> void {
|
||||
if (message_is_string(key, ".name")) {
|
||||
foronly_string(value, [&](size_t N, const unsigned char *str) {
|
||||
kernelarg->name_ = std::string(str, str + N);
|
||||
});
|
||||
} else if (message_is_string(key, ".size")) {
|
||||
foronly_unsigned(value, [&](uint64_t x) { kernelarg->size_ = x; });
|
||||
} else if (message_is_string(key, ".offset")) {
|
||||
foronly_unsigned(value, [&](uint64_t x) { kernelarg->offset_ = x; });
|
||||
} else if (message_is_string(key, ".value_kind")) {
|
||||
foronly_string(value, [&](size_t N, const unsigned char *str) {
|
||||
std::string s = std::string(str, str + N);
|
||||
auto itValueKind = ArgValueKind.find(s);
|
||||
if (itValueKind != ArgValueKind.end()) {
|
||||
kernelarg->valueKind_ = itValueKind->second;
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
return error;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
static hsa_status_t get_code_object_custom_metadata(
|
||||
void *binary, size_t binSize,
|
||||
std::map<std::string, atl_kernel_info_t> &KernelInfoTable) {
|
||||
// parse code object with different keys from v2
|
||||
// also, the kernel name is not the same as the symbol name -- so a
|
||||
// symbol->name map is needed
|
||||
|
||||
std::pair<const unsigned char *, const unsigned char *> metadata =
|
||||
find_metadata(binary, binSize);
|
||||
if (!metadata.first) {
|
||||
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
|
||||
}
|
||||
|
||||
uint64_t kernelsSize = 0;
|
||||
int msgpack_errors = 0;
|
||||
msgpack::byte_range kernel_array;
|
||||
msgpack_errors =
|
||||
map_lookup_array({metadata.first, metadata.second}, "amdhsa.kernels",
|
||||
&kernel_array, &kernelsSize);
|
||||
if (msgpack_errors != 0) {
|
||||
printf("[%s:%d] %s failed\n", __FILE__, __LINE__,
|
||||
"kernels lookup in program metadata");
|
||||
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < kernelsSize; i++) {
|
||||
assert(msgpack_errors == 0);
|
||||
std::string kernelName;
|
||||
std::string symbolName;
|
||||
|
||||
msgpack::byte_range element;
|
||||
msgpack_errors += array_lookup_element(kernel_array, i, &element);
|
||||
if (msgpack_errors != 0) {
|
||||
printf("[%s:%d] %s failed\n", __FILE__, __LINE__,
|
||||
"element lookup in kernel metadata");
|
||||
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
|
||||
}
|
||||
|
||||
msgpack_errors += map_lookup_string(element, ".name", &kernelName);
|
||||
msgpack_errors += map_lookup_string(element, ".symbol", &symbolName);
|
||||
if (msgpack_errors != 0) {
|
||||
printf("[%s:%d] %s failed\n", __FILE__, __LINE__,
|
||||
"strings lookup in kernel metadata");
|
||||
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
|
||||
}
|
||||
|
||||
// Make sure that kernelName + ".kd" == symbolName
|
||||
if ((kernelName + ".kd") != symbolName) {
|
||||
printf("[%s:%d] Kernel name mismatching symbol: %s != %s + .kd\n",
|
||||
__FILE__, __LINE__, symbolName.c_str(), kernelName.c_str());
|
||||
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
|
||||
}
|
||||
|
||||
atl_kernel_info_t info = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
|
||||
uint64_t sgpr_count, vgpr_count, sgpr_spill_count, vgpr_spill_count;
|
||||
msgpack_errors += map_lookup_uint64_t(element, ".sgpr_count", &sgpr_count);
|
||||
if (msgpack_errors != 0) {
|
||||
printf("[%s:%d] %s failed\n", __FILE__, __LINE__,
|
||||
"sgpr count metadata lookup in kernel metadata");
|
||||
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
|
||||
}
|
||||
|
||||
info.sgpr_count = sgpr_count;
|
||||
|
||||
msgpack_errors += map_lookup_uint64_t(element, ".vgpr_count", &vgpr_count);
|
||||
if (msgpack_errors != 0) {
|
||||
printf("[%s:%d] %s failed\n", __FILE__, __LINE__,
|
||||
"vgpr count metadata lookup in kernel metadata");
|
||||
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
|
||||
}
|
||||
|
||||
info.vgpr_count = vgpr_count;
|
||||
|
||||
msgpack_errors +=
|
||||
map_lookup_uint64_t(element, ".sgpr_spill_count", &sgpr_spill_count);
|
||||
if (msgpack_errors != 0) {
|
||||
printf("[%s:%d] %s failed\n", __FILE__, __LINE__,
|
||||
"sgpr spill count metadata lookup in kernel metadata");
|
||||
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
|
||||
}
|
||||
|
||||
info.sgpr_spill_count = sgpr_spill_count;
|
||||
|
||||
msgpack_errors +=
|
||||
map_lookup_uint64_t(element, ".vgpr_spill_count", &vgpr_spill_count);
|
||||
if (msgpack_errors != 0) {
|
||||
printf("[%s:%d] %s failed\n", __FILE__, __LINE__,
|
||||
"vgpr spill count metadata lookup in kernel metadata");
|
||||
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
|
||||
}
|
||||
|
||||
info.vgpr_spill_count = vgpr_spill_count;
|
||||
|
||||
size_t kernel_explicit_args_size = 0;
|
||||
uint64_t kernel_segment_size;
|
||||
msgpack_errors += map_lookup_uint64_t(element, ".kernarg_segment_size",
|
||||
&kernel_segment_size);
|
||||
if (msgpack_errors != 0) {
|
||||
printf("[%s:%d] %s failed\n", __FILE__, __LINE__,
|
||||
"kernarg segment size metadata lookup in kernel metadata");
|
||||
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
|
||||
}
|
||||
|
||||
bool hasHiddenArgs = false;
|
||||
if (kernel_segment_size > 0) {
|
||||
uint64_t argsSize;
|
||||
size_t offset = 0;
|
||||
|
||||
msgpack::byte_range args_array;
|
||||
msgpack_errors +=
|
||||
map_lookup_array(element, ".args", &args_array, &argsSize);
|
||||
if (msgpack_errors != 0) {
|
||||
printf("[%s:%d] %s failed\n", __FILE__, __LINE__,
|
||||
"kernel args metadata lookup in kernel metadata");
|
||||
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < argsSize; ++i) {
|
||||
KernelArgMD lcArg;
|
||||
|
||||
msgpack::byte_range args_element;
|
||||
msgpack_errors += array_lookup_element(args_array, i, &args_element);
|
||||
if (msgpack_errors != 0) {
|
||||
printf("[%s:%d] %s failed\n", __FILE__, __LINE__,
|
||||
"iterate args map in kernel args metadata");
|
||||
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
|
||||
}
|
||||
|
||||
msgpack_errors += populate_kernelArgMD(args_element, &lcArg);
|
||||
if (msgpack_errors != 0) {
|
||||
printf("[%s:%d] %s failed\n", __FILE__, __LINE__,
|
||||
"iterate args map in kernel args metadata");
|
||||
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
|
||||
}
|
||||
// v3 has offset field and not align field
|
||||
size_t new_offset = lcArg.offset_;
|
||||
size_t padding = new_offset - offset;
|
||||
offset = new_offset;
|
||||
DP("Arg[%lu] \"%s\" (%u, %u)\n", i, lcArg.name_.c_str(), lcArg.size_,
|
||||
lcArg.offset_);
|
||||
offset += lcArg.size_;
|
||||
|
||||
// check if the arg is a hidden/implicit arg
|
||||
// this logic assumes that all hidden args are 8-byte aligned
|
||||
if (!isImplicit(lcArg.valueKind_)) {
|
||||
info.explicit_argument_count++;
|
||||
kernel_explicit_args_size += lcArg.size_;
|
||||
} else {
|
||||
info.implicit_argument_count++;
|
||||
hasHiddenArgs = true;
|
||||
}
|
||||
kernel_explicit_args_size += padding;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Probably don't want this arithmetic
|
||||
info.kernel_segment_size =
|
||||
(hasHiddenArgs ? kernel_explicit_args_size : kernel_segment_size);
|
||||
DP("[%s: kernarg seg size] (%lu --> %u)\n", kernelName.c_str(),
|
||||
kernel_segment_size, info.kernel_segment_size);
|
||||
|
||||
// kernel received, now add it to the kernel info table
|
||||
KernelInfoTable[kernelName] = info;
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
static hsa_status_t
|
||||
populate_InfoTables(hsa_executable_symbol_t symbol,
|
||||
std::map<std::string, atl_kernel_info_t> &KernelInfoTable,
|
||||
std::map<std::string, atl_symbol_info_t> &SymbolInfoTable) {
|
||||
hsa_symbol_kind_t type;
|
||||
|
||||
uint32_t name_length;
|
||||
hsa_status_t err;
|
||||
err = hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE,
|
||||
&type);
|
||||
if (err != HSA_STATUS_SUCCESS) {
|
||||
printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
|
||||
"Symbol info extraction", get_error_string(err));
|
||||
return err;
|
||||
}
|
||||
DP("Exec Symbol type: %d\n", type);
|
||||
if (type == HSA_SYMBOL_KIND_KERNEL) {
|
||||
err = hsa_executable_symbol_get_info(
|
||||
symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &name_length);
|
||||
if (err != HSA_STATUS_SUCCESS) {
|
||||
printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
|
||||
"Symbol info extraction", get_error_string(err));
|
||||
return err;
|
||||
}
|
||||
char *name = reinterpret_cast<char *>(malloc(name_length + 1));
|
||||
err = hsa_executable_symbol_get_info(symbol,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_NAME, name);
|
||||
if (err != HSA_STATUS_SUCCESS) {
|
||||
printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
|
||||
"Symbol info extraction", get_error_string(err));
|
||||
return err;
|
||||
}
|
||||
// remove the suffix .kd from symbol name.
|
||||
name[name_length - 3] = 0;
|
||||
|
||||
atl_kernel_info_t info;
|
||||
std::string kernelName(name);
|
||||
// by now, the kernel info table should already have an entry
|
||||
// because the non-ROCr custom code object parsing is called before
|
||||
// iterating over the code object symbols using ROCr
|
||||
if (KernelInfoTable.find(kernelName) == KernelInfoTable.end()) {
|
||||
DP("amdgpu internal consistency error\n");
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
// found, so assign and update
|
||||
info = KernelInfoTable[kernelName];
|
||||
|
||||
/* Extract dispatch information from the symbol */
|
||||
err = hsa_executable_symbol_get_info(
|
||||
symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
|
||||
&(info.kernel_object));
|
||||
if (err != HSA_STATUS_SUCCESS) {
|
||||
printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
|
||||
"Extracting the symbol from the executable",
|
||||
get_error_string(err));
|
||||
return err;
|
||||
}
|
||||
err = hsa_executable_symbol_get_info(
|
||||
symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
|
||||
&(info.group_segment_size));
|
||||
if (err != HSA_STATUS_SUCCESS) {
|
||||
printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
|
||||
"Extracting the group segment size from the executable",
|
||||
get_error_string(err));
|
||||
return err;
|
||||
}
|
||||
err = hsa_executable_symbol_get_info(
|
||||
symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
|
||||
&(info.private_segment_size));
|
||||
if (err != HSA_STATUS_SUCCESS) {
|
||||
printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
|
||||
"Extracting the private segment from the executable",
|
||||
get_error_string(err));
|
||||
return err;
|
||||
}
|
||||
|
||||
DP("Kernel %s --> %lx symbol %u group segsize %u pvt segsize %u bytes "
|
||||
"kernarg\n",
|
||||
kernelName.c_str(), info.kernel_object, info.group_segment_size,
|
||||
info.private_segment_size, info.kernel_segment_size);
|
||||
|
||||
// assign it back to the kernel info table
|
||||
KernelInfoTable[kernelName] = info;
|
||||
free(name);
|
||||
} else if (type == HSA_SYMBOL_KIND_VARIABLE) {
|
||||
err = hsa_executable_symbol_get_info(
|
||||
symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &name_length);
|
||||
if (err != HSA_STATUS_SUCCESS) {
|
||||
printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
|
||||
"Symbol info extraction", get_error_string(err));
|
||||
return err;
|
||||
}
|
||||
char *name = reinterpret_cast<char *>(malloc(name_length + 1));
|
||||
err = hsa_executable_symbol_get_info(symbol,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_NAME, name);
|
||||
if (err != HSA_STATUS_SUCCESS) {
|
||||
printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
|
||||
"Symbol info extraction", get_error_string(err));
|
||||
return err;
|
||||
}
|
||||
name[name_length] = 0;
|
||||
|
||||
atl_symbol_info_t info;
|
||||
|
||||
err = hsa_executable_symbol_get_info(
|
||||
symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &(info.addr));
|
||||
if (err != HSA_STATUS_SUCCESS) {
|
||||
printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
|
||||
"Symbol info address extraction", get_error_string(err));
|
||||
return err;
|
||||
}
|
||||
|
||||
err = hsa_executable_symbol_get_info(
|
||||
symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE, &(info.size));
|
||||
if (err != HSA_STATUS_SUCCESS) {
|
||||
printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
|
||||
"Symbol info size extraction", get_error_string(err));
|
||||
return err;
|
||||
}
|
||||
|
||||
DP("Symbol %s = %p (%u bytes)\n", name, (void *)info.addr, info.size);
|
||||
SymbolInfoTable[std::string(name)] = info;
|
||||
free(name);
|
||||
} else {
|
||||
DP("Symbol is an indirect function\n");
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t RegisterModuleFromMemory(
|
||||
std::map<std::string, atl_kernel_info_t> &KernelInfoTable,
|
||||
std::map<std::string, atl_symbol_info_t> &SymbolInfoTable,
|
||||
void *module_bytes, size_t module_size, hsa_agent_t agent,
|
||||
hsa_status_t (*on_deserialized_data)(void *data, size_t size,
|
||||
void *cb_state),
|
||||
void *cb_state, std::vector<hsa_executable_t> &HSAExecutables) {
|
||||
hsa_status_t err;
|
||||
hsa_executable_t executable = {0};
|
||||
hsa_profile_t agent_profile;
|
||||
|
||||
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_profile);
|
||||
if (err != HSA_STATUS_SUCCESS) {
|
||||
printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
|
||||
"Query the agent profile", get_error_string(err));
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
// FIXME: Assume that every profile is FULL until we understand how to build
|
||||
// GCN with base profile
|
||||
agent_profile = HSA_PROFILE_FULL;
|
||||
/* Create the empty executable. */
|
||||
err = hsa_executable_create(agent_profile, HSA_EXECUTABLE_STATE_UNFROZEN, "",
|
||||
&executable);
|
||||
if (err != HSA_STATUS_SUCCESS) {
|
||||
printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
|
||||
"Create the executable", get_error_string(err));
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
bool module_load_success = false;
|
||||
do // Existing control flow used continue, preserve that for this patch
|
||||
{
|
||||
{
|
||||
// Some metadata info is not available through ROCr API, so use custom
|
||||
// code object metadata parsing to collect such metadata info
|
||||
|
||||
err = get_code_object_custom_metadata(module_bytes, module_size,
|
||||
KernelInfoTable);
|
||||
if (err != HSA_STATUS_SUCCESS) {
|
||||
DP("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
|
||||
"Getting custom code object metadata", get_error_string(err));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Deserialize code object.
|
||||
hsa_code_object_t code_object = {0};
|
||||
err = hsa_code_object_deserialize(module_bytes, module_size, NULL,
|
||||
&code_object);
|
||||
if (err != HSA_STATUS_SUCCESS) {
|
||||
DP("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
|
||||
"Code Object Deserialization", get_error_string(err));
|
||||
continue;
|
||||
}
|
||||
assert(0 != code_object.handle);
|
||||
|
||||
// Mutating the device image here avoids another allocation & memcpy
|
||||
void *code_object_alloc_data =
|
||||
reinterpret_cast<void *>(code_object.handle);
|
||||
hsa_status_t impl_err =
|
||||
on_deserialized_data(code_object_alloc_data, module_size, cb_state);
|
||||
if (impl_err != HSA_STATUS_SUCCESS) {
|
||||
printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
|
||||
"Error in deserialized_data callback",
|
||||
get_error_string(impl_err));
|
||||
return impl_err;
|
||||
}
|
||||
|
||||
/* Load the code object. */
|
||||
err =
|
||||
hsa_executable_load_code_object(executable, agent, code_object, NULL);
|
||||
if (err != HSA_STATUS_SUCCESS) {
|
||||
DP("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
|
||||
"Loading the code object", get_error_string(err));
|
||||
continue;
|
||||
}
|
||||
|
||||
// cannot iterate over symbols until executable is frozen
|
||||
}
|
||||
module_load_success = true;
|
||||
} while (0);
|
||||
DP("Modules loaded successful? %d\n", module_load_success);
|
||||
if (module_load_success) {
|
||||
/* Freeze the executable; it can now be queried for symbols. */
|
||||
err = hsa_executable_freeze(executable, "");
|
||||
if (err != HSA_STATUS_SUCCESS) {
|
||||
printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
|
||||
"Freeze the executable", get_error_string(err));
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
err = hsa::executable_iterate_symbols(
|
||||
executable,
|
||||
[&](hsa_executable_t, hsa_executable_symbol_t symbol) -> hsa_status_t {
|
||||
return populate_InfoTables(symbol, KernelInfoTable, SymbolInfoTable);
|
||||
});
|
||||
if (err != HSA_STATUS_SUCCESS) {
|
||||
printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
|
||||
"Iterating over symbols for execuatable", get_error_string(err));
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
// save the executable and destroy during finalize
|
||||
HSAExecutables.push_back(executable);
|
||||
return HSA_STATUS_SUCCESS;
|
||||
} else {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace core
|
@ -1,20 +0,0 @@
|
||||
//===--- amdgpu/src/print_tracing.h ------------------------------- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#ifndef LIBOMPTARGET_PLUGINS_AMGGPU_SRC_PRINT_TRACING_H_INCLUDED
|
||||
#define LIBOMPTARGET_PLUGINS_AMGGPU_SRC_PRINT_TRACING_H_INCLUDED
|
||||
|
||||
enum PrintTraceControlBits {
|
||||
LAUNCH = 1, // print a message to stderr for each kernel launch
|
||||
RTL_TIMING = 2, // Print timing info around each RTL step
|
||||
STARTUP_DETAILS = 4, // Details around loading up kernel
|
||||
RTL_TO_STDOUT = 8 // Redirect RTL tracing to stdout
|
||||
};
|
||||
|
||||
extern int print_kernel_trace; // set by environment variable
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
@ -1,14 +0,0 @@
|
||||
##===----------------------------------------------------------------------===##
|
||||
#
|
||||
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
# See https://llvm.org/LICENSE.txt for license information.
|
||||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
#
|
||||
##===----------------------------------------------------------------------===##
|
||||
#
|
||||
# Common parts which can be used by all plugins
|
||||
#
|
||||
##===----------------------------------------------------------------------===##
|
||||
|
||||
add_subdirectory(elf_common)
|
||||
add_subdirectory(MemoryManager)
|
@ -1,76 +0,0 @@
|
||||
##===----------------------------------------------------------------------===##
|
||||
#
|
||||
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
# See https://llvm.org/LICENSE.txt for license information.
|
||||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
#
|
||||
##===----------------------------------------------------------------------===##
|
||||
#
|
||||
# Build a plugin for a CUDA machine if available.
|
||||
#
|
||||
##===----------------------------------------------------------------------===##
|
||||
set(LIBOMPTARGET_BUILD_CUDA_PLUGIN TRUE CACHE BOOL
|
||||
"Whether to build CUDA plugin")
|
||||
if (NOT LIBOMPTARGET_BUILD_CUDA_PLUGIN)
|
||||
libomptarget_say("Not building CUDA offloading plugin: LIBOMPTARGET_BUILD_CUDA_PLUGIN is false")
|
||||
return()
|
||||
endif()
|
||||
|
||||
if (NOT (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(ppc64le)|(aarch64)$" AND CMAKE_SYSTEM_NAME MATCHES "Linux"))
|
||||
libomptarget_say("Not building CUDA offloading plugin: only support CUDA in Linux x86_64, ppc64le, or aarch64 hosts.")
|
||||
return()
|
||||
endif()
|
||||
|
||||
libomptarget_say("Building CUDA offloading plugin.")
|
||||
|
||||
set(LIBOMPTARGET_DLOPEN_LIBCUDA OFF)
|
||||
option(LIBOMPTARGET_FORCE_DLOPEN_LIBCUDA "Build with dlopened libcuda" ${LIBOMPTARGET_DLOPEN_LIBCUDA})
|
||||
|
||||
add_llvm_library(omptarget.rtl.cuda SHARED
|
||||
src/rtl.cpp
|
||||
|
||||
LINK_COMPONENTS
|
||||
Support
|
||||
Object
|
||||
|
||||
LINK_LIBS PRIVATE
|
||||
elf_common
|
||||
MemoryManager
|
||||
${OPENMP_PTHREAD_LIB}
|
||||
"-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exports,-z,defs"
|
||||
|
||||
NO_INSTALL_RPATH
|
||||
)
|
||||
|
||||
if(LIBOMPTARGET_DEP_CUDA_FOUND AND NOT LIBOMPTARGET_FORCE_DLOPEN_LIBCUDA)
|
||||
libomptarget_say("Building CUDA plugin linked against libcuda")
|
||||
target_link_libraries(omptarget.rtl.cuda PRIVATE CUDA::cuda_driver)
|
||||
else()
|
||||
libomptarget_say("Building CUDA plugin for dlopened libcuda")
|
||||
target_include_directories(omptarget.rtl.cuda PRIVATE dynamic_cuda)
|
||||
target_sources(omptarget.rtl.cuda PRIVATE dynamic_cuda/cuda.cpp)
|
||||
endif()
|
||||
|
||||
# Define the suffix for the runtime messaging dumps.
|
||||
target_compile_definitions(omptarget.rtl.cuda PRIVATE TARGET_NAME="CUDA")
|
||||
target_include_directories(omptarget.rtl.cuda PRIVATE ${LIBOMPTARGET_INCLUDE_DIR})
|
||||
|
||||
# Install plugin under the lib destination folder.
|
||||
install(TARGETS omptarget.rtl.cuda LIBRARY DESTINATION "${OPENMP_INSTALL_LIBDIR}")
|
||||
set_target_properties(omptarget.rtl.cuda PROPERTIES
|
||||
INSTALL_RPATH "$ORIGIN" BUILD_RPATH "$ORIGIN:${CMAKE_CURRENT_BINARY_DIR}/.."
|
||||
CXX_VISIBILITY_PRESET protected)
|
||||
|
||||
# Report to the parent scope that we are building a plugin for CUDA.
|
||||
# This controls whether tests are run for the nvptx offloading target
|
||||
# Run them if libcuda is available, or if the user explicitly asked for dlopen
|
||||
# Otherwise this plugin is being built speculatively and there may be no cuda available
|
||||
option(LIBOMPTARGET_FORCE_NVIDIA_TESTS "Build NVIDIA libomptarget tests" OFF)
|
||||
if (LIBOMPTARGET_FOUND_NVIDIA_GPU OR LIBOMPTARGET_FORCE_NVIDIA_TESTS)
|
||||
libomptarget_say("Enable tests using CUDA plugin")
|
||||
set(LIBOMPTARGET_SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS} nvptx64-nvidia-cuda nvptx64-nvidia-cuda-LTO" PARENT_SCOPE)
|
||||
list(APPEND LIBOMPTARGET_TESTED_PLUGINS "omptarget.rtl.cuda")
|
||||
set(LIBOMPTARGET_TESTED_PLUGINS "${LIBOMPTARGET_TESTED_PLUGINS}" PARENT_SCOPE)
|
||||
else()
|
||||
libomptarget_say("Not generating NVIDIA tests, no supported devices detected. Use 'LIBOMPTARGET_FORCE_NVIDIA_TESTS' to override.")
|
||||
endif()
|
File diff suppressed because it is too large
Load Diff
@ -1,6 +0,0 @@
|
||||
VERS1.0 {
|
||||
global:
|
||||
__tgt_rtl*;
|
||||
local:
|
||||
*;
|
||||
};
|
@ -1,280 +0,0 @@
|
||||
//===-RTLs/generic-64bit/src/rtl.cpp - Target RTLs Implementation - C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// RTL for generic 64-bit machine
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/Support/DynamicLibrary.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <ffi.h>
|
||||
#include <link.h>
|
||||
#include <list>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "Debug.h"
|
||||
#include "omptargetplugin.h"
|
||||
|
||||
using namespace llvm;
|
||||
using namespace llvm::sys;
|
||||
|
||||
#ifndef TARGET_NAME
|
||||
#define TARGET_NAME Generic ELF - 64bit
|
||||
#endif
|
||||
#define DEBUG_PREFIX "TARGET " GETNAME(TARGET_NAME) " RTL"
|
||||
|
||||
#ifndef TARGET_ELF_ID
|
||||
#define TARGET_ELF_ID 0
|
||||
#endif
|
||||
|
||||
#include "elf_common.h"
|
||||
|
||||
#define NUMBER_OF_DEVICES 4
|
||||
#define OFFLOAD_SECTION_NAME "omp_offloading_entries"
|
||||
|
||||
/// Array of Dynamic libraries loaded for this target.
|
||||
struct DynLibTy {
|
||||
std::string FileName;
|
||||
std::unique_ptr<DynamicLibrary> DynLib;
|
||||
};
|
||||
|
||||
/// Keep entries table per device.
|
||||
struct FuncOrGblEntryTy {
|
||||
__tgt_target_table Table;
|
||||
SmallVector<__tgt_offload_entry> Entries;
|
||||
};
|
||||
|
||||
/// Class containing all the device information.
|
||||
class RTLDeviceInfoTy {
|
||||
std::vector<std::list<FuncOrGblEntryTy>> FuncGblEntries;
|
||||
|
||||
public:
|
||||
std::list<DynLibTy> DynLibs;
|
||||
|
||||
// Record entry point associated with device.
|
||||
void createOffloadTable(int32_t DeviceId,
|
||||
SmallVector<__tgt_offload_entry> &&Entries) {
|
||||
assert(DeviceId < (int32_t)FuncGblEntries.size() &&
|
||||
"Unexpected device id!");
|
||||
FuncGblEntries[DeviceId].emplace_back();
|
||||
FuncOrGblEntryTy &E = FuncGblEntries[DeviceId].back();
|
||||
|
||||
E.Entries = Entries;
|
||||
E.Table.EntriesBegin = E.Entries.begin();
|
||||
E.Table.EntriesEnd = E.Entries.end();
|
||||
}
|
||||
|
||||
// Return true if the entry is associated with device.
|
||||
bool findOffloadEntry(int32_t DeviceId, void *Addr) {
|
||||
assert(DeviceId < (int32_t)FuncGblEntries.size() &&
|
||||
"Unexpected device id!");
|
||||
FuncOrGblEntryTy &E = FuncGblEntries[DeviceId].back();
|
||||
|
||||
for (__tgt_offload_entry *I = E.Table.EntriesBegin,
|
||||
*End = E.Table.EntriesEnd;
|
||||
I < End; ++I) {
|
||||
if (I->addr == Addr)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Return the pointer to the target entries table.
|
||||
__tgt_target_table *getOffloadEntriesTable(int32_t DeviceId) {
|
||||
assert(DeviceId < (int32_t)FuncGblEntries.size() &&
|
||||
"Unexpected device id!");
|
||||
FuncOrGblEntryTy &E = FuncGblEntries[DeviceId].back();
|
||||
|
||||
return &E.Table;
|
||||
}
|
||||
|
||||
RTLDeviceInfoTy(int32_t NumDevices) { FuncGblEntries.resize(NumDevices); }
|
||||
|
||||
~RTLDeviceInfoTy() {
|
||||
// Close dynamic libraries
|
||||
for (auto &Lib : DynLibs) {
|
||||
if (Lib.DynLib->isValid())
|
||||
remove(Lib.FileName.c_str());
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static RTLDeviceInfoTy DeviceInfo(NUMBER_OF_DEVICES);
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) {
|
||||
// If we don't have a valid ELF ID we can just fail.
|
||||
#if TARGET_ELF_ID < 1
|
||||
return 0;
|
||||
#else
|
||||
return elf_check_machine(Image, TARGET_ELF_ID);
|
||||
#endif
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_number_of_devices() { return NUMBER_OF_DEVICES; }
|
||||
|
||||
int32_t __tgt_rtl_init_device(int32_t DeviceId) { return OFFLOAD_SUCCESS; }
|
||||
|
||||
__tgt_target_table *__tgt_rtl_load_binary(int32_t DeviceId,
|
||||
__tgt_device_image *Image) {
|
||||
|
||||
DP("Dev %d: load binary from " DPxMOD " image\n", DeviceId,
|
||||
DPxPTR(Image->ImageStart));
|
||||
|
||||
assert(DeviceId >= 0 && DeviceId < NUMBER_OF_DEVICES && "bad dev id");
|
||||
|
||||
size_t ImageSize = (size_t)Image->ImageEnd - (size_t)Image->ImageStart;
|
||||
|
||||
// load dynamic library and get the entry points. We use the dl library
|
||||
// to do the loading of the library, but we could do it directly to avoid the
|
||||
// dump to the temporary file.
|
||||
//
|
||||
// 1) Create tmp file with the library contents.
|
||||
// 2) Use dlopen to load the file and dlsym to retrieve the symbols.
|
||||
char TmpName[] = "/tmp/tmpfile_XXXXXX";
|
||||
int TmpFd = mkstemp(TmpName);
|
||||
|
||||
if (TmpFd == -1)
|
||||
return nullptr;
|
||||
|
||||
FILE *Ftmp = fdopen(TmpFd, "wb");
|
||||
|
||||
if (!Ftmp)
|
||||
return nullptr;
|
||||
|
||||
fwrite(Image->ImageStart, ImageSize, 1, Ftmp);
|
||||
fclose(Ftmp);
|
||||
|
||||
std::string ErrMsg;
|
||||
auto DynLib = std::make_unique<sys::DynamicLibrary>(
|
||||
sys::DynamicLibrary::getPermanentLibrary(TmpName, &ErrMsg));
|
||||
DynLibTy Lib = {TmpName, std::move(DynLib)};
|
||||
|
||||
if (!Lib.DynLib->isValid()) {
|
||||
DP("Target library loading error: %s\n", ErrMsg.c_str());
|
||||
return NULL;
|
||||
}
|
||||
|
||||
__tgt_offload_entry *HostBegin = Image->EntriesBegin;
|
||||
__tgt_offload_entry *HostEnd = Image->EntriesEnd;
|
||||
|
||||
// Create a new offloading entry list using the device symbol address.
|
||||
SmallVector<__tgt_offload_entry> Entries;
|
||||
for (__tgt_offload_entry *E = HostBegin; E != HostEnd; ++E) {
|
||||
if (!E->addr)
|
||||
return nullptr;
|
||||
|
||||
__tgt_offload_entry Entry = *E;
|
||||
|
||||
void *DevAddr = Lib.DynLib->getAddressOfSymbol(E->name);
|
||||
Entry.addr = DevAddr;
|
||||
|
||||
DP("Entry point " DPxMOD " maps to global %s (" DPxMOD ")\n",
|
||||
DPxPTR(E - HostBegin), E->name, DPxPTR(DevAddr));
|
||||
|
||||
Entries.emplace_back(Entry);
|
||||
}
|
||||
|
||||
DeviceInfo.createOffloadTable(DeviceId, std::move(Entries));
|
||||
DeviceInfo.DynLibs.emplace_back(std::move(Lib));
|
||||
|
||||
return DeviceInfo.getOffloadEntriesTable(DeviceId);
|
||||
}
|
||||
|
||||
void __tgt_rtl_print_device_info(int32_t DeviceId) {
|
||||
printf(" This is a generic-elf-64bit device\n");
|
||||
}
|
||||
|
||||
// Sample implementation of explicit memory allocator. For this plugin all kinds
|
||||
// are equivalent to each other.
|
||||
void *__tgt_rtl_data_alloc(int32_t DeviceId, int64_t Size, void *HstPtr,
|
||||
int32_t Kind) {
|
||||
void *Ptr = NULL;
|
||||
|
||||
switch (Kind) {
|
||||
case TARGET_ALLOC_DEVICE:
|
||||
case TARGET_ALLOC_HOST:
|
||||
case TARGET_ALLOC_SHARED:
|
||||
case TARGET_ALLOC_DEFAULT:
|
||||
Ptr = malloc(Size);
|
||||
break;
|
||||
default:
|
||||
REPORT("Invalid target data allocation kind");
|
||||
}
|
||||
|
||||
return Ptr;
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_data_submit(int32_t DeviceId, void *TgtPtr, void *HstPtr,
|
||||
int64_t Size) {
|
||||
memcpy(TgtPtr, HstPtr, Size);
|
||||
return OFFLOAD_SUCCESS;
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_data_retrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr,
|
||||
int64_t Size) {
|
||||
memcpy(HstPtr, TgtPtr, Size);
|
||||
return OFFLOAD_SUCCESS;
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_data_delete(int32_t DeviceId, void *TgtPtr, int32_t) {
|
||||
free(TgtPtr);
|
||||
return OFFLOAD_SUCCESS;
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_launch_kernel(int32_t DeviceId, void *TgtEntryPtr,
|
||||
void **TgtArgs, ptrdiff_t *TgtOffsets,
|
||||
KernelArgsTy *KernelArgs,
|
||||
__tgt_async_info *AsyncInfoPtr) {
|
||||
assert(!KernelArgs->NumTeams[1] && !KernelArgs->NumTeams[2] &&
|
||||
!KernelArgs->ThreadLimit[1] && !KernelArgs->ThreadLimit[2] &&
|
||||
"Only one dimensional kernels supported.");
|
||||
// ignore team num and thread limit.
|
||||
|
||||
// Use libffi to launch execution.
|
||||
ffi_cif Cif;
|
||||
|
||||
// All args are references.
|
||||
std::vector<ffi_type *> ArgsTypes(KernelArgs->NumArgs, &ffi_type_pointer);
|
||||
std::vector<void *> Args(KernelArgs->NumArgs);
|
||||
std::vector<void *> Ptrs(KernelArgs->NumArgs);
|
||||
|
||||
for (uint32_t I = 0; I < KernelArgs->NumArgs; ++I) {
|
||||
Ptrs[I] = (void *)((intptr_t)TgtArgs[I] + TgtOffsets[I]);
|
||||
Args[I] = &Ptrs[I];
|
||||
}
|
||||
|
||||
ffi_status Status = ffi_prep_cif(&Cif, FFI_DEFAULT_ABI, KernelArgs->NumArgs,
|
||||
&ffi_type_void, &ArgsTypes[0]);
|
||||
|
||||
assert(Status == FFI_OK && "Unable to prepare target launch!");
|
||||
|
||||
if (Status != FFI_OK)
|
||||
return OFFLOAD_FAIL;
|
||||
|
||||
DP("Running entry point at " DPxMOD "...\n", DPxPTR(TgtEntryPtr));
|
||||
|
||||
void (*Entry)(void);
|
||||
*((void **)&Entry) = TgtEntryPtr;
|
||||
ffi_call(&Cif, Entry, NULL, &Args[0]);
|
||||
return OFFLOAD_SUCCESS;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
@ -1,17 +0,0 @@
|
||||
##===----------------------------------------------------------------------===##
|
||||
#
|
||||
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
# See https://llvm.org/LICENSE.txt for license information.
|
||||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
#
|
||||
##===----------------------------------------------------------------------===##
|
||||
#
|
||||
# Build a plugin for a ppc64 machine if available.
|
||||
#
|
||||
##===----------------------------------------------------------------------===##
|
||||
|
||||
if(CMAKE_SYSTEM_NAME MATCHES "Linux")
|
||||
build_generic_elf64("ppc64" "PPC64" "ppc64" "powerpc64-ibm-linux-gnu" "21")
|
||||
else()
|
||||
libomptarget_say("Not building ppc64 offloading plugin: machine not found in the system.")
|
||||
endif()
|
@ -1,17 +0,0 @@
|
||||
##===----------------------------------------------------------------------===##
|
||||
#
|
||||
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
# See https://llvm.org/LICENSE.txt for license information.
|
||||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
#
|
||||
##===----------------------------------------------------------------------===##
|
||||
#
|
||||
# Build a plugin for a ppc64le machine if available.
|
||||
#
|
||||
##===----------------------------------------------------------------------===##
|
||||
|
||||
if(CMAKE_SYSTEM_NAME MATCHES "Linux")
|
||||
build_generic_elf64("ppc64le" "PPC64le" "ppc64" "powerpc64le-ibm-linux-gnu" "21")
|
||||
else()
|
||||
libomptarget_say("Not building ppc64le offloading plugin: machine not found in the system.")
|
||||
endif()
|
@ -1,17 +0,0 @@
|
||||
##===----------------------------------------------------------------------===##
|
||||
#
|
||||
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
# See https://llvm.org/LICENSE.txt for license information.
|
||||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
#
|
||||
##===----------------------------------------------------------------------===##
|
||||
#
|
||||
# Build a plugin for a x86_64 machine if available.
|
||||
#
|
||||
##===----------------------------------------------------------------------===##
|
||||
|
||||
if(CMAKE_SYSTEM_NAME MATCHES "Linux")
|
||||
build_generic_elf64("x86_64" "x86_64" "x86_64" "x86_64-pc-linux-gnu" "62")
|
||||
else()
|
||||
libomptarget_say("Not building x86_64 offloading plugin: machine not found in the system.")
|
||||
endif()
|
@ -99,8 +99,6 @@ void RTLsTy::loadRTLs() {
|
||||
|
||||
DP("Loading RTLs...\n");
|
||||
|
||||
BoolEnvar NextGenPlugins("LIBOMPTARGET_NEXTGEN_PLUGINS", true);
|
||||
|
||||
// Attempt to open all the plugins and, if they exist, check if the interface
|
||||
// is correct and if they are supporting any devices.
|
||||
for (const char *Name : RTLNames) {
|
||||
@ -109,13 +107,6 @@ void RTLsTy::loadRTLs() {
|
||||
RTLInfoTy &RTL = AllRTLs.back();
|
||||
|
||||
const std::string BaseRTLName(Name);
|
||||
if (NextGenPlugins) {
|
||||
if (attemptLoadRTL(BaseRTLName + ".nextgen.so", RTL))
|
||||
continue;
|
||||
|
||||
DP("Falling back to original plugin...\n");
|
||||
}
|
||||
|
||||
if (!attemptLoadRTL(BaseRTLName + ".so", RTL))
|
||||
AllRTLs.pop_back();
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user