[OpenMP] Delete old plugins

It's time to remove the old plugins as the next-gen has already been set
to default in LLVM 16.

Reviewed By: tianshilei1992

Differential Revision: https://reviews.llvm.org/D142820
This commit is contained in:
Joseph Huber 2023-03-21 11:27:37 -05:00
parent 70c08dbcfb
commit e90ab9148b
50 changed files with 35 additions and 7228 deletions

View File

@ -1378,7 +1378,7 @@ clause. Examples for both are given below.
.. code-block:: console
$ clang++ -fopenmp --offload-arch=gfx90a -O3 shared.c
$ env LIBOMPTARGET_NEXTGEN_PLUGINS=1 ./shared
$ env ./shared
.. _libomptarget_device_debugging:

View File

@ -105,7 +105,6 @@ set(LIBOMPTARGET_LLVM_LIBRARY_DIR "${LLVM_LIBRARY_DIR}" CACHE STRING
"Path to folder containing llvm library libomptarget.so")
# Build offloading plugins and device RTLs if they are available.
add_subdirectory(plugins)
add_subdirectory(plugins-nextgen)
add_subdirectory(DeviceRTL)
add_subdirectory(tools)

View File

@ -12,12 +12,12 @@
add_subdirectory(common)
# void build_generic_elf64_nextgen(string tmachine, string tmachine_name, string tmachine_libname, string elf_machine_id);
# void build_generic_elf64(string tmachine, string tmachine_name, string tmachine_libname, string elf_machine_id);
# - build a plugin for an ELF based generic 64-bit target based on libffi.
# - tmachine: name of the machine processor as used in the cmake build system.
# - tmachine_name: name of the machine to be printed with the debug messages.
# - tmachine_libname: machine name to be appended to the plugin library name.
macro(build_generic_elf64_nextgen tmachine tmachine_name tmachine_libname tmachine_triple elf_machine_id)
macro(build_generic_elf64 tmachine tmachine_name tmachine_libname tmachine_triple elf_machine_id)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "${tmachine}$")
if(LIBOMPTARGET_DEP_LIBFFI_FOUND)
@ -36,7 +36,7 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "${tmachine}$")
# Define target regiple
add_definitions("-DLIBOMPTARGET_NEXTGEN_GENERIC_PLUGIN_TRIPLE=${tmachine}")
add_llvm_library("omptarget.rtl.${tmachine_libname}.nextgen"
add_llvm_library("omptarget.rtl.${tmachine_libname}"
SHARED
${CMAKE_CURRENT_SOURCE_DIR}/../generic-elf-64bit/src/rtl.cpp
@ -58,23 +58,23 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "${tmachine}$")
)
if (LIBOMP_HAVE_VERSION_SCRIPT_FLAG)
target_link_libraries("omptarget.rtl.${tmachine_libname}.nextgen" PRIVATE
target_link_libraries("omptarget.rtl.${tmachine_libname}" PRIVATE
"-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exports")
endif()
# Install plugin under the lib destination folder.
install(TARGETS "omptarget.rtl.${tmachine_libname}.nextgen"
install(TARGETS "omptarget.rtl.${tmachine_libname}"
LIBRARY DESTINATION "${OPENMP_INSTALL_LIBDIR}")
set_target_properties("omptarget.rtl.${tmachine_libname}.nextgen" PROPERTIES
set_target_properties("omptarget.rtl.${tmachine_libname}" PROPERTIES
INSTALL_RPATH "$ORIGIN" BUILD_RPATH "$ORIGIN:${CMAKE_CURRENT_BINARY_DIR}/.."
CXX_VISIBILITY_PRESET protected)
target_include_directories( "omptarget.rtl.${tmachine_libname}.nextgen" PRIVATE
target_include_directories( "omptarget.rtl.${tmachine_libname}" PRIVATE
${LIBOMPTARGET_INCLUDE_DIR}
${LIBOMPTARGET_DEP_LIBFFI_INCLUDE_DIR})
list(APPEND LIBOMPTARGET_TESTED_PLUGINS
"omptarget.rtl.${tmachine_libname}.nextgen")
"omptarget.rtl.${tmachine_libname}")
else(LIBOMPTARGET_DEP_LIBFFI_FOUND)
libomptarget_say("Not building ${tmachine_name} NextGen offloading plugin: libffi dependency not found.")

View File

@ -11,7 +11,7 @@
##===----------------------------------------------------------------------===##
if(CMAKE_SYSTEM_NAME MATCHES "Linux")
build_generic_elf64_nextgen("aarch64" "aarch64" "aarch64" "aarch64-unknown-linux-gnu" "183")
build_generic_elf64("aarch64" "aarch64" "aarch64" "aarch64-unknown-linux-gnu" "183")
else()
libomptarget_say("Not building aarch64 NextGen offloading plugin: machine not found in the system.")
endif()

View File

@ -52,8 +52,8 @@ if (${hsa-runtime64_FOUND} AND NOT LIBOMPTARGET_FORCE_DLOPEN_LIBHSA)
set(LIBOMPTARGET_DEP_LIBRARIES hsa-runtime64::hsa-runtime64)
else()
libomptarget_say("Building AMDGPU NextGen plugin for dlopened libhsa")
include_directories(../../plugins/amdgpu/dynamic_hsa)
set(LIBOMPTARGET_EXTRA_SOURCE ../../plugins/amdgpu/dynamic_hsa/hsa.cpp)
include_directories(dynamic_hsa)
set(LIBOMPTARGET_EXTRA_SOURCE dynamic_hsa/hsa.cpp)
set(LIBOMPTARGET_DEP_LIBRARIES)
endif()
@ -66,7 +66,7 @@ else()
set(LDFLAGS_UNDEFINED "-Wl,-z,defs")
endif()
add_llvm_library(omptarget.rtl.amdgpu.nextgen SHARED
add_llvm_library(omptarget.rtl.amdgpu SHARED
src/rtl.cpp
${LIBOMPTARGET_EXTRA_SOURCE}
@ -91,16 +91,16 @@ add_llvm_library(omptarget.rtl.amdgpu.nextgen SHARED
)
if ((OMPT_TARGET_DEFAULT) AND (LIBOMPTARGET_OMPT_SUPPORT))
target_link_libraries(omptarget.rtl.amdgpu.nextgen PRIVATE OMPT)
target_link_libraries(omptarget.rtl.amdgpu PRIVATE OMPT)
endif()
if (LIBOMP_HAVE_VERSION_SCRIPT_FLAG)
target_link_libraries(omptarget.rtl.amdgpu.nextgen PRIVATE
target_link_libraries(omptarget.rtl.amdgpu PRIVATE
"-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exports")
endif()
target_include_directories(
omptarget.rtl.amdgpu.nextgen
omptarget.rtl.amdgpu
PRIVATE
${LIBOMPTARGET_INCLUDE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/utils
@ -108,7 +108,7 @@ target_include_directories(
# Install plugin under the lib destination folder.
install(TARGETS omptarget.rtl.amdgpu.nextgen LIBRARY DESTINATION "${OPENMP_INSTALL_LIBDIR}")
set_target_properties(omptarget.rtl.amdgpu.nextgen PROPERTIES
install(TARGETS omptarget.rtl.amdgpu LIBRARY DESTINATION "${OPENMP_INSTALL_LIBDIR}")
set_target_properties(omptarget.rtl.amdgpu PROPERTIES
INSTALL_RPATH "$ORIGIN" BUILD_RPATH "$ORIGIN:${CMAKE_CURRENT_BINARY_DIR}/.."
CXX_VISIBILITY_PRESET protected)

View File

@ -12,3 +12,5 @@
add_subdirectory(OMPT)
add_subdirectory(PluginInterface)
add_subdirectory(MemoryManager)
add_subdirectory(elf_common)

View File

@ -11,7 +11,7 @@
##===----------------------------------------------------------------------===##
# NOTE: Don't try to build `elf_common` using `add_llvm_library`.
# See openmp/libomptarget/plugins-nextgen/common/PluginInterface/CMakeLists.txt
# See openmp/libomptarget/plugins/common/PluginInterface/CMakeLists.txt
# for more explanation.
add_library(elf_common OBJECT elf_common.cpp ELFSymbols.cpp)

View File

@ -26,7 +26,7 @@ libomptarget_say("Building CUDA NextGen offloading plugin.")
set(LIBOMPTARGET_DLOPEN_LIBCUDA OFF)
option(LIBOMPTARGET_FORCE_DLOPEN_LIBCUDA "Build with dlopened libcuda" ${LIBOMPTARGET_DLOPEN_LIBCUDA})
add_llvm_library(omptarget.rtl.cuda.nextgen SHARED
add_llvm_library(omptarget.rtl.cuda SHARED
src/rtl.cpp
LINK_COMPONENTS
@ -43,33 +43,33 @@ add_llvm_library(omptarget.rtl.cuda.nextgen SHARED
)
if ((OMPT_TARGET_DEFAULT) AND (LIBOMPTARGET_OMPT_SUPPORT))
target_link_libraries(omptarget.rtl.cuda.nextgen PRIVATE OMPT)
target_link_libraries(omptarget.rtl.cuda PRIVATE OMPT)
endif()
if (LIBOMP_HAVE_VERSION_SCRIPT_FLAG)
target_link_libraries(omptarget.rtl.cuda.nextgen PRIVATE
target_link_libraries(omptarget.rtl.cuda PRIVATE
"-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exports,-z,defs")
endif()
if(LIBOMPTARGET_DEP_CUDA_FOUND AND NOT LIBOMPTARGET_FORCE_DLOPEN_LIBCUDA)
libomptarget_say("Building CUDA plugin linked against libcuda")
target_link_libraries(omptarget.rtl.cuda.nextgen PRIVATE CUDA::cuda_driver)
target_link_libraries(omptarget.rtl.cuda PRIVATE CUDA::cuda_driver)
else()
libomptarget_say("Building CUDA plugin for dlopened libcuda")
target_include_directories(omptarget.rtl.cuda.nextgen PRIVATE ../../plugins/cuda/dynamic_cuda)
target_sources(omptarget.rtl.cuda.nextgen PRIVATE ../../plugins/cuda/dynamic_cuda/cuda.cpp)
target_include_directories(omptarget.rtl.cuda PRIVATE dynamic_cuda)
target_sources(omptarget.rtl.cuda PRIVATE dynamic_cuda/cuda.cpp)
endif()
# Define debug prefix. TODO: This should be automatized in the Debug.h but it
# requires changing the original plugins.
target_compile_definitions(omptarget.rtl.cuda.nextgen PRIVATE TARGET_NAME="CUDA")
target_compile_definitions(omptarget.rtl.cuda.nextgen PRIVATE DEBUG_PREFIX="TARGET CUDA RTL")
target_compile_definitions(omptarget.rtl.cuda PRIVATE TARGET_NAME="CUDA")
target_compile_definitions(omptarget.rtl.cuda PRIVATE DEBUG_PREFIX="TARGET CUDA RTL")
target_include_directories(omptarget.rtl.cuda.nextgen PRIVATE ${LIBOMPTARGET_INCLUDE_DIR})
target_include_directories(omptarget.rtl.cuda PRIVATE ${LIBOMPTARGET_INCLUDE_DIR})
# Install plugin under the lib destination folder.
install(TARGETS omptarget.rtl.cuda.nextgen LIBRARY DESTINATION "${OPENMP_INSTALL_LIBDIR}")
set_target_properties(omptarget.rtl.cuda.nextgen PROPERTIES
install(TARGETS omptarget.rtl.cuda LIBRARY DESTINATION "${OPENMP_INSTALL_LIBDIR}")
set_target_properties(omptarget.rtl.cuda PROPERTIES
INSTALL_RPATH "$ORIGIN" BUILD_RPATH "$ORIGIN:${CMAKE_CURRENT_BINARY_DIR}/.."
CXX_VISIBILITY_PRESET protected)

View File

@ -11,7 +11,7 @@
##===----------------------------------------------------------------------===##
if(CMAKE_SYSTEM_NAME MATCHES "Linux")
build_generic_elf64_nextgen("ppc64" "PPC64" "ppc64" "powerpc64-ibm-linux-gnu" "21")
build_generic_elf64("ppc64" "PPC64" "ppc64" "powerpc64-ibm-linux-gnu" "21")
else()
libomptarget_say("Not building ppc64 NextGen offloading plugin: machine not found in the system.")
endif()

View File

@ -11,7 +11,7 @@
##===----------------------------------------------------------------------===##
if(CMAKE_SYSTEM_NAME MATCHES "Linux")
build_generic_elf64_nextgen("ppc64le" "PPC64le" "ppc64" "powerpc64le-ibm-linux-gnu" "21")
build_generic_elf64("ppc64le" "PPC64le" "ppc64" "powerpc64le-ibm-linux-gnu" "21")
else()
libomptarget_say("Not building ppc64le NextGen offloading plugin: machine not found in the system.")
endif()

View File

@ -11,7 +11,7 @@
##===----------------------------------------------------------------------===##
if(CMAKE_SYSTEM_NAME MATCHES "Linux")
build_generic_elf64_nextgen("x86_64" "x86_64" "x86_64" "x86_64-pc-linux-gnu" "62")
build_generic_elf64("x86_64" "x86_64" "x86_64" "x86_64-pc-linux-gnu" "62")
else()
libomptarget_say("Not building x86_64 NextGen offloading plugin: machine not found in the system.")
endif()

View File

@ -1,89 +0,0 @@
##===----------------------------------------------------------------------===##
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
##===----------------------------------------------------------------------===##
#
# Build plugins for the user system if available.
#
##===----------------------------------------------------------------------===##
add_subdirectory(common)
# void build_generic_elf64(string tmachine, string tmachine_name, string tmachine_libname, string elf_machine_id);
# - build a plugin for an ELF based generic 64-bit target based on libffi.
# - tmachine: name of the machine processor as used in the cmake build system.
# - tmachine_name: name of the machine to be printed with the debug messages.
# - tmachine_libname: machine name to be appended to the plugin library name.
macro(build_generic_elf64 tmachine tmachine_name tmachine_libname tmachine_triple elf_machine_id)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "${tmachine}$")
if(LIBOMPTARGET_DEP_LIBFFI_FOUND)
libomptarget_say("Building ${tmachine_name} offloading plugin.")
# Define macro to be used as prefix of the runtime messages for this target.
add_definitions("-DTARGET_NAME=${tmachine_name}")
# Define macro with the ELF ID for this target.
add_definitions("-DTARGET_ELF_ID=${elf_machine_id}")
add_llvm_library("omptarget.rtl.${tmachine_libname}"
SHARED
${CMAKE_CURRENT_SOURCE_DIR}/../generic-elf-64bit/src/rtl.cpp
ADDITIONAL_HEADER_DIRS
${LIBOMPTARGET_INCLUDE_DIR}
${LIBOMPTARGET_DEP_LIBFFI_INCLUDE_DIR}
LINK_LIBS
PRIVATE
elf_common
${LIBOMPTARGET_DEP_LIBFFI_LIBRARIES}
${OPENMP_PTHREAD_LIB}
"-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exports"
NO_INSTALL_RPATH
)
# Install plugin under the lib destination folder.
install(TARGETS "omptarget.rtl.${tmachine_libname}"
LIBRARY DESTINATION "${OPENMP_INSTALL_LIBDIR}")
set_target_properties("omptarget.rtl.${tmachine_libname}" PROPERTIES
INSTALL_RPATH "$ORIGIN" BUILD_RPATH "$ORIGIN:${CMAKE_CURRENT_BINARY_DIR}/.."
CXX_VISIBILITY_PRESET protected)
target_include_directories( "omptarget.rtl.${tmachine_libname}" PRIVATE
${LIBOMPTARGET_INCLUDE_DIR}
${LIBOMPTARGET_DEP_LIBFFI_INCLUDE_DIR})
list(APPEND LIBOMPTARGET_TESTED_PLUGINS
"omptarget.rtl.${tmachine_libname}")
# Report to the parent scope that we are building a plugin.
set(LIBOMPTARGET_SYSTEM_TARGETS
"${LIBOMPTARGET_SYSTEM_TARGETS} ${tmachine_triple} ${tmachine_triple}-LTO" PARENT_SCOPE)
set(LIBOMPTARGET_TESTED_PLUGINS
"${LIBOMPTARGET_TESTED_PLUGINS}" PARENT_SCOPE)
else(LIBOMPTARGET_DEP_LIBFFI_FOUND)
libomptarget_say("Not building ${tmachine_name} offloading plugin: libffi dependency not found.")
endif(LIBOMPTARGET_DEP_LIBFFI_FOUND)
else()
libomptarget_say("Not building ${tmachine_name} offloading plugin: machine not found in the system.")
endif()
endmacro()
add_subdirectory(aarch64)
add_subdirectory(amdgpu)
add_subdirectory(cuda)
add_subdirectory(ppc64)
add_subdirectory(ppc64le)
add_subdirectory(x86_64)
# Make sure the parent scope can see the plugins that will be created.
set(LIBOMPTARGET_SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS}" PARENT_SCOPE)
set(LIBOMPTARGET_TESTED_PLUGINS "${LIBOMPTARGET_TESTED_PLUGINS}" PARENT_SCOPE)

View File

@ -1,17 +0,0 @@
##===----------------------------------------------------------------------===##
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
##===----------------------------------------------------------------------===##
#
# Build a plugin for an aarch64 machine if available.
#
##===----------------------------------------------------------------------===##
if(CMAKE_SYSTEM_NAME MATCHES "Linux")
build_generic_elf64("aarch64" "aarch64" "aarch64" "aarch64-unknown-linux-gnu" "183")
else()
libomptarget_say("Not building aarch64 offloading plugin: machine not found in the system.")
endif()

View File

@ -1,122 +0,0 @@
##===----------------------------------------------------------------------===##
#
# The LLVM Compiler Infrastructure
#
# This file is dual licensed under the MIT and the University of Illinois Open
# Source Licenses. See LICENSE.txt for details.
#
##===----------------------------------------------------------------------===##
#
# Build a plugin for an AMDGPU machine if available.
#
##===----------------------------------------------------------------------===##
################################################################################
set(LIBOMPTARGET_BUILD_AMDGPU_PLUGIN TRUE CACHE BOOL
"Whether to build AMDGPU plugin")
if (NOT LIBOMPTARGET_BUILD_AMDGPU_PLUGIN)
libomptarget_say("Not building AMDGPU offloading plugin: LIBOMPTARGET_BUILD_AMDGPU_PLUGIN is false")
return()
endif()
# as of rocm-3.7, hsa is installed with cmake packages and kmt is found via hsa
find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
if(NOT (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(ppc64le)|(aarch64)$" AND CMAKE_SYSTEM_NAME MATCHES "Linux"))
libomptarget_say("Not building AMDGPU plugin: only support AMDGPU in Linux x86_64, ppc64le, or aarch64 hosts")
return()
endif()
################################################################################
# Define the suffix for the runtime messaging dumps.
add_definitions(-DTARGET_NAME=AMDGPU)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(ppc64le)|(aarch64)$")
add_definitions(-DLITTLEENDIAN_CPU=1)
endif()
if(CMAKE_BUILD_TYPE MATCHES Debug)
add_definitions(-DDEBUG)
endif()
set(LIBOMPTARGET_DLOPEN_LIBHSA OFF)
option(LIBOMPTARGET_FORCE_DLOPEN_LIBHSA "Build with dlopened libhsa" ${LIBOMPTARGET_DLOPEN_LIBHSA})
if (${hsa-runtime64_FOUND} AND NOT LIBOMPTARGET_FORCE_DLOPEN_LIBHSA)
libomptarget_say("Building AMDGPU plugin linked against libhsa")
set(LIBOMPTARGET_EXTRA_SOURCE)
set(LIBOMPTARGET_DEP_LIBRARIES hsa-runtime64::hsa-runtime64)
else()
libomptarget_say("Building AMDGPU plugin for dlopened libhsa")
include_directories(dynamic_hsa)
set(LIBOMPTARGET_EXTRA_SOURCE dynamic_hsa/hsa.cpp)
set(LIBOMPTARGET_DEP_LIBRARIES)
endif()
if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
# On FreeBSD, the 'environ' symbol is undefined at link time, but resolved by
# the dynamic linker at runtime. Therefore, allow the symbol to be undefined
# when creating a shared library.
set(LDFLAGS_UNDEFINED "-Wl,--allow-shlib-undefined")
else()
set(LDFLAGS_UNDEFINED "-Wl,-z,defs")
endif()
add_llvm_library(omptarget.rtl.amdgpu SHARED
impl/impl.cpp
impl/interop_hsa.cpp
impl/data.cpp
impl/get_elf_mach_gfx_name.cpp
impl/system.cpp
impl/msgpack.cpp
src/rtl.cpp
${LIBOMPTARGET_EXTRA_SOURCE}
ADDITIONAL_HEADER_DIRS
${LIBOMPTARGET_INCLUDE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/impl
${CMAKE_CURRENT_SOURCE_DIR}/../../plugins-nextgen/amdgpu/utils
LINK_COMPONENTS
Support
Object
LINK_LIBS
PRIVATE
elf_common
${LIBOMPTARGET_DEP_LIBRARIES}
${OPENMP_PTHREAD_LIB}
"-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exports"
${LDFLAGS_UNDEFINED}
NO_INSTALL_RPATH
)
target_include_directories(
omptarget.rtl.amdgpu
PRIVATE
${LIBOMPTARGET_INCLUDE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/impl
${CMAKE_CURRENT_SOURCE_DIR}/../../plugins-nextgen/amdgpu/utils
)
# Install plugin under the lib destination folder.
install(TARGETS omptarget.rtl.amdgpu LIBRARY DESTINATION "${OPENMP_INSTALL_LIBDIR}")
set_target_properties(omptarget.rtl.amdgpu PROPERTIES
INSTALL_RPATH "$ORIGIN" BUILD_RPATH "$ORIGIN:${CMAKE_CURRENT_BINARY_DIR}/.."
CXX_VISIBILITY_PRESET protected)
# Report to the parent scope that we are building a plugin for hsa.
# This controls whether tests are run for the nvptx offloading target
# Run them if libhsa is available, or if the user explicitly asked for dlopen
# Otherwise this plugin is being built speculatively and there may be no hsa available
option(LIBOMPTARGET_FORCE_AMDGPU_TESTS "Build AMDGPU libomptarget tests" OFF)
if (LIBOMPTARGET_FOUND_AMDGPU_GPU OR LIBOMPTARGET_FORCE_AMDGPU_TESTS)
# Report to the parent scope that we are building a plugin for amdgpu
set(LIBOMPTARGET_SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS} amdgcn-amd-amdhsa" PARENT_SCOPE)
list(APPEND LIBOMPTARGET_TESTED_PLUGINS "omptarget.rtl.amdgpu")
set(LIBOMPTARGET_TESTED_PLUGINS "${LIBOMPTARGET_TESTED_PLUGINS}" PARENT_SCOPE)
else()
libomptarget_say("Not generating AMDGPU tests, no supported devices detected. Use 'LIBOMPTARGET_FORCE_AMDGPU_TESTS' to override.")
return()
endif()

View File

@ -1,37 +0,0 @@
//===--- amdgpu/impl/data.cpp ------------------------------------- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "impl_runtime.h"
#include "hsa_api.h"
#include "internal.h"
#include "rt.h"
#include <cassert>
#include <stdio.h>
#include <string.h>
#include <vector>
using core::TaskImpl;
namespace core {
namespace Runtime {
hsa_status_t HostMalloc(void **ptr, size_t size,
hsa_amd_memory_pool_t MemoryPool) {
hsa_status_t err = hsa_amd_memory_pool_allocate(MemoryPool, size, 0, ptr);
DP("Malloced %p\n", *ptr);
if (err == HSA_STATUS_SUCCESS) {
err = core::allow_access_to_all_gpu_agents(*ptr);
}
return err;
}
hsa_status_t Memfree(void *ptr) {
hsa_status_t err = hsa_amd_memory_pool_free(ptr);
DP("Freed %p\n", ptr);
return err;
}
} // namespace Runtime
} // namespace core

View File

@ -1,80 +0,0 @@
//===--- amdgpu/impl/get_elf_mach_gfx_name.cpp -------------------- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "get_elf_mach_gfx_name.h"
// This header conflicts with the system elf.h (macros vs enums of the same
// identifier) and contains more up to date values for the enum checked here.
// rtl.cpp uses the system elf.h.
#include "llvm/BinaryFormat/ELF.h"
const char *get_elf_mach_gfx_name(uint32_t EFlags) {
using namespace llvm::ELF;
uint32_t Gfx = (EFlags & EF_AMDGPU_MACH);
switch (Gfx) {
case EF_AMDGPU_MACH_AMDGCN_GFX801:
return "gfx801";
case EF_AMDGPU_MACH_AMDGCN_GFX802:
return "gfx802";
case EF_AMDGPU_MACH_AMDGCN_GFX803:
return "gfx803";
case EF_AMDGPU_MACH_AMDGCN_GFX805:
return "gfx805";
case EF_AMDGPU_MACH_AMDGCN_GFX810:
return "gfx810";
case EF_AMDGPU_MACH_AMDGCN_GFX900:
return "gfx900";
case EF_AMDGPU_MACH_AMDGCN_GFX902:
return "gfx902";
case EF_AMDGPU_MACH_AMDGCN_GFX904:
return "gfx904";
case EF_AMDGPU_MACH_AMDGCN_GFX906:
return "gfx906";
case EF_AMDGPU_MACH_AMDGCN_GFX908:
return "gfx908";
case EF_AMDGPU_MACH_AMDGCN_GFX909:
return "gfx909";
case EF_AMDGPU_MACH_AMDGCN_GFX90A:
return "gfx90a";
case EF_AMDGPU_MACH_AMDGCN_GFX90C:
return "gfx90c";
case EF_AMDGPU_MACH_AMDGCN_GFX940:
return "gfx940";
case EF_AMDGPU_MACH_AMDGCN_GFX1010:
return "gfx1010";
case EF_AMDGPU_MACH_AMDGCN_GFX1011:
return "gfx1011";
case EF_AMDGPU_MACH_AMDGCN_GFX1012:
return "gfx1012";
case EF_AMDGPU_MACH_AMDGCN_GFX1013:
return "gfx1013";
case EF_AMDGPU_MACH_AMDGCN_GFX1030:
return "gfx1030";
case EF_AMDGPU_MACH_AMDGCN_GFX1031:
return "gfx1031";
case EF_AMDGPU_MACH_AMDGCN_GFX1032:
return "gfx1032";
case EF_AMDGPU_MACH_AMDGCN_GFX1033:
return "gfx1033";
case EF_AMDGPU_MACH_AMDGCN_GFX1034:
return "gfx1034";
case EF_AMDGPU_MACH_AMDGCN_GFX1035:
return "gfx1035";
case EF_AMDGPU_MACH_AMDGCN_GFX1036:
return "gfx1036";
case EF_AMDGPU_MACH_AMDGCN_GFX1100:
return "gfx1100";
case EF_AMDGPU_MACH_AMDGCN_GFX1101:
return "gfx1101";
case EF_AMDGPU_MACH_AMDGCN_GFX1102:
return "gfx1102";
case EF_AMDGPU_MACH_AMDGCN_GFX1103:
return "gfx1103";
default:
return "--unknown gfx";
}
}

View File

@ -1,15 +0,0 @@
//===--- amdgpu/impl/get_elf_mach_gfx_name.h ---------------------- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef GET_ELF_MACH_GFX_NAME_H_INCLUDED
#define GET_ELF_MACH_GFX_NAME_H_INCLUDED
#include <stdint.h>
const char *get_elf_mach_gfx_name(uint32_t EFlags);
#endif

View File

@ -1,26 +0,0 @@
//===--- amdgpu/impl/hsa_api.h ------------------------------------ C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef AMDGPU_HSA_API_H_INCLUDED
#define AMDGPU_HSA_API_H_INCLUDED
#if defined(__has_include)
#if __has_include("hsa/hsa.h")
#include "hsa/hsa.h"
#include "hsa/hsa_ext_amd.h"
#elif __has_include("hsa.h")
#include "hsa.h"
#include "hsa_ext_amd.h"
#endif
#else
#include "hsa/hsa.h"
#include "hsa_ext_amd.h"
#endif
#endif

View File

@ -1,182 +0,0 @@
//===--- amdgpu/impl/impl.cpp ------------------------------------- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "rt.h"
#include <memory>
/*
* Data
*/
hsa_status_t is_locked(void *ptr, void **agentBaseAddress) {
hsa_status_t err = HSA_STATUS_SUCCESS;
hsa_amd_pointer_info_t info;
info.size = sizeof(hsa_amd_pointer_info_t);
err = hsa_amd_pointer_info(ptr, &info, /*alloc=*/nullptr,
/*num_agents_accessible=*/nullptr,
/*accessible=*/nullptr);
if (err != HSA_STATUS_SUCCESS) {
DP("Error when getting pointer info\n");
return err;
}
if (info.type == HSA_EXT_POINTER_TYPE_LOCKED) {
// When user passes in a basePtr+offset we need to fix the
// locked pointer to include the offset: ROCr always returns
// the base locked address, not the shifted one.
if ((char *)info.hostBaseAddress <= (char *)ptr &&
(char *)ptr < (char *)info.hostBaseAddress + info.sizeInBytes)
*agentBaseAddress =
(void *)((uint64_t)info.agentBaseAddress + (uint64_t)ptr -
(uint64_t)info.hostBaseAddress);
else // address is already device-agent accessible, no need to compute
// offset
*agentBaseAddress = ptr;
} else
*agentBaseAddress = nullptr;
return HSA_STATUS_SUCCESS;
}
// host pointer (either src or dest) must be locked via hsa_amd_memory_lock
static hsa_status_t invoke_hsa_copy(hsa_signal_t signal, void *dest,
hsa_agent_t agent, const void *src,
size_t size) {
const hsa_signal_value_t init = 1;
const hsa_signal_value_t success = 0;
hsa_signal_store_screlease(signal, init);
hsa_status_t err = hsa_amd_memory_async_copy(dest, agent, src, agent, size, 0,
nullptr, signal);
if (err != HSA_STATUS_SUCCESS)
return err;
// async_copy reports success by decrementing and failure by setting to < 0
hsa_signal_value_t got = init;
while (got == init)
got = hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_NE, init,
UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
if (got != success)
return HSA_STATUS_ERROR;
return err;
}
struct implFreePtrDeletor {
void operator()(void *p) {
core::Runtime::Memfree(p); // ignore failure to free
}
};
enum CopyDirection { H2D, D2H };
static hsa_status_t locking_async_memcpy(enum CopyDirection direction,
hsa_signal_t signal, void *dest,
hsa_agent_t agent, void *src,
void *lockingPtr, size_t size) {
void *lockedPtr = nullptr;
hsa_status_t err = is_locked(lockingPtr, &lockedPtr);
bool HostPtrIsLocked = true;
if (err != HSA_STATUS_SUCCESS)
return err;
if (!lockedPtr) { // not locked
HostPtrIsLocked = false;
hsa_agent_t agents[1] = {agent};
err = hsa_amd_memory_lock(lockingPtr, size, agents, /*num_agent=*/1,
(void **)&lockedPtr);
if (err != HSA_STATUS_SUCCESS)
return err;
DP("locking_async_memcpy: lockingPtr=%p lockedPtr=%p Size = %lu\n",
lockingPtr, lockedPtr, size);
}
switch (direction) {
case H2D:
err = invoke_hsa_copy(signal, dest, agent, lockedPtr, size);
break;
case D2H:
err = invoke_hsa_copy(signal, lockedPtr, agent, src, size);
break;
}
if (err != HSA_STATUS_SUCCESS && !HostPtrIsLocked) {
// do not leak locked host pointers, but discard potential error message
// because the initial error was in the copy function
hsa_amd_memory_unlock(lockingPtr);
return err;
}
// unlock only if not user locked
if (!HostPtrIsLocked)
err = hsa_amd_memory_unlock(lockingPtr);
if (err != HSA_STATUS_SUCCESS)
return err;
return HSA_STATUS_SUCCESS;
}
hsa_status_t impl_memcpy_h2d(hsa_signal_t signal, void *deviceDest,
void *hostSrc, size_t size,
hsa_agent_t device_agent,
hsa_amd_memory_pool_t MemoryPool) {
hsa_status_t err;
err = locking_async_memcpy(CopyDirection::H2D, signal, deviceDest,
device_agent, hostSrc, hostSrc, size);
if (err == HSA_STATUS_SUCCESS)
return err;
// async memcpy sometimes fails in situations where
// allocate + copy succeeds. Looks like it might be related to
// locking part of a read only segment. Fall back for now.
void *tempHostPtr;
hsa_status_t ret = core::Runtime::HostMalloc(&tempHostPtr, size, MemoryPool);
if (ret != HSA_STATUS_SUCCESS) {
DP("HostMalloc: Unable to alloc %zu bytes for temp scratch\n", size);
return ret;
}
std::unique_ptr<void, implFreePtrDeletor> del(tempHostPtr);
memcpy(tempHostPtr, hostSrc, size);
return locking_async_memcpy(CopyDirection::H2D, signal, deviceDest,
device_agent, tempHostPtr, tempHostPtr, size);
}
hsa_status_t impl_memcpy_d2h(hsa_signal_t signal, void *hostDest,
void *deviceSrc, size_t size,
hsa_agent_t deviceAgent,
hsa_amd_memory_pool_t MemoryPool) {
hsa_status_t err;
// device has always visibility over both pointers, so use that
err = locking_async_memcpy(CopyDirection::D2H, signal, hostDest, deviceAgent,
deviceSrc, hostDest, size);
if (err == HSA_STATUS_SUCCESS)
return err;
// hsa_memory_copy sometimes fails in situations where
// allocate + copy succeeds. Looks like it might be related to
// locking part of a read only segment. Fall back for now.
void *tempHostPtr;
hsa_status_t ret = core::Runtime::HostMalloc(&tempHostPtr, size, MemoryPool);
if (ret != HSA_STATUS_SUCCESS) {
DP("HostMalloc: Unable to alloc %zu bytes for temp scratch\n", size);
return ret;
}
std::unique_ptr<void, implFreePtrDeletor> del(tempHostPtr);
err = locking_async_memcpy(CopyDirection::D2H, signal, tempHostPtr,
deviceAgent, deviceSrc, tempHostPtr, size);
if (err != HSA_STATUS_SUCCESS)
return HSA_STATUS_ERROR;
memcpy(hostDest, tempHostPtr, size);
return HSA_STATUS_SUCCESS;
}

View File

@ -1,34 +0,0 @@
//===--- amdgpu/impl/impl_runtime.h ------------------------------- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef INCLUDE_IMPL_RUNTIME_H_
#define INCLUDE_IMPL_RUNTIME_H_
#include "hsa_api.h"
extern "C" {
// Check if pointer ptr is already locked
hsa_status_t is_locked(void *ptr, void **agentBaseAddress);
hsa_status_t impl_module_register_from_memory_to_place(
void *module_bytes, size_t module_size, int DeviceId,
hsa_status_t (*on_deserialized_data)(void *data, size_t size,
void *cb_state),
void *cb_state);
hsa_status_t impl_memcpy_h2d(hsa_signal_t signal, void *deviceDest,
void *hostSrc, size_t size,
hsa_agent_t device_agent,
hsa_amd_memory_pool_t MemoryPool);
hsa_status_t impl_memcpy_d2h(hsa_signal_t sig, void *hostDest, void *deviceSrc,
size_t size, hsa_agent_t device_agent,
hsa_amd_memory_pool_t MemoryPool);
}
#endif // INCLUDE_IMPL_RUNTIME_H_

View File

@ -1,154 +0,0 @@
//===--- amdgpu/impl/internal.h ----------------------------------- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef SRC_RUNTIME_INCLUDE_INTERNAL_H_
#define SRC_RUNTIME_INCLUDE_INTERNAL_H_
#include <inttypes.h>
#include <pthread.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <cstring>
#include <map>
#include <queue>
#include <string>
#include <utility>
#include <vector>
#include "hsa_api.h"
#include "impl_runtime.h"
#ifndef TARGET_NAME
#error "Missing TARGET_NAME macro"
#endif
#define DEBUG_PREFIX "Target " GETNAME(TARGET_NAME) " RTL"
#include "Debug.h"
#define MAX_NUM_KERNELS (1024 * 16)
// ---------------------- Kernel Start -------------
typedef struct atl_kernel_info_s {
uint64_t kernel_object;
uint32_t group_segment_size;
uint32_t private_segment_size;
uint32_t sgpr_count;
uint32_t vgpr_count;
uint32_t sgpr_spill_count;
uint32_t vgpr_spill_count;
uint32_t kernel_segment_size;
uint32_t explicit_argument_count;
uint32_t implicit_argument_count;
} atl_kernel_info_t;
typedef struct atl_symbol_info_s {
uint64_t addr;
uint32_t size;
} atl_symbol_info_t;
// ---------------------- Kernel End -------------
namespace core {
class TaskgroupImpl;
class TaskImpl;
class Kernel;
class KernelImpl;
} // namespace core
struct SignalPoolT {
SignalPoolT() {}
SignalPoolT(const SignalPoolT &) = delete;
SignalPoolT(SignalPoolT &&) = delete;
~SignalPoolT() {
size_t N = state.size();
for (size_t i = 0; i < N; i++) {
hsa_signal_t signal = state.front();
state.pop();
hsa_status_t rc = hsa_signal_destroy(signal);
if (rc != HSA_STATUS_SUCCESS) {
DP("Signal pool destruction failed\n");
}
}
}
size_t size() {
lock l(&mutex);
return state.size();
}
void push(hsa_signal_t s) {
lock l(&mutex);
state.push(s);
}
hsa_signal_t pop(void) {
lock l(&mutex);
if (!state.empty()) {
hsa_signal_t res = state.front();
state.pop();
return res;
}
// Pool empty, attempt to create another signal
hsa_signal_t new_signal;
hsa_status_t err = hsa_signal_create(0, 0, NULL, &new_signal);
if (err == HSA_STATUS_SUCCESS) {
return new_signal;
}
// Fail
return {0};
}
private:
static pthread_mutex_t mutex;
std::queue<hsa_signal_t> state;
struct lock {
lock(pthread_mutex_t *m) : m(m) { pthread_mutex_lock(m); }
~lock() { pthread_mutex_unlock(m); }
pthread_mutex_t *m;
};
};
namespace core {
hsa_status_t atl_init_gpu_context();
hsa_status_t init_hsa();
hsa_status_t finalize_hsa();
/*
* Generic utils
*/
template <typename T> inline T alignDown(T value, size_t alignment) {
return (T)(value & ~(alignment - 1));
}
template <typename T> inline T *alignDown(T *value, size_t alignment) {
return reinterpret_cast<T *>(alignDown((intptr_t)value, alignment));
}
template <typename T> inline T alignUp(T value, size_t alignment) {
return alignDown((T)(value + alignment - 1), alignment);
}
template <typename T> inline T *alignUp(T *value, size_t alignment) {
return reinterpret_cast<T *>(
alignDown((intptr_t)(value + alignment - 1), alignment));
}
extern bool atl_is_impl_initialized();
bool handle_group_signal(hsa_signal_value_t value, void *arg);
hsa_status_t allow_access_to_all_gpu_agents(void *ptr);
} // namespace core
inline const char *get_error_string(hsa_status_t err) {
const char *res;
hsa_status_t rc = hsa_status_string(err, &res);
return (rc == HSA_STATUS_SUCCESS) ? res : "HSA_STATUS UNKNOWN.";
}
#endif // SRC_RUNTIME_INCLUDE_INTERNAL_H_

View File

@ -1,39 +0,0 @@
//===--- amdgpu/impl/interop_hsa.cpp ------------------------------ C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "interop_hsa.h"
#include "internal.h"
hsa_status_t interop_hsa_get_symbol_info(
const std::map<std::string, atl_symbol_info_t> &SymbolInfoTable,
int DeviceId, const char *symbol, void **var_addr, unsigned int *var_size) {
/*
// Typical usage:
void *var_addr;
size_t var_size;
interop_hsa_get_symbol_addr(gpu_place, "symbol_name", &var_addr,
&var_size);
impl_memcpy(signal, host_add, var_addr, var_size);
*/
if (!symbol || !var_addr || !var_size)
return HSA_STATUS_ERROR;
// get the symbol info
std::string symbolStr = std::string(symbol);
auto It = SymbolInfoTable.find(symbolStr);
if (It != SymbolInfoTable.end()) {
atl_symbol_info_t info = It->second;
*var_addr = reinterpret_cast<void *>(info.addr);
*var_size = info.size;
return HSA_STATUS_SUCCESS;
} else {
*var_addr = NULL;
*var_size = 0;
return HSA_STATUS_ERROR;
}
}

View File

@ -1,26 +0,0 @@
//===--- amdgpu/impl/interop_hsa.h -------------------------------- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef INCLUDE_INTEROP_HSA_H_
#define INCLUDE_INTEROP_HSA_H_
#include "impl_runtime.h"
#include "hsa_api.h"
#include "internal.h"
#include <map>
#include <string>
extern "C" {
hsa_status_t interop_hsa_get_symbol_info(
const std::map<std::string, atl_symbol_info_t> &SymbolInfoTable,
int DeviceId, const char *symbol, void **var_addr, unsigned int *var_size);
}
#endif // INCLUDE_INTEROP_HSA_H_

View File

@ -1,271 +0,0 @@
//===--- amdgpu/impl/msgpack.cpp ---------------------------------- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <cassert>
#include <cstdint>
#include <cstring>
#include <functional>
#include <string>
#include "msgpack.h"
namespace msgpack {
[[noreturn]] void internal_error() {
printf("internal error\n");
exit(1);
}
const char *type_name(type ty) {
switch (ty) {
#define X(NAME, WIDTH, PAYLOAD, LOWER, UPPER) \
case NAME: \
return #NAME;
#include "msgpack.def"
#undef X
}
internal_error();
}
unsigned bytes_used_fixed(msgpack::type ty) {
using namespace msgpack;
switch (ty) {
#define X(NAME, WIDTH, PAYLOAD, LOWER, UPPER) \
case NAME: \
return WIDTH;
#include "msgpack.def"
#undef X
}
internal_error();
}
msgpack::type parse_type(unsigned char x) {
#define X(NAME, WIDTH, PAYLOAD, LOWER, UPPER) \
if (x >= LOWER && x <= UPPER) { \
return NAME; \
} else
#include "msgpack.def"
#undef X
{ internal_error(); }
}
template <typename T, typename R> R bitcast(T x) {
static_assert(sizeof(T) == sizeof(R), "");
R tmp;
memcpy(&tmp, &x, sizeof(T));
return tmp;
}
template int64_t bitcast<uint64_t, int64_t>(uint64_t);
} // namespace msgpack
// Helper functions for reading additional payload from the header
// Depending on the type, this can be a number of bytes, elements,
// key-value pairs or an embedded integer.
// Each takes a pointer to the start of the header and returns a uint64_t
namespace {
namespace payload {
uint64_t read_zero(const unsigned char *) { return 0; }
// Read the first byte and zero/sign extend it
uint64_t read_embedded_u8(const unsigned char *start) { return start[0]; }
uint64_t read_embedded_s8(const unsigned char *start) {
int64_t res = msgpack::bitcast<uint8_t, int8_t>(start[0]);
return msgpack::bitcast<int64_t, uint64_t>(res);
}
// Read a masked part of the first byte
uint64_t read_via_mask_0x1(const unsigned char *start) { return *start & 0x1u; }
uint64_t read_via_mask_0xf(const unsigned char *start) { return *start & 0xfu; }
uint64_t read_via_mask_0x1f(const unsigned char *start) {
return *start & 0x1fu;
}
// Read 1/2/4/8 bytes immediately following the type byte and zero/sign extend
// Big endian format.
uint64_t read_size_field_u8(const unsigned char *from) {
from++;
return from[0];
}
// TODO: detect whether host is little endian or not, and whether the intrinsic
// is available. And probably use the builtin to test the diy
const bool use_bswap = false;
uint64_t read_size_field_u16(const unsigned char *from) {
from++;
if (use_bswap) {
uint16_t b;
memcpy(&b, from, 2);
return __builtin_bswap16(b);
} else {
return (from[0] << 8u) | from[1];
}
}
uint64_t read_size_field_u32(const unsigned char *from) {
from++;
if (use_bswap) {
uint32_t b;
memcpy(&b, from, 4);
return __builtin_bswap32(b);
} else {
return (from[0] << 24u) | (from[1] << 16u) | (from[2] << 8u) |
(from[3] << 0u);
}
}
uint64_t read_size_field_u64(const unsigned char *from) {
from++;
if (use_bswap) {
uint64_t b;
memcpy(&b, from, 8);
return __builtin_bswap64(b);
} else {
return ((uint64_t)from[0] << 56u) | ((uint64_t)from[1] << 48u) |
((uint64_t)from[2] << 40u) | ((uint64_t)from[3] << 32u) |
(from[4] << 24u) | (from[5] << 16u) | (from[6] << 8u) |
(from[7] << 0u);
}
}
uint64_t read_size_field_s8(const unsigned char *from) {
uint8_t u = read_size_field_u8(from);
int64_t res = msgpack::bitcast<uint8_t, int8_t>(u);
return msgpack::bitcast<int64_t, uint64_t>(res);
}
uint64_t read_size_field_s16(const unsigned char *from) {
uint16_t u = read_size_field_u16(from);
int64_t res = msgpack::bitcast<uint16_t, int16_t>(u);
return msgpack::bitcast<int64_t, uint64_t>(res);
}
uint64_t read_size_field_s32(const unsigned char *from) {
uint32_t u = read_size_field_u32(from);
int64_t res = msgpack::bitcast<uint32_t, int32_t>(u);
return msgpack::bitcast<int64_t, uint64_t>(res);
}
uint64_t read_size_field_s64(const unsigned char *from) {
uint64_t u = read_size_field_u64(from);
int64_t res = msgpack::bitcast<uint64_t, int64_t>(u);
return msgpack::bitcast<int64_t, uint64_t>(res);
}
} // namespace payload
} // namespace
namespace msgpack {
payload_info_t payload_info(msgpack::type ty) {
using namespace msgpack;
switch (ty) {
#define X(NAME, WIDTH, PAYLOAD, LOWER, UPPER) \
case NAME: \
return payload::PAYLOAD;
#include "msgpack.def"
#undef X
}
internal_error();
}
} // namespace msgpack
const unsigned char *msgpack::skip_next_message(const unsigned char *start,
const unsigned char *end) {
class f : public functors_defaults<f> {};
return handle_msgpack({start, end}, f());
}
namespace msgpack {
bool message_is_string(byte_range bytes, const char *needle) {
bool matched = false;
size_t needleN = strlen(needle);
foronly_string(bytes, [=, &matched](size_t N, const unsigned char *str) {
if (N == needleN) {
if (memcmp(needle, str, N) == 0) {
matched = true;
}
}
});
return matched;
}
void dump(byte_range bytes) {
struct inner : functors_defaults<inner> {
inner(unsigned indent) : indent(indent) {}
const unsigned by = 2;
unsigned indent = 0;
void handle_string(size_t N, const unsigned char *bytes) {
char *tmp = (char *)malloc(N + 1);
memcpy(tmp, bytes, N);
tmp[N] = '\0';
printf("\"%s\"", tmp);
free(tmp);
}
void handle_signed(int64_t x) { printf("%ld", x); }
void handle_unsigned(uint64_t x) { printf("%lu", x); }
const unsigned char *handle_array(uint64_t N, byte_range bytes) {
printf("\n%*s[\n", indent, "");
indent += by;
for (uint64_t i = 0; i < N; i++) {
indent += by;
printf("%*s", indent, "");
const unsigned char *next = handle_msgpack<inner>(bytes, {indent});
printf(",\n");
indent -= by;
bytes.start = next;
if (!next) {
break;
}
}
indent -= by;
printf("%*s]", indent, "");
return bytes.start;
}
const unsigned char *handle_map(uint64_t N, byte_range bytes) {
printf("\n%*s{\n", indent, "");
indent += by;
for (uint64_t i = 0; i < 2 * N; i += 2) {
const unsigned char *start_key = bytes.start;
printf("%*s", indent, "");
const unsigned char *end_key =
handle_msgpack<inner>({start_key, bytes.end}, {indent});
if (!end_key) {
break;
}
printf(" : ");
const unsigned char *start_value = end_key;
const unsigned char *end_value =
handle_msgpack<inner>({start_value, bytes.end}, {indent});
if (!end_value) {
break;
}
printf(",\n");
bytes.start = end_value;
}
indent -= by;
printf("%*s}", indent, "");
return bytes.start;
}
};
handle_msgpack<inner>(bytes, {0});
printf("\n");
}
} // namespace msgpack

View File

@ -1,46 +0,0 @@
//===--- amdgpu/impl/msgpack.def ---------------------------------- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// name, header width, reader, [lower, upper] encoding
X(posfixint, 1, read_embedded_u8, 0x00, 0x7f)
X(negfixint, 1, read_embedded_s8, 0xe0, 0xff)
X(fixmap, 1, read_via_mask_0xf, 0x80, 0x8f)
X(fixarray, 1, read_via_mask_0xf, 0x90, 0x9f)
X(fixstr, 1, read_via_mask_0x1f, 0xa0, 0xbf)
X(nil, 1, read_zero, 0xc0, 0xc0)
X(never_used, 1, read_zero, 0xc1, 0xc1)
X(f, 1, read_via_mask_0x1, 0xc2, 0xc2)
X(t, 1, read_via_mask_0x1, 0xc3, 0xc3)
X(bin8, 2, read_size_field_u8, 0xc4, 0xc4)
X(bin16, 3, read_size_field_u16, 0xc5, 0xc5)
X(bin32, 5, read_size_field_u32, 0xc6, 0xc6)
X(ext8, 3, read_size_field_u8, 0xc7, 0xc7)
X(ext16, 4, read_size_field_u16, 0xc8, 0xc8)
X(ext32, 6, read_size_field_u32, 0xc9, 0xc9)
X(float32, 5, read_zero, 0xca, 0xca)
X(float64, 9, read_zero, 0xcb, 0xcb)
X(uint8, 2, read_size_field_u8, 0xcc, 0xcc)
X(uint16, 3, read_size_field_u16, 0xcd, 0xcd)
X(uint32, 5, read_size_field_u32, 0xce, 0xce)
X(uint64, 9, read_size_field_u64, 0xcf, 0xcf)
X(int8, 2, read_size_field_s8, 0xd0, 0xd0)
X(int16, 3, read_size_field_s16, 0xd1, 0xd1)
X(int32, 5, read_size_field_s32, 0xd2, 0xd2)
X(int64, 9, read_size_field_s64, 0xd3, 0xd3)
X(fixext1, 3, read_zero, 0xd4, 0xd4)
X(fixext2, 4, read_zero, 0xd5, 0xd5)
X(fixext4, 6, read_zero, 0xd6, 0xd6)
X(fixext8, 10, read_zero, 0xd7, 0xd7)
X(fixext16, 18, read_zero, 0xd8, 0xd8)
X(str8, 2, read_size_field_u8, 0xd9, 0xd9)
X(str16, 3, read_size_field_u16, 0xda, 0xda)
X(str32, 5, read_size_field_u32, 0xdb, 0xdb)
X(array16, 3, read_size_field_u16, 0xdc, 0xdc)
X(array32, 5, read_size_field_u32, 0xdd, 0xdd)
X(map16, 3, read_size_field_u16, 0xde, 0xde)
X(map32, 5, read_size_field_u32, 0xdf, 0xdf)

View File

@ -1,282 +0,0 @@
//===--- amdgpu/impl/msgpack.h ------------------------------------ C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef MSGPACK_H
#define MSGPACK_H
#include <functional>
namespace msgpack {
// The message pack format is dynamically typed, schema-less. Format is:
// message: [type][header][payload]
// where type is one byte, header length is a fixed length function of type
// payload is zero to N bytes, with the length encoded in [type][header]
// Scalar fields include boolean, signed integer, float, string etc
// Composite types are sequences of messages
// Array field is [header][element][element]...
// Map field is [header][key][value][key][value]...
// Multibyte integer fields are big endian encoded
// The map key can be any message type
// Maps may contain duplicate keys
// Data is not uniquely encoded, e.g. integer "8" may be stored as one byte or
// in as many as nine, as signed or unsigned. Implementation defined.
// Similarly "foo" may embed the length in the type field or in multiple bytes
// This parser is structured as an iterator over a sequence of bytes.
// It calls a user provided function on each message in order to extract fields
// The default implementation for each scalar type is to do nothing. For map or
// arrays, the default implementation returns just after that message to support
// iterating to the next message, but otherwise has no effect.
struct byte_range {
const unsigned char *start;
const unsigned char *end;
};
const unsigned char *skip_next_message(const unsigned char *start,
const unsigned char *end);
template <typename Derived> class functors_defaults {
public:
void cb_string(size_t N, const unsigned char *str) {
derived().handle_string(N, str);
}
void cb_boolean(bool x) { derived().handle_boolean(x); }
void cb_signed(int64_t x) { derived().handle_signed(x); }
void cb_unsigned(uint64_t x) { derived().handle_unsigned(x); }
void cb_array_elements(byte_range bytes) {
derived().handle_array_elements(bytes);
}
void cb_map_elements(byte_range key, byte_range value) {
derived().handle_map_elements(key, value);
}
const unsigned char *cb_array(uint64_t N, byte_range bytes) {
return derived().handle_array(N, bytes);
}
const unsigned char *cb_map(uint64_t N, byte_range bytes) {
return derived().handle_map(N, bytes);
}
private:
Derived &derived() { return *static_cast<Derived *>(this); }
// Default implementations for scalar ops are no-ops
void handle_string(size_t, const unsigned char *) {}
void handle_boolean(bool) {}
void handle_signed(int64_t) {}
void handle_unsigned(uint64_t) {}
void handle_array_elements(byte_range) {}
void handle_map_elements(byte_range, byte_range) {}
// Default implementation for sequences is to skip over the messages
const unsigned char *handle_array(uint64_t N, byte_range bytes) {
for (uint64_t i = 0; i < N; i++) {
const unsigned char *next = skip_next_message(bytes.start, bytes.end);
if (!next) {
return nullptr;
}
cb_array_elements(bytes);
bytes.start = next;
}
return bytes.start;
}
const unsigned char *handle_map(uint64_t N, byte_range bytes) {
for (uint64_t i = 0; i < N; i++) {
const unsigned char *start_key = bytes.start;
const unsigned char *end_key = skip_next_message(start_key, bytes.end);
if (!end_key) {
return nullptr;
}
const unsigned char *start_value = end_key;
const unsigned char *end_value =
skip_next_message(start_value, bytes.end);
if (!end_value) {
return nullptr;
}
cb_map_elements({start_key, end_key}, {start_value, end_value});
bytes.start = end_value;
}
return bytes.start;
}
};
typedef enum : uint8_t {
#define X(NAME, WIDTH, PAYLOAD, LOWER, UPPER) NAME,
#include "msgpack.def"
#undef X
} type;
[[noreturn]] void internal_error();
type parse_type(unsigned char x);
unsigned bytes_used_fixed(type ty);
typedef uint64_t (*payload_info_t)(const unsigned char *);
payload_info_t payload_info(msgpack::type ty);
template <typename T, typename R> R bitcast(T x);
template <typename F, msgpack::type ty>
const unsigned char *handle_msgpack_given_type(byte_range bytes, F f) {
const unsigned char *start = bytes.start;
const unsigned char *end = bytes.end;
const uint64_t available = end - start;
assert(available != 0);
assert(ty == parse_type(*start));
const uint64_t bytes_used = bytes_used_fixed(ty);
if (available < bytes_used) {
return 0;
}
const uint64_t available_post_header = available - bytes_used;
const payload_info_t info = payload_info(ty);
const uint64_t N = info(start);
switch (ty) {
case msgpack::t:
case msgpack::f: {
// t is 0b11000010, f is 0b11000011, masked with 0x1
f.cb_boolean(N);
return start + bytes_used;
}
case msgpack::posfixint:
case msgpack::uint8:
case msgpack::uint16:
case msgpack::uint32:
case msgpack::uint64: {
f.cb_unsigned(N);
return start + bytes_used;
}
case msgpack::negfixint:
case msgpack::int8:
case msgpack::int16:
case msgpack::int32:
case msgpack::int64: {
f.cb_signed(bitcast<uint64_t, int64_t>(N));
return start + bytes_used;
}
case msgpack::fixstr:
case msgpack::str8:
case msgpack::str16:
case msgpack::str32: {
if (available_post_header < N) {
return 0;
} else {
f.cb_string(N, start + bytes_used);
return start + bytes_used + N;
}
}
case msgpack::fixarray:
case msgpack::array16:
case msgpack::array32: {
return f.cb_array(N, {start + bytes_used, end});
}
case msgpack::fixmap:
case msgpack::map16:
case msgpack::map32: {
return f.cb_map(N, {start + bytes_used, end});
}
case msgpack::nil:
case msgpack::bin8:
case msgpack::bin16:
case msgpack::bin32:
case msgpack::float32:
case msgpack::float64:
case msgpack::ext8:
case msgpack::ext16:
case msgpack::ext32:
case msgpack::fixext1:
case msgpack::fixext2:
case msgpack::fixext4:
case msgpack::fixext8:
case msgpack::fixext16:
case msgpack::never_used: {
if (available_post_header < N) {
return 0;
}
return start + bytes_used + N;
}
}
internal_error();
}
template <typename F>
const unsigned char *handle_msgpack(byte_range bytes, F f) {
const unsigned char *start = bytes.start;
const unsigned char *end = bytes.end;
const uint64_t available = end - start;
if (available == 0) {
return 0;
}
const type ty = parse_type(*start);
switch (ty) {
#define X(NAME, WIDTH, PAYLOAD, LOWER, UPPER) \
case msgpack::NAME: \
return handle_msgpack_given_type<F, msgpack::NAME>(bytes, f);
#include "msgpack.def"
#undef X
}
internal_error();
}
bool message_is_string(byte_range bytes, const char *str);
template <typename C> void foronly_string(byte_range bytes, C callback) {
struct inner : functors_defaults<inner> {
inner(C &cb) : cb(cb) {}
C &cb;
void handle_string(size_t N, const unsigned char *str) { cb(N, str); }
};
handle_msgpack<inner>(bytes, {callback});
}
template <typename C> void foronly_unsigned(byte_range bytes, C callback) {
struct inner : functors_defaults<inner> {
inner(C &cb) : cb(cb) {}
C &cb;
void handle_unsigned(uint64_t x) { cb(x); }
};
handle_msgpack<inner>(bytes, {callback});
}
template <typename C> void foreach_array(byte_range bytes, C callback) {
struct inner : functors_defaults<inner> {
inner(C &cb) : cb(cb) {}
C &cb;
void handle_array_elements(byte_range element) { cb(element); }
};
handle_msgpack<inner>(bytes, {callback});
}
template <typename C> void foreach_map(byte_range bytes, C callback) {
struct inner : functors_defaults<inner> {
inner(C &cb) : cb(cb) {}
C &cb;
void handle_map_elements(byte_range key, byte_range value) {
cb(key, value);
}
};
handle_msgpack<inner>(bytes, {callback});
}
// Crude approximation to json
void dump(byte_range);
} // namespace msgpack
#endif

View File

@ -1,34 +0,0 @@
//===--- amdgpu/impl/rt.h ----------------------------------------- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef SRC_RUNTIME_INCLUDE_RT_H_
#define SRC_RUNTIME_INCLUDE_RT_H_
#include "hsa_api.h"
#include "impl_runtime.h"
#include "internal.h"
#include <string>
namespace core {
namespace Runtime {
hsa_status_t Memfree(void *);
hsa_status_t HostMalloc(void **ptr, size_t size,
hsa_amd_memory_pool_t MemoryPool);
} // namespace Runtime
hsa_status_t RegisterModuleFromMemory(
std::map<std::string, atl_kernel_info_t> &KernelInfoTable,
std::map<std::string, atl_symbol_info_t> &SymbolInfoTable,
void *module_bytes, size_t module_size, hsa_agent_t agent,
hsa_status_t (*on_deserialized_data)(void *data, size_t size,
void *cb_state),
void *cb_state, std::vector<hsa_executable_t> &HSAExecutables);
} // namespace core
#endif // SRC_RUNTIME_INCLUDE_RT_H_

View File

@ -1,744 +0,0 @@
//===--- amdgpu/impl/system.cpp ----------------------------------- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/Object/ELF.h"
#include "llvm/Object/ELFObjectFile.h"
#include <cassert>
#include <sstream>
#include <string>
#include "internal.h"
#include "rt.h"
#include "msgpack.h"
using namespace llvm;
using namespace llvm::object;
using namespace llvm::ELF;
namespace hsa {
// Wrap HSA iterate API in a shim that allows passing general callables
template <typename C>
hsa_status_t executable_iterate_symbols(hsa_executable_t executable, C cb) {
auto L = [](hsa_executable_t executable, hsa_executable_symbol_t symbol,
void *data) -> hsa_status_t {
C *unwrapped = static_cast<C *>(data);
return (*unwrapped)(executable, symbol);
};
return hsa_executable_iterate_symbols(executable, L,
static_cast<void *>(&cb));
}
} // namespace hsa
typedef unsigned char *address;
/*
* Note descriptors.
*/
// FreeBSD already declares Elf_Note (indirectly via <libelf.h>)
#if !defined(__FreeBSD__)
typedef struct {
uint32_t n_namesz; /* Length of note's name. */
uint32_t n_descsz; /* Length of note's value. */
uint32_t n_type; /* Type of note. */
// then name
// then padding, optional
// then desc, at 4 byte alignment (not 8, despite being elf64)
} Elf_Note;
#endif
class KernelArgMD {
public:
enum class ValueKind {
HiddenGlobalOffsetX,
HiddenGlobalOffsetY,
HiddenGlobalOffsetZ,
HiddenNone,
HiddenPrintfBuffer,
HiddenDefaultQueue,
HiddenCompletionAction,
HiddenMultiGridSyncArg,
HiddenHostcallBuffer,
HiddenHeapV1,
Unknown
};
KernelArgMD()
: name_(std::string()), size_(0), offset_(0),
valueKind_(ValueKind::Unknown) {}
// fields
std::string name_;
uint32_t size_;
uint32_t offset_;
ValueKind valueKind_;
};
static const std::map<std::string, KernelArgMD::ValueKind> ArgValueKind = {
// v3
// {"by_value", KernelArgMD::ValueKind::ByValue},
// {"global_buffer", KernelArgMD::ValueKind::GlobalBuffer},
// {"dynamic_shared_pointer",
// KernelArgMD::ValueKind::DynamicSharedPointer},
// {"sampler", KernelArgMD::ValueKind::Sampler},
// {"image", KernelArgMD::ValueKind::Image},
// {"pipe", KernelArgMD::ValueKind::Pipe},
// {"queue", KernelArgMD::ValueKind::Queue},
{"hidden_global_offset_x", KernelArgMD::ValueKind::HiddenGlobalOffsetX},
{"hidden_global_offset_y", KernelArgMD::ValueKind::HiddenGlobalOffsetY},
{"hidden_global_offset_z", KernelArgMD::ValueKind::HiddenGlobalOffsetZ},
{"hidden_none", KernelArgMD::ValueKind::HiddenNone},
{"hidden_printf_buffer", KernelArgMD::ValueKind::HiddenPrintfBuffer},
{"hidden_default_queue", KernelArgMD::ValueKind::HiddenDefaultQueue},
{"hidden_completion_action",
KernelArgMD::ValueKind::HiddenCompletionAction},
{"hidden_multigrid_sync_arg",
KernelArgMD::ValueKind::HiddenMultiGridSyncArg},
{"hidden_hostcall_buffer", KernelArgMD::ValueKind::HiddenHostcallBuffer},
{"hidden_heap_v1", KernelArgMD::ValueKind::HiddenHeapV1}};
namespace core {
hsa_status_t callbackEvent(const hsa_amd_event_t *event, void *data) {
if (event->event_type == HSA_AMD_GPU_MEMORY_FAULT_EVENT) {
hsa_amd_gpu_memory_fault_info_t memory_fault = event->memory_fault;
// memory_fault.agent
// memory_fault.virtual_address
// memory_fault.fault_reason_mask
// fprintf("[GPU Error at %p: Reason is ", memory_fault.virtual_address);
std::stringstream stream;
stream << std::hex << (uintptr_t)memory_fault.virtual_address;
std::string addr("0x" + stream.str());
std::string err_string = "[GPU Memory Error] Addr: " + addr;
err_string += " Reason: ";
if (!(memory_fault.fault_reason_mask & 0x00111111)) {
err_string += "No Idea! ";
} else {
if (memory_fault.fault_reason_mask & 0x00000001)
err_string += "Page not present or supervisor privilege. ";
if (memory_fault.fault_reason_mask & 0x00000010)
err_string += "Write access to a read-only page. ";
if (memory_fault.fault_reason_mask & 0x00000100)
err_string += "Execute access to a page marked NX. ";
if (memory_fault.fault_reason_mask & 0x00001000)
err_string += "Host access only. ";
if (memory_fault.fault_reason_mask & 0x00010000)
err_string += "ECC failure (if supported by HW). ";
if (memory_fault.fault_reason_mask & 0x00100000)
err_string += "Can't determine the exact fault address. ";
}
fprintf(stderr, "%s\n", err_string.c_str());
return HSA_STATUS_ERROR;
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t atl_init_gpu_context() {
hsa_status_t err = hsa_amd_register_system_event_handler(callbackEvent, NULL);
if (err != HSA_STATUS_SUCCESS) {
printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
"Registering the system for memory faults", get_error_string(err));
return HSA_STATUS_ERROR;
}
return HSA_STATUS_SUCCESS;
}
static bool isImplicit(KernelArgMD::ValueKind value_kind) {
switch (value_kind) {
case KernelArgMD::ValueKind::HiddenGlobalOffsetX:
case KernelArgMD::ValueKind::HiddenGlobalOffsetY:
case KernelArgMD::ValueKind::HiddenGlobalOffsetZ:
case KernelArgMD::ValueKind::HiddenNone:
case KernelArgMD::ValueKind::HiddenPrintfBuffer:
case KernelArgMD::ValueKind::HiddenDefaultQueue:
case KernelArgMD::ValueKind::HiddenCompletionAction:
case KernelArgMD::ValueKind::HiddenMultiGridSyncArg:
case KernelArgMD::ValueKind::HiddenHostcallBuffer:
case KernelArgMD::ValueKind::HiddenHeapV1:
return true;
default:
return false;
}
}
static std::pair<const unsigned char *, const unsigned char *>
findMetadata(const ELFObjectFile<ELF64LE> &ELFObj) {
constexpr std::pair<const unsigned char *, const unsigned char *> Failure = {
nullptr, nullptr};
const auto &Elf = ELFObj.getELFFile();
auto PhdrsOrErr = Elf.program_headers();
if (!PhdrsOrErr) {
consumeError(PhdrsOrErr.takeError());
return Failure;
}
for (auto Phdr : *PhdrsOrErr) {
if (Phdr.p_type != PT_NOTE)
continue;
Error Err = Error::success();
for (auto Note : Elf.notes(Phdr, Err)) {
if (Note.getType() == 7 || Note.getType() == 8)
return Failure;
// Code object v2 uses yaml metadata and is no longer supported.
if (Note.getType() == NT_AMD_HSA_METADATA && Note.getName() == "AMD")
return Failure;
// Code object v3 should have AMDGPU metadata.
if (Note.getType() == NT_AMDGPU_METADATA && Note.getName() != "AMDGPU")
return Failure;
ArrayRef<uint8_t> Desc = Note.getDesc(Phdr.p_align);
return {Desc.data(), Desc.data() + Desc.size()};
}
if (Err) {
consumeError(std::move(Err));
return Failure;
}
}
return Failure;
}
static std::pair<const unsigned char *, const unsigned char *>
find_metadata(void *binary, size_t binSize) {
constexpr std::pair<const unsigned char *, const unsigned char *> Failure = {
nullptr, nullptr};
StringRef Buffer = StringRef(static_cast<const char *>(binary), binSize);
auto ElfOrErr = ObjectFile::createELFObjectFile(MemoryBufferRef(Buffer, ""),
/*InitContent=*/false);
if (!ElfOrErr) {
consumeError(ElfOrErr.takeError());
return Failure;
}
if (const auto *ELFObj = dyn_cast<ELF64LEObjectFile>(ElfOrErr->get()))
return findMetadata(*ELFObj);
return Failure;
}
namespace {
int map_lookup_array(msgpack::byte_range message, const char *needle,
msgpack::byte_range *res, uint64_t *size) {
unsigned count = 0;
struct s : msgpack::functors_defaults<s> {
s(unsigned &count, uint64_t *size) : count(count), size(size) {}
unsigned &count;
uint64_t *size;
const unsigned char *handle_array(uint64_t N, msgpack::byte_range bytes) {
count++;
*size = N;
return bytes.end;
}
};
msgpack::foreach_map(message,
[&](msgpack::byte_range key, msgpack::byte_range value) {
if (msgpack::message_is_string(key, needle)) {
// If the message is an array, record number of
// elements in *size
msgpack::handle_msgpack<s>(value, {count, size});
// return the whole array
*res = value;
}
});
// Only claim success if exactly one key/array pair matched
return count != 1;
}
int map_lookup_string(msgpack::byte_range message, const char *needle,
std::string *res) {
unsigned count = 0;
struct s : public msgpack::functors_defaults<s> {
s(unsigned &count, std::string *res) : count(count), res(res) {}
unsigned &count;
std::string *res;
void handle_string(size_t N, const unsigned char *str) {
count++;
*res = std::string(str, str + N);
}
};
msgpack::foreach_map(message,
[&](msgpack::byte_range key, msgpack::byte_range value) {
if (msgpack::message_is_string(key, needle)) {
msgpack::handle_msgpack<s>(value, {count, res});
}
});
return count != 1;
}
int map_lookup_uint64_t(msgpack::byte_range message, const char *needle,
uint64_t *res) {
unsigned count = 0;
msgpack::foreach_map(message,
[&](msgpack::byte_range key, msgpack::byte_range value) {
if (msgpack::message_is_string(key, needle)) {
msgpack::foronly_unsigned(value, [&](uint64_t x) {
count++;
*res = x;
});
}
});
return count != 1;
}
int array_lookup_element(msgpack::byte_range message, uint64_t elt,
msgpack::byte_range *res) {
int rc = 1;
uint64_t i = 0;
msgpack::foreach_array(message, [&](msgpack::byte_range value) {
if (i == elt) {
*res = value;
rc = 0;
}
i++;
});
return rc;
}
int populate_kernelArgMD(msgpack::byte_range args_element,
KernelArgMD *kernelarg) {
using namespace msgpack;
int error = 0;
foreach_map(args_element, [&](byte_range key, byte_range value) -> void {
if (message_is_string(key, ".name")) {
foronly_string(value, [&](size_t N, const unsigned char *str) {
kernelarg->name_ = std::string(str, str + N);
});
} else if (message_is_string(key, ".size")) {
foronly_unsigned(value, [&](uint64_t x) { kernelarg->size_ = x; });
} else if (message_is_string(key, ".offset")) {
foronly_unsigned(value, [&](uint64_t x) { kernelarg->offset_ = x; });
} else if (message_is_string(key, ".value_kind")) {
foronly_string(value, [&](size_t N, const unsigned char *str) {
std::string s = std::string(str, str + N);
auto itValueKind = ArgValueKind.find(s);
if (itValueKind != ArgValueKind.end()) {
kernelarg->valueKind_ = itValueKind->second;
}
});
}
});
return error;
}
} // namespace
static hsa_status_t get_code_object_custom_metadata(
void *binary, size_t binSize,
std::map<std::string, atl_kernel_info_t> &KernelInfoTable) {
// parse code object with different keys from v2
// also, the kernel name is not the same as the symbol name -- so a
// symbol->name map is needed
std::pair<const unsigned char *, const unsigned char *> metadata =
find_metadata(binary, binSize);
if (!metadata.first) {
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
}
uint64_t kernelsSize = 0;
int msgpack_errors = 0;
msgpack::byte_range kernel_array;
msgpack_errors =
map_lookup_array({metadata.first, metadata.second}, "amdhsa.kernels",
&kernel_array, &kernelsSize);
if (msgpack_errors != 0) {
printf("[%s:%d] %s failed\n", __FILE__, __LINE__,
"kernels lookup in program metadata");
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
}
for (size_t i = 0; i < kernelsSize; i++) {
assert(msgpack_errors == 0);
std::string kernelName;
std::string symbolName;
msgpack::byte_range element;
msgpack_errors += array_lookup_element(kernel_array, i, &element);
if (msgpack_errors != 0) {
printf("[%s:%d] %s failed\n", __FILE__, __LINE__,
"element lookup in kernel metadata");
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
}
msgpack_errors += map_lookup_string(element, ".name", &kernelName);
msgpack_errors += map_lookup_string(element, ".symbol", &symbolName);
if (msgpack_errors != 0) {
printf("[%s:%d] %s failed\n", __FILE__, __LINE__,
"strings lookup in kernel metadata");
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
}
// Make sure that kernelName + ".kd" == symbolName
if ((kernelName + ".kd") != symbolName) {
printf("[%s:%d] Kernel name mismatching symbol: %s != %s + .kd\n",
__FILE__, __LINE__, symbolName.c_str(), kernelName.c_str());
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
}
atl_kernel_info_t info = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
uint64_t sgpr_count, vgpr_count, sgpr_spill_count, vgpr_spill_count;
msgpack_errors += map_lookup_uint64_t(element, ".sgpr_count", &sgpr_count);
if (msgpack_errors != 0) {
printf("[%s:%d] %s failed\n", __FILE__, __LINE__,
"sgpr count metadata lookup in kernel metadata");
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
}
info.sgpr_count = sgpr_count;
msgpack_errors += map_lookup_uint64_t(element, ".vgpr_count", &vgpr_count);
if (msgpack_errors != 0) {
printf("[%s:%d] %s failed\n", __FILE__, __LINE__,
"vgpr count metadata lookup in kernel metadata");
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
}
info.vgpr_count = vgpr_count;
msgpack_errors +=
map_lookup_uint64_t(element, ".sgpr_spill_count", &sgpr_spill_count);
if (msgpack_errors != 0) {
printf("[%s:%d] %s failed\n", __FILE__, __LINE__,
"sgpr spill count metadata lookup in kernel metadata");
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
}
info.sgpr_spill_count = sgpr_spill_count;
msgpack_errors +=
map_lookup_uint64_t(element, ".vgpr_spill_count", &vgpr_spill_count);
if (msgpack_errors != 0) {
printf("[%s:%d] %s failed\n", __FILE__, __LINE__,
"vgpr spill count metadata lookup in kernel metadata");
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
}
info.vgpr_spill_count = vgpr_spill_count;
size_t kernel_explicit_args_size = 0;
uint64_t kernel_segment_size;
msgpack_errors += map_lookup_uint64_t(element, ".kernarg_segment_size",
&kernel_segment_size);
if (msgpack_errors != 0) {
printf("[%s:%d] %s failed\n", __FILE__, __LINE__,
"kernarg segment size metadata lookup in kernel metadata");
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
}
bool hasHiddenArgs = false;
if (kernel_segment_size > 0) {
uint64_t argsSize;
size_t offset = 0;
msgpack::byte_range args_array;
msgpack_errors +=
map_lookup_array(element, ".args", &args_array, &argsSize);
if (msgpack_errors != 0) {
printf("[%s:%d] %s failed\n", __FILE__, __LINE__,
"kernel args metadata lookup in kernel metadata");
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
}
for (size_t i = 0; i < argsSize; ++i) {
KernelArgMD lcArg;
msgpack::byte_range args_element;
msgpack_errors += array_lookup_element(args_array, i, &args_element);
if (msgpack_errors != 0) {
printf("[%s:%d] %s failed\n", __FILE__, __LINE__,
"iterate args map in kernel args metadata");
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
}
msgpack_errors += populate_kernelArgMD(args_element, &lcArg);
if (msgpack_errors != 0) {
printf("[%s:%d] %s failed\n", __FILE__, __LINE__,
"iterate args map in kernel args metadata");
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
}
// v3 has offset field and not align field
size_t new_offset = lcArg.offset_;
size_t padding = new_offset - offset;
offset = new_offset;
DP("Arg[%lu] \"%s\" (%u, %u)\n", i, lcArg.name_.c_str(), lcArg.size_,
lcArg.offset_);
offset += lcArg.size_;
// check if the arg is a hidden/implicit arg
// this logic assumes that all hidden args are 8-byte aligned
if (!isImplicit(lcArg.valueKind_)) {
info.explicit_argument_count++;
kernel_explicit_args_size += lcArg.size_;
} else {
info.implicit_argument_count++;
hasHiddenArgs = true;
}
kernel_explicit_args_size += padding;
}
}
// TODO: Probably don't want this arithmetic
info.kernel_segment_size =
(hasHiddenArgs ? kernel_explicit_args_size : kernel_segment_size);
DP("[%s: kernarg seg size] (%lu --> %u)\n", kernelName.c_str(),
kernel_segment_size, info.kernel_segment_size);
// kernel received, now add it to the kernel info table
KernelInfoTable[kernelName] = info;
}
return HSA_STATUS_SUCCESS;
}
static hsa_status_t
populate_InfoTables(hsa_executable_symbol_t symbol,
std::map<std::string, atl_kernel_info_t> &KernelInfoTable,
std::map<std::string, atl_symbol_info_t> &SymbolInfoTable) {
hsa_symbol_kind_t type;
uint32_t name_length;
hsa_status_t err;
err = hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE,
&type);
if (err != HSA_STATUS_SUCCESS) {
printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
"Symbol info extraction", get_error_string(err));
return err;
}
DP("Exec Symbol type: %d\n", type);
if (type == HSA_SYMBOL_KIND_KERNEL) {
err = hsa_executable_symbol_get_info(
symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &name_length);
if (err != HSA_STATUS_SUCCESS) {
printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
"Symbol info extraction", get_error_string(err));
return err;
}
char *name = reinterpret_cast<char *>(malloc(name_length + 1));
err = hsa_executable_symbol_get_info(symbol,
HSA_EXECUTABLE_SYMBOL_INFO_NAME, name);
if (err != HSA_STATUS_SUCCESS) {
printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
"Symbol info extraction", get_error_string(err));
return err;
}
// remove the suffix .kd from symbol name.
name[name_length - 3] = 0;
atl_kernel_info_t info;
std::string kernelName(name);
// by now, the kernel info table should already have an entry
// because the non-ROCr custom code object parsing is called before
// iterating over the code object symbols using ROCr
if (KernelInfoTable.find(kernelName) == KernelInfoTable.end()) {
DP("amdgpu internal consistency error\n");
return HSA_STATUS_ERROR;
}
// found, so assign and update
info = KernelInfoTable[kernelName];
/* Extract dispatch information from the symbol */
err = hsa_executable_symbol_get_info(
symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
&(info.kernel_object));
if (err != HSA_STATUS_SUCCESS) {
printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
"Extracting the symbol from the executable",
get_error_string(err));
return err;
}
err = hsa_executable_symbol_get_info(
symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
&(info.group_segment_size));
if (err != HSA_STATUS_SUCCESS) {
printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
"Extracting the group segment size from the executable",
get_error_string(err));
return err;
}
err = hsa_executable_symbol_get_info(
symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
&(info.private_segment_size));
if (err != HSA_STATUS_SUCCESS) {
printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
"Extracting the private segment from the executable",
get_error_string(err));
return err;
}
DP("Kernel %s --> %lx symbol %u group segsize %u pvt segsize %u bytes "
"kernarg\n",
kernelName.c_str(), info.kernel_object, info.group_segment_size,
info.private_segment_size, info.kernel_segment_size);
// assign it back to the kernel info table
KernelInfoTable[kernelName] = info;
free(name);
} else if (type == HSA_SYMBOL_KIND_VARIABLE) {
err = hsa_executable_symbol_get_info(
symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &name_length);
if (err != HSA_STATUS_SUCCESS) {
printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
"Symbol info extraction", get_error_string(err));
return err;
}
char *name = reinterpret_cast<char *>(malloc(name_length + 1));
err = hsa_executable_symbol_get_info(symbol,
HSA_EXECUTABLE_SYMBOL_INFO_NAME, name);
if (err != HSA_STATUS_SUCCESS) {
printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
"Symbol info extraction", get_error_string(err));
return err;
}
name[name_length] = 0;
atl_symbol_info_t info;
err = hsa_executable_symbol_get_info(
symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &(info.addr));
if (err != HSA_STATUS_SUCCESS) {
printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
"Symbol info address extraction", get_error_string(err));
return err;
}
err = hsa_executable_symbol_get_info(
symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE, &(info.size));
if (err != HSA_STATUS_SUCCESS) {
printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
"Symbol info size extraction", get_error_string(err));
return err;
}
DP("Symbol %s = %p (%u bytes)\n", name, (void *)info.addr, info.size);
SymbolInfoTable[std::string(name)] = info;
free(name);
} else {
DP("Symbol is an indirect function\n");
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t RegisterModuleFromMemory(
std::map<std::string, atl_kernel_info_t> &KernelInfoTable,
std::map<std::string, atl_symbol_info_t> &SymbolInfoTable,
void *module_bytes, size_t module_size, hsa_agent_t agent,
hsa_status_t (*on_deserialized_data)(void *data, size_t size,
void *cb_state),
void *cb_state, std::vector<hsa_executable_t> &HSAExecutables) {
hsa_status_t err;
hsa_executable_t executable = {0};
hsa_profile_t agent_profile;
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_profile);
if (err != HSA_STATUS_SUCCESS) {
printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
"Query the agent profile", get_error_string(err));
return HSA_STATUS_ERROR;
}
// FIXME: Assume that every profile is FULL until we understand how to build
// GCN with base profile
agent_profile = HSA_PROFILE_FULL;
/* Create the empty executable. */
err = hsa_executable_create(agent_profile, HSA_EXECUTABLE_STATE_UNFROZEN, "",
&executable);
if (err != HSA_STATUS_SUCCESS) {
printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
"Create the executable", get_error_string(err));
return HSA_STATUS_ERROR;
}
bool module_load_success = false;
do // Existing control flow used continue, preserve that for this patch
{
{
// Some metadata info is not available through ROCr API, so use custom
// code object metadata parsing to collect such metadata info
err = get_code_object_custom_metadata(module_bytes, module_size,
KernelInfoTable);
if (err != HSA_STATUS_SUCCESS) {
DP("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
"Getting custom code object metadata", get_error_string(err));
continue;
}
// Deserialize code object.
hsa_code_object_t code_object = {0};
err = hsa_code_object_deserialize(module_bytes, module_size, NULL,
&code_object);
if (err != HSA_STATUS_SUCCESS) {
DP("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
"Code Object Deserialization", get_error_string(err));
continue;
}
assert(0 != code_object.handle);
// Mutating the device image here avoids another allocation & memcpy
void *code_object_alloc_data =
reinterpret_cast<void *>(code_object.handle);
hsa_status_t impl_err =
on_deserialized_data(code_object_alloc_data, module_size, cb_state);
if (impl_err != HSA_STATUS_SUCCESS) {
printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
"Error in deserialized_data callback",
get_error_string(impl_err));
return impl_err;
}
/* Load the code object. */
err =
hsa_executable_load_code_object(executable, agent, code_object, NULL);
if (err != HSA_STATUS_SUCCESS) {
DP("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
"Loading the code object", get_error_string(err));
continue;
}
// cannot iterate over symbols until executable is frozen
}
module_load_success = true;
} while (0);
DP("Modules loaded successful? %d\n", module_load_success);
if (module_load_success) {
/* Freeze the executable; it can now be queried for symbols. */
err = hsa_executable_freeze(executable, "");
if (err != HSA_STATUS_SUCCESS) {
printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
"Freeze the executable", get_error_string(err));
return HSA_STATUS_ERROR;
}
err = hsa::executable_iterate_symbols(
executable,
[&](hsa_executable_t, hsa_executable_symbol_t symbol) -> hsa_status_t {
return populate_InfoTables(symbol, KernelInfoTable, SymbolInfoTable);
});
if (err != HSA_STATUS_SUCCESS) {
printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
"Iterating over symbols for execuatable", get_error_string(err));
return HSA_STATUS_ERROR;
}
// save the executable and destroy during finalize
HSAExecutables.push_back(executable);
return HSA_STATUS_SUCCESS;
} else {
return HSA_STATUS_ERROR;
}
}
} // namespace core

View File

@ -1,20 +0,0 @@
//===--- amdgpu/src/print_tracing.h ------------------------------- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LIBOMPTARGET_PLUGINS_AMGGPU_SRC_PRINT_TRACING_H_INCLUDED
#define LIBOMPTARGET_PLUGINS_AMGGPU_SRC_PRINT_TRACING_H_INCLUDED
enum PrintTraceControlBits {
LAUNCH = 1, // print a message to stderr for each kernel launch
RTL_TIMING = 2, // Print timing info around each RTL step
STARTUP_DETAILS = 4, // Details around loading up kernel
RTL_TO_STDOUT = 8 // Redirect RTL tracing to stdout
};
extern int print_kernel_trace; // set by environment variable
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,14 +0,0 @@
##===----------------------------------------------------------------------===##
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
##===----------------------------------------------------------------------===##
#
# Common parts which can be used by all plugins
#
##===----------------------------------------------------------------------===##
add_subdirectory(elf_common)
add_subdirectory(MemoryManager)

View File

@ -1,76 +0,0 @@
##===----------------------------------------------------------------------===##
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
##===----------------------------------------------------------------------===##
#
# Build a plugin for a CUDA machine if available.
#
##===----------------------------------------------------------------------===##
set(LIBOMPTARGET_BUILD_CUDA_PLUGIN TRUE CACHE BOOL
"Whether to build CUDA plugin")
if (NOT LIBOMPTARGET_BUILD_CUDA_PLUGIN)
libomptarget_say("Not building CUDA offloading plugin: LIBOMPTARGET_BUILD_CUDA_PLUGIN is false")
return()
endif()
if (NOT (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(ppc64le)|(aarch64)$" AND CMAKE_SYSTEM_NAME MATCHES "Linux"))
libomptarget_say("Not building CUDA offloading plugin: only support CUDA in Linux x86_64, ppc64le, or aarch64 hosts.")
return()
endif()
libomptarget_say("Building CUDA offloading plugin.")
set(LIBOMPTARGET_DLOPEN_LIBCUDA OFF)
option(LIBOMPTARGET_FORCE_DLOPEN_LIBCUDA "Build with dlopened libcuda" ${LIBOMPTARGET_DLOPEN_LIBCUDA})
add_llvm_library(omptarget.rtl.cuda SHARED
src/rtl.cpp
LINK_COMPONENTS
Support
Object
LINK_LIBS PRIVATE
elf_common
MemoryManager
${OPENMP_PTHREAD_LIB}
"-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exports,-z,defs"
NO_INSTALL_RPATH
)
if(LIBOMPTARGET_DEP_CUDA_FOUND AND NOT LIBOMPTARGET_FORCE_DLOPEN_LIBCUDA)
libomptarget_say("Building CUDA plugin linked against libcuda")
target_link_libraries(omptarget.rtl.cuda PRIVATE CUDA::cuda_driver)
else()
libomptarget_say("Building CUDA plugin for dlopened libcuda")
target_include_directories(omptarget.rtl.cuda PRIVATE dynamic_cuda)
target_sources(omptarget.rtl.cuda PRIVATE dynamic_cuda/cuda.cpp)
endif()
# Define the suffix for the runtime messaging dumps.
target_compile_definitions(omptarget.rtl.cuda PRIVATE TARGET_NAME="CUDA")
target_include_directories(omptarget.rtl.cuda PRIVATE ${LIBOMPTARGET_INCLUDE_DIR})
# Install plugin under the lib destination folder.
install(TARGETS omptarget.rtl.cuda LIBRARY DESTINATION "${OPENMP_INSTALL_LIBDIR}")
set_target_properties(omptarget.rtl.cuda PROPERTIES
INSTALL_RPATH "$ORIGIN" BUILD_RPATH "$ORIGIN:${CMAKE_CURRENT_BINARY_DIR}/.."
CXX_VISIBILITY_PRESET protected)
# Report to the parent scope that we are building a plugin for CUDA.
# This controls whether tests are run for the nvptx offloading target
# Run them if libcuda is available, or if the user explicitly asked for dlopen
# Otherwise this plugin is being built speculatively and there may be no cuda available
option(LIBOMPTARGET_FORCE_NVIDIA_TESTS "Build NVIDIA libomptarget tests" OFF)
if (LIBOMPTARGET_FOUND_NVIDIA_GPU OR LIBOMPTARGET_FORCE_NVIDIA_TESTS)
libomptarget_say("Enable tests using CUDA plugin")
set(LIBOMPTARGET_SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS} nvptx64-nvidia-cuda nvptx64-nvidia-cuda-LTO" PARENT_SCOPE)
list(APPEND LIBOMPTARGET_TESTED_PLUGINS "omptarget.rtl.cuda")
set(LIBOMPTARGET_TESTED_PLUGINS "${LIBOMPTARGET_TESTED_PLUGINS}" PARENT_SCOPE)
else()
libomptarget_say("Not generating NVIDIA tests, no supported devices detected. Use 'LIBOMPTARGET_FORCE_NVIDIA_TESTS' to override.")
endif()

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +0,0 @@
VERS1.0 {
global:
__tgt_rtl*;
local:
*;
};

View File

@ -1,280 +0,0 @@
//===-RTLs/generic-64bit/src/rtl.cpp - Target RTLs Implementation - C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// RTL for generic 64-bit machine
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/DynamicLibrary.h"
#include <cassert>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <ffi.h>
#include <link.h>
#include <list>
#include <string>
#include <vector>
#include "Debug.h"
#include "omptargetplugin.h"
using namespace llvm;
using namespace llvm::sys;
#ifndef TARGET_NAME
#define TARGET_NAME Generic ELF - 64bit
#endif
#define DEBUG_PREFIX "TARGET " GETNAME(TARGET_NAME) " RTL"
#ifndef TARGET_ELF_ID
#define TARGET_ELF_ID 0
#endif
#include "elf_common.h"
#define NUMBER_OF_DEVICES 4
#define OFFLOAD_SECTION_NAME "omp_offloading_entries"
/// Array of Dynamic libraries loaded for this target.
struct DynLibTy {
std::string FileName;
std::unique_ptr<DynamicLibrary> DynLib;
};
/// Keep entries table per device.
struct FuncOrGblEntryTy {
__tgt_target_table Table;
SmallVector<__tgt_offload_entry> Entries;
};
/// Class containing all the device information.
class RTLDeviceInfoTy {
std::vector<std::list<FuncOrGblEntryTy>> FuncGblEntries;
public:
std::list<DynLibTy> DynLibs;
// Record entry point associated with device.
void createOffloadTable(int32_t DeviceId,
SmallVector<__tgt_offload_entry> &&Entries) {
assert(DeviceId < (int32_t)FuncGblEntries.size() &&
"Unexpected device id!");
FuncGblEntries[DeviceId].emplace_back();
FuncOrGblEntryTy &E = FuncGblEntries[DeviceId].back();
E.Entries = Entries;
E.Table.EntriesBegin = E.Entries.begin();
E.Table.EntriesEnd = E.Entries.end();
}
// Return true if the entry is associated with device.
bool findOffloadEntry(int32_t DeviceId, void *Addr) {
assert(DeviceId < (int32_t)FuncGblEntries.size() &&
"Unexpected device id!");
FuncOrGblEntryTy &E = FuncGblEntries[DeviceId].back();
for (__tgt_offload_entry *I = E.Table.EntriesBegin,
*End = E.Table.EntriesEnd;
I < End; ++I) {
if (I->addr == Addr)
return true;
}
return false;
}
// Return the pointer to the target entries table.
__tgt_target_table *getOffloadEntriesTable(int32_t DeviceId) {
assert(DeviceId < (int32_t)FuncGblEntries.size() &&
"Unexpected device id!");
FuncOrGblEntryTy &E = FuncGblEntries[DeviceId].back();
return &E.Table;
}
RTLDeviceInfoTy(int32_t NumDevices) { FuncGblEntries.resize(NumDevices); }
~RTLDeviceInfoTy() {
// Close dynamic libraries
for (auto &Lib : DynLibs) {
if (Lib.DynLib->isValid())
remove(Lib.FileName.c_str());
}
}
};
static RTLDeviceInfoTy DeviceInfo(NUMBER_OF_DEVICES);
#ifdef __cplusplus
extern "C" {
#endif
int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) {
// If we don't have a valid ELF ID we can just fail.
#if TARGET_ELF_ID < 1
return 0;
#else
return elf_check_machine(Image, TARGET_ELF_ID);
#endif
}
int32_t __tgt_rtl_number_of_devices() { return NUMBER_OF_DEVICES; }
int32_t __tgt_rtl_init_device(int32_t DeviceId) { return OFFLOAD_SUCCESS; }
__tgt_target_table *__tgt_rtl_load_binary(int32_t DeviceId,
__tgt_device_image *Image) {
DP("Dev %d: load binary from " DPxMOD " image\n", DeviceId,
DPxPTR(Image->ImageStart));
assert(DeviceId >= 0 && DeviceId < NUMBER_OF_DEVICES && "bad dev id");
size_t ImageSize = (size_t)Image->ImageEnd - (size_t)Image->ImageStart;
// load dynamic library and get the entry points. We use the dl library
// to do the loading of the library, but we could do it directly to avoid the
// dump to the temporary file.
//
// 1) Create tmp file with the library contents.
// 2) Use dlopen to load the file and dlsym to retrieve the symbols.
char TmpName[] = "/tmp/tmpfile_XXXXXX";
int TmpFd = mkstemp(TmpName);
if (TmpFd == -1)
return nullptr;
FILE *Ftmp = fdopen(TmpFd, "wb");
if (!Ftmp)
return nullptr;
fwrite(Image->ImageStart, ImageSize, 1, Ftmp);
fclose(Ftmp);
std::string ErrMsg;
auto DynLib = std::make_unique<sys::DynamicLibrary>(
sys::DynamicLibrary::getPermanentLibrary(TmpName, &ErrMsg));
DynLibTy Lib = {TmpName, std::move(DynLib)};
if (!Lib.DynLib->isValid()) {
DP("Target library loading error: %s\n", ErrMsg.c_str());
return NULL;
}
__tgt_offload_entry *HostBegin = Image->EntriesBegin;
__tgt_offload_entry *HostEnd = Image->EntriesEnd;
// Create a new offloading entry list using the device symbol address.
SmallVector<__tgt_offload_entry> Entries;
for (__tgt_offload_entry *E = HostBegin; E != HostEnd; ++E) {
if (!E->addr)
return nullptr;
__tgt_offload_entry Entry = *E;
void *DevAddr = Lib.DynLib->getAddressOfSymbol(E->name);
Entry.addr = DevAddr;
DP("Entry point " DPxMOD " maps to global %s (" DPxMOD ")\n",
DPxPTR(E - HostBegin), E->name, DPxPTR(DevAddr));
Entries.emplace_back(Entry);
}
DeviceInfo.createOffloadTable(DeviceId, std::move(Entries));
DeviceInfo.DynLibs.emplace_back(std::move(Lib));
return DeviceInfo.getOffloadEntriesTable(DeviceId);
}
void __tgt_rtl_print_device_info(int32_t DeviceId) {
printf(" This is a generic-elf-64bit device\n");
}
// Sample implementation of explicit memory allocator. For this plugin all kinds
// are equivalent to each other.
void *__tgt_rtl_data_alloc(int32_t DeviceId, int64_t Size, void *HstPtr,
int32_t Kind) {
void *Ptr = NULL;
switch (Kind) {
case TARGET_ALLOC_DEVICE:
case TARGET_ALLOC_HOST:
case TARGET_ALLOC_SHARED:
case TARGET_ALLOC_DEFAULT:
Ptr = malloc(Size);
break;
default:
REPORT("Invalid target data allocation kind");
}
return Ptr;
}
int32_t __tgt_rtl_data_submit(int32_t DeviceId, void *TgtPtr, void *HstPtr,
int64_t Size) {
memcpy(TgtPtr, HstPtr, Size);
return OFFLOAD_SUCCESS;
}
int32_t __tgt_rtl_data_retrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr,
int64_t Size) {
memcpy(HstPtr, TgtPtr, Size);
return OFFLOAD_SUCCESS;
}
int32_t __tgt_rtl_data_delete(int32_t DeviceId, void *TgtPtr, int32_t) {
free(TgtPtr);
return OFFLOAD_SUCCESS;
}
int32_t __tgt_rtl_launch_kernel(int32_t DeviceId, void *TgtEntryPtr,
void **TgtArgs, ptrdiff_t *TgtOffsets,
KernelArgsTy *KernelArgs,
__tgt_async_info *AsyncInfoPtr) {
assert(!KernelArgs->NumTeams[1] && !KernelArgs->NumTeams[2] &&
!KernelArgs->ThreadLimit[1] && !KernelArgs->ThreadLimit[2] &&
"Only one dimensional kernels supported.");
// ignore team num and thread limit.
// Use libffi to launch execution.
ffi_cif Cif;
// All args are references.
std::vector<ffi_type *> ArgsTypes(KernelArgs->NumArgs, &ffi_type_pointer);
std::vector<void *> Args(KernelArgs->NumArgs);
std::vector<void *> Ptrs(KernelArgs->NumArgs);
for (uint32_t I = 0; I < KernelArgs->NumArgs; ++I) {
Ptrs[I] = (void *)((intptr_t)TgtArgs[I] + TgtOffsets[I]);
Args[I] = &Ptrs[I];
}
ffi_status Status = ffi_prep_cif(&Cif, FFI_DEFAULT_ABI, KernelArgs->NumArgs,
&ffi_type_void, &ArgsTypes[0]);
assert(Status == FFI_OK && "Unable to prepare target launch!");
if (Status != FFI_OK)
return OFFLOAD_FAIL;
DP("Running entry point at " DPxMOD "...\n", DPxPTR(TgtEntryPtr));
void (*Entry)(void);
*((void **)&Entry) = TgtEntryPtr;
ffi_call(&Cif, Entry, NULL, &Args[0]);
return OFFLOAD_SUCCESS;
}
#ifdef __cplusplus
}
#endif

View File

@ -1,17 +0,0 @@
##===----------------------------------------------------------------------===##
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
##===----------------------------------------------------------------------===##
#
# Build a plugin for a ppc64 machine if available.
#
##===----------------------------------------------------------------------===##
if(CMAKE_SYSTEM_NAME MATCHES "Linux")
build_generic_elf64("ppc64" "PPC64" "ppc64" "powerpc64-ibm-linux-gnu" "21")
else()
libomptarget_say("Not building ppc64 offloading plugin: machine not found in the system.")
endif()

View File

@ -1,17 +0,0 @@
##===----------------------------------------------------------------------===##
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
##===----------------------------------------------------------------------===##
#
# Build a plugin for a ppc64le machine if available.
#
##===----------------------------------------------------------------------===##
if(CMAKE_SYSTEM_NAME MATCHES "Linux")
build_generic_elf64("ppc64le" "PPC64le" "ppc64" "powerpc64le-ibm-linux-gnu" "21")
else()
libomptarget_say("Not building ppc64le offloading plugin: machine not found in the system.")
endif()

View File

@ -1,17 +0,0 @@
##===----------------------------------------------------------------------===##
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
##===----------------------------------------------------------------------===##
#
# Build a plugin for a x86_64 machine if available.
#
##===----------------------------------------------------------------------===##
if(CMAKE_SYSTEM_NAME MATCHES "Linux")
build_generic_elf64("x86_64" "x86_64" "x86_64" "x86_64-pc-linux-gnu" "62")
else()
libomptarget_say("Not building x86_64 offloading plugin: machine not found in the system.")
endif()

View File

@ -99,8 +99,6 @@ void RTLsTy::loadRTLs() {
DP("Loading RTLs...\n");
BoolEnvar NextGenPlugins("LIBOMPTARGET_NEXTGEN_PLUGINS", true);
// Attempt to open all the plugins and, if they exist, check if the interface
// is correct and if they are supporting any devices.
for (const char *Name : RTLNames) {
@ -109,13 +107,6 @@ void RTLsTy::loadRTLs() {
RTLInfoTy &RTL = AllRTLs.back();
const std::string BaseRTLName(Name);
if (NextGenPlugins) {
if (attemptLoadRTL(BaseRTLName + ".nextgen.so", RTL))
continue;
DP("Falling back to original plugin...\n");
}
if (!attemptLoadRTL(BaseRTLName + ".so", RTL))
AllRTLs.pop_back();
}