[OpenMP] Delete old plugins

It's time to remove the old plugins as the next-gen has already been set to default in LLVM 16. Reviewed By: tianshilei1992 Differential Revision: https://reviews.llvm.org/D142820
2024-10-07 10:54:01 +00:00 · 2023-03-21 11:27:37 -05:00 · 2023-03-21 11:27:37 -05:00 · e90ab9148b
commit e90ab9148b
parent 70c08dbcfb
50 changed files with 35 additions and 7228 deletions
--- a/openmp/docs/design/Runtimes.rst
+++ b/openmp/docs/design/Runtimes.rst
@ -1378,7 +1378,7 @@ clause. Examples for both are given below.
 .. code-block:: console

    $ clang++ -fopenmp --offload-arch=gfx90a -O3 shared.c
-    $ env LIBOMPTARGET_NEXTGEN_PLUGINS=1 ./shared
+    $ env ./shared


 .. _libomptarget_device_debugging:
--- a/openmp/libomptarget/CMakeLists.txt
+++ b/openmp/libomptarget/CMakeLists.txt
@ -105,7 +105,6 @@ set(LIBOMPTARGET_LLVM_LIBRARY_DIR "${LLVM_LIBRARY_DIR}" CACHE STRING
  "Path to folder containing llvm library libomptarget.so")

 # Build offloading plugins and device RTLs if they are available.
-add_subdirectory(plugins)
 add_subdirectory(plugins-nextgen)
 add_subdirectory(DeviceRTL)
 add_subdirectory(tools)
--- a/openmp/libomptarget/plugins-nextgen/CMakeLists.txt
+++ b/openmp/libomptarget/plugins-nextgen/CMakeLists.txt
@ -12,12 +12,12 @@

 add_subdirectory(common)

-# void build_generic_elf64_nextgen(string tmachine, string tmachine_name, string tmachine_libname, string elf_machine_id);
+# void build_generic_elf64(string tmachine, string tmachine_name, string tmachine_libname, string elf_machine_id);
 # - build a plugin for an ELF based generic 64-bit target based on libffi.
 # - tmachine: name of the machine processor as used in the cmake build system.
 # - tmachine_name: name of the machine to be printed with the debug messages.
 # - tmachine_libname: machine name to be appended to the plugin library name.
-macro(build_generic_elf64_nextgen tmachine tmachine_name tmachine_libname tmachine_triple elf_machine_id)
+macro(build_generic_elf64 tmachine tmachine_name tmachine_libname tmachine_triple elf_machine_id)
 if(CMAKE_SYSTEM_PROCESSOR MATCHES "${tmachine}$")
  if(LIBOMPTARGET_DEP_LIBFFI_FOUND)

@ -36,7 +36,7 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "${tmachine}$")
    # Define target regiple
    add_definitions("-DLIBOMPTARGET_NEXTGEN_GENERIC_PLUGIN_TRIPLE=${tmachine}")

-    add_llvm_library("omptarget.rtl.${tmachine_libname}.nextgen"
+    add_llvm_library("omptarget.rtl.${tmachine_libname}"
      SHARED

      ${CMAKE_CURRENT_SOURCE_DIR}/../generic-elf-64bit/src/rtl.cpp
@ -58,23 +58,23 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "${tmachine}$")
    )

    if (LIBOMP_HAVE_VERSION_SCRIPT_FLAG)
-      target_link_libraries("omptarget.rtl.${tmachine_libname}.nextgen" PRIVATE
+      target_link_libraries("omptarget.rtl.${tmachine_libname}" PRIVATE
        "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exports")
    endif()

    # Install plugin under the lib destination folder.
-    install(TARGETS "omptarget.rtl.${tmachine_libname}.nextgen"
+    install(TARGETS "omptarget.rtl.${tmachine_libname}"
      LIBRARY DESTINATION "${OPENMP_INSTALL_LIBDIR}")
-    set_target_properties("omptarget.rtl.${tmachine_libname}.nextgen" PROPERTIES
+    set_target_properties("omptarget.rtl.${tmachine_libname}" PROPERTIES
      INSTALL_RPATH "$ORIGIN" BUILD_RPATH "$ORIGIN:${CMAKE_CURRENT_BINARY_DIR}/.."
      CXX_VISIBILITY_PRESET protected)

-    target_include_directories( "omptarget.rtl.${tmachine_libname}.nextgen" PRIVATE
+    target_include_directories( "omptarget.rtl.${tmachine_libname}" PRIVATE
      ${LIBOMPTARGET_INCLUDE_DIR}
      ${LIBOMPTARGET_DEP_LIBFFI_INCLUDE_DIR})

    list(APPEND LIBOMPTARGET_TESTED_PLUGINS
-      "omptarget.rtl.${tmachine_libname}.nextgen")
+      "omptarget.rtl.${tmachine_libname}")

  else(LIBOMPTARGET_DEP_LIBFFI_FOUND)
    libomptarget_say("Not building ${tmachine_name} NextGen offloading plugin: libffi dependency not found.")
--- a/openmp/libomptarget/plugins-nextgen/aarch64/CMakeLists.txt
+++ b/openmp/libomptarget/plugins-nextgen/aarch64/CMakeLists.txt
@ -11,7 +11,7 @@
 ##===----------------------------------------------------------------------===##

 if(CMAKE_SYSTEM_NAME MATCHES "Linux")
-  build_generic_elf64_nextgen("aarch64" "aarch64" "aarch64" "aarch64-unknown-linux-gnu" "183")
+  build_generic_elf64("aarch64" "aarch64" "aarch64" "aarch64-unknown-linux-gnu" "183")
 else()
 libomptarget_say("Not building aarch64 NextGen offloading plugin: machine not found in the system.")
 endif()
--- a/openmp/libomptarget/plugins-nextgen/amdgpu/CMakeLists.txt
+++ b/openmp/libomptarget/plugins-nextgen/amdgpu/CMakeLists.txt
@ -52,8 +52,8 @@ if (${hsa-runtime64_FOUND} AND NOT LIBOMPTARGET_FORCE_DLOPEN_LIBHSA)
  set(LIBOMPTARGET_DEP_LIBRARIES hsa-runtime64::hsa-runtime64)
 else()
  libomptarget_say("Building AMDGPU NextGen plugin for dlopened libhsa")
-  include_directories(../../plugins/amdgpu/dynamic_hsa)
-  set(LIBOMPTARGET_EXTRA_SOURCE ../../plugins/amdgpu/dynamic_hsa/hsa.cpp)
+  include_directories(dynamic_hsa)
+  set(LIBOMPTARGET_EXTRA_SOURCE dynamic_hsa/hsa.cpp)
  set(LIBOMPTARGET_DEP_LIBRARIES)
 endif()

@ -66,7 +66,7 @@ else()
  set(LDFLAGS_UNDEFINED "-Wl,-z,defs")
 endif()

-add_llvm_library(omptarget.rtl.amdgpu.nextgen SHARED
+add_llvm_library(omptarget.rtl.amdgpu SHARED
  src/rtl.cpp
  ${LIBOMPTARGET_EXTRA_SOURCE}

@ -91,16 +91,16 @@ add_llvm_library(omptarget.rtl.amdgpu.nextgen SHARED
 )

 if ((OMPT_TARGET_DEFAULT) AND (LIBOMPTARGET_OMPT_SUPPORT))
-  target_link_libraries(omptarget.rtl.amdgpu.nextgen PRIVATE OMPT)
+  target_link_libraries(omptarget.rtl.amdgpu PRIVATE OMPT)
 endif()

 if (LIBOMP_HAVE_VERSION_SCRIPT_FLAG)
-  target_link_libraries(omptarget.rtl.amdgpu.nextgen PRIVATE
+  target_link_libraries(omptarget.rtl.amdgpu PRIVATE
    "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exports")
 endif()

 target_include_directories(
-  omptarget.rtl.amdgpu.nextgen
+  omptarget.rtl.amdgpu
  PRIVATE
  ${LIBOMPTARGET_INCLUDE_DIR}
  ${CMAKE_CURRENT_SOURCE_DIR}/utils
@ -108,7 +108,7 @@ target_include_directories(


 # Install plugin under the lib destination folder.
-install(TARGETS omptarget.rtl.amdgpu.nextgen LIBRARY DESTINATION "${OPENMP_INSTALL_LIBDIR}")
-set_target_properties(omptarget.rtl.amdgpu.nextgen PROPERTIES
+install(TARGETS omptarget.rtl.amdgpu LIBRARY DESTINATION "${OPENMP_INSTALL_LIBDIR}")
+set_target_properties(omptarget.rtl.amdgpu PROPERTIES
  INSTALL_RPATH "$ORIGIN" BUILD_RPATH "$ORIGIN:${CMAKE_CURRENT_BINARY_DIR}/.."
  CXX_VISIBILITY_PRESET protected)
--- a/openmp/libomptarget/plugins-nextgen/amdgpu/dynamic_hsa/hsa.cpp
+++ b/openmp/libomptarget/plugins-nextgen/amdgpu/dynamic_hsa/hsa.cpp
--- a/openmp/libomptarget/plugins-nextgen/amdgpu/dynamic_hsa/hsa.h
+++ b/openmp/libomptarget/plugins-nextgen/amdgpu/dynamic_hsa/hsa.h
--- a/openmp/libomptarget/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h
+++ b/openmp/libomptarget/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h
--- a/openmp/libomptarget/plugins-nextgen/common/CMakeLists.txt
+++ b/openmp/libomptarget/plugins-nextgen/common/CMakeLists.txt
@ -12,3 +12,5 @@

 add_subdirectory(OMPT)
 add_subdirectory(PluginInterface)
+add_subdirectory(MemoryManager)
+add_subdirectory(elf_common)
--- a/openmp/libomptarget/plugins-nextgen/common/MemoryManager/CMakeLists.txt
+++ b/openmp/libomptarget/plugins-nextgen/common/MemoryManager/CMakeLists.txt
--- a/openmp/libomptarget/plugins-nextgen/common/MemoryManager/MemoryManager.h
+++ b/openmp/libomptarget/plugins-nextgen/common/MemoryManager/MemoryManager.h
--- a/openmp/libomptarget/plugins-nextgen/common/elf_common/CMakeLists.txt
+++ b/openmp/libomptarget/plugins-nextgen/common/elf_common/CMakeLists.txt
@ -11,7 +11,7 @@
 ##===----------------------------------------------------------------------===##

 # NOTE: Don't try to build `elf_common` using `add_llvm_library`.
-# See openmp/libomptarget/plugins-nextgen/common/PluginInterface/CMakeLists.txt
+# See openmp/libomptarget/plugins/common/PluginInterface/CMakeLists.txt
 # for more explanation.
 add_library(elf_common OBJECT elf_common.cpp ELFSymbols.cpp)

--- a/openmp/libomptarget/plugins-nextgen/common/elf_common/ELFSymbols.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/elf_common/ELFSymbols.cpp
--- a/openmp/libomptarget/plugins-nextgen/common/elf_common/ELFSymbols.h
+++ b/openmp/libomptarget/plugins-nextgen/common/elf_common/ELFSymbols.h
--- a/openmp/libomptarget/plugins-nextgen/common/elf_common/elf_common.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/elf_common/elf_common.cpp
--- a/openmp/libomptarget/plugins-nextgen/common/elf_common/elf_common.h
+++ b/openmp/libomptarget/plugins-nextgen/common/elf_common/elf_common.h
--- a/openmp/libomptarget/plugins-nextgen/cuda/CMakeLists.txt
+++ b/openmp/libomptarget/plugins-nextgen/cuda/CMakeLists.txt
@ -26,7 +26,7 @@ libomptarget_say("Building CUDA NextGen offloading plugin.")
 set(LIBOMPTARGET_DLOPEN_LIBCUDA OFF)
 option(LIBOMPTARGET_FORCE_DLOPEN_LIBCUDA "Build with dlopened libcuda" ${LIBOMPTARGET_DLOPEN_LIBCUDA})

-add_llvm_library(omptarget.rtl.cuda.nextgen SHARED
+add_llvm_library(omptarget.rtl.cuda SHARED
  src/rtl.cpp

  LINK_COMPONENTS
@ -43,33 +43,33 @@ add_llvm_library(omptarget.rtl.cuda.nextgen SHARED
 )

 if ((OMPT_TARGET_DEFAULT) AND (LIBOMPTARGET_OMPT_SUPPORT))
-  target_link_libraries(omptarget.rtl.cuda.nextgen PRIVATE OMPT)
+  target_link_libraries(omptarget.rtl.cuda PRIVATE OMPT)
 endif()

 if (LIBOMP_HAVE_VERSION_SCRIPT_FLAG)
-  target_link_libraries(omptarget.rtl.cuda.nextgen PRIVATE
+  target_link_libraries(omptarget.rtl.cuda PRIVATE
  "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exports,-z,defs")
 endif()


 if(LIBOMPTARGET_DEP_CUDA_FOUND AND NOT LIBOMPTARGET_FORCE_DLOPEN_LIBCUDA)
  libomptarget_say("Building CUDA plugin linked against libcuda")
-  target_link_libraries(omptarget.rtl.cuda.nextgen PRIVATE CUDA::cuda_driver)
+  target_link_libraries(omptarget.rtl.cuda PRIVATE CUDA::cuda_driver)
 else()
  libomptarget_say("Building CUDA plugin for dlopened libcuda")
-  target_include_directories(omptarget.rtl.cuda.nextgen PRIVATE ../../plugins/cuda/dynamic_cuda)
-  target_sources(omptarget.rtl.cuda.nextgen PRIVATE ../../plugins/cuda/dynamic_cuda/cuda.cpp)
+  target_include_directories(omptarget.rtl.cuda PRIVATE dynamic_cuda)
+  target_sources(omptarget.rtl.cuda PRIVATE dynamic_cuda/cuda.cpp)
 endif()

 # Define debug prefix. TODO: This should be automatized in the Debug.h but it
 # requires changing the original plugins.
-target_compile_definitions(omptarget.rtl.cuda.nextgen PRIVATE TARGET_NAME="CUDA")
-target_compile_definitions(omptarget.rtl.cuda.nextgen PRIVATE DEBUG_PREFIX="TARGET CUDA RTL")
+target_compile_definitions(omptarget.rtl.cuda PRIVATE TARGET_NAME="CUDA")
+target_compile_definitions(omptarget.rtl.cuda PRIVATE DEBUG_PREFIX="TARGET CUDA RTL")

-target_include_directories(omptarget.rtl.cuda.nextgen PRIVATE ${LIBOMPTARGET_INCLUDE_DIR})
+target_include_directories(omptarget.rtl.cuda PRIVATE ${LIBOMPTARGET_INCLUDE_DIR})

 # Install plugin under the lib destination folder.
-install(TARGETS omptarget.rtl.cuda.nextgen LIBRARY DESTINATION "${OPENMP_INSTALL_LIBDIR}")
-set_target_properties(omptarget.rtl.cuda.nextgen PROPERTIES
+install(TARGETS omptarget.rtl.cuda LIBRARY DESTINATION "${OPENMP_INSTALL_LIBDIR}")
+set_target_properties(omptarget.rtl.cuda PROPERTIES
  INSTALL_RPATH "$ORIGIN" BUILD_RPATH "$ORIGIN:${CMAKE_CURRENT_BINARY_DIR}/.."
  CXX_VISIBILITY_PRESET protected)
--- a/openmp/libomptarget/plugins-nextgen/cuda/dynamic_cuda/cuda.cpp
+++ b/openmp/libomptarget/plugins-nextgen/cuda/dynamic_cuda/cuda.cpp
--- a/openmp/libomptarget/plugins-nextgen/cuda/dynamic_cuda/cuda.h
+++ b/openmp/libomptarget/plugins-nextgen/cuda/dynamic_cuda/cuda.h
--- a/openmp/libomptarget/plugins-nextgen/ppc64/CMakeLists.txt
+++ b/openmp/libomptarget/plugins-nextgen/ppc64/CMakeLists.txt
@ -11,7 +11,7 @@
 ##===----------------------------------------------------------------------===##

 if(CMAKE_SYSTEM_NAME MATCHES "Linux")
-  build_generic_elf64_nextgen("ppc64" "PPC64" "ppc64" "powerpc64-ibm-linux-gnu" "21")
+  build_generic_elf64("ppc64" "PPC64" "ppc64" "powerpc64-ibm-linux-gnu" "21")
 else()
 libomptarget_say("Not building ppc64 NextGen offloading plugin: machine not found in the system.")
 endif()
--- a/openmp/libomptarget/plugins-nextgen/ppc64le/CMakeLists.txt
+++ b/openmp/libomptarget/plugins-nextgen/ppc64le/CMakeLists.txt
@ -11,7 +11,7 @@
 ##===----------------------------------------------------------------------===##

 if(CMAKE_SYSTEM_NAME MATCHES "Linux")
-  build_generic_elf64_nextgen("ppc64le" "PPC64le" "ppc64" "powerpc64le-ibm-linux-gnu" "21")
+  build_generic_elf64("ppc64le" "PPC64le" "ppc64" "powerpc64le-ibm-linux-gnu" "21")
 else()
 libomptarget_say("Not building ppc64le NextGen offloading plugin: machine not found in the system.")
 endif()
--- a/openmp/libomptarget/plugins-nextgen/x86_64/CMakeLists.txt
+++ b/openmp/libomptarget/plugins-nextgen/x86_64/CMakeLists.txt
@ -11,7 +11,7 @@
 ##===----------------------------------------------------------------------===##

 if(CMAKE_SYSTEM_NAME MATCHES "Linux")
-  build_generic_elf64_nextgen("x86_64" "x86_64" "x86_64" "x86_64-pc-linux-gnu" "62")
+  build_generic_elf64("x86_64" "x86_64" "x86_64" "x86_64-pc-linux-gnu" "62")
 else()
 libomptarget_say("Not building x86_64 NextGen offloading plugin: machine not found in the system.")
 endif()
--- a/openmp/libomptarget/plugins/CMakeLists.txt
+++ b/openmp/libomptarget/plugins/CMakeLists.txt
@ -1,89 +0,0 @@
-##===----------------------------------------------------------------------===##
-#
-# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-#
-##===----------------------------------------------------------------------===##
-#
-# Build plugins for the user system if available.
-#
-##===----------------------------------------------------------------------===##
-
-add_subdirectory(common)
-
-# void build_generic_elf64(string tmachine, string tmachine_name, string tmachine_libname, string elf_machine_id);
-# - build a plugin for an ELF based generic 64-bit target based on libffi.
-# - tmachine: name of the machine processor as used in the cmake build system.
-# - tmachine_name: name of the machine to be printed with the debug messages.
-# - tmachine_libname: machine name to be appended to the plugin library name.
-macro(build_generic_elf64 tmachine tmachine_name tmachine_libname tmachine_triple elf_machine_id)
-if(CMAKE_SYSTEM_PROCESSOR MATCHES "${tmachine}$")
-  if(LIBOMPTARGET_DEP_LIBFFI_FOUND)
-
-    libomptarget_say("Building ${tmachine_name} offloading plugin.")
-
-    # Define macro to be used as prefix of the runtime messages for this target.
-    add_definitions("-DTARGET_NAME=${tmachine_name}")
-
-    # Define macro with the ELF ID for this target.
-    add_definitions("-DTARGET_ELF_ID=${elf_machine_id}")
-
-    add_llvm_library("omptarget.rtl.${tmachine_libname}"
-      SHARED
-
-      ${CMAKE_CURRENT_SOURCE_DIR}/../generic-elf-64bit/src/rtl.cpp
-
-      ADDITIONAL_HEADER_DIRS
-      ${LIBOMPTARGET_INCLUDE_DIR}
-      ${LIBOMPTARGET_DEP_LIBFFI_INCLUDE_DIR}
-
-        LINK_LIBS 
-        PRIVATE
-        elf_common
-        ${LIBOMPTARGET_DEP_LIBFFI_LIBRARIES}
-        ${OPENMP_PTHREAD_LIB}
-        "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exports"
-
-      NO_INSTALL_RPATH
-    )
-
-    # Install plugin under the lib destination folder.
-    install(TARGETS "omptarget.rtl.${tmachine_libname}"
-      LIBRARY DESTINATION "${OPENMP_INSTALL_LIBDIR}")
-    set_target_properties("omptarget.rtl.${tmachine_libname}" PROPERTIES
-      INSTALL_RPATH "$ORIGIN" BUILD_RPATH "$ORIGIN:${CMAKE_CURRENT_BINARY_DIR}/.."
-      CXX_VISIBILITY_PRESET protected)
-
-    target_include_directories( "omptarget.rtl.${tmachine_libname}" PRIVATE
-      ${LIBOMPTARGET_INCLUDE_DIR}
-      ${LIBOMPTARGET_DEP_LIBFFI_INCLUDE_DIR})
-
-    list(APPEND LIBOMPTARGET_TESTED_PLUGINS
-      "omptarget.rtl.${tmachine_libname}")
-
-    # Report to the parent scope that we are building a plugin.
-    set(LIBOMPTARGET_SYSTEM_TARGETS
-      "${LIBOMPTARGET_SYSTEM_TARGETS} ${tmachine_triple} ${tmachine_triple}-LTO" PARENT_SCOPE)
-    set(LIBOMPTARGET_TESTED_PLUGINS
-      "${LIBOMPTARGET_TESTED_PLUGINS}" PARENT_SCOPE)
-
-  else(LIBOMPTARGET_DEP_LIBFFI_FOUND)
-    libomptarget_say("Not building ${tmachine_name} offloading plugin: libffi dependency not found.")
-  endif(LIBOMPTARGET_DEP_LIBFFI_FOUND)
-else()
-  libomptarget_say("Not building ${tmachine_name} offloading plugin: machine not found in the system.")
-endif()
-endmacro()
-
-add_subdirectory(aarch64)
-add_subdirectory(amdgpu)
-add_subdirectory(cuda)
-add_subdirectory(ppc64)
-add_subdirectory(ppc64le)
-add_subdirectory(x86_64)
-
-# Make sure the parent scope can see the plugins that will be created.
-set(LIBOMPTARGET_SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS}" PARENT_SCOPE)
-set(LIBOMPTARGET_TESTED_PLUGINS "${LIBOMPTARGET_TESTED_PLUGINS}" PARENT_SCOPE)
-
--- a/openmp/libomptarget/plugins/aarch64/CMakeLists.txt
+++ b/openmp/libomptarget/plugins/aarch64/CMakeLists.txt
@ -1,17 +0,0 @@
-##===----------------------------------------------------------------------===##
-#
-# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-#
-##===----------------------------------------------------------------------===##
-#
-# Build a plugin for an aarch64 machine if available.
-#
-##===----------------------------------------------------------------------===##
-
-if(CMAKE_SYSTEM_NAME MATCHES "Linux")
-  build_generic_elf64("aarch64" "aarch64" "aarch64" "aarch64-unknown-linux-gnu" "183")
-else()
- libomptarget_say("Not building aarch64 offloading plugin: machine not found in the system.")
-endif()
--- a/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt
+++ b/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt
@ -1,122 +0,0 @@
-##===----------------------------------------------------------------------===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is dual licensed under the MIT and the University of Illinois Open
-# Source Licenses. See LICENSE.txt for details.
-#
-##===----------------------------------------------------------------------===##
-#
-# Build a plugin for an AMDGPU machine if available.
-#
-##===----------------------------------------------------------------------===##
-
-################################################################################
-set(LIBOMPTARGET_BUILD_AMDGPU_PLUGIN TRUE CACHE BOOL
-  "Whether to build AMDGPU plugin")
-if (NOT LIBOMPTARGET_BUILD_AMDGPU_PLUGIN)
-  libomptarget_say("Not building AMDGPU offloading plugin: LIBOMPTARGET_BUILD_AMDGPU_PLUGIN is false")
-  return()
-endif()
-
-# as of rocm-3.7, hsa is installed with cmake packages and kmt is found via hsa
-find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
-
-if(NOT (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(ppc64le)|(aarch64)$" AND CMAKE_SYSTEM_NAME MATCHES "Linux"))
-  libomptarget_say("Not building AMDGPU plugin: only support AMDGPU in Linux x86_64, ppc64le, or aarch64 hosts")
-  return()
-endif()
-
-################################################################################
-# Define the suffix for the runtime messaging dumps.
-add_definitions(-DTARGET_NAME=AMDGPU)
-if(CMAKE_SYSTEM_PROCESSOR MATCHES "(ppc64le)|(aarch64)$")
-   add_definitions(-DLITTLEENDIAN_CPU=1)
-endif()
-
-if(CMAKE_BUILD_TYPE MATCHES Debug)
-  add_definitions(-DDEBUG)
-endif()
-
-set(LIBOMPTARGET_DLOPEN_LIBHSA OFF)
-option(LIBOMPTARGET_FORCE_DLOPEN_LIBHSA "Build with dlopened libhsa" ${LIBOMPTARGET_DLOPEN_LIBHSA})
-
-if (${hsa-runtime64_FOUND} AND NOT LIBOMPTARGET_FORCE_DLOPEN_LIBHSA)
-  libomptarget_say("Building AMDGPU plugin linked against libhsa")
-  set(LIBOMPTARGET_EXTRA_SOURCE)
-  set(LIBOMPTARGET_DEP_LIBRARIES hsa-runtime64::hsa-runtime64)
-else()
-  libomptarget_say("Building AMDGPU plugin for dlopened libhsa")
-  include_directories(dynamic_hsa)
-  set(LIBOMPTARGET_EXTRA_SOURCE dynamic_hsa/hsa.cpp)
-  set(LIBOMPTARGET_DEP_LIBRARIES)
-endif()
-
-if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
-  # On FreeBSD, the 'environ' symbol is undefined at link time, but resolved by
-  # the dynamic linker at runtime. Therefore, allow the symbol to be undefined
-  # when creating a shared library.
-  set(LDFLAGS_UNDEFINED "-Wl,--allow-shlib-undefined")
-else()
-  set(LDFLAGS_UNDEFINED "-Wl,-z,defs")
-endif()
-
-add_llvm_library(omptarget.rtl.amdgpu SHARED
-  impl/impl.cpp
-  impl/interop_hsa.cpp
-  impl/data.cpp
-  impl/get_elf_mach_gfx_name.cpp
-  impl/system.cpp
-  impl/msgpack.cpp
-  src/rtl.cpp
-  ${LIBOMPTARGET_EXTRA_SOURCE}
-
-  ADDITIONAL_HEADER_DIRS
-  ${LIBOMPTARGET_INCLUDE_DIR}
-  ${CMAKE_CURRENT_SOURCE_DIR}/impl
-  ${CMAKE_CURRENT_SOURCE_DIR}/../../plugins-nextgen/amdgpu/utils
-
-  LINK_COMPONENTS
-  Support
-  Object
-
-  LINK_LIBS 
-  PRIVATE
-  elf_common
-  ${LIBOMPTARGET_DEP_LIBRARIES}
-  ${OPENMP_PTHREAD_LIB}
-  "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exports"
-  ${LDFLAGS_UNDEFINED}
-
-  NO_INSTALL_RPATH
-)
-
-target_include_directories(
-  omptarget.rtl.amdgpu
-  PRIVATE
-  ${LIBOMPTARGET_INCLUDE_DIR}
-  ${CMAKE_CURRENT_SOURCE_DIR}/impl
-  ${CMAKE_CURRENT_SOURCE_DIR}/../../plugins-nextgen/amdgpu/utils
-)
-
-
-# Install plugin under the lib destination folder.
-install(TARGETS omptarget.rtl.amdgpu LIBRARY DESTINATION "${OPENMP_INSTALL_LIBDIR}")
-set_target_properties(omptarget.rtl.amdgpu PROPERTIES 
-  INSTALL_RPATH "$ORIGIN" BUILD_RPATH "$ORIGIN:${CMAKE_CURRENT_BINARY_DIR}/.."
-  CXX_VISIBILITY_PRESET protected)
-
-# Report to the parent scope that we are building a plugin for hsa.
-# This controls whether tests are run for the nvptx offloading target
-# Run them if libhsa is available, or if the user explicitly asked for dlopen
-# Otherwise this plugin is being built speculatively and there may be no hsa available
-option(LIBOMPTARGET_FORCE_AMDGPU_TESTS "Build AMDGPU libomptarget tests" OFF)
-if (LIBOMPTARGET_FOUND_AMDGPU_GPU OR LIBOMPTARGET_FORCE_AMDGPU_TESTS)
-  # Report to the parent scope that we are building a plugin for amdgpu
-  set(LIBOMPTARGET_SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS} amdgcn-amd-amdhsa" PARENT_SCOPE)
-  list(APPEND LIBOMPTARGET_TESTED_PLUGINS "omptarget.rtl.amdgpu")
-  set(LIBOMPTARGET_TESTED_PLUGINS "${LIBOMPTARGET_TESTED_PLUGINS}" PARENT_SCOPE)
-else()
-  libomptarget_say("Not generating AMDGPU tests, no supported devices detected. Use 'LIBOMPTARGET_FORCE_AMDGPU_TESTS' to override.")
-  return()
-endif()
--- a/openmp/libomptarget/plugins/amdgpu/impl/data.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/data.cpp
@ -1,37 +0,0 @@
-//===--- amdgpu/impl/data.cpp ------------------------------------- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#include "impl_runtime.h"
-#include "hsa_api.h"
-#include "internal.h"
-#include "rt.h"
-#include <cassert>
-#include <stdio.h>
-#include <string.h>
-#include <vector>
-
-using core::TaskImpl;
-
-namespace core {
-namespace Runtime {
-hsa_status_t HostMalloc(void **ptr, size_t size,
-                        hsa_amd_memory_pool_t MemoryPool) {
-  hsa_status_t err = hsa_amd_memory_pool_allocate(MemoryPool, size, 0, ptr);
-  DP("Malloced %p\n", *ptr);
-  if (err == HSA_STATUS_SUCCESS) {
-    err = core::allow_access_to_all_gpu_agents(*ptr);
-  }
-  return err;
-}
-
-hsa_status_t Memfree(void *ptr) {
-  hsa_status_t err = hsa_amd_memory_pool_free(ptr);
-  DP("Freed %p\n", ptr);
-  return err;
-}
-} // namespace Runtime
-} // namespace core
--- a/openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.cpp
@ -1,80 +0,0 @@
-//===--- amdgpu/impl/get_elf_mach_gfx_name.cpp -------------------- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#include "get_elf_mach_gfx_name.h"
-
-// This header conflicts with the system elf.h (macros vs enums of the same
-// identifier) and contains more up to date values for the enum checked here.
-// rtl.cpp uses the system elf.h.
-#include "llvm/BinaryFormat/ELF.h"
-
-const char *get_elf_mach_gfx_name(uint32_t EFlags) {
-  using namespace llvm::ELF;
-  uint32_t Gfx = (EFlags & EF_AMDGPU_MACH);
-  switch (Gfx) {
-  case EF_AMDGPU_MACH_AMDGCN_GFX801:
-    return "gfx801";
-  case EF_AMDGPU_MACH_AMDGCN_GFX802:
-    return "gfx802";
-  case EF_AMDGPU_MACH_AMDGCN_GFX803:
-    return "gfx803";
-  case EF_AMDGPU_MACH_AMDGCN_GFX805:
-    return "gfx805";
-  case EF_AMDGPU_MACH_AMDGCN_GFX810:
-    return "gfx810";
-  case EF_AMDGPU_MACH_AMDGCN_GFX900:
-    return "gfx900";
-  case EF_AMDGPU_MACH_AMDGCN_GFX902:
-    return "gfx902";
-  case EF_AMDGPU_MACH_AMDGCN_GFX904:
-    return "gfx904";
-  case EF_AMDGPU_MACH_AMDGCN_GFX906:
-    return "gfx906";
-  case EF_AMDGPU_MACH_AMDGCN_GFX908:
-    return "gfx908";
-  case EF_AMDGPU_MACH_AMDGCN_GFX909:
-    return "gfx909";
-  case EF_AMDGPU_MACH_AMDGCN_GFX90A:
-    return "gfx90a";
-  case EF_AMDGPU_MACH_AMDGCN_GFX90C:
-    return "gfx90c";
-  case EF_AMDGPU_MACH_AMDGCN_GFX940:
-    return "gfx940";
-  case EF_AMDGPU_MACH_AMDGCN_GFX1010:
-    return "gfx1010";
-  case EF_AMDGPU_MACH_AMDGCN_GFX1011:
-    return "gfx1011";
-  case EF_AMDGPU_MACH_AMDGCN_GFX1012:
-    return "gfx1012";
-  case EF_AMDGPU_MACH_AMDGCN_GFX1013:
-    return "gfx1013";
-  case EF_AMDGPU_MACH_AMDGCN_GFX1030:
-    return "gfx1030";
-  case EF_AMDGPU_MACH_AMDGCN_GFX1031:
-    return "gfx1031";
-  case EF_AMDGPU_MACH_AMDGCN_GFX1032:
-    return "gfx1032";
-  case EF_AMDGPU_MACH_AMDGCN_GFX1033:
-    return "gfx1033";
-  case EF_AMDGPU_MACH_AMDGCN_GFX1034:
-    return "gfx1034";
-  case EF_AMDGPU_MACH_AMDGCN_GFX1035:
-    return "gfx1035";
-  case EF_AMDGPU_MACH_AMDGCN_GFX1036:
-    return "gfx1036";
-  case EF_AMDGPU_MACH_AMDGCN_GFX1100:
-    return "gfx1100";
-  case EF_AMDGPU_MACH_AMDGCN_GFX1101:
-    return "gfx1101";
-  case EF_AMDGPU_MACH_AMDGCN_GFX1102:
-    return "gfx1102";
-  case EF_AMDGPU_MACH_AMDGCN_GFX1103:
-    return "gfx1103";
-  default:
-    return "--unknown gfx";
-  }
-}
--- a/openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.h
+++ b/openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.h
@ -1,15 +0,0 @@
-//===--- amdgpu/impl/get_elf_mach_gfx_name.h ---------------------- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#ifndef GET_ELF_MACH_GFX_NAME_H_INCLUDED
-#define GET_ELF_MACH_GFX_NAME_H_INCLUDED
-
-#include <stdint.h>
-
-const char *get_elf_mach_gfx_name(uint32_t EFlags);
-
-#endif
--- a/openmp/libomptarget/plugins/amdgpu/impl/hsa_api.h
+++ b/openmp/libomptarget/plugins/amdgpu/impl/hsa_api.h
@ -1,26 +0,0 @@
-//===--- amdgpu/impl/hsa_api.h ------------------------------------ C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#ifndef AMDGPU_HSA_API_H_INCLUDED
-#define AMDGPU_HSA_API_H_INCLUDED
-
-#if defined(__has_include)
-#if __has_include("hsa/hsa.h")
-#include "hsa/hsa.h"
-#include "hsa/hsa_ext_amd.h"
-#elif __has_include("hsa.h")
-#include "hsa.h"
-#include "hsa_ext_amd.h"
-#endif
-#else
-#include "hsa/hsa.h"
-#include "hsa_ext_amd.h"
-#endif
-
-
-
-#endif
--- a/openmp/libomptarget/plugins/amdgpu/impl/impl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/impl.cpp
@ -1,182 +0,0 @@
-//===--- amdgpu/impl/impl.cpp ------------------------------------- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#include "rt.h"
-#include <memory>
-
-/*
- * Data
- */
-
-hsa_status_t is_locked(void *ptr, void **agentBaseAddress) {
-  hsa_status_t err = HSA_STATUS_SUCCESS;
-  hsa_amd_pointer_info_t info;
-  info.size = sizeof(hsa_amd_pointer_info_t);
-  err = hsa_amd_pointer_info(ptr, &info, /*alloc=*/nullptr,
-                             /*num_agents_accessible=*/nullptr,
-                             /*accessible=*/nullptr);
-  if (err != HSA_STATUS_SUCCESS) {
-    DP("Error when getting pointer info\n");
-    return err;
-  }
-
-  if (info.type == HSA_EXT_POINTER_TYPE_LOCKED) {
-    // When user passes in a basePtr+offset we need to fix the
-    // locked pointer to include the offset: ROCr always returns
-    // the base locked address, not the shifted one.
-    if ((char *)info.hostBaseAddress <= (char *)ptr &&
-        (char *)ptr < (char *)info.hostBaseAddress + info.sizeInBytes)
-      *agentBaseAddress =
-          (void *)((uint64_t)info.agentBaseAddress + (uint64_t)ptr -
-                   (uint64_t)info.hostBaseAddress);
-    else // address is already device-agent accessible, no need to compute
-         // offset
-      *agentBaseAddress = ptr;
-  } else
-    *agentBaseAddress = nullptr;
-
-  return HSA_STATUS_SUCCESS;
-}
-
-// host pointer (either src or dest) must be locked via hsa_amd_memory_lock
-static hsa_status_t invoke_hsa_copy(hsa_signal_t signal, void *dest,
-                                    hsa_agent_t agent, const void *src,
-                                    size_t size) {
-  const hsa_signal_value_t init = 1;
-  const hsa_signal_value_t success = 0;
-  hsa_signal_store_screlease(signal, init);
-
-  hsa_status_t err = hsa_amd_memory_async_copy(dest, agent, src, agent, size, 0,
-                                               nullptr, signal);
-  if (err != HSA_STATUS_SUCCESS)
-    return err;
-
-  // async_copy reports success by decrementing and failure by setting to < 0
-  hsa_signal_value_t got = init;
-  while (got == init)
-    got = hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_NE, init,
-                                    UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
-
-  if (got != success)
-    return HSA_STATUS_ERROR;
-
-  return err;
-}
-
-struct implFreePtrDeletor {
-  void operator()(void *p) {
-    core::Runtime::Memfree(p); // ignore failure to free
-  }
-};
-
-enum CopyDirection { H2D, D2H };
-
-static hsa_status_t locking_async_memcpy(enum CopyDirection direction,
-                                         hsa_signal_t signal, void *dest,
-                                         hsa_agent_t agent, void *src,
-                                         void *lockingPtr, size_t size) {
-  void *lockedPtr = nullptr;
-  hsa_status_t err = is_locked(lockingPtr, &lockedPtr);
-  bool HostPtrIsLocked = true;
-  if (err != HSA_STATUS_SUCCESS)
-    return err;
-  if (!lockedPtr) { // not locked
-    HostPtrIsLocked = false;
-    hsa_agent_t agents[1] = {agent};
-    err = hsa_amd_memory_lock(lockingPtr, size, agents, /*num_agent=*/1,
-                              (void **)&lockedPtr);
-    if (err != HSA_STATUS_SUCCESS)
-      return err;
-    DP("locking_async_memcpy: lockingPtr=%p lockedPtr=%p Size = %lu\n",
-       lockingPtr, lockedPtr, size);
-  }
-
-  switch (direction) {
-  case H2D:
-    err = invoke_hsa_copy(signal, dest, agent, lockedPtr, size);
-    break;
-  case D2H:
-    err = invoke_hsa_copy(signal, lockedPtr, agent, src, size);
-    break;
-  }
-
-  if (err != HSA_STATUS_SUCCESS && !HostPtrIsLocked) {
-    // do not leak locked host pointers, but discard potential error message
-    // because the initial error was in the copy function
-    hsa_amd_memory_unlock(lockingPtr);
-    return err;
-  }
-
-  // unlock only if not user locked
-  if (!HostPtrIsLocked)
-    err = hsa_amd_memory_unlock(lockingPtr);
-  if (err != HSA_STATUS_SUCCESS)
-    return err;
-
-  return HSA_STATUS_SUCCESS;
-}
-
-hsa_status_t impl_memcpy_h2d(hsa_signal_t signal, void *deviceDest,
-                             void *hostSrc, size_t size,
-                             hsa_agent_t device_agent,
-                             hsa_amd_memory_pool_t MemoryPool) {
-  hsa_status_t err;
-
-  err = locking_async_memcpy(CopyDirection::H2D, signal, deviceDest,
-                             device_agent, hostSrc, hostSrc, size);
-
-  if (err == HSA_STATUS_SUCCESS)
-    return err;
-
-  // async memcpy sometimes fails in situations where
-  // allocate + copy succeeds. Looks like it might be related to
-  // locking part of a read only segment. Fall back for now.
-  void *tempHostPtr;
-  hsa_status_t ret = core::Runtime::HostMalloc(&tempHostPtr, size, MemoryPool);
-  if (ret != HSA_STATUS_SUCCESS) {
-    DP("HostMalloc: Unable to alloc %zu bytes for temp scratch\n", size);
-    return ret;
-  }
-  std::unique_ptr<void, implFreePtrDeletor> del(tempHostPtr);
-  memcpy(tempHostPtr, hostSrc, size);
-
-  return locking_async_memcpy(CopyDirection::H2D, signal, deviceDest,
-                              device_agent, tempHostPtr, tempHostPtr, size);
-}
-
-hsa_status_t impl_memcpy_d2h(hsa_signal_t signal, void *hostDest,
-                             void *deviceSrc, size_t size,
-                             hsa_agent_t deviceAgent,
-                             hsa_amd_memory_pool_t MemoryPool) {
-  hsa_status_t err;
-
-  // device has always visibility over both pointers, so use that
-  err = locking_async_memcpy(CopyDirection::D2H, signal, hostDest, deviceAgent,
-                             deviceSrc, hostDest, size);
-
-  if (err == HSA_STATUS_SUCCESS)
-    return err;
-
-  // hsa_memory_copy sometimes fails in situations where
-  // allocate + copy succeeds. Looks like it might be related to
-  // locking part of a read only segment. Fall back for now.
-  void *tempHostPtr;
-  hsa_status_t ret = core::Runtime::HostMalloc(&tempHostPtr, size, MemoryPool);
-  if (ret != HSA_STATUS_SUCCESS) {
-    DP("HostMalloc: Unable to alloc %zu bytes for temp scratch\n", size);
-    return ret;
-  }
-  std::unique_ptr<void, implFreePtrDeletor> del(tempHostPtr);
-
-  err = locking_async_memcpy(CopyDirection::D2H, signal, tempHostPtr,
-                             deviceAgent, deviceSrc, tempHostPtr, size);
-  if (err != HSA_STATUS_SUCCESS)
-    return HSA_STATUS_ERROR;
-
-  memcpy(hostDest, tempHostPtr, size);
-  return HSA_STATUS_SUCCESS;
-}
--- a/openmp/libomptarget/plugins/amdgpu/impl/impl_runtime.h
+++ b/openmp/libomptarget/plugins/amdgpu/impl/impl_runtime.h
@ -1,34 +0,0 @@
-//===--- amdgpu/impl/impl_runtime.h ------------------------------- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#ifndef INCLUDE_IMPL_RUNTIME_H_
-#define INCLUDE_IMPL_RUNTIME_H_
-
-#include "hsa_api.h"
-
-extern "C" {
-
-// Check if pointer ptr is already locked
-hsa_status_t is_locked(void *ptr, void **agentBaseAddress);
-
-hsa_status_t impl_module_register_from_memory_to_place(
-    void *module_bytes, size_t module_size, int DeviceId,
-    hsa_status_t (*on_deserialized_data)(void *data, size_t size,
-                                         void *cb_state),
-    void *cb_state);
-
-hsa_status_t impl_memcpy_h2d(hsa_signal_t signal, void *deviceDest,
-                             void *hostSrc, size_t size,
-                             hsa_agent_t device_agent,
-                             hsa_amd_memory_pool_t MemoryPool);
-
-hsa_status_t impl_memcpy_d2h(hsa_signal_t sig, void *hostDest, void *deviceSrc,
-                             size_t size, hsa_agent_t device_agent,
-                             hsa_amd_memory_pool_t MemoryPool);
-}
-
-#endif // INCLUDE_IMPL_RUNTIME_H_
--- a/openmp/libomptarget/plugins/amdgpu/impl/internal.h
+++ b/openmp/libomptarget/plugins/amdgpu/impl/internal.h
@ -1,154 +0,0 @@
-//===--- amdgpu/impl/internal.h ----------------------------------- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#ifndef SRC_RUNTIME_INCLUDE_INTERNAL_H_
-#define SRC_RUNTIME_INCLUDE_INTERNAL_H_
-#include <inttypes.h>
-#include <pthread.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include <cstring>
-#include <map>
-#include <queue>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "hsa_api.h"
-
-#include "impl_runtime.h"
-
-#ifndef TARGET_NAME
-#error "Missing TARGET_NAME macro"
-#endif
-#define DEBUG_PREFIX "Target " GETNAME(TARGET_NAME) " RTL"
-#include "Debug.h"
-
-#define MAX_NUM_KERNELS (1024 * 16)
-
-// ---------------------- Kernel Start -------------
-typedef struct atl_kernel_info_s {
-  uint64_t kernel_object;
-  uint32_t group_segment_size;
-  uint32_t private_segment_size;
-  uint32_t sgpr_count;
-  uint32_t vgpr_count;
-  uint32_t sgpr_spill_count;
-  uint32_t vgpr_spill_count;
-  uint32_t kernel_segment_size;
-  uint32_t explicit_argument_count;
-  uint32_t implicit_argument_count;
-} atl_kernel_info_t;
-
-typedef struct atl_symbol_info_s {
-  uint64_t addr;
-  uint32_t size;
-} atl_symbol_info_t;
-
-// ---------------------- Kernel End -------------
-
-namespace core {
-class TaskgroupImpl;
-class TaskImpl;
-class Kernel;
-class KernelImpl;
-} // namespace core
-
-struct SignalPoolT {
-  SignalPoolT() {}
-  SignalPoolT(const SignalPoolT &) = delete;
-  SignalPoolT(SignalPoolT &&) = delete;
-  ~SignalPoolT() {
-    size_t N = state.size();
-    for (size_t i = 0; i < N; i++) {
-      hsa_signal_t signal = state.front();
-      state.pop();
-      hsa_status_t rc = hsa_signal_destroy(signal);
-      if (rc != HSA_STATUS_SUCCESS) {
-        DP("Signal pool destruction failed\n");
-      }
-    }
-  }
-  size_t size() {
-    lock l(&mutex);
-    return state.size();
-  }
-  void push(hsa_signal_t s) {
-    lock l(&mutex);
-    state.push(s);
-  }
-  hsa_signal_t pop(void) {
-    lock l(&mutex);
-    if (!state.empty()) {
-      hsa_signal_t res = state.front();
-      state.pop();
-      return res;
-    }
-
-    // Pool empty, attempt to create another signal
-    hsa_signal_t new_signal;
-    hsa_status_t err = hsa_signal_create(0, 0, NULL, &new_signal);
-    if (err == HSA_STATUS_SUCCESS) {
-      return new_signal;
-    }
-
-    // Fail
-    return {0};
-  }
-
-private:
-  static pthread_mutex_t mutex;
-  std::queue<hsa_signal_t> state;
-  struct lock {
-    lock(pthread_mutex_t *m) : m(m) { pthread_mutex_lock(m); }
-    ~lock() { pthread_mutex_unlock(m); }
-    pthread_mutex_t *m;
-  };
-};
-
-namespace core {
-hsa_status_t atl_init_gpu_context();
-
-hsa_status_t init_hsa();
-hsa_status_t finalize_hsa();
-/*
- * Generic utils
- */
-template <typename T> inline T alignDown(T value, size_t alignment) {
-  return (T)(value & ~(alignment - 1));
-}
-
-template <typename T> inline T *alignDown(T *value, size_t alignment) {
-  return reinterpret_cast<T *>(alignDown((intptr_t)value, alignment));
-}
-
-template <typename T> inline T alignUp(T value, size_t alignment) {
-  return alignDown((T)(value + alignment - 1), alignment);
-}
-
-template <typename T> inline T *alignUp(T *value, size_t alignment) {
-  return reinterpret_cast<T *>(
-      alignDown((intptr_t)(value + alignment - 1), alignment));
-}
-
-extern bool atl_is_impl_initialized();
-
-bool handle_group_signal(hsa_signal_value_t value, void *arg);
-
-hsa_status_t allow_access_to_all_gpu_agents(void *ptr);
-} // namespace core
-
-inline const char *get_error_string(hsa_status_t err) {
-  const char *res;
-  hsa_status_t rc = hsa_status_string(err, &res);
-  return (rc == HSA_STATUS_SUCCESS) ? res : "HSA_STATUS UNKNOWN.";
-}
-
-#endif // SRC_RUNTIME_INCLUDE_INTERNAL_H_
--- a/openmp/libomptarget/plugins/amdgpu/impl/interop_hsa.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/interop_hsa.cpp
@ -1,39 +0,0 @@
-//===--- amdgpu/impl/interop_hsa.cpp ------------------------------ C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#include "interop_hsa.h"
-#include "internal.h"
-
-hsa_status_t interop_hsa_get_symbol_info(
-    const std::map<std::string, atl_symbol_info_t> &SymbolInfoTable,
-    int DeviceId, const char *symbol, void **var_addr, unsigned int *var_size) {
-  /*
-     // Typical usage:
-     void *var_addr;
-     size_t var_size;
-     interop_hsa_get_symbol_addr(gpu_place, "symbol_name", &var_addr,
-     &var_size);
-     impl_memcpy(signal, host_add, var_addr, var_size);
-  */
-
-  if (!symbol || !var_addr || !var_size)
-    return HSA_STATUS_ERROR;
-
-  // get the symbol info
-  std::string symbolStr = std::string(symbol);
-  auto It = SymbolInfoTable.find(symbolStr);
-  if (It != SymbolInfoTable.end()) {
-    atl_symbol_info_t info = It->second;
-    *var_addr = reinterpret_cast<void *>(info.addr);
-    *var_size = info.size;
-    return HSA_STATUS_SUCCESS;
-  } else {
-    *var_addr = NULL;
-    *var_size = 0;
-    return HSA_STATUS_ERROR;
-  }
-}
--- a/openmp/libomptarget/plugins/amdgpu/impl/interop_hsa.h
+++ b/openmp/libomptarget/plugins/amdgpu/impl/interop_hsa.h
@ -1,26 +0,0 @@
-//===--- amdgpu/impl/interop_hsa.h -------------------------------- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#ifndef INCLUDE_INTEROP_HSA_H_
-#define INCLUDE_INTEROP_HSA_H_
-
-#include "impl_runtime.h"
-#include "hsa_api.h"
-#include "internal.h"
-
-#include <map>
-#include <string>
-
-extern "C" {
-
-hsa_status_t interop_hsa_get_symbol_info(
-    const std::map<std::string, atl_symbol_info_t> &SymbolInfoTable,
-    int DeviceId, const char *symbol, void **var_addr, unsigned int *var_size);
-
-}
-
-#endif // INCLUDE_INTEROP_HSA_H_
--- a/openmp/libomptarget/plugins/amdgpu/impl/msgpack.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/msgpack.cpp
@ -1,271 +0,0 @@
-//===--- amdgpu/impl/msgpack.cpp ---------------------------------- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#include <cassert>
-#include <cstdint>
-#include <cstring>
-#include <functional>
-#include <string>
-
-#include "msgpack.h"
-
-namespace msgpack {
-
-[[noreturn]] void internal_error() {
-  printf("internal error\n");
-  exit(1);
-}
-
-const char *type_name(type ty) {
-  switch (ty) {
-#define X(NAME, WIDTH, PAYLOAD, LOWER, UPPER)                                  \
-  case NAME:                                                                   \
-    return #NAME;
-#include "msgpack.def"
-#undef X
-  }
-  internal_error();
-}
-
-unsigned bytes_used_fixed(msgpack::type ty) {
-  using namespace msgpack;
-  switch (ty) {
-#define X(NAME, WIDTH, PAYLOAD, LOWER, UPPER)                                  \
-  case NAME:                                                                   \
-    return WIDTH;
-#include "msgpack.def"
-#undef X
-  }
-  internal_error();
-}
-
-msgpack::type parse_type(unsigned char x) {
-
-#define X(NAME, WIDTH, PAYLOAD, LOWER, UPPER)                                  \
-  if (x >= LOWER && x <= UPPER) {                                              \
-    return NAME;                                                               \
-  } else
-#include "msgpack.def"
-#undef X
-  { internal_error(); }
-}
-
-template <typename T, typename R> R bitcast(T x) {
-  static_assert(sizeof(T) == sizeof(R), "");
-  R tmp;
-  memcpy(&tmp, &x, sizeof(T));
-  return tmp;
-}
-template int64_t bitcast<uint64_t, int64_t>(uint64_t);
-} // namespace msgpack
-
-// Helper functions for reading additional payload from the header
-// Depending on the type, this can be a number of bytes, elements,
-// key-value pairs or an embedded integer.
-// Each takes a pointer to the start of the header and returns a uint64_t
-
-namespace {
-namespace payload {
-uint64_t read_zero(const unsigned char *) { return 0; }
-
-// Read the first byte and zero/sign extend it
-uint64_t read_embedded_u8(const unsigned char *start) { return start[0]; }
-uint64_t read_embedded_s8(const unsigned char *start) {
-  int64_t res = msgpack::bitcast<uint8_t, int8_t>(start[0]);
-  return msgpack::bitcast<int64_t, uint64_t>(res);
-}
-
-// Read a masked part of the first byte
-uint64_t read_via_mask_0x1(const unsigned char *start) { return *start & 0x1u; }
-uint64_t read_via_mask_0xf(const unsigned char *start) { return *start & 0xfu; }
-uint64_t read_via_mask_0x1f(const unsigned char *start) {
-  return *start & 0x1fu;
-}
-
-// Read 1/2/4/8 bytes immediately following the type byte and zero/sign extend
-// Big endian format.
-uint64_t read_size_field_u8(const unsigned char *from) {
-  from++;
-  return from[0];
-}
-
-// TODO: detect whether host is little endian or not, and whether the intrinsic
-// is available. And probably use the builtin to test the diy
-const bool use_bswap = false;
-
-uint64_t read_size_field_u16(const unsigned char *from) {
-  from++;
-  if (use_bswap) {
-    uint16_t b;
-    memcpy(&b, from, 2);
-    return __builtin_bswap16(b);
-  } else {
-    return (from[0] << 8u) | from[1];
-  }
-}
-uint64_t read_size_field_u32(const unsigned char *from) {
-  from++;
-  if (use_bswap) {
-    uint32_t b;
-    memcpy(&b, from, 4);
-    return __builtin_bswap32(b);
-  } else {
-    return (from[0] << 24u) | (from[1] << 16u) | (from[2] << 8u) |
-           (from[3] << 0u);
-  }
-}
-uint64_t read_size_field_u64(const unsigned char *from) {
-  from++;
-  if (use_bswap) {
-    uint64_t b;
-    memcpy(&b, from, 8);
-    return __builtin_bswap64(b);
-  } else {
-    return ((uint64_t)from[0] << 56u) | ((uint64_t)from[1] << 48u) |
-           ((uint64_t)from[2] << 40u) | ((uint64_t)from[3] << 32u) |
-           (from[4] << 24u) | (from[5] << 16u) | (from[6] << 8u) |
-           (from[7] << 0u);
-  }
-}
-
-uint64_t read_size_field_s8(const unsigned char *from) {
-  uint8_t u = read_size_field_u8(from);
-  int64_t res = msgpack::bitcast<uint8_t, int8_t>(u);
-  return msgpack::bitcast<int64_t, uint64_t>(res);
-}
-uint64_t read_size_field_s16(const unsigned char *from) {
-  uint16_t u = read_size_field_u16(from);
-  int64_t res = msgpack::bitcast<uint16_t, int16_t>(u);
-  return msgpack::bitcast<int64_t, uint64_t>(res);
-}
-uint64_t read_size_field_s32(const unsigned char *from) {
-  uint32_t u = read_size_field_u32(from);
-  int64_t res = msgpack::bitcast<uint32_t, int32_t>(u);
-  return msgpack::bitcast<int64_t, uint64_t>(res);
-}
-uint64_t read_size_field_s64(const unsigned char *from) {
-  uint64_t u = read_size_field_u64(from);
-  int64_t res = msgpack::bitcast<uint64_t, int64_t>(u);
-  return msgpack::bitcast<int64_t, uint64_t>(res);
-}
-} // namespace payload
-} // namespace
-
-namespace msgpack {
-
-payload_info_t payload_info(msgpack::type ty) {
-  using namespace msgpack;
-  switch (ty) {
-#define X(NAME, WIDTH, PAYLOAD, LOWER, UPPER)                                  \
-  case NAME:                                                                   \
-    return payload::PAYLOAD;
-#include "msgpack.def"
-#undef X
-  }
-  internal_error();
-}
-
-} // namespace msgpack
-
-const unsigned char *msgpack::skip_next_message(const unsigned char *start,
-                                                const unsigned char *end) {
-  class f : public functors_defaults<f> {};
-  return handle_msgpack({start, end}, f());
-}
-
-namespace msgpack {
-bool message_is_string(byte_range bytes, const char *needle) {
-  bool matched = false;
-  size_t needleN = strlen(needle);
-
-  foronly_string(bytes, [=, &matched](size_t N, const unsigned char *str) {
-    if (N == needleN) {
-      if (memcmp(needle, str, N) == 0) {
-        matched = true;
-      }
-    }
-  });
-  return matched;
-}
-
-void dump(byte_range bytes) {
-  struct inner : functors_defaults<inner> {
-    inner(unsigned indent) : indent(indent) {}
-    const unsigned by = 2;
-    unsigned indent = 0;
-
-    void handle_string(size_t N, const unsigned char *bytes) {
-      char *tmp = (char *)malloc(N + 1);
-      memcpy(tmp, bytes, N);
-      tmp[N] = '\0';
-      printf("\"%s\"", tmp);
-      free(tmp);
-    }
-
-    void handle_signed(int64_t x) { printf("%ld", x); }
-    void handle_unsigned(uint64_t x) { printf("%lu", x); }
-
-    const unsigned char *handle_array(uint64_t N, byte_range bytes) {
-      printf("\n%*s[\n", indent, "");
-      indent += by;
-
-      for (uint64_t i = 0; i < N; i++) {
-        indent += by;
-        printf("%*s", indent, "");
-        const unsigned char *next = handle_msgpack<inner>(bytes, {indent});
-        printf(",\n");
-        indent -= by;
-        bytes.start = next;
-        if (!next) {
-          break;
-        }
-      }
-      indent -= by;
-      printf("%*s]", indent, "");
-
-      return bytes.start;
-    }
-
-    const unsigned char *handle_map(uint64_t N, byte_range bytes) {
-      printf("\n%*s{\n", indent, "");
-      indent += by;
-
-      for (uint64_t i = 0; i < 2 * N; i += 2) {
-        const unsigned char *start_key = bytes.start;
-        printf("%*s", indent, "");
-        const unsigned char *end_key =
-            handle_msgpack<inner>({start_key, bytes.end}, {indent});
-        if (!end_key) {
-          break;
-        }
-
-        printf(" : ");
-
-        const unsigned char *start_value = end_key;
-        const unsigned char *end_value =
-            handle_msgpack<inner>({start_value, bytes.end}, {indent});
-
-        if (!end_value) {
-          break;
-        }
-
-        printf(",\n");
-        bytes.start = end_value;
-      }
-
-      indent -= by;
-      printf("%*s}", indent, "");
-
-      return bytes.start;
-    }
-  };
-
-  handle_msgpack<inner>(bytes, {0});
-  printf("\n");
-}
-
-} // namespace msgpack
--- a/openmp/libomptarget/plugins/amdgpu/impl/msgpack.def
+++ b/openmp/libomptarget/plugins/amdgpu/impl/msgpack.def
@ -1,46 +0,0 @@
-//===--- amdgpu/impl/msgpack.def ---------------------------------- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// name, header width, reader, [lower, upper] encoding
-X(posfixint, 1, read_embedded_u8, 0x00, 0x7f)
-X(negfixint, 1, read_embedded_s8, 0xe0, 0xff)
-X(fixmap, 1, read_via_mask_0xf, 0x80, 0x8f)
-X(fixarray, 1, read_via_mask_0xf, 0x90, 0x9f)
-X(fixstr, 1, read_via_mask_0x1f, 0xa0, 0xbf)
-X(nil, 1, read_zero, 0xc0, 0xc0)
-X(never_used, 1, read_zero, 0xc1, 0xc1)
-X(f, 1, read_via_mask_0x1, 0xc2, 0xc2)
-X(t, 1, read_via_mask_0x1, 0xc3, 0xc3)
-X(bin8, 2, read_size_field_u8, 0xc4, 0xc4)
-X(bin16, 3, read_size_field_u16, 0xc5, 0xc5)
-X(bin32, 5, read_size_field_u32, 0xc6, 0xc6)
-X(ext8, 3, read_size_field_u8, 0xc7, 0xc7)
-X(ext16, 4, read_size_field_u16, 0xc8, 0xc8)
-X(ext32, 6, read_size_field_u32, 0xc9, 0xc9)
-X(float32, 5, read_zero, 0xca, 0xca)
-X(float64, 9, read_zero, 0xcb, 0xcb)
-X(uint8, 2, read_size_field_u8, 0xcc, 0xcc)
-X(uint16, 3, read_size_field_u16, 0xcd, 0xcd)
-X(uint32, 5, read_size_field_u32, 0xce, 0xce)
-X(uint64, 9, read_size_field_u64, 0xcf, 0xcf)
-X(int8, 2, read_size_field_s8, 0xd0, 0xd0)
-X(int16, 3, read_size_field_s16, 0xd1, 0xd1)
-X(int32, 5, read_size_field_s32, 0xd2, 0xd2)
-X(int64, 9, read_size_field_s64, 0xd3, 0xd3)
-X(fixext1, 3, read_zero, 0xd4, 0xd4)
-X(fixext2, 4, read_zero, 0xd5, 0xd5)
-X(fixext4, 6, read_zero, 0xd6, 0xd6)
-X(fixext8, 10, read_zero, 0xd7, 0xd7)
-X(fixext16, 18, read_zero, 0xd8, 0xd8)
-X(str8, 2, read_size_field_u8, 0xd9, 0xd9)
-X(str16, 3, read_size_field_u16, 0xda, 0xda)
-X(str32, 5, read_size_field_u32, 0xdb, 0xdb)
-X(array16, 3, read_size_field_u16, 0xdc, 0xdc)
-X(array32, 5, read_size_field_u32, 0xdd, 0xdd)
-X(map16, 3, read_size_field_u16, 0xde, 0xde)
-X(map32, 5, read_size_field_u32, 0xdf, 0xdf)
--- a/openmp/libomptarget/plugins/amdgpu/impl/msgpack.h
+++ b/openmp/libomptarget/plugins/amdgpu/impl/msgpack.h
@ -1,282 +0,0 @@
-//===--- amdgpu/impl/msgpack.h ------------------------------------ C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#ifndef MSGPACK_H
-#define MSGPACK_H
-
-#include <functional>
-
-namespace msgpack {
-
-// The message pack format is dynamically typed, schema-less. Format is:
-// message: [type][header][payload]
-// where type is one byte, header length is a fixed length function of type
-// payload is zero to N bytes, with the length encoded in [type][header]
-
-// Scalar fields include boolean, signed integer, float, string etc
-// Composite types are sequences of messages
-// Array field is [header][element][element]...
-// Map field is [header][key][value][key][value]...
-
-// Multibyte integer fields are big endian encoded
-// The map key can be any message type
-// Maps may contain duplicate keys
-// Data is not uniquely encoded, e.g. integer "8" may be stored as one byte or
-// in as many as nine, as signed or unsigned. Implementation defined.
-// Similarly "foo" may embed the length in the type field or in multiple bytes
-
-// This parser is structured as an iterator over a sequence of bytes.
-// It calls a user provided function on each message in order to extract fields
-// The default implementation for each scalar type is to do nothing. For map or
-// arrays, the default implementation returns just after that message to support
-// iterating to the next message, but otherwise has no effect.
-
-struct byte_range {
-  const unsigned char *start;
-  const unsigned char *end;
-};
-
-const unsigned char *skip_next_message(const unsigned char *start,
-                                       const unsigned char *end);
-
-template <typename Derived> class functors_defaults {
-public:
-  void cb_string(size_t N, const unsigned char *str) {
-    derived().handle_string(N, str);
-  }
-  void cb_boolean(bool x) { derived().handle_boolean(x); }
-  void cb_signed(int64_t x) { derived().handle_signed(x); }
-  void cb_unsigned(uint64_t x) { derived().handle_unsigned(x); }
-  void cb_array_elements(byte_range bytes) {
-    derived().handle_array_elements(bytes);
-  }
-  void cb_map_elements(byte_range key, byte_range value) {
-    derived().handle_map_elements(key, value);
-  }
-  const unsigned char *cb_array(uint64_t N, byte_range bytes) {
-    return derived().handle_array(N, bytes);
-  }
-  const unsigned char *cb_map(uint64_t N, byte_range bytes) {
-    return derived().handle_map(N, bytes);
-  }
-
-private:
-  Derived &derived() { return *static_cast<Derived *>(this); }
-
-  // Default implementations for scalar ops are no-ops
-  void handle_string(size_t, const unsigned char *) {}
-  void handle_boolean(bool) {}
-  void handle_signed(int64_t) {}
-  void handle_unsigned(uint64_t) {}
-  void handle_array_elements(byte_range) {}
-  void handle_map_elements(byte_range, byte_range) {}
-
-  // Default implementation for sequences is to skip over the messages
-  const unsigned char *handle_array(uint64_t N, byte_range bytes) {
-    for (uint64_t i = 0; i < N; i++) {
-      const unsigned char *next = skip_next_message(bytes.start, bytes.end);
-      if (!next) {
-        return nullptr;
-      }
-      cb_array_elements(bytes);
-      bytes.start = next;
-    }
-    return bytes.start;
-  }
-  const unsigned char *handle_map(uint64_t N, byte_range bytes) {
-    for (uint64_t i = 0; i < N; i++) {
-      const unsigned char *start_key = bytes.start;
-      const unsigned char *end_key = skip_next_message(start_key, bytes.end);
-      if (!end_key) {
-        return nullptr;
-      }
-      const unsigned char *start_value = end_key;
-      const unsigned char *end_value =
-          skip_next_message(start_value, bytes.end);
-      if (!end_value) {
-        return nullptr;
-      }
-      cb_map_elements({start_key, end_key}, {start_value, end_value});
-      bytes.start = end_value;
-    }
-    return bytes.start;
-  }
-};
-
-typedef enum : uint8_t {
-#define X(NAME, WIDTH, PAYLOAD, LOWER, UPPER) NAME,
-#include "msgpack.def"
-#undef X
-} type;
-
-[[noreturn]] void internal_error();
-type parse_type(unsigned char x);
-unsigned bytes_used_fixed(type ty);
-
-typedef uint64_t (*payload_info_t)(const unsigned char *);
-payload_info_t payload_info(msgpack::type ty);
-
-template <typename T, typename R> R bitcast(T x);
-
-template <typename F, msgpack::type ty>
-const unsigned char *handle_msgpack_given_type(byte_range bytes, F f) {
-  const unsigned char *start = bytes.start;
-  const unsigned char *end = bytes.end;
-  const uint64_t available = end - start;
-  assert(available != 0);
-  assert(ty == parse_type(*start));
-
-  const uint64_t bytes_used = bytes_used_fixed(ty);
-  if (available < bytes_used) {
-    return 0;
-  }
-  const uint64_t available_post_header = available - bytes_used;
-
-  const payload_info_t info = payload_info(ty);
-  const uint64_t N = info(start);
-
-  switch (ty) {
-  case msgpack::t:
-  case msgpack::f: {
-    // t is 0b11000010, f is 0b11000011, masked with 0x1
-    f.cb_boolean(N);
-    return start + bytes_used;
-  }
-
-  case msgpack::posfixint:
-  case msgpack::uint8:
-  case msgpack::uint16:
-  case msgpack::uint32:
-  case msgpack::uint64: {
-    f.cb_unsigned(N);
-    return start + bytes_used;
-  }
-
-  case msgpack::negfixint:
-  case msgpack::int8:
-  case msgpack::int16:
-  case msgpack::int32:
-  case msgpack::int64: {
-    f.cb_signed(bitcast<uint64_t, int64_t>(N));
-    return start + bytes_used;
-  }
-
-  case msgpack::fixstr:
-  case msgpack::str8:
-  case msgpack::str16:
-  case msgpack::str32: {
-    if (available_post_header < N) {
-      return 0;
-    } else {
-      f.cb_string(N, start + bytes_used);
-      return start + bytes_used + N;
-    }
-  }
-
-  case msgpack::fixarray:
-  case msgpack::array16:
-  case msgpack::array32: {
-    return f.cb_array(N, {start + bytes_used, end});
-  }
-
-  case msgpack::fixmap:
-  case msgpack::map16:
-  case msgpack::map32: {
-    return f.cb_map(N, {start + bytes_used, end});
-  }
-
-  case msgpack::nil:
-  case msgpack::bin8:
-  case msgpack::bin16:
-  case msgpack::bin32:
-  case msgpack::float32:
-  case msgpack::float64:
-  case msgpack::ext8:
-  case msgpack::ext16:
-  case msgpack::ext32:
-  case msgpack::fixext1:
-  case msgpack::fixext2:
-  case msgpack::fixext4:
-  case msgpack::fixext8:
-  case msgpack::fixext16:
-  case msgpack::never_used: {
-    if (available_post_header < N) {
-      return 0;
-    }
-    return start + bytes_used + N;
-  }
-  }
-  internal_error();
-}
-
-template <typename F>
-const unsigned char *handle_msgpack(byte_range bytes, F f) {
-  const unsigned char *start = bytes.start;
-  const unsigned char *end = bytes.end;
-  const uint64_t available = end - start;
-  if (available == 0) {
-    return 0;
-  }
-  const type ty = parse_type(*start);
-
-  switch (ty) {
-#define X(NAME, WIDTH, PAYLOAD, LOWER, UPPER)                                  \
-  case msgpack::NAME:                                                          \
-    return handle_msgpack_given_type<F, msgpack::NAME>(bytes, f);
-#include "msgpack.def"
-#undef X
-  }
-
-  internal_error();
-}
-
-bool message_is_string(byte_range bytes, const char *str);
-
-template <typename C> void foronly_string(byte_range bytes, C callback) {
-  struct inner : functors_defaults<inner> {
-    inner(C &cb) : cb(cb) {}
-    C &cb;
-    void handle_string(size_t N, const unsigned char *str) { cb(N, str); }
-  };
-  handle_msgpack<inner>(bytes, {callback});
-}
-
-template <typename C> void foronly_unsigned(byte_range bytes, C callback) {
-  struct inner : functors_defaults<inner> {
-    inner(C &cb) : cb(cb) {}
-    C &cb;
-    void handle_unsigned(uint64_t x) { cb(x); }
-  };
-  handle_msgpack<inner>(bytes, {callback});
-}
-
-template <typename C> void foreach_array(byte_range bytes, C callback) {
-  struct inner : functors_defaults<inner> {
-    inner(C &cb) : cb(cb) {}
-    C &cb;
-    void handle_array_elements(byte_range element) { cb(element); }
-  };
-  handle_msgpack<inner>(bytes, {callback});
-}
-
-template <typename C> void foreach_map(byte_range bytes, C callback) {
-  struct inner : functors_defaults<inner> {
-    inner(C &cb) : cb(cb) {}
-    C &cb;
-    void handle_map_elements(byte_range key, byte_range value) {
-      cb(key, value);
-    }
-  };
-  handle_msgpack<inner>(bytes, {callback});
-}
-
-// Crude approximation to json
-void dump(byte_range);
-
-} // namespace msgpack
-
-#endif
--- a/openmp/libomptarget/plugins/amdgpu/impl/rt.h
+++ b/openmp/libomptarget/plugins/amdgpu/impl/rt.h
@ -1,34 +0,0 @@
-//===--- amdgpu/impl/rt.h ----------------------------------------- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#ifndef SRC_RUNTIME_INCLUDE_RT_H_
-#define SRC_RUNTIME_INCLUDE_RT_H_
-
-#include "hsa_api.h"
-#include "impl_runtime.h"
-#include "internal.h"
-
-#include <string>
-
-namespace core {
-namespace Runtime {
-hsa_status_t Memfree(void *);
-hsa_status_t HostMalloc(void **ptr, size_t size,
-                        hsa_amd_memory_pool_t MemoryPool);
-
-} // namespace Runtime
-hsa_status_t RegisterModuleFromMemory(
-    std::map<std::string, atl_kernel_info_t> &KernelInfoTable,
-    std::map<std::string, atl_symbol_info_t> &SymbolInfoTable,
-    void *module_bytes, size_t module_size, hsa_agent_t agent,
-    hsa_status_t (*on_deserialized_data)(void *data, size_t size,
-                                         void *cb_state),
-    void *cb_state, std::vector<hsa_executable_t> &HSAExecutables);
-
-} // namespace core
-
-#endif // SRC_RUNTIME_INCLUDE_RT_H_
--- a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp
@ -1,744 +0,0 @@
-//===--- amdgpu/impl/system.cpp ----------------------------------- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/StringRef.h"
-#include "llvm/BinaryFormat/ELF.h"
-#include "llvm/Object/ELF.h"
-#include "llvm/Object/ELFObjectFile.h"
-
-#include <cassert>
-#include <sstream>
-#include <string>
-
-#include "internal.h"
-#include "rt.h"
-
-#include "msgpack.h"
-
-using namespace llvm;
-using namespace llvm::object;
-using namespace llvm::ELF;
-
-namespace hsa {
-// Wrap HSA iterate API in a shim that allows passing general callables
-template <typename C>
-hsa_status_t executable_iterate_symbols(hsa_executable_t executable, C cb) {
-  auto L = [](hsa_executable_t executable, hsa_executable_symbol_t symbol,
-              void *data) -> hsa_status_t {
-    C *unwrapped = static_cast<C *>(data);
-    return (*unwrapped)(executable, symbol);
-  };
-  return hsa_executable_iterate_symbols(executable, L,
-                                        static_cast<void *>(&cb));
-}
-} // namespace hsa
-
-typedef unsigned char *address;
-/*
- * Note descriptors.
- */
-// FreeBSD already declares Elf_Note (indirectly via <libelf.h>)
-#if !defined(__FreeBSD__)
-typedef struct {
-  uint32_t n_namesz; /* Length of note's name. */
-  uint32_t n_descsz; /* Length of note's value. */
-  uint32_t n_type;   /* Type of note. */
-  // then name
-  // then padding, optional
-  // then desc, at 4 byte alignment (not 8, despite being elf64)
-} Elf_Note;
-#endif
-
-class KernelArgMD {
-public:
-  enum class ValueKind {
-    HiddenGlobalOffsetX,
-    HiddenGlobalOffsetY,
-    HiddenGlobalOffsetZ,
-    HiddenNone,
-    HiddenPrintfBuffer,
-    HiddenDefaultQueue,
-    HiddenCompletionAction,
-    HiddenMultiGridSyncArg,
-    HiddenHostcallBuffer,
-    HiddenHeapV1,
-    Unknown
-  };
-
-  KernelArgMD()
-      : name_(std::string()), size_(0), offset_(0),
-        valueKind_(ValueKind::Unknown) {}
-
-  // fields
-  std::string name_;
-  uint32_t size_;
-  uint32_t offset_;
-  ValueKind valueKind_;
-};
-
-static const std::map<std::string, KernelArgMD::ValueKind> ArgValueKind = {
-    // v3
-    //    {"by_value", KernelArgMD::ValueKind::ByValue},
-    //    {"global_buffer", KernelArgMD::ValueKind::GlobalBuffer},
-    //    {"dynamic_shared_pointer",
-    //    KernelArgMD::ValueKind::DynamicSharedPointer},
-    //    {"sampler", KernelArgMD::ValueKind::Sampler},
-    //    {"image", KernelArgMD::ValueKind::Image},
-    //    {"pipe", KernelArgMD::ValueKind::Pipe},
-    //    {"queue", KernelArgMD::ValueKind::Queue},
-    {"hidden_global_offset_x", KernelArgMD::ValueKind::HiddenGlobalOffsetX},
-    {"hidden_global_offset_y", KernelArgMD::ValueKind::HiddenGlobalOffsetY},
-    {"hidden_global_offset_z", KernelArgMD::ValueKind::HiddenGlobalOffsetZ},
-    {"hidden_none", KernelArgMD::ValueKind::HiddenNone},
-    {"hidden_printf_buffer", KernelArgMD::ValueKind::HiddenPrintfBuffer},
-    {"hidden_default_queue", KernelArgMD::ValueKind::HiddenDefaultQueue},
-    {"hidden_completion_action",
-     KernelArgMD::ValueKind::HiddenCompletionAction},
-    {"hidden_multigrid_sync_arg",
-     KernelArgMD::ValueKind::HiddenMultiGridSyncArg},
-    {"hidden_hostcall_buffer", KernelArgMD::ValueKind::HiddenHostcallBuffer},
-    {"hidden_heap_v1", KernelArgMD::ValueKind::HiddenHeapV1}};
-
-namespace core {
-
-hsa_status_t callbackEvent(const hsa_amd_event_t *event, void *data) {
-  if (event->event_type == HSA_AMD_GPU_MEMORY_FAULT_EVENT) {
-    hsa_amd_gpu_memory_fault_info_t memory_fault = event->memory_fault;
-    // memory_fault.agent
-    // memory_fault.virtual_address
-    // memory_fault.fault_reason_mask
-    // fprintf("[GPU Error at %p: Reason is ", memory_fault.virtual_address);
-    std::stringstream stream;
-    stream << std::hex << (uintptr_t)memory_fault.virtual_address;
-    std::string addr("0x" + stream.str());
-
-    std::string err_string = "[GPU Memory Error] Addr: " + addr;
-    err_string += " Reason: ";
-    if (!(memory_fault.fault_reason_mask & 0x00111111)) {
-      err_string += "No Idea! ";
-    } else {
-      if (memory_fault.fault_reason_mask & 0x00000001)
-        err_string += "Page not present or supervisor privilege. ";
-      if (memory_fault.fault_reason_mask & 0x00000010)
-        err_string += "Write access to a read-only page. ";
-      if (memory_fault.fault_reason_mask & 0x00000100)
-        err_string += "Execute access to a page marked NX. ";
-      if (memory_fault.fault_reason_mask & 0x00001000)
-        err_string += "Host access only. ";
-      if (memory_fault.fault_reason_mask & 0x00010000)
-        err_string += "ECC failure (if supported by HW). ";
-      if (memory_fault.fault_reason_mask & 0x00100000)
-        err_string += "Can't determine the exact fault address. ";
-    }
-    fprintf(stderr, "%s\n", err_string.c_str());
-    return HSA_STATUS_ERROR;
-  }
-  return HSA_STATUS_SUCCESS;
-}
-
-hsa_status_t atl_init_gpu_context() {
-  hsa_status_t err = hsa_amd_register_system_event_handler(callbackEvent, NULL);
-  if (err != HSA_STATUS_SUCCESS) {
-    printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-           "Registering the system for memory faults", get_error_string(err));
-    return HSA_STATUS_ERROR;
-  }
-
-  return HSA_STATUS_SUCCESS;
-}
-
-static bool isImplicit(KernelArgMD::ValueKind value_kind) {
-  switch (value_kind) {
-  case KernelArgMD::ValueKind::HiddenGlobalOffsetX:
-  case KernelArgMD::ValueKind::HiddenGlobalOffsetY:
-  case KernelArgMD::ValueKind::HiddenGlobalOffsetZ:
-  case KernelArgMD::ValueKind::HiddenNone:
-  case KernelArgMD::ValueKind::HiddenPrintfBuffer:
-  case KernelArgMD::ValueKind::HiddenDefaultQueue:
-  case KernelArgMD::ValueKind::HiddenCompletionAction:
-  case KernelArgMD::ValueKind::HiddenMultiGridSyncArg:
-  case KernelArgMD::ValueKind::HiddenHostcallBuffer:
-  case KernelArgMD::ValueKind::HiddenHeapV1:
-    return true;
-  default:
-    return false;
-  }
-}
-
-static std::pair<const unsigned char *, const unsigned char *>
-findMetadata(const ELFObjectFile<ELF64LE> &ELFObj) {
-  constexpr std::pair<const unsigned char *, const unsigned char *> Failure = {
-      nullptr, nullptr};
-  const auto &Elf = ELFObj.getELFFile();
-  auto PhdrsOrErr = Elf.program_headers();
-  if (!PhdrsOrErr) {
-    consumeError(PhdrsOrErr.takeError());
-    return Failure;
-  }
-
-  for (auto Phdr : *PhdrsOrErr) {
-    if (Phdr.p_type != PT_NOTE)
-      continue;
-
-    Error Err = Error::success();
-    for (auto Note : Elf.notes(Phdr, Err)) {
-      if (Note.getType() == 7 || Note.getType() == 8)
-        return Failure;
-
-      // Code object v2 uses yaml metadata and is no longer supported.
-      if (Note.getType() == NT_AMD_HSA_METADATA && Note.getName() == "AMD")
-        return Failure;
-      // Code object v3 should have AMDGPU metadata.
-      if (Note.getType() == NT_AMDGPU_METADATA && Note.getName() != "AMDGPU")
-        return Failure;
-
-      ArrayRef<uint8_t> Desc = Note.getDesc(Phdr.p_align);
-      return {Desc.data(), Desc.data() + Desc.size()};
-    }
-
-    if (Err) {
-      consumeError(std::move(Err));
-      return Failure;
-    }
-  }
-
-  return Failure;
-}
-
-static std::pair<const unsigned char *, const unsigned char *>
-find_metadata(void *binary, size_t binSize) {
-  constexpr std::pair<const unsigned char *, const unsigned char *> Failure = {
-      nullptr, nullptr};
-
-  StringRef Buffer = StringRef(static_cast<const char *>(binary), binSize);
-  auto ElfOrErr = ObjectFile::createELFObjectFile(MemoryBufferRef(Buffer, ""),
-                                                  /*InitContent=*/false);
-  if (!ElfOrErr) {
-    consumeError(ElfOrErr.takeError());
-    return Failure;
-  }
-
-  if (const auto *ELFObj = dyn_cast<ELF64LEObjectFile>(ElfOrErr->get()))
-    return findMetadata(*ELFObj);
-  return Failure;
-}
-
-namespace {
-int map_lookup_array(msgpack::byte_range message, const char *needle,
-                     msgpack::byte_range *res, uint64_t *size) {
-  unsigned count = 0;
-  struct s : msgpack::functors_defaults<s> {
-    s(unsigned &count, uint64_t *size) : count(count), size(size) {}
-    unsigned &count;
-    uint64_t *size;
-    const unsigned char *handle_array(uint64_t N, msgpack::byte_range bytes) {
-      count++;
-      *size = N;
-      return bytes.end;
-    }
-  };
-
-  msgpack::foreach_map(message,
-                       [&](msgpack::byte_range key, msgpack::byte_range value) {
-                         if (msgpack::message_is_string(key, needle)) {
-                           // If the message is an array, record number of
-                           // elements in *size
-                           msgpack::handle_msgpack<s>(value, {count, size});
-                           // return the whole array
-                           *res = value;
-                         }
-                       });
-  // Only claim success if exactly one key/array pair matched
-  return count != 1;
-}
-
-int map_lookup_string(msgpack::byte_range message, const char *needle,
-                      std::string *res) {
-  unsigned count = 0;
-  struct s : public msgpack::functors_defaults<s> {
-    s(unsigned &count, std::string *res) : count(count), res(res) {}
-    unsigned &count;
-    std::string *res;
-    void handle_string(size_t N, const unsigned char *str) {
-      count++;
-      *res = std::string(str, str + N);
-    }
-  };
-  msgpack::foreach_map(message,
-                       [&](msgpack::byte_range key, msgpack::byte_range value) {
-                         if (msgpack::message_is_string(key, needle)) {
-                           msgpack::handle_msgpack<s>(value, {count, res});
-                         }
-                       });
-  return count != 1;
-}
-
-int map_lookup_uint64_t(msgpack::byte_range message, const char *needle,
-                        uint64_t *res) {
-  unsigned count = 0;
-  msgpack::foreach_map(message,
-                       [&](msgpack::byte_range key, msgpack::byte_range value) {
-                         if (msgpack::message_is_string(key, needle)) {
-                           msgpack::foronly_unsigned(value, [&](uint64_t x) {
-                             count++;
-                             *res = x;
-                           });
-                         }
-                       });
-  return count != 1;
-}
-
-int array_lookup_element(msgpack::byte_range message, uint64_t elt,
-                         msgpack::byte_range *res) {
-  int rc = 1;
-  uint64_t i = 0;
-  msgpack::foreach_array(message, [&](msgpack::byte_range value) {
-    if (i == elt) {
-      *res = value;
-      rc = 0;
-    }
-    i++;
-  });
-  return rc;
-}
-
-int populate_kernelArgMD(msgpack::byte_range args_element,
-                         KernelArgMD *kernelarg) {
-  using namespace msgpack;
-  int error = 0;
-  foreach_map(args_element, [&](byte_range key, byte_range value) -> void {
-    if (message_is_string(key, ".name")) {
-      foronly_string(value, [&](size_t N, const unsigned char *str) {
-        kernelarg->name_ = std::string(str, str + N);
-      });
-    } else if (message_is_string(key, ".size")) {
-      foronly_unsigned(value, [&](uint64_t x) { kernelarg->size_ = x; });
-    } else if (message_is_string(key, ".offset")) {
-      foronly_unsigned(value, [&](uint64_t x) { kernelarg->offset_ = x; });
-    } else if (message_is_string(key, ".value_kind")) {
-      foronly_string(value, [&](size_t N, const unsigned char *str) {
-        std::string s = std::string(str, str + N);
-        auto itValueKind = ArgValueKind.find(s);
-        if (itValueKind != ArgValueKind.end()) {
-          kernelarg->valueKind_ = itValueKind->second;
-        }
-      });
-    }
-  });
-  return error;
-}
-} // namespace
-
-static hsa_status_t get_code_object_custom_metadata(
-    void *binary, size_t binSize,
-    std::map<std::string, atl_kernel_info_t> &KernelInfoTable) {
-  // parse code object with different keys from v2
-  // also, the kernel name is not the same as the symbol name -- so a
-  // symbol->name map is needed
-
-  std::pair<const unsigned char *, const unsigned char *> metadata =
-      find_metadata(binary, binSize);
-  if (!metadata.first) {
-    return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
-  }
-
-  uint64_t kernelsSize = 0;
-  int msgpack_errors = 0;
-  msgpack::byte_range kernel_array;
-  msgpack_errors =
-      map_lookup_array({metadata.first, metadata.second}, "amdhsa.kernels",
-                       &kernel_array, &kernelsSize);
-  if (msgpack_errors != 0) {
-    printf("[%s:%d] %s failed\n", __FILE__, __LINE__,
-           "kernels lookup in program metadata");
-    return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
-  }
-
-  for (size_t i = 0; i < kernelsSize; i++) {
-    assert(msgpack_errors == 0);
-    std::string kernelName;
-    std::string symbolName;
-
-    msgpack::byte_range element;
-    msgpack_errors += array_lookup_element(kernel_array, i, &element);
-    if (msgpack_errors != 0) {
-      printf("[%s:%d] %s failed\n", __FILE__, __LINE__,
-             "element lookup in kernel metadata");
-      return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
-    }
-
-    msgpack_errors += map_lookup_string(element, ".name", &kernelName);
-    msgpack_errors += map_lookup_string(element, ".symbol", &symbolName);
-    if (msgpack_errors != 0) {
-      printf("[%s:%d] %s failed\n", __FILE__, __LINE__,
-             "strings lookup in kernel metadata");
-      return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
-    }
-
-    // Make sure that kernelName + ".kd" == symbolName
-    if ((kernelName + ".kd") != symbolName) {
-      printf("[%s:%d] Kernel name mismatching symbol: %s != %s + .kd\n",
-             __FILE__, __LINE__, symbolName.c_str(), kernelName.c_str());
-      return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
-    }
-
-    atl_kernel_info_t info = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
-
-    uint64_t sgpr_count, vgpr_count, sgpr_spill_count, vgpr_spill_count;
-    msgpack_errors += map_lookup_uint64_t(element, ".sgpr_count", &sgpr_count);
-    if (msgpack_errors != 0) {
-      printf("[%s:%d] %s failed\n", __FILE__, __LINE__,
-             "sgpr count metadata lookup in kernel metadata");
-      return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
-    }
-
-    info.sgpr_count = sgpr_count;
-
-    msgpack_errors += map_lookup_uint64_t(element, ".vgpr_count", &vgpr_count);
-    if (msgpack_errors != 0) {
-      printf("[%s:%d] %s failed\n", __FILE__, __LINE__,
-             "vgpr count metadata lookup in kernel metadata");
-      return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
-    }
-
-    info.vgpr_count = vgpr_count;
-
-    msgpack_errors +=
-        map_lookup_uint64_t(element, ".sgpr_spill_count", &sgpr_spill_count);
-    if (msgpack_errors != 0) {
-      printf("[%s:%d] %s failed\n", __FILE__, __LINE__,
-             "sgpr spill count metadata lookup in kernel metadata");
-      return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
-    }
-
-    info.sgpr_spill_count = sgpr_spill_count;
-
-    msgpack_errors +=
-        map_lookup_uint64_t(element, ".vgpr_spill_count", &vgpr_spill_count);
-    if (msgpack_errors != 0) {
-      printf("[%s:%d] %s failed\n", __FILE__, __LINE__,
-             "vgpr spill count metadata lookup in kernel metadata");
-      return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
-    }
-
-    info.vgpr_spill_count = vgpr_spill_count;
-
-    size_t kernel_explicit_args_size = 0;
-    uint64_t kernel_segment_size;
-    msgpack_errors += map_lookup_uint64_t(element, ".kernarg_segment_size",
-                                          &kernel_segment_size);
-    if (msgpack_errors != 0) {
-      printf("[%s:%d] %s failed\n", __FILE__, __LINE__,
-             "kernarg segment size metadata lookup in kernel metadata");
-      return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
-    }
-
-    bool hasHiddenArgs = false;
-    if (kernel_segment_size > 0) {
-      uint64_t argsSize;
-      size_t offset = 0;
-
-      msgpack::byte_range args_array;
-      msgpack_errors +=
-          map_lookup_array(element, ".args", &args_array, &argsSize);
-      if (msgpack_errors != 0) {
-        printf("[%s:%d] %s failed\n", __FILE__, __LINE__,
-               "kernel args metadata lookup in kernel metadata");
-        return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
-      }
-
-      for (size_t i = 0; i < argsSize; ++i) {
-        KernelArgMD lcArg;
-
-        msgpack::byte_range args_element;
-        msgpack_errors += array_lookup_element(args_array, i, &args_element);
-        if (msgpack_errors != 0) {
-          printf("[%s:%d] %s failed\n", __FILE__, __LINE__,
-                 "iterate args map in kernel args metadata");
-          return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
-        }
-
-        msgpack_errors += populate_kernelArgMD(args_element, &lcArg);
-        if (msgpack_errors != 0) {
-          printf("[%s:%d] %s failed\n", __FILE__, __LINE__,
-                 "iterate args map in kernel args metadata");
-          return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
-        }
-        // v3 has offset field and not align field
-        size_t new_offset = lcArg.offset_;
-        size_t padding = new_offset - offset;
-        offset = new_offset;
-        DP("Arg[%lu] \"%s\" (%u, %u)\n", i, lcArg.name_.c_str(), lcArg.size_,
-           lcArg.offset_);
-        offset += lcArg.size_;
-
-        // check if the arg is a hidden/implicit arg
-        // this logic assumes that all hidden args are 8-byte aligned
-        if (!isImplicit(lcArg.valueKind_)) {
-          info.explicit_argument_count++;
-          kernel_explicit_args_size += lcArg.size_;
-        } else {
-          info.implicit_argument_count++;
-          hasHiddenArgs = true;
-        }
-        kernel_explicit_args_size += padding;
-      }
-    }
-
-    // TODO: Probably don't want this arithmetic
-    info.kernel_segment_size =
-        (hasHiddenArgs ? kernel_explicit_args_size : kernel_segment_size);
-    DP("[%s: kernarg seg size] (%lu --> %u)\n", kernelName.c_str(),
-       kernel_segment_size, info.kernel_segment_size);
-
-    // kernel received, now add it to the kernel info table
-    KernelInfoTable[kernelName] = info;
-  }
-
-  return HSA_STATUS_SUCCESS;
-}
-
-static hsa_status_t
-populate_InfoTables(hsa_executable_symbol_t symbol,
-                    std::map<std::string, atl_kernel_info_t> &KernelInfoTable,
-                    std::map<std::string, atl_symbol_info_t> &SymbolInfoTable) {
-  hsa_symbol_kind_t type;
-
-  uint32_t name_length;
-  hsa_status_t err;
-  err = hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE,
-                                       &type);
-  if (err != HSA_STATUS_SUCCESS) {
-    printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-           "Symbol info extraction", get_error_string(err));
-    return err;
-  }
-  DP("Exec Symbol type: %d\n", type);
-  if (type == HSA_SYMBOL_KIND_KERNEL) {
-    err = hsa_executable_symbol_get_info(
-        symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &name_length);
-    if (err != HSA_STATUS_SUCCESS) {
-      printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-             "Symbol info extraction", get_error_string(err));
-      return err;
-    }
-    char *name = reinterpret_cast<char *>(malloc(name_length + 1));
-    err = hsa_executable_symbol_get_info(symbol,
-                                         HSA_EXECUTABLE_SYMBOL_INFO_NAME, name);
-    if (err != HSA_STATUS_SUCCESS) {
-      printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-             "Symbol info extraction", get_error_string(err));
-      return err;
-    }
-    // remove the suffix .kd from symbol name.
-    name[name_length - 3] = 0;
-
-    atl_kernel_info_t info;
-    std::string kernelName(name);
-    // by now, the kernel info table should already have an entry
-    // because the non-ROCr custom code object parsing is called before
-    // iterating over the code object symbols using ROCr
-    if (KernelInfoTable.find(kernelName) == KernelInfoTable.end()) {
-      DP("amdgpu internal consistency error\n");
-      return HSA_STATUS_ERROR;
-    }
-    // found, so assign and update
-    info = KernelInfoTable[kernelName];
-
-    /* Extract dispatch information from the symbol */
-    err = hsa_executable_symbol_get_info(
-        symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
-        &(info.kernel_object));
-    if (err != HSA_STATUS_SUCCESS) {
-      printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-             "Extracting the symbol from the executable",
-             get_error_string(err));
-      return err;
-    }
-    err = hsa_executable_symbol_get_info(
-        symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
-        &(info.group_segment_size));
-    if (err != HSA_STATUS_SUCCESS) {
-      printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-             "Extracting the group segment size from the executable",
-             get_error_string(err));
-      return err;
-    }
-    err = hsa_executable_symbol_get_info(
-        symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
-        &(info.private_segment_size));
-    if (err != HSA_STATUS_SUCCESS) {
-      printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-             "Extracting the private segment from the executable",
-             get_error_string(err));
-      return err;
-    }
-
-    DP("Kernel %s --> %lx symbol %u group segsize %u pvt segsize %u bytes "
-       "kernarg\n",
-       kernelName.c_str(), info.kernel_object, info.group_segment_size,
-       info.private_segment_size, info.kernel_segment_size);
-
-    // assign it back to the kernel info table
-    KernelInfoTable[kernelName] = info;
-    free(name);
-  } else if (type == HSA_SYMBOL_KIND_VARIABLE) {
-    err = hsa_executable_symbol_get_info(
-        symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &name_length);
-    if (err != HSA_STATUS_SUCCESS) {
-      printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-             "Symbol info extraction", get_error_string(err));
-      return err;
-    }
-    char *name = reinterpret_cast<char *>(malloc(name_length + 1));
-    err = hsa_executable_symbol_get_info(symbol,
-                                         HSA_EXECUTABLE_SYMBOL_INFO_NAME, name);
-    if (err != HSA_STATUS_SUCCESS) {
-      printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-             "Symbol info extraction", get_error_string(err));
-      return err;
-    }
-    name[name_length] = 0;
-
-    atl_symbol_info_t info;
-
-    err = hsa_executable_symbol_get_info(
-        symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &(info.addr));
-    if (err != HSA_STATUS_SUCCESS) {
-      printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-             "Symbol info address extraction", get_error_string(err));
-      return err;
-    }
-
-    err = hsa_executable_symbol_get_info(
-        symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE, &(info.size));
-    if (err != HSA_STATUS_SUCCESS) {
-      printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-             "Symbol info size extraction", get_error_string(err));
-      return err;
-    }
-
-    DP("Symbol %s = %p (%u bytes)\n", name, (void *)info.addr, info.size);
-    SymbolInfoTable[std::string(name)] = info;
-    free(name);
-  } else {
-    DP("Symbol is an indirect function\n");
-  }
-  return HSA_STATUS_SUCCESS;
-}
-
-hsa_status_t RegisterModuleFromMemory(
-    std::map<std::string, atl_kernel_info_t> &KernelInfoTable,
-    std::map<std::string, atl_symbol_info_t> &SymbolInfoTable,
-    void *module_bytes, size_t module_size, hsa_agent_t agent,
-    hsa_status_t (*on_deserialized_data)(void *data, size_t size,
-                                         void *cb_state),
-    void *cb_state, std::vector<hsa_executable_t> &HSAExecutables) {
-  hsa_status_t err;
-  hsa_executable_t executable = {0};
-  hsa_profile_t agent_profile;
-
-  err = hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_profile);
-  if (err != HSA_STATUS_SUCCESS) {
-    printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-           "Query the agent profile", get_error_string(err));
-    return HSA_STATUS_ERROR;
-  }
-  // FIXME: Assume that every profile is FULL until we understand how to build
-  // GCN with base profile
-  agent_profile = HSA_PROFILE_FULL;
-  /* Create the empty executable.  */
-  err = hsa_executable_create(agent_profile, HSA_EXECUTABLE_STATE_UNFROZEN, "",
-                              &executable);
-  if (err != HSA_STATUS_SUCCESS) {
-    printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-           "Create the executable", get_error_string(err));
-    return HSA_STATUS_ERROR;
-  }
-
-  bool module_load_success = false;
-  do // Existing control flow used continue, preserve that for this patch
-  {
-    {
-      // Some metadata info is not available through ROCr API, so use custom
-      // code object metadata parsing to collect such metadata info
-
-      err = get_code_object_custom_metadata(module_bytes, module_size,
-                                            KernelInfoTable);
-      if (err != HSA_STATUS_SUCCESS) {
-        DP("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-           "Getting custom code object metadata", get_error_string(err));
-        continue;
-      }
-
-      // Deserialize code object.
-      hsa_code_object_t code_object = {0};
-      err = hsa_code_object_deserialize(module_bytes, module_size, NULL,
-                                        &code_object);
-      if (err != HSA_STATUS_SUCCESS) {
-        DP("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-           "Code Object Deserialization", get_error_string(err));
-        continue;
-      }
-      assert(0 != code_object.handle);
-
-      // Mutating the device image here avoids another allocation & memcpy
-      void *code_object_alloc_data =
-          reinterpret_cast<void *>(code_object.handle);
-      hsa_status_t impl_err =
-          on_deserialized_data(code_object_alloc_data, module_size, cb_state);
-      if (impl_err != HSA_STATUS_SUCCESS) {
-        printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-               "Error in deserialized_data callback",
-               get_error_string(impl_err));
-        return impl_err;
-      }
-
-      /* Load the code object.  */
-      err =
-          hsa_executable_load_code_object(executable, agent, code_object, NULL);
-      if (err != HSA_STATUS_SUCCESS) {
-        DP("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-           "Loading the code object", get_error_string(err));
-        continue;
-      }
-
-      // cannot iterate over symbols until executable is frozen
-    }
-    module_load_success = true;
-  } while (0);
-  DP("Modules loaded successful? %d\n", module_load_success);
-  if (module_load_success) {
-    /* Freeze the executable; it can now be queried for symbols.  */
-    err = hsa_executable_freeze(executable, "");
-    if (err != HSA_STATUS_SUCCESS) {
-      printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-             "Freeze the executable", get_error_string(err));
-      return HSA_STATUS_ERROR;
-    }
-
-    err = hsa::executable_iterate_symbols(
-        executable,
-        [&](hsa_executable_t, hsa_executable_symbol_t symbol) -> hsa_status_t {
-          return populate_InfoTables(symbol, KernelInfoTable, SymbolInfoTable);
-        });
-    if (err != HSA_STATUS_SUCCESS) {
-      printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-             "Iterating over symbols for execuatable", get_error_string(err));
-      return HSA_STATUS_ERROR;
-    }
-
-    // save the executable and destroy during finalize
-    HSAExecutables.push_back(executable);
-    return HSA_STATUS_SUCCESS;
-  } else {
-    return HSA_STATUS_ERROR;
-  }
-}
-
-} // namespace core
--- a/openmp/libomptarget/plugins/amdgpu/src/print_tracing.h
+++ b/openmp/libomptarget/plugins/amdgpu/src/print_tracing.h
@ -1,20 +0,0 @@
-//===--- amdgpu/src/print_tracing.h ------------------------------- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#ifndef LIBOMPTARGET_PLUGINS_AMGGPU_SRC_PRINT_TRACING_H_INCLUDED
-#define LIBOMPTARGET_PLUGINS_AMGGPU_SRC_PRINT_TRACING_H_INCLUDED
-
-enum PrintTraceControlBits {
-  LAUNCH = 1,          // print a message to stderr for each kernel launch
-  RTL_TIMING = 2,      // Print timing info around each RTL step
-  STARTUP_DETAILS = 4, // Details around loading up kernel
-  RTL_TO_STDOUT = 8    // Redirect RTL tracing to stdout
-};
-
-extern int print_kernel_trace; // set by environment variable
-
-#endif
--- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
--- a/openmp/libomptarget/plugins/common/CMakeLists.txt
+++ b/openmp/libomptarget/plugins/common/CMakeLists.txt
@ -1,14 +0,0 @@
-##===----------------------------------------------------------------------===##
-#
-# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-#
-##===----------------------------------------------------------------------===##
-#
-# Common parts which can be used by all plugins
-#
-##===----------------------------------------------------------------------===##
-
-add_subdirectory(elf_common)
-add_subdirectory(MemoryManager)
--- a/openmp/libomptarget/plugins/cuda/CMakeLists.txt
+++ b/openmp/libomptarget/plugins/cuda/CMakeLists.txt
@ -1,76 +0,0 @@
-##===----------------------------------------------------------------------===##
-#
-# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-#
-##===----------------------------------------------------------------------===##
-#
-# Build a plugin for a CUDA machine if available.
-#
-##===----------------------------------------------------------------------===##
-set(LIBOMPTARGET_BUILD_CUDA_PLUGIN TRUE CACHE BOOL
-  "Whether to build CUDA plugin")
-if (NOT LIBOMPTARGET_BUILD_CUDA_PLUGIN)
-  libomptarget_say("Not building CUDA offloading plugin: LIBOMPTARGET_BUILD_CUDA_PLUGIN is false")
-  return()
-endif()
-
-if (NOT (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(ppc64le)|(aarch64)$" AND CMAKE_SYSTEM_NAME MATCHES "Linux"))
-  libomptarget_say("Not building CUDA offloading plugin: only support CUDA in Linux x86_64, ppc64le, or aarch64 hosts.")
-  return()
-endif()
-
-libomptarget_say("Building CUDA offloading plugin.")
-
-set(LIBOMPTARGET_DLOPEN_LIBCUDA OFF)
-option(LIBOMPTARGET_FORCE_DLOPEN_LIBCUDA "Build with dlopened libcuda" ${LIBOMPTARGET_DLOPEN_LIBCUDA})
-
-add_llvm_library(omptarget.rtl.cuda SHARED
-  src/rtl.cpp
-
-  LINK_COMPONENTS
-  Support
-  Object
-
-  LINK_LIBS PRIVATE
-  elf_common
-  MemoryManager
-  ${OPENMP_PTHREAD_LIB}
-  "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exports,-z,defs"
-
-  NO_INSTALL_RPATH
-)
-
-if(LIBOMPTARGET_DEP_CUDA_FOUND AND NOT LIBOMPTARGET_FORCE_DLOPEN_LIBCUDA)
-  libomptarget_say("Building CUDA plugin linked against libcuda")
-  target_link_libraries(omptarget.rtl.cuda PRIVATE CUDA::cuda_driver)
-else()
-  libomptarget_say("Building CUDA plugin for dlopened libcuda")
-  target_include_directories(omptarget.rtl.cuda PRIVATE dynamic_cuda)
-  target_sources(omptarget.rtl.cuda PRIVATE dynamic_cuda/cuda.cpp)
-endif()
-
-# Define the suffix for the runtime messaging dumps.
-target_compile_definitions(omptarget.rtl.cuda PRIVATE TARGET_NAME="CUDA")
-target_include_directories(omptarget.rtl.cuda PRIVATE ${LIBOMPTARGET_INCLUDE_DIR})
-
-# Install plugin under the lib destination folder.
-install(TARGETS omptarget.rtl.cuda LIBRARY DESTINATION "${OPENMP_INSTALL_LIBDIR}")
-set_target_properties(omptarget.rtl.cuda PROPERTIES 
-  INSTALL_RPATH "$ORIGIN" BUILD_RPATH "$ORIGIN:${CMAKE_CURRENT_BINARY_DIR}/.."
-  CXX_VISIBILITY_PRESET protected)
-
-# Report to the parent scope that we are building a plugin for CUDA.
-# This controls whether tests are run for the nvptx offloading target
-# Run them if libcuda is available, or if the user explicitly asked for dlopen
-# Otherwise this plugin is being built speculatively and there may be no cuda available
-option(LIBOMPTARGET_FORCE_NVIDIA_TESTS "Build NVIDIA libomptarget tests" OFF)
-if (LIBOMPTARGET_FOUND_NVIDIA_GPU OR LIBOMPTARGET_FORCE_NVIDIA_TESTS)
-  libomptarget_say("Enable tests using CUDA plugin")
-  set(LIBOMPTARGET_SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS} nvptx64-nvidia-cuda nvptx64-nvidia-cuda-LTO" PARENT_SCOPE)
-  list(APPEND LIBOMPTARGET_TESTED_PLUGINS "omptarget.rtl.cuda")
-  set(LIBOMPTARGET_TESTED_PLUGINS "${LIBOMPTARGET_TESTED_PLUGINS}" PARENT_SCOPE)
-else()
-  libomptarget_say("Not generating NVIDIA tests, no supported devices detected. Use 'LIBOMPTARGET_FORCE_NVIDIA_TESTS' to override.")
-endif()
--- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
--- a/openmp/libomptarget/plugins/exports
+++ b/openmp/libomptarget/plugins/exports
@ -1,6 +0,0 @@
-VERS1.0 {
-  global:
-    __tgt_rtl*;
-  local:
-    *;
-};
--- a/openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp
@ -1,280 +0,0 @@
-//===-RTLs/generic-64bit/src/rtl.cpp - Target RTLs Implementation - C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// RTL for generic 64-bit machine
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/DynamicLibrary.h"
-
-#include <cassert>
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <ffi.h>
-#include <link.h>
-#include <list>
-#include <string>
-#include <vector>
-
-#include "Debug.h"
-#include "omptargetplugin.h"
-
-using namespace llvm;
-using namespace llvm::sys;
-
-#ifndef TARGET_NAME
-#define TARGET_NAME Generic ELF - 64bit
-#endif
-#define DEBUG_PREFIX "TARGET " GETNAME(TARGET_NAME) " RTL"
-
-#ifndef TARGET_ELF_ID
-#define TARGET_ELF_ID 0
-#endif
-
-#include "elf_common.h"
-
-#define NUMBER_OF_DEVICES 4
-#define OFFLOAD_SECTION_NAME "omp_offloading_entries"
-
-/// Array of Dynamic libraries loaded for this target.
-struct DynLibTy {
-  std::string FileName;
-  std::unique_ptr<DynamicLibrary> DynLib;
-};
-
-/// Keep entries table per device.
-struct FuncOrGblEntryTy {
-  __tgt_target_table Table;
-  SmallVector<__tgt_offload_entry> Entries;
-};
-
-/// Class containing all the device information.
-class RTLDeviceInfoTy {
-  std::vector<std::list<FuncOrGblEntryTy>> FuncGblEntries;
-
-public:
-  std::list<DynLibTy> DynLibs;
-
-  // Record entry point associated with device.
-  void createOffloadTable(int32_t DeviceId,
-                          SmallVector<__tgt_offload_entry> &&Entries) {
-    assert(DeviceId < (int32_t)FuncGblEntries.size() &&
-           "Unexpected device id!");
-    FuncGblEntries[DeviceId].emplace_back();
-    FuncOrGblEntryTy &E = FuncGblEntries[DeviceId].back();
-
-    E.Entries = Entries;
-    E.Table.EntriesBegin = E.Entries.begin();
-    E.Table.EntriesEnd = E.Entries.end();
-  }
-
-  // Return true if the entry is associated with device.
-  bool findOffloadEntry(int32_t DeviceId, void *Addr) {
-    assert(DeviceId < (int32_t)FuncGblEntries.size() &&
-           "Unexpected device id!");
-    FuncOrGblEntryTy &E = FuncGblEntries[DeviceId].back();
-
-    for (__tgt_offload_entry *I = E.Table.EntriesBegin,
-                             *End = E.Table.EntriesEnd;
-         I < End; ++I) {
-      if (I->addr == Addr)
-        return true;
-    }
-
-    return false;
-  }
-
-  // Return the pointer to the target entries table.
-  __tgt_target_table *getOffloadEntriesTable(int32_t DeviceId) {
-    assert(DeviceId < (int32_t)FuncGblEntries.size() &&
-           "Unexpected device id!");
-    FuncOrGblEntryTy &E = FuncGblEntries[DeviceId].back();
-
-    return &E.Table;
-  }
-
-  RTLDeviceInfoTy(int32_t NumDevices) { FuncGblEntries.resize(NumDevices); }
-
-  ~RTLDeviceInfoTy() {
-    // Close dynamic libraries
-    for (auto &Lib : DynLibs) {
-      if (Lib.DynLib->isValid())
-        remove(Lib.FileName.c_str());
-    }
-  }
-};
-
-static RTLDeviceInfoTy DeviceInfo(NUMBER_OF_DEVICES);
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) {
-// If we don't have a valid ELF ID we can just fail.
-#if TARGET_ELF_ID < 1
-  return 0;
-#else
-  return elf_check_machine(Image, TARGET_ELF_ID);
-#endif
-}
-
-int32_t __tgt_rtl_number_of_devices() { return NUMBER_OF_DEVICES; }
-
-int32_t __tgt_rtl_init_device(int32_t DeviceId) { return OFFLOAD_SUCCESS; }
-
-__tgt_target_table *__tgt_rtl_load_binary(int32_t DeviceId,
-                                          __tgt_device_image *Image) {
-
-  DP("Dev %d: load binary from " DPxMOD " image\n", DeviceId,
-     DPxPTR(Image->ImageStart));
-
-  assert(DeviceId >= 0 && DeviceId < NUMBER_OF_DEVICES && "bad dev id");
-
-  size_t ImageSize = (size_t)Image->ImageEnd - (size_t)Image->ImageStart;
-
-  // load dynamic library and get the entry points. We use the dl library
-  // to do the loading of the library, but we could do it directly to avoid the
-  // dump to the temporary file.
-  //
-  // 1) Create tmp file with the library contents.
-  // 2) Use dlopen to load the file and dlsym to retrieve the symbols.
-  char TmpName[] = "/tmp/tmpfile_XXXXXX";
-  int TmpFd = mkstemp(TmpName);
-
-  if (TmpFd == -1)
-    return nullptr;
-
-  FILE *Ftmp = fdopen(TmpFd, "wb");
-
-  if (!Ftmp)
-    return nullptr;
-
-  fwrite(Image->ImageStart, ImageSize, 1, Ftmp);
-  fclose(Ftmp);
-
-  std::string ErrMsg;
-  auto DynLib = std::make_unique<sys::DynamicLibrary>(
-      sys::DynamicLibrary::getPermanentLibrary(TmpName, &ErrMsg));
-  DynLibTy Lib = {TmpName, std::move(DynLib)};
-
-  if (!Lib.DynLib->isValid()) {
-    DP("Target library loading error: %s\n", ErrMsg.c_str());
-    return NULL;
-  }
-
-  __tgt_offload_entry *HostBegin = Image->EntriesBegin;
-  __tgt_offload_entry *HostEnd = Image->EntriesEnd;
-
-  // Create a new offloading entry list using the device symbol address.
-  SmallVector<__tgt_offload_entry> Entries;
-  for (__tgt_offload_entry *E = HostBegin; E != HostEnd; ++E) {
-    if (!E->addr)
-      return nullptr;
-
-    __tgt_offload_entry Entry = *E;
-
-    void *DevAddr = Lib.DynLib->getAddressOfSymbol(E->name);
-    Entry.addr = DevAddr;
-
-    DP("Entry point " DPxMOD " maps to global %s (" DPxMOD ")\n",
-       DPxPTR(E - HostBegin), E->name, DPxPTR(DevAddr));
-
-    Entries.emplace_back(Entry);
-  }
-
-  DeviceInfo.createOffloadTable(DeviceId, std::move(Entries));
-  DeviceInfo.DynLibs.emplace_back(std::move(Lib));
-
-  return DeviceInfo.getOffloadEntriesTable(DeviceId);
-}
-
-void __tgt_rtl_print_device_info(int32_t DeviceId) {
-  printf("    This is a generic-elf-64bit device\n");
-}
-
-// Sample implementation of explicit memory allocator. For this plugin all kinds
-// are equivalent to each other.
-void *__tgt_rtl_data_alloc(int32_t DeviceId, int64_t Size, void *HstPtr,
-                           int32_t Kind) {
-  void *Ptr = NULL;
-
-  switch (Kind) {
-  case TARGET_ALLOC_DEVICE:
-  case TARGET_ALLOC_HOST:
-  case TARGET_ALLOC_SHARED:
-  case TARGET_ALLOC_DEFAULT:
-    Ptr = malloc(Size);
-    break;
-  default:
-    REPORT("Invalid target data allocation kind");
-  }
-
-  return Ptr;
-}
-
-int32_t __tgt_rtl_data_submit(int32_t DeviceId, void *TgtPtr, void *HstPtr,
-                              int64_t Size) {
-  memcpy(TgtPtr, HstPtr, Size);
-  return OFFLOAD_SUCCESS;
-}
-
-int32_t __tgt_rtl_data_retrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr,
-                                int64_t Size) {
-  memcpy(HstPtr, TgtPtr, Size);
-  return OFFLOAD_SUCCESS;
-}
-
-int32_t __tgt_rtl_data_delete(int32_t DeviceId, void *TgtPtr, int32_t) {
-  free(TgtPtr);
-  return OFFLOAD_SUCCESS;
-}
-
-int32_t __tgt_rtl_launch_kernel(int32_t DeviceId, void *TgtEntryPtr,
-                                void **TgtArgs, ptrdiff_t *TgtOffsets,
-                                KernelArgsTy *KernelArgs,
-                                __tgt_async_info *AsyncInfoPtr) {
-  assert(!KernelArgs->NumTeams[1] && !KernelArgs->NumTeams[2] &&
-         !KernelArgs->ThreadLimit[1] && !KernelArgs->ThreadLimit[2] &&
-         "Only one dimensional kernels supported.");
-  // ignore team num and thread limit.
-
-  // Use libffi to launch execution.
-  ffi_cif Cif;
-
-  // All args are references.
-  std::vector<ffi_type *> ArgsTypes(KernelArgs->NumArgs, &ffi_type_pointer);
-  std::vector<void *> Args(KernelArgs->NumArgs);
-  std::vector<void *> Ptrs(KernelArgs->NumArgs);
-
-  for (uint32_t I = 0; I < KernelArgs->NumArgs; ++I) {
-    Ptrs[I] = (void *)((intptr_t)TgtArgs[I] + TgtOffsets[I]);
-    Args[I] = &Ptrs[I];
-  }
-
-  ffi_status Status = ffi_prep_cif(&Cif, FFI_DEFAULT_ABI, KernelArgs->NumArgs,
-                                   &ffi_type_void, &ArgsTypes[0]);
-
-  assert(Status == FFI_OK && "Unable to prepare target launch!");
-
-  if (Status != FFI_OK)
-    return OFFLOAD_FAIL;
-
-  DP("Running entry point at " DPxMOD "...\n", DPxPTR(TgtEntryPtr));
-
-  void (*Entry)(void);
-  *((void **)&Entry) = TgtEntryPtr;
-  ffi_call(&Cif, Entry, NULL, &Args[0]);
-  return OFFLOAD_SUCCESS;
-}
-
-#ifdef __cplusplus
-}
-#endif
--- a/openmp/libomptarget/plugins/ppc64/CMakeLists.txt
+++ b/openmp/libomptarget/plugins/ppc64/CMakeLists.txt
@ -1,17 +0,0 @@
-##===----------------------------------------------------------------------===##
-# 
-# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-# 
-##===----------------------------------------------------------------------===##
-#
-# Build a plugin for a ppc64 machine if available.
-#
-##===----------------------------------------------------------------------===##
-
-if(CMAKE_SYSTEM_NAME MATCHES "Linux")
-  build_generic_elf64("ppc64" "PPC64" "ppc64" "powerpc64-ibm-linux-gnu" "21")
-else()
- libomptarget_say("Not building ppc64 offloading plugin: machine not found in the system.")
-endif()
--- a/openmp/libomptarget/plugins/ppc64le/CMakeLists.txt
+++ b/openmp/libomptarget/plugins/ppc64le/CMakeLists.txt
@ -1,17 +0,0 @@
-##===----------------------------------------------------------------------===##
-# 
-# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-# 
-##===----------------------------------------------------------------------===##
-#
-# Build a plugin for a ppc64le machine if available.
-#
-##===----------------------------------------------------------------------===##
-
-if(CMAKE_SYSTEM_NAME MATCHES "Linux")
-  build_generic_elf64("ppc64le" "PPC64le" "ppc64" "powerpc64le-ibm-linux-gnu" "21")
-else()
- libomptarget_say("Not building ppc64le offloading plugin: machine not found in the system.")
-endif()
--- a/openmp/libomptarget/plugins/x86_64/CMakeLists.txt
+++ b/openmp/libomptarget/plugins/x86_64/CMakeLists.txt
@ -1,17 +0,0 @@
-##===----------------------------------------------------------------------===##
-# 
-# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-# 
-##===----------------------------------------------------------------------===##
-#
-# Build a plugin for a x86_64 machine if available.
-#
-##===----------------------------------------------------------------------===##
-
-if(CMAKE_SYSTEM_NAME MATCHES "Linux")
-  build_generic_elf64("x86_64" "x86_64" "x86_64" "x86_64-pc-linux-gnu" "62")
-else()
- libomptarget_say("Not building x86_64 offloading plugin: machine not found in the system.")
-endif()
--- a/openmp/libomptarget/src/rtl.cpp
+++ b/openmp/libomptarget/src/rtl.cpp
@ -99,8 +99,6 @@ void RTLsTy::loadRTLs() {

  DP("Loading RTLs...\n");

-  BoolEnvar NextGenPlugins("LIBOMPTARGET_NEXTGEN_PLUGINS", true);
-
  // Attempt to open all the plugins and, if they exist, check if the interface
  // is correct and if they are supporting any devices.
  for (const char *Name : RTLNames) {
@ -109,13 +107,6 @@ void RTLsTy::loadRTLs() {
    RTLInfoTy &RTL = AllRTLs.back();

    const std::string BaseRTLName(Name);
-    if (NextGenPlugins) {
-      if (attemptLoadRTL(BaseRTLName + ".nextgen.so", RTL))
-        continue;
-
-      DP("Falling back to original plugin...\n");
-    }
-
    if (!attemptLoadRTL(BaseRTLName + ".so", RTL))
      AllRTLs.pop_back();
  }