[libomptarget][devicertl][nfc] Simplify target_atomic abstraction

[libomptarget][devicertl][nfc] Simplify target_atomic abstraction Atomic functions were implemented as a shim around cuda's atomics, with amdgcn implementing those symbols as a shim around gcc style intrinsics. This patch folds target_atomic.h into target_impl.h and folds amdgcn. Further work is likely to be useful here, either changing to openmp's atomic interface or instantiating the templates on the few used types in order to move them into a cuda/c++ implementation file. This change is mostly to group the remaining uses of the cuda api under nvptx' target_impl abstraction. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D95062
2024-11-24 06:10:12 +00:00 · 2021-01-20 19:50:50 +00:00 · 2021-01-20 19:50:50 +00:00 · fbc1dcb946
commit fbc1dcb946
parent 599fdfc5db
11 changed files with 46 additions and 90 deletions
--- a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
@ -73,14 +73,12 @@ set(cuda_sources

 set(h_files
  ${CMAKE_CURRENT_SOURCE_DIR}/src/amdgcn_interface.h
-  ${CMAKE_CURRENT_SOURCE_DIR}/src/hip_atomics.h
  ${CMAKE_CURRENT_SOURCE_DIR}/src/target_impl.h
  ${devicertl_base_directory}/common/debug.h
  ${devicertl_base_directory}/common/device_environment.h
  ${devicertl_base_directory}/common/omptarget.h
  ${devicertl_base_directory}/common/omptargeti.h
  ${devicertl_base_directory}/common/state-queue.h
-  ${devicertl_base_directory}/common/target_atomic.h
  ${devicertl_base_directory}/common/state-queuei.h
  ${devicertl_base_directory}/common/support.h)

--- a/openmp/libomptarget/deviceRTLs/amdgcn/src/hip_atomics.h
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/hip_atomics.h
@ -1,41 +0,0 @@
-//===---- hip_atomics.h - Declarations of hip atomic functions ---- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef OMPTARGET_AMDGCN_HIP_ATOMICS_H
-#define OMPTARGET_AMDGCN_HIP_ATOMICS_H
-
-#include "target_impl.h"
-
-namespace {
-
-template <typename T> DEVICE T atomicAdd(T *address, T val) {
-  return __atomic_fetch_add(address, val, __ATOMIC_SEQ_CST);
-}
-
-template <typename T> DEVICE T atomicMax(T *address, T val) {
-  return __atomic_fetch_max(address, val, __ATOMIC_SEQ_CST);
-}
-
-template <typename T> DEVICE T atomicExch(T *address, T val) {
-  T r;
-  __atomic_exchange(address, &val, &r, __ATOMIC_SEQ_CST);
-  return r;
-}
-
-template <typename T> DEVICE T atomicCAS(T *address, T compare, T val) {
-  (void)__atomic_compare_exchange(address, &compare, &val, false,
-                                  __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
-  return compare;
-}
-
-INLINE uint32_t atomicInc(uint32_t *address, uint32_t max) {
-  return __builtin_amdgcn_atomic_inc32(address, max, __ATOMIC_SEQ_CST, "");
-}
-
-} // namespace
-#endif
--- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
@ -29,8 +29,6 @@
 #define SHARED __attribute__((shared))
 #define ALIGN(N) __attribute__((aligned(N)))

-#include "hip_atomics.h"
-
 ////////////////////////////////////////////////////////////////////////////////
 // Kernel options
 ////////////////////////////////////////////////////////////////////////////////
@ -127,6 +125,31 @@ DEVICE int GetNumberOfThreadsInBlock();
 DEVICE unsigned GetWarpId();
 DEVICE unsigned GetLaneId();

+// Atomics
+template <typename T> INLINE T __kmpc_atomic_add(T *address, T val) {
+  return __atomic_fetch_add(address, val, __ATOMIC_SEQ_CST);
+}
+
+INLINE uint32_t __kmpc_atomic_inc(uint32_t *address, uint32_t max) {
+  return __builtin_amdgcn_atomic_inc32(address, max, __ATOMIC_SEQ_CST, "");
+}
+
+template <typename T> INLINE T __kmpc_atomic_max(T *address, T val) {
+  return __atomic_fetch_max(address, val, __ATOMIC_SEQ_CST);
+}
+
+template <typename T> INLINE T __kmpc_atomic_exchange(T *address, T val) {
+  T r;
+  __atomic_exchange(address, &val, &r, __ATOMIC_SEQ_CST);
+  return r;
+}
+
+template <typename T> INLINE T __kmpc_atomic_cas(T *address, T compare, T val) {
+  (void)__atomic_compare_exchange(address, &compare, &val, false,
+                                  __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
+  return compare;
+}
+
 // Locks
 DEVICE void __kmpc_impl_init_lock(omp_lock_t *lock);
 DEVICE void __kmpc_impl_destroy_lock(omp_lock_t *lock);
--- a/openmp/libomptarget/deviceRTLs/common/omptargeti.h
+++ b/openmp/libomptarget/deviceRTLs/common/omptargeti.h
@ -11,8 +11,6 @@
 //
 //===----------------------------------------------------------------------===//

-#include "common/target_atomic.h"
-
 ////////////////////////////////////////////////////////////////////////////////
 // Task Descriptor
 ////////////////////////////////////////////////////////////////////////////////
--- a/openmp/libomptarget/deviceRTLs/common/src/libcall.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/libcall.cu
@ -13,7 +13,6 @@
 #pragma omp declare target

 #include "common/omptarget.h"
-#include "common/target_atomic.h"
 #include "target_impl.h"

 EXTERN double omp_get_wtick(void) {
--- a/openmp/libomptarget/deviceRTLs/common/src/loop.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/loop.cu
@ -15,7 +15,6 @@

 #include "common/omptarget.h"
 #include "target_impl.h"
-#include "common/target_atomic.h"

 ////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////
--- a/openmp/libomptarget/deviceRTLs/common/src/reduction.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/reduction.cu
@ -12,7 +12,6 @@
 #pragma omp declare target

 #include "common/omptarget.h"
-#include "common/target_atomic.h"
 #include "target_impl.h"

 EXTERN
--- a/openmp/libomptarget/deviceRTLs/common/state-queuei.h
+++ b/openmp/libomptarget/deviceRTLs/common/state-queuei.h
@ -17,7 +17,6 @@
 //===----------------------------------------------------------------------===//

 #include "state-queue.h"
-#include "common/target_atomic.h"

 template <typename ElementType, uint32_t SIZE>
 INLINE uint32_t omptarget_nvptx_Queue<ElementType, SIZE>::ENQUEUE_TICKET() {
--- a/openmp/libomptarget/deviceRTLs/common/target_atomic.h
+++ b/openmp/libomptarget/deviceRTLs/common/target_atomic.h
@ -1,38 +0,0 @@
-//===---- target_atomic.h - OpenMP GPU target atomic functions ---- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Declarations of atomic functions provided by each target
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef OMPTARGET_TARGET_ATOMIC_H
-#define OMPTARGET_TARGET_ATOMIC_H
-
-#include "target_impl.h"
-
-template <typename T> INLINE T __kmpc_atomic_add(T *address, T val) {
-  return atomicAdd(address, val);
-}
-
-template <typename T> INLINE T __kmpc_atomic_inc(T *address, T val) {
-  return atomicInc(address, val);
-}
-
-template <typename T> INLINE T __kmpc_atomic_max(T *address, T val) {
-  return atomicMax(address, val);
-}
-
-template <typename T> INLINE T __kmpc_atomic_exchange(T *address, T val) {
-  return atomicExch(address, val);
-}
-
-template <typename T> INLINE T __kmpc_atomic_cas(T *address, T compare, T val) {
-  return atomicCAS(address, compare, val);
-}
-
-#endif
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu
@ -13,7 +13,6 @@

 #include "target_impl.h"
 #include "common/debug.h"
-#include "common/target_atomic.h"

 #include <cuda.h>

--- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
@ -130,6 +130,27 @@ DEVICE int GetNumberOfThreadsInBlock();
 DEVICE unsigned GetWarpId();
 DEVICE unsigned GetLaneId();

+// Atomics
+template <typename T> INLINE T __kmpc_atomic_add(T *address, T val) {
+  return atomicAdd(address, val);
+}
+
+template <typename T> INLINE T __kmpc_atomic_inc(T *address, T val) {
+  return atomicInc(address, val);
+}
+
+template <typename T> INLINE T __kmpc_atomic_max(T *address, T val) {
+  return atomicMax(address, val);
+}
+
+template <typename T> INLINE T __kmpc_atomic_exchange(T *address, T val) {
+  return atomicExch(address, val);
+}
+
+template <typename T> INLINE T __kmpc_atomic_cas(T *address, T compare, T val) {
+  return atomicCAS(address, compare, val);
+}
+
 // Locks
 DEVICE void __kmpc_impl_init_lock(omp_lock_t *lock);
 DEVICE void __kmpc_impl_destroy_lock(omp_lock_t *lock);