[libomptarget][devicertl][nfc] Simplify target_atomic abstraction

[libomptarget][devicertl][nfc] Simplify target_atomic abstraction

Atomic functions were implemented as a shim around cuda's atomics, with
amdgcn implementing those symbols as a shim around gcc style intrinsics.

This patch folds target_atomic.h into target_impl.h and folds amdgcn.

Further work is likely to be useful here, either changing to openmp's atomic
interface or instantiating the templates on the few used types in order to
move them into a cuda/c++ implementation file. This change is mostly to
group the remaining uses of the cuda api under nvptx' target_impl abstraction.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D95062
This commit is contained in:
Jon Chesterfield 2021-01-20 19:50:50 +00:00
parent 599fdfc5db
commit fbc1dcb946
11 changed files with 46 additions and 90 deletions

View File

@ -73,14 +73,12 @@ set(cuda_sources
set(h_files
${CMAKE_CURRENT_SOURCE_DIR}/src/amdgcn_interface.h
${CMAKE_CURRENT_SOURCE_DIR}/src/hip_atomics.h
${CMAKE_CURRENT_SOURCE_DIR}/src/target_impl.h
${devicertl_base_directory}/common/debug.h
${devicertl_base_directory}/common/device_environment.h
${devicertl_base_directory}/common/omptarget.h
${devicertl_base_directory}/common/omptargeti.h
${devicertl_base_directory}/common/state-queue.h
${devicertl_base_directory}/common/target_atomic.h
${devicertl_base_directory}/common/state-queuei.h
${devicertl_base_directory}/common/support.h)

View File

@ -1,41 +0,0 @@
//===---- hip_atomics.h - Declarations of hip atomic functions ---- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef OMPTARGET_AMDGCN_HIP_ATOMICS_H
#define OMPTARGET_AMDGCN_HIP_ATOMICS_H
#include "target_impl.h"
namespace {
template <typename T> DEVICE T atomicAdd(T *address, T val) {
return __atomic_fetch_add(address, val, __ATOMIC_SEQ_CST);
}
template <typename T> DEVICE T atomicMax(T *address, T val) {
return __atomic_fetch_max(address, val, __ATOMIC_SEQ_CST);
}
template <typename T> DEVICE T atomicExch(T *address, T val) {
T r;
__atomic_exchange(address, &val, &r, __ATOMIC_SEQ_CST);
return r;
}
template <typename T> DEVICE T atomicCAS(T *address, T compare, T val) {
(void)__atomic_compare_exchange(address, &compare, &val, false,
__ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
return compare;
}
INLINE uint32_t atomicInc(uint32_t *address, uint32_t max) {
return __builtin_amdgcn_atomic_inc32(address, max, __ATOMIC_SEQ_CST, "");
}
} // namespace
#endif

View File

@ -29,8 +29,6 @@
#define SHARED __attribute__((shared))
#define ALIGN(N) __attribute__((aligned(N)))
#include "hip_atomics.h"
////////////////////////////////////////////////////////////////////////////////
// Kernel options
////////////////////////////////////////////////////////////////////////////////
@ -127,6 +125,31 @@ DEVICE int GetNumberOfThreadsInBlock();
DEVICE unsigned GetWarpId();
DEVICE unsigned GetLaneId();
// Atomics
template <typename T> INLINE T __kmpc_atomic_add(T *address, T val) {
return __atomic_fetch_add(address, val, __ATOMIC_SEQ_CST);
}
INLINE uint32_t __kmpc_atomic_inc(uint32_t *address, uint32_t max) {
return __builtin_amdgcn_atomic_inc32(address, max, __ATOMIC_SEQ_CST, "");
}
template <typename T> INLINE T __kmpc_atomic_max(T *address, T val) {
return __atomic_fetch_max(address, val, __ATOMIC_SEQ_CST);
}
template <typename T> INLINE T __kmpc_atomic_exchange(T *address, T val) {
T r;
__atomic_exchange(address, &val, &r, __ATOMIC_SEQ_CST);
return r;
}
template <typename T> INLINE T __kmpc_atomic_cas(T *address, T compare, T val) {
(void)__atomic_compare_exchange(address, &compare, &val, false,
__ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
return compare;
}
// Locks
DEVICE void __kmpc_impl_init_lock(omp_lock_t *lock);
DEVICE void __kmpc_impl_destroy_lock(omp_lock_t *lock);

View File

@ -11,8 +11,6 @@
//
//===----------------------------------------------------------------------===//
#include "common/target_atomic.h"
////////////////////////////////////////////////////////////////////////////////
// Task Descriptor
////////////////////////////////////////////////////////////////////////////////

View File

@ -13,7 +13,6 @@
#pragma omp declare target
#include "common/omptarget.h"
#include "common/target_atomic.h"
#include "target_impl.h"
EXTERN double omp_get_wtick(void) {

View File

@ -15,7 +15,6 @@
#include "common/omptarget.h"
#include "target_impl.h"
#include "common/target_atomic.h"
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////

View File

@ -12,7 +12,6 @@
#pragma omp declare target
#include "common/omptarget.h"
#include "common/target_atomic.h"
#include "target_impl.h"
EXTERN

View File

@ -17,7 +17,6 @@
//===----------------------------------------------------------------------===//
#include "state-queue.h"
#include "common/target_atomic.h"
template <typename ElementType, uint32_t SIZE>
INLINE uint32_t omptarget_nvptx_Queue<ElementType, SIZE>::ENQUEUE_TICKET() {

View File

@ -1,38 +0,0 @@
//===---- target_atomic.h - OpenMP GPU target atomic functions ---- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Declarations of atomic functions provided by each target
//
//===----------------------------------------------------------------------===//
#ifndef OMPTARGET_TARGET_ATOMIC_H
#define OMPTARGET_TARGET_ATOMIC_H
#include "target_impl.h"
template <typename T> INLINE T __kmpc_atomic_add(T *address, T val) {
return atomicAdd(address, val);
}
template <typename T> INLINE T __kmpc_atomic_inc(T *address, T val) {
return atomicInc(address, val);
}
template <typename T> INLINE T __kmpc_atomic_max(T *address, T val) {
return atomicMax(address, val);
}
template <typename T> INLINE T __kmpc_atomic_exchange(T *address, T val) {
return atomicExch(address, val);
}
template <typename T> INLINE T __kmpc_atomic_cas(T *address, T compare, T val) {
return atomicCAS(address, compare, val);
}
#endif

View File

@ -13,7 +13,6 @@
#include "target_impl.h"
#include "common/debug.h"
#include "common/target_atomic.h"
#include <cuda.h>

View File

@ -130,6 +130,27 @@ DEVICE int GetNumberOfThreadsInBlock();
DEVICE unsigned GetWarpId();
DEVICE unsigned GetLaneId();
// Atomics
template <typename T> INLINE T __kmpc_atomic_add(T *address, T val) {
return atomicAdd(address, val);
}
template <typename T> INLINE T __kmpc_atomic_inc(T *address, T val) {
return atomicInc(address, val);
}
template <typename T> INLINE T __kmpc_atomic_max(T *address, T val) {
return atomicMax(address, val);
}
template <typename T> INLINE T __kmpc_atomic_exchange(T *address, T val) {
return atomicExch(address, val);
}
template <typename T> INLINE T __kmpc_atomic_cas(T *address, T compare, T val) {
return atomicCAS(address, compare, val);
}
// Locks
DEVICE void __kmpc_impl_init_lock(omp_lock_t *lock);
DEVICE void __kmpc_impl_destroy_lock(omp_lock_t *lock);