mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-01-07 08:34:59 +00:00
[libomptarget][nfc] Move omp locks under target_impl
Summary: [libomptarget][nfc] Move omp locks under target_impl These are likely to be target specific, even down to the lock_t which is correspondingly moved out of interface.h. The alternative is to include interface.h in target_impl which substantiatially increases the scope of those symbols. The current nvptx implementation deadlocks on amdgcn. The preferred implementation for that arch is still under discussion - this change leaves declarations in target_impl. The functions could be inline for nvptx. I'd prefer to keep the internals hidden in the target_impl translation unit, but will add the (possibly renamed) macros to target_impl.h if preferred. Reviewers: ABataev, jdoerfert, grokos Reviewed By: jdoerfert Subscribers: jvesely, mgorny, jfb, openmp-commits Tags: #openmp Differential Revision: https://reviews.llvm.org/D71574
This commit is contained in:
parent
5666b70fd0
commit
3d3e4076cd
@ -13,5 +13,6 @@
|
||||
|
||||
#define EXTERN extern "C" __attribute__((device))
|
||||
typedef uint64_t __kmpc_impl_lanemask_t;
|
||||
typedef uint32_t omp_lock_t; /* arbitrary type of the right length */
|
||||
|
||||
#endif
|
||||
|
@ -155,6 +155,13 @@ INLINE int GetBlockIdInKernel() { return __builtin_amdgcn_workgroup_id_x(); }
|
||||
INLINE int GetNumberOfBlocksInKernel() { return __ockl_get_num_groups(0); }
|
||||
INLINE int GetNumberOfThreadsInBlock() { return __ockl_get_local_size(0); }
|
||||
|
||||
// Locks
|
||||
EXTERN void __kmpc_impl_init_lock(omp_lock_t *lock);
|
||||
EXTERN void __kmpc_impl_destroy_lock(omp_lock_t *lock);
|
||||
EXTERN void __kmpc_impl_set_lock(omp_lock_t *lock);
|
||||
EXTERN void __kmpc_impl_unset_lock(omp_lock_t *lock);
|
||||
EXTERN int __kmpc_impl_test_lock(omp_lock_t *lock);
|
||||
|
||||
// DEVICE versions of part of libc
|
||||
extern "C" {
|
||||
DEVICE __attribute__((noreturn)) void
|
||||
|
@ -30,7 +30,6 @@
|
||||
// OpenMP interface
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
typedef uint32_t omp_lock_t; /* arbitrary type of the right length */
|
||||
typedef uint64_t omp_nest_lock_t; /* arbitrary type of the right length */
|
||||
|
||||
typedef enum omp_sched_t {
|
||||
|
@ -55,6 +55,7 @@ if(LIBOMPTARGET_DEP_CUDA_FOUND)
|
||||
${devicertl_common_directory}/src/critical.cu
|
||||
src/data_sharing.cu
|
||||
src/libcall.cu
|
||||
src/target_impl.cu
|
||||
${devicertl_common_directory}/src/loop.cu
|
||||
${devicertl_common_directory}/src/omptarget.cu
|
||||
${devicertl_common_directory}/src/parallel.cu
|
||||
|
@ -336,54 +336,30 @@ EXTERN int omp_get_max_task_priority(void) {
|
||||
// locks
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define __OMP_SPIN 1000
|
||||
#define UNSET 0
|
||||
#define SET 1
|
||||
|
||||
EXTERN void omp_init_lock(omp_lock_t *lock) {
|
||||
omp_unset_lock(lock);
|
||||
__kmpc_impl_init_lock(lock);
|
||||
PRINT0(LD_IO, "call omp_init_lock()\n");
|
||||
}
|
||||
|
||||
EXTERN void omp_destroy_lock(omp_lock_t *lock) {
|
||||
omp_unset_lock(lock);
|
||||
__kmpc_impl_destroy_lock(lock);
|
||||
PRINT0(LD_IO, "call omp_destroy_lock()\n");
|
||||
}
|
||||
|
||||
EXTERN void omp_set_lock(omp_lock_t *lock) {
|
||||
// int atomicCAS(int* address, int compare, int val);
|
||||
// (old == compare ? val : old)
|
||||
|
||||
// TODO: not sure spinning is a good idea here..
|
||||
while (atomicCAS(lock, UNSET, SET) != UNSET) {
|
||||
clock_t start = clock();
|
||||
clock_t now;
|
||||
for (;;) {
|
||||
now = clock();
|
||||
clock_t cycles = now > start ? now - start : now + (0xffffffff - start);
|
||||
if (cycles >= __OMP_SPIN * GetBlockIdInKernel()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} // wait for 0 to be the read value
|
||||
|
||||
__kmpc_impl_set_lock(lock);
|
||||
PRINT0(LD_IO, "call omp_set_lock()\n");
|
||||
}
|
||||
|
||||
EXTERN void omp_unset_lock(omp_lock_t *lock) {
|
||||
(void)atomicExch(lock, UNSET);
|
||||
|
||||
__kmpc_impl_unset_lock(lock);
|
||||
PRINT0(LD_IO, "call omp_unset_lock()\n");
|
||||
}
|
||||
|
||||
EXTERN int omp_test_lock(omp_lock_t *lock) {
|
||||
// int atomicCAS(int* address, int compare, int val);
|
||||
// (old == compare ? val : old)
|
||||
int ret = atomicAdd(lock, 0);
|
||||
|
||||
int rc = __kmpc_impl_test_lock(lock);
|
||||
PRINT(LD_IO, "call omp_test_lock() return %d\n", ret);
|
||||
|
||||
return ret;
|
||||
return rc;
|
||||
}
|
||||
|
||||
// for xlf Fotran
|
||||
|
@ -13,5 +13,6 @@
|
||||
|
||||
#define EXTERN extern "C" __device__
|
||||
typedef uint32_t __kmpc_impl_lanemask_t;
|
||||
typedef uint32_t omp_lock_t; /* arbitrary type of the right length */
|
||||
|
||||
#endif
|
||||
|
54
openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu
Normal file
54
openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu
Normal file
@ -0,0 +1,54 @@
|
||||
//===---------- target_impl.cu - NVPTX OpenMP GPU options ------- CUDA -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Definitions of target specific functions
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "target_impl.h"
|
||||
#include "common/debug.h"
|
||||
|
||||
#define __OMP_SPIN 1000
|
||||
#define UNSET 0
|
||||
#define SET 1
|
||||
|
||||
EXTERN void __kmpc_impl_init_lock(omp_lock_t *lock) {
|
||||
omp_unset_lock(lock);
|
||||
}
|
||||
|
||||
EXTERN void __kmpc_impl_destroy_lock(omp_lock_t *lock) {
|
||||
omp_unset_lock(lock);
|
||||
}
|
||||
|
||||
EXTERN void __kmpc_impl_set_lock(omp_lock_t *lock) {
|
||||
// int atomicCAS(int* address, int compare, int val);
|
||||
// (old == compare ? val : old)
|
||||
|
||||
// TODO: not sure spinning is a good idea here..
|
||||
while (atomicCAS(lock, UNSET, SET) != UNSET) {
|
||||
clock_t start = clock();
|
||||
clock_t now;
|
||||
for (;;) {
|
||||
now = clock();
|
||||
clock_t cycles = now > start ? now - start : now + (0xffffffff - start);
|
||||
if (cycles >= __OMP_SPIN * GetBlockIdInKernel()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} // wait for 0 to be the read value
|
||||
}
|
||||
|
||||
EXTERN void __kmpc_impl_unset_lock(omp_lock_t *lock) {
|
||||
(void)atomicExch(lock, UNSET);
|
||||
}
|
||||
|
||||
EXTERN int __kmpc_impl_test_lock(omp_lock_t *lock) {
|
||||
// int atomicCAS(int* address, int compare, int val);
|
||||
// (old == compare ? val : old)
|
||||
return atomicAdd(lock, 0);
|
||||
}
|
@ -188,4 +188,11 @@ INLINE int GetBlockIdInKernel() { return blockIdx.x; }
|
||||
INLINE int GetNumberOfBlocksInKernel() { return gridDim.x; }
|
||||
INLINE int GetNumberOfThreadsInBlock() { return blockDim.x; }
|
||||
|
||||
// Locks
|
||||
EXTERN void __kmpc_impl_init_lock(omp_lock_t *lock);
|
||||
EXTERN void __kmpc_impl_destroy_lock(omp_lock_t *lock);
|
||||
EXTERN void __kmpc_impl_set_lock(omp_lock_t *lock);
|
||||
EXTERN void __kmpc_impl_unset_lock(omp_lock_t *lock);
|
||||
EXTERN int __kmpc_impl_test_lock(omp_lock_t *lock);
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user