[Libomptarget] Add an external interface to dynamic shared memory

This patch adds an external interface to access the dynamic shared
memory buffer in the device runtime. The function introduced is
``llvm_omp_get_dynamic_shared``. This includes a host-side
definition that only returns a null pointer so that it can be used when
host-fallback is enabled without crashing. Support for dynamic shared
memory was also ported to the old device runtime.

Reviewed By: JonChesterfield

Differential Revision: https://reviews.llvm.org/D110957
This commit is contained in:
Joseph Huber 2021-10-01 14:37:02 -04:00
parent 007d98f520
commit 208f900527
8 changed files with 32 additions and 11 deletions

View File

@ -132,6 +132,8 @@ int omp_get_team_num();
int omp_get_initial_device(void);
void *llvm_omp_get_dynamic_shared();
/// Synchronization
///
///{

View File

@ -503,9 +503,9 @@ __attribute__((noinline)) void __kmpc_free_shared(void *Ptr, uint64_t Bytes) {
memory::freeShared(Ptr, Bytes, "Frontend free shared");
}
__attribute__((noinline)) void *__kmpc_get_dynamic_shared() {
return memory::getDynamicBuffer();
}
void *__kmpc_get_dynamic_shared() { return memory::getDynamicBuffer(); }
void *llvm_omp_get_dynamic_shared() { return __kmpc_get_dynamic_shared(); }
/// Allocate storage in shared memory to communicate arguments from the main
/// thread to the workers in generic mode. If we exceed

View File

@ -21,6 +21,18 @@
static constexpr unsigned MinBytes = 8;
static constexpr unsigned Alignment = 8;
/// External symbol to access dynamic shared memory.
extern unsigned char DynamicSharedBuffer[] __attribute__((aligned(Alignment)));
#pragma omp allocate(DynamicSharedBuffer) allocator(omp_pteam_mem_alloc)
EXTERN void *__kmpc_get_dynamic_shared() { return DynamicSharedBuffer; }
EXTERN void *llvm_omp_get_dynamic_shared() {
return __kmpc_get_dynamic_shared();
}
template <unsigned BPerThread, unsigned NThreads = MAX_THREADS_PER_TEAM>
struct alignas(32) ThreadStackTy {
static constexpr unsigned BytesPerThread = BPerThread;

View File

@ -92,6 +92,8 @@ EXTERN int omp_get_team_num(void);
EXTERN int omp_get_initial_device(void);
EXTERN int omp_get_max_task_priority(void);
EXTERN void *llvm_omp_get_dynamic_shared();
////////////////////////////////////////////////////////////////////////////////
// file below is swiped from kmpc host interface
////////////////////////////////////////////////////////////////////////////////
@ -499,4 +501,7 @@ EXTERN void *__kmpc_alloc_shared(uint64_t Bytes);
/// paired allocation to make memory management easier.
EXTERN void __kmpc_free_shared(void *Ptr, size_t Bytes);
/// Get a pointer to the dynamic shared memory buffer in the device.
EXTERN void *__kmpc_get_dynamic_shared();
#endif

View File

@ -220,6 +220,9 @@ void *llvm_omp_target_alloc_device(size_t size, int device_num);
void *llvm_omp_target_alloc_host(size_t size, int device_num);
void *llvm_omp_target_alloc_shared(size_t size, int device_num);
/// Dummy target so we have a symbol for generating host fallback.
void *llvm_omp_get_dynamic_shared();
/// add the clauses of the requires directives in a given file
void __tgt_register_requires(int64_t flags);

View File

@ -53,6 +53,8 @@ EXTERN void *llvm_omp_target_alloc_shared(size_t size, int device_num) {
return targetAllocExplicit(size, device_num, TARGET_ALLOC_SHARED, __func__);
}
EXTERN void *llvm_omp_get_dynamic_shared() { return nullptr; }
EXTERN void omp_target_free(void *device_ptr, int device_num) {
TIMESCOPE();
DP("Call to omp_target_free for device %d and address " DPxMOD "\n",

View File

@ -40,6 +40,7 @@ VERS1.0 {
llvm_omp_target_alloc_host;
llvm_omp_target_alloc_shared;
llvm_omp_target_alloc_device;
llvm_omp_get_dynamic_shared;
__tgt_set_info_flag;
__tgt_print_device_info;
local:

View File

@ -1,22 +1,18 @@
// RUN: %libomptarget-compile-nvptx64-nvidia-cuda -fopenmp-target-new-runtime
// RUN: env LIBOMPTARGET_SHARED_MEMORY_SIZE=4 \
// RUN: env LIBOMPTARGET_SHARED_MEMORY_SIZE=256 \
// RUN: %libomptarget-run-nvptx64-nvidia-cuda | %fcheck-nvptx64-nvidia-cuda
// REQUIRES: nvptx64-nvidia-cuda
#include <omp.h>
#include <stdio.h>
void *get_dynamic_shared() { return NULL; }
#pragma omp begin declare variant match(device = {arch(nvptx64)})
extern void *__kmpc_get_dynamic_shared();
void *get_dynamic_shared() { return __kmpc_get_dynamic_shared(); }
#pragma omp end declare variant
void *llvm_omp_get_dynamic_shared();
int main() {
int x;
#pragma omp target parallel map(from : x)
{
int *buf = get_dynamic_shared();
int *buf = llvm_omp_get_dynamic_shared() + 252;
#pragma omp barrier
if (omp_get_thread_num() == 0)
*buf = 1;
@ -26,6 +22,6 @@ int main() {
}
// CHECK: PASS
if (x == 1)
if (x == 1 && llvm_omp_get_dynamic_shared() == NULL)
printf("PASS\n");
}