mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-01-16 13:08:42 +00:00
[OpenMP] libomp: runtime part of omp_all_memory task dependence implementation.
New omp_all_memory task dependence type is implemented. Library recognizes the new type via either (dependence_address == NULL && dependence_flag == 0x80) or (dependence_address == SIZE_MAX). A task with new dependence type depends on each preceding task with any dependence type (kind of a dependence barrier). Differential Revision: https://reviews.llvm.org/D108574
This commit is contained in:
parent
2c6d90d741
commit
d40108e0af
@ -2255,22 +2255,26 @@ typedef union kmp_depnode kmp_depnode_t;
|
||||
typedef struct kmp_depnode_list kmp_depnode_list_t;
|
||||
typedef struct kmp_dephash_entry kmp_dephash_entry_t;
|
||||
|
||||
// macros for checking dep flag as an integer
|
||||
#define KMP_DEP_IN 0x1
|
||||
#define KMP_DEP_OUT 0x2
|
||||
#define KMP_DEP_INOUT 0x3
|
||||
#define KMP_DEP_MTX 0x4
|
||||
#define KMP_DEP_SET 0x8
|
||||
#define KMP_DEP_ALL 0x80
|
||||
// Compiler sends us this info:
|
||||
typedef struct kmp_depend_info {
|
||||
kmp_intptr_t base_addr;
|
||||
size_t len;
|
||||
union {
|
||||
kmp_uint8 flag;
|
||||
struct {
|
||||
kmp_uint8 flag; // flag as an unsigned char
|
||||
struct { // flag as a set of 8 bits
|
||||
unsigned in : 1;
|
||||
unsigned out : 1;
|
||||
unsigned mtx : 1;
|
||||
unsigned set : 1;
|
||||
unsigned unused : 3;
|
||||
unsigned all : 1;
|
||||
} flags;
|
||||
};
|
||||
} kmp_depend_info_t;
|
||||
@ -2316,6 +2320,7 @@ struct kmp_dephash_entry {
|
||||
typedef struct kmp_dephash {
|
||||
kmp_dephash_entry_t **buckets;
|
||||
size_t size;
|
||||
kmp_depnode_t *last_all;
|
||||
size_t generation;
|
||||
kmp_uint32 nelements;
|
||||
kmp_uint32 nconflicts;
|
||||
|
@ -86,6 +86,7 @@ static kmp_dephash_t *__kmp_dephash_extend(kmp_info_t *thread,
|
||||
h->buckets = (kmp_dephash_entry **)(h + 1);
|
||||
h->generation = gen;
|
||||
h->nconflicts = 0;
|
||||
h->last_all = current_dephash->last_all;
|
||||
|
||||
// make sure buckets are properly initialized
|
||||
for (size_t i = 0; i < new_size; i++) {
|
||||
@ -142,6 +143,7 @@ static kmp_dephash_t *__kmp_dephash_create(kmp_info_t *thread,
|
||||
h->nelements = 0;
|
||||
h->nconflicts = 0;
|
||||
h->buckets = (kmp_dephash_entry **)(h + 1);
|
||||
h->last_all = NULL;
|
||||
|
||||
for (size_t i = 0; i < h_size; i++)
|
||||
h->buckets[i] = 0;
|
||||
@ -174,7 +176,10 @@ static kmp_dephash_entry *__kmp_dephash_find(kmp_info_t *thread,
|
||||
thread, sizeof(kmp_dephash_entry_t));
|
||||
#endif
|
||||
entry->addr = addr;
|
||||
entry->last_out = NULL;
|
||||
if (!h->last_all) // no predecessor task with omp_all_memory dependence
|
||||
entry->last_out = NULL;
|
||||
else // else link the omp_all_memory depnode to the new entry
|
||||
entry->last_out = __kmp_node_ref(h->last_all);
|
||||
entry->last_set = NULL;
|
||||
entry->prev_set = NULL;
|
||||
entry->last_flag = 0;
|
||||
@ -290,6 +295,63 @@ static inline kmp_int32 __kmp_depnode_link_successor(kmp_int32 gtid,
|
||||
return npredecessors;
|
||||
}
|
||||
|
||||
static inline kmp_int32
|
||||
__kmp_process_dep_all(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *h,
|
||||
bool dep_barrier, kmp_task_t *task) {
|
||||
KA_TRACE(30, ("__kmp_process_dep_all: T#%d processing dep_all, "
|
||||
"dep_barrier = %d\n",
|
||||
gtid, dep_barrier));
|
||||
kmp_info_t *thread = __kmp_threads[gtid];
|
||||
kmp_int32 npredecessors = 0;
|
||||
|
||||
// process previous omp_all_memory node if any
|
||||
npredecessors +=
|
||||
__kmp_depnode_link_successor(gtid, thread, task, node, h->last_all);
|
||||
__kmp_node_deref(thread, h->last_all);
|
||||
if (!dep_barrier) {
|
||||
h->last_all = __kmp_node_ref(node);
|
||||
} else {
|
||||
// if this is a sync point in the serial sequence, then the previous
|
||||
// outputs are guaranteed to be completed after the execution of this
|
||||
// task so the previous output nodes can be cleared.
|
||||
h->last_all = NULL;
|
||||
}
|
||||
|
||||
// process all regular dependences
|
||||
for (size_t i = 0; i < h->size; i++) {
|
||||
kmp_dephash_entry_t *info = h->buckets[i];
|
||||
if (!info) // skip empty slots in dephash
|
||||
continue;
|
||||
for (; info; info = info->next_in_bucket) {
|
||||
// for each entry the omp_all_memory works as OUT dependence
|
||||
kmp_depnode_t *last_out = info->last_out;
|
||||
kmp_depnode_list_t *last_set = info->last_set;
|
||||
kmp_depnode_list_t *prev_set = info->prev_set;
|
||||
if (last_set) {
|
||||
npredecessors +=
|
||||
__kmp_depnode_link_successor(gtid, thread, task, node, last_set);
|
||||
__kmp_depnode_list_free(thread, last_set);
|
||||
__kmp_depnode_list_free(thread, prev_set);
|
||||
info->last_set = NULL;
|
||||
info->prev_set = NULL;
|
||||
info->last_flag = 0; // no sets in this dephash entry
|
||||
} else {
|
||||
npredecessors +=
|
||||
__kmp_depnode_link_successor(gtid, thread, task, node, last_out);
|
||||
}
|
||||
__kmp_node_deref(thread, last_out);
|
||||
if (!dep_barrier) {
|
||||
info->last_out = __kmp_node_ref(node);
|
||||
} else {
|
||||
info->last_out = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
KA_TRACE(30, ("__kmp_process_dep_all: T#%d found %d predecessors\n", gtid,
|
||||
npredecessors));
|
||||
return npredecessors;
|
||||
}
|
||||
|
||||
template <bool filter>
|
||||
static inline kmp_int32
|
||||
__kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t **hash,
|
||||
@ -417,7 +479,7 @@ static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node,
|
||||
kmp_depend_info_t *dep_list,
|
||||
kmp_int32 ndeps_noalias,
|
||||
kmp_depend_info_t *noalias_dep_list) {
|
||||
int i, n_mtxs = 0;
|
||||
int i, n_mtxs = 0, dep_all = 0;
|
||||
#if KMP_DEBUG
|
||||
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
|
||||
#endif
|
||||
@ -429,7 +491,7 @@ static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node,
|
||||
// Filter deps in dep_list
|
||||
// TODO: Different algorithm for large dep_list ( > 10 ? )
|
||||
for (i = 0; i < ndeps; i++) {
|
||||
if (dep_list[i].base_addr != 0) {
|
||||
if (dep_list[i].base_addr != 0 && dep_list[i].base_addr != KMP_SIZE_T_MAX) {
|
||||
KMP_DEBUG_ASSERT(
|
||||
dep_list[i].flag == KMP_DEP_IN || dep_list[i].flag == KMP_DEP_OUT ||
|
||||
dep_list[i].flag == KMP_DEP_INOUT ||
|
||||
@ -451,6 +513,13 @@ static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node,
|
||||
dep_list[i].flag = KMP_DEP_OUT; // downgrade mutexinoutset to inout
|
||||
}
|
||||
}
|
||||
} else if (dep_list[i].flag == KMP_DEP_ALL ||
|
||||
dep_list[i].base_addr == KMP_SIZE_T_MAX) {
|
||||
// omp_all_memory dependence can be marked by compiler by either
|
||||
// (addr=0 && flag=0x80) (flag KMP_DEP_ALL), or (addr=-1).
|
||||
// omp_all_memory overrides all other dependences if any
|
||||
dep_all = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -464,10 +533,14 @@ static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node,
|
||||
// the end
|
||||
int npredecessors;
|
||||
|
||||
npredecessors = __kmp_process_deps<true>(gtid, node, hash, dep_barrier, ndeps,
|
||||
dep_list, task);
|
||||
npredecessors += __kmp_process_deps<false>(
|
||||
gtid, node, hash, dep_barrier, ndeps_noalias, noalias_dep_list, task);
|
||||
if (!dep_all) { // regular dependences
|
||||
npredecessors = __kmp_process_deps<true>(gtid, node, hash, dep_barrier,
|
||||
ndeps, dep_list, task);
|
||||
npredecessors += __kmp_process_deps<false>(
|
||||
gtid, node, hash, dep_barrier, ndeps_noalias, noalias_dep_list, task);
|
||||
} else { // omp_all_memory dependence
|
||||
npredecessors = __kmp_process_dep_all(gtid, node, *hash, dep_barrier, task);
|
||||
}
|
||||
|
||||
node->dn.task = task;
|
||||
KMP_MB();
|
||||
|
@ -73,6 +73,8 @@ static inline void __kmp_dephash_free_entries(kmp_info_t *thread,
|
||||
h->buckets[i] = 0;
|
||||
}
|
||||
}
|
||||
__kmp_node_deref(thread, h->last_all);
|
||||
h->last_all = NULL;
|
||||
}
|
||||
|
||||
static inline void __kmp_dephash_free(kmp_info_t *thread, kmp_dephash_t *h) {
|
||||
|
298
openmp/runtime/test/tasking/kmp_task_depend_all.c
Normal file
298
openmp/runtime/test/tasking/kmp_task_depend_all.c
Normal file
@ -0,0 +1,298 @@
|
||||
// RUN: %libomp-compile-and-run
|
||||
// The runtime currently does not get dependency information from GCC.
|
||||
// UNSUPPORTED: gcc
|
||||
|
||||
// Tests OMP 5.x task dependence "omp_all_memory",
|
||||
// emulates compiler codegen versions for new dep kind
|
||||
//
|
||||
// Task tree created:
|
||||
// task0 - task1 (in: i1, i2)
|
||||
// \
|
||||
// task2 (inoutset: i2), (in: i1)
|
||||
// /
|
||||
// task3 (omp_all_memory) via flag=0x80
|
||||
// /
|
||||
// task4 - task5 (in: i1, i2)
|
||||
// /
|
||||
// task6 (omp_all_memory) via addr=-1
|
||||
// /
|
||||
// task7 (omp_all_memory) via flag=0x80
|
||||
// /
|
||||
// task8 (in: i3)
|
||||
//
|
||||
#include <stdio.h>
|
||||
#include <omp.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#define mysleep(n) Sleep(n)
|
||||
#else
|
||||
#include <unistd.h>
|
||||
#define mysleep(n) usleep((n)*1000)
|
||||
#endif
|
||||
|
||||
// to check the # of concurrent tasks (must be 1 for MTX, <3 for other kinds)
|
||||
static int checker = 0;
|
||||
static int err = 0;
|
||||
#ifndef DELAY
|
||||
#define DELAY 100
|
||||
#endif
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// internal data to emulate compiler codegen
|
||||
typedef struct DEP {
|
||||
size_t addr;
|
||||
size_t len;
|
||||
unsigned char flags;
|
||||
} dep;
|
||||
#define DEP_ALL_MEM 0x80
|
||||
typedef struct task {
|
||||
void** shareds;
|
||||
void* entry;
|
||||
int part_id;
|
||||
void* destr_thunk;
|
||||
int priority;
|
||||
long long device_id;
|
||||
int f_priv;
|
||||
} task_t;
|
||||
#define TIED 1
|
||||
typedef int(*entry_t)(int, task_t*);
|
||||
typedef struct ID {
|
||||
int reserved_1;
|
||||
int flags;
|
||||
int reserved_2;
|
||||
int reserved_3;
|
||||
char *psource;
|
||||
} id;
|
||||
// thunk routine for tasks with ALL dependency
|
||||
int thunk_m(int gtid, task_t* ptask) {
|
||||
int lcheck, th;
|
||||
#pragma omp atomic capture
|
||||
lcheck = ++checker;
|
||||
th = omp_get_thread_num();
|
||||
printf("task m_%d, th %d, checker %d\n", ptask->f_priv, th, lcheck);
|
||||
if (lcheck != 1) { // no more than 1 task at a time
|
||||
err++;
|
||||
printf("Error m1, checker %d != 1\n", lcheck);
|
||||
}
|
||||
mysleep(DELAY);
|
||||
#pragma omp atomic read
|
||||
lcheck = checker; // must still be equal to 1
|
||||
if (lcheck != 1) {
|
||||
err++;
|
||||
printf("Error m2, checker %d != 1\n", lcheck);
|
||||
}
|
||||
#pragma omp atomic
|
||||
--checker;
|
||||
return 0;
|
||||
}
|
||||
// thunk routine for tasks with inoutset dependency
|
||||
int thunk_s(int gtid, task_t* ptask) {
|
||||
int lcheck, th;
|
||||
#pragma omp atomic capture
|
||||
lcheck = ++checker; // 1
|
||||
th = omp_get_thread_num();
|
||||
printf("task 2_%d, th %d, checker %d\n", ptask->f_priv, th, lcheck);
|
||||
if (lcheck != 1) { // no more than 1 task at a time
|
||||
err++;
|
||||
printf("Error s1, checker %d != 1\n", lcheck);
|
||||
}
|
||||
mysleep(DELAY);
|
||||
#pragma omp atomic read
|
||||
lcheck = checker; // must still be equal to 1
|
||||
if (lcheck != 1) {
|
||||
err++;
|
||||
printf("Error s2, checker %d != 1\n", lcheck);
|
||||
}
|
||||
#pragma omp atomic
|
||||
--checker;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
int __kmpc_global_thread_num(id*);
|
||||
task_t *__kmpc_omp_task_alloc(id *loc, int gtid, int flags,
|
||||
size_t sz, size_t shar, entry_t rtn);
|
||||
int __kmpc_omp_task_with_deps(id *loc, int gtid, task_t *task, int ndeps,
|
||||
dep *dep_lst, int nd_noalias, dep *noalias_lst);
|
||||
static id loc = {0, 2, 0, 0, ";file;func;0;0;;"};
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
// End of internal data
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
int main()
|
||||
{
|
||||
int i1,i2,i3;
|
||||
omp_set_num_threads(8);
|
||||
omp_set_dynamic(0);
|
||||
#pragma omp parallel
|
||||
{
|
||||
#pragma omp single nowait
|
||||
{
|
||||
dep sdep[2];
|
||||
task_t *ptr;
|
||||
int gtid = __kmpc_global_thread_num(&loc);
|
||||
int t = omp_get_thread_num();
|
||||
#pragma omp task depend(in: i1, i2)
|
||||
{ // task 0
|
||||
int lcheck, th;
|
||||
#pragma omp atomic capture
|
||||
lcheck = ++checker; // 1 or 2
|
||||
th = omp_get_thread_num();
|
||||
printf("task 0_%d, th %d, checker %d\n", t, th, lcheck);
|
||||
if (lcheck > 2 || lcheck < 1) {
|
||||
err++; // no more than 2 tasks concurrently
|
||||
printf("Error1, checker %d, not 1 or 2\n", lcheck);
|
||||
}
|
||||
mysleep(DELAY);
|
||||
#pragma omp atomic read
|
||||
lcheck = checker; // 1 or 2
|
||||
if (lcheck > 2 || lcheck < 1) {
|
||||
#pragma omp atomic
|
||||
err++;
|
||||
printf("Error2, checker %d, not 1 or 2\n", lcheck);
|
||||
}
|
||||
#pragma omp atomic
|
||||
--checker;
|
||||
}
|
||||
#pragma omp task depend(in: i1, i2)
|
||||
{ // task 1
|
||||
int lcheck, th;
|
||||
#pragma omp atomic capture
|
||||
lcheck = ++checker; // 1 or 2
|
||||
th = omp_get_thread_num();
|
||||
printf("task 1_%d, th %d, checker %d\n", t, th, lcheck);
|
||||
if (lcheck > 2 || lcheck < 1) {
|
||||
err++; // no more than 2 tasks concurrently
|
||||
printf("Error3, checker %d, not 1 or 2\n", lcheck);
|
||||
}
|
||||
mysleep(DELAY);
|
||||
#pragma omp atomic read
|
||||
lcheck = checker; // 1 or 2
|
||||
if (lcheck > 2 || lcheck < 1) {
|
||||
err++;
|
||||
printf("Error4, checker %d, not 1 or 2\n", lcheck);
|
||||
}
|
||||
#pragma omp atomic
|
||||
--checker;
|
||||
}
|
||||
// compiler codegen start
|
||||
// task2
|
||||
ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_s);
|
||||
sdep[0].addr = (size_t)&i1;
|
||||
sdep[0].len = 0; // not used
|
||||
sdep[0].flags = 1; // IN
|
||||
sdep[1].addr = (size_t)&i2;
|
||||
sdep[1].len = 0; // not used
|
||||
sdep[1].flags = 8; // INOUTSET
|
||||
ptr->f_priv = t + 10; // init single first-private variable
|
||||
__kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
|
||||
|
||||
// task3
|
||||
ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_m);
|
||||
sdep[0].addr = (size_t)&i1; // to be ignored
|
||||
sdep[0].len = 0; // not used
|
||||
sdep[0].flags = 1; // IN
|
||||
sdep[1].addr = 0;
|
||||
sdep[1].len = 0; // not used
|
||||
sdep[1].flags = DEP_ALL_MEM; // omp_all_memory
|
||||
ptr->f_priv = t + 20; // init single first-private variable
|
||||
__kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
|
||||
// compiler codegen end
|
||||
#pragma omp task depend(in: i1, i2)
|
||||
{ // task 4
|
||||
int lcheck, th;
|
||||
#pragma omp atomic capture
|
||||
lcheck = ++checker; // 1 or 2
|
||||
th = omp_get_thread_num();
|
||||
printf("task 4_%d, th %d, checker %d\n", t, th, lcheck);
|
||||
if (lcheck > 2 || lcheck < 1) {
|
||||
err++; // no more than 2 tasks concurrently
|
||||
printf("Error5, checker %d, not 1 or 2\n", lcheck);
|
||||
}
|
||||
mysleep(DELAY);
|
||||
#pragma omp atomic read
|
||||
lcheck = checker; // 1 or 2
|
||||
if (lcheck > 2 || lcheck < 1) {
|
||||
err++;
|
||||
printf("Error6, checker %d, not 1 or 2\n", lcheck);
|
||||
}
|
||||
#pragma omp atomic
|
||||
--checker;
|
||||
}
|
||||
#pragma omp task depend(in: i1, i2)
|
||||
{ // task 5
|
||||
int lcheck, th;
|
||||
#pragma omp atomic capture
|
||||
lcheck = ++checker; // 1 or 2
|
||||
th = omp_get_thread_num();
|
||||
printf("task 5_%d, th %d, checker %d\n", t, th, lcheck);
|
||||
if (lcheck > 2 || lcheck < 1) {
|
||||
err++; // no more than 2 tasks concurrently
|
||||
printf("Error7, checker %d, not 1 or 2\n", lcheck);
|
||||
}
|
||||
mysleep(DELAY);
|
||||
#pragma omp atomic read
|
||||
lcheck = checker; // 1 or 2
|
||||
if (lcheck > 2 || lcheck < 1) {
|
||||
err++;
|
||||
printf("Error8, checker %d, not 1 or 2\n", lcheck);
|
||||
}
|
||||
#pragma omp atomic
|
||||
--checker;
|
||||
}
|
||||
// compiler codegen start
|
||||
// task6
|
||||
ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_m);
|
||||
sdep[0].addr = (size_t)(-1); // omp_all_memory
|
||||
sdep[0].len = 0; // not used
|
||||
sdep[0].flags = 2; // OUT
|
||||
ptr->f_priv = t + 30; // init single first-private variable
|
||||
__kmpc_omp_task_with_deps(&loc, gtid, ptr, 1, sdep, 0, 0);
|
||||
|
||||
// task7
|
||||
ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_m);
|
||||
sdep[0].addr = 0;
|
||||
sdep[0].len = 0; // not used
|
||||
sdep[0].flags = DEP_ALL_MEM; // omp_all_memory
|
||||
sdep[1].addr = (size_t)&i3; // to be ignored
|
||||
sdep[1].len = 0; // not used
|
||||
sdep[1].flags = 4; // MUTEXINOUTSET
|
||||
ptr->f_priv = t + 40; // init single first-private variable
|
||||
__kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
|
||||
// compiler codegen end
|
||||
#pragma omp task depend(in: i3)
|
||||
{ // task 8
|
||||
int lcheck, th;
|
||||
#pragma omp atomic capture
|
||||
lcheck = ++checker; // 1
|
||||
th = omp_get_thread_num();
|
||||
printf("task 8_%d, th %d, checker %d\n", t, th, lcheck);
|
||||
if (lcheck != 1) {
|
||||
err++;
|
||||
printf("Error9, checker %d, != 1\n", lcheck);
|
||||
}
|
||||
mysleep(DELAY);
|
||||
#pragma omp atomic read
|
||||
lcheck = checker;
|
||||
if (lcheck != 1) {
|
||||
err++;
|
||||
printf("Error10, checker %d, != 1\n", lcheck);
|
||||
}
|
||||
#pragma omp atomic
|
||||
--checker;
|
||||
}
|
||||
} // single
|
||||
} // parallel
|
||||
if (err == 0 && checker == 0) {
|
||||
printf("passed\n");
|
||||
return 0;
|
||||
} else {
|
||||
printf("failed, err = %d, checker = %d\n", err, checker);
|
||||
return 1;
|
||||
}
|
||||
}
|
334
openmp/runtime/test/tasking/kmp_taskwait_depend_all.c
Normal file
334
openmp/runtime/test/tasking/kmp_taskwait_depend_all.c
Normal file
@ -0,0 +1,334 @@
|
||||
// RUN: %libomp-compile-and-run
|
||||
// The runtime currently does not get dependency information from GCC.
|
||||
// UNSUPPORTED: gcc
|
||||
|
||||
// Tests OMP 5.x task dependence "omp_all_memory",
|
||||
// emulates compiler codegen versions for new dep kind
|
||||
//
|
||||
// Task tree created:
|
||||
// task0 - task1 (in: i1, i2)
|
||||
// \
|
||||
// task2 (inoutset: i2), (in: i1)
|
||||
// /
|
||||
// task3 (omp_all_memory) via flag=0x80
|
||||
// /
|
||||
// task4 - task5 (in: i1, i2)
|
||||
// /
|
||||
// task6 (omp_all_memory) via addr=-1
|
||||
// /
|
||||
// task7 (omp_all_memory) via flag=0x80
|
||||
// /
|
||||
// task8 (in: i3)
|
||||
// /
|
||||
// task9 - no dependences
|
||||
// /
|
||||
// taskwait (omp_all_memory) (should not wait for task9, see prints)
|
||||
//
|
||||
#include <stdio.h>
|
||||
#include <omp.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#define mysleep(n) Sleep(n)
|
||||
#else
|
||||
#include <unistd.h>
|
||||
#define mysleep(n) usleep((n)*1000)
|
||||
#endif
|
||||
|
||||
// to check the # of concurrent tasks (must be 1 for MTX, <3 for other kinds)
|
||||
static int checker = 0;
|
||||
static int err = 0;
|
||||
static int taskwait_flag = 0;
|
||||
#ifndef DELAY
|
||||
// set delay interval in ms for dependent tasks
|
||||
#define DELAY 100
|
||||
#endif
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// internal data to emulate compiler codegen
|
||||
typedef struct DEP {
|
||||
size_t addr;
|
||||
size_t len;
|
||||
unsigned char flags;
|
||||
} dep;
|
||||
#define DEP_ALL_MEM 0x80
|
||||
typedef struct task {
|
||||
void** shareds;
|
||||
void* entry;
|
||||
int part_id;
|
||||
void* destr_thunk;
|
||||
int priority;
|
||||
long long device_id;
|
||||
int f_priv;
|
||||
} task_t;
|
||||
#define TIED 1
|
||||
typedef int(*entry_t)(int, task_t*);
|
||||
typedef struct ID {
|
||||
int reserved_1;
|
||||
int flags;
|
||||
int reserved_2;
|
||||
int reserved_3;
|
||||
char *psource;
|
||||
} id;
|
||||
// thunk routine for tasks with ALL dependency
|
||||
int thunk_m(int gtid, task_t* ptask) {
|
||||
int lcheck, th;
|
||||
#pragma omp atomic capture
|
||||
lcheck = ++checker;
|
||||
th = omp_get_thread_num();
|
||||
printf("task m_%d, th %d, checker %d\n", ptask->f_priv, th, lcheck);
|
||||
if (lcheck != 1) { // no more than 1 task at a time
|
||||
err++;
|
||||
printf("Error m1, checker %d != 1\n", lcheck);
|
||||
}
|
||||
mysleep(DELAY);
|
||||
#pragma omp atomic read
|
||||
lcheck = checker; // must still be equal to 1
|
||||
if (lcheck != 1) {
|
||||
err++;
|
||||
printf("Error m2, checker %d != 1\n", lcheck);
|
||||
}
|
||||
#pragma omp atomic
|
||||
--checker;
|
||||
return 0;
|
||||
}
|
||||
// thunk routine for tasks with inoutset dependency
|
||||
int thunk_s(int gtid, task_t* ptask) {
|
||||
int lcheck, th;
|
||||
#pragma omp atomic capture
|
||||
lcheck = ++checker; // 1
|
||||
th = omp_get_thread_num();
|
||||
printf("task 2_%d, th %d, checker %d\n", ptask->f_priv, th, lcheck);
|
||||
if (lcheck != 1) { // no more than 1 task at a time
|
||||
err++;
|
||||
printf("Error s1, checker %d != 1\n", lcheck);
|
||||
}
|
||||
mysleep(DELAY);
|
||||
#pragma omp atomic read
|
||||
lcheck = checker; // must still be equal to 1
|
||||
if (lcheck != 1) {
|
||||
err++;
|
||||
printf("Error s2, checker %d != 1\n", lcheck);
|
||||
}
|
||||
#pragma omp atomic
|
||||
--checker;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
int __kmpc_global_thread_num(id*);
|
||||
task_t *__kmpc_omp_task_alloc(id *loc, int gtid, int flags,
|
||||
size_t sz, size_t shar, entry_t rtn);
|
||||
int __kmpc_omp_task_with_deps(id *loc, int gtid, task_t *task, int ndeps,
|
||||
dep *dep_lst, int nd_noalias, dep *noalias_lst);
|
||||
void __kmpc_omp_wait_deps(id *loc, int gtid, int ndeps, dep *dep_lst,
|
||||
int ndeps_noalias, dep *noalias_dep_lst);
|
||||
static id loc = {0, 2, 0, 0, ";file;func;0;0;;"};
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
// End of internal data
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
int main()
|
||||
{
|
||||
int i1,i2,i3;
|
||||
omp_set_num_threads(8);
|
||||
omp_set_dynamic(0);
|
||||
#pragma omp parallel
|
||||
{
|
||||
#pragma omp single nowait
|
||||
{
|
||||
dep sdep[2];
|
||||
task_t *ptr;
|
||||
int gtid = __kmpc_global_thread_num(&loc);
|
||||
int t = omp_get_thread_num();
|
||||
// Create longest task first to ensure it is stolen.
|
||||
// The test may hang if the task created last and
|
||||
// executed by a thread which executes taskwait.
|
||||
#pragma omp task
|
||||
{ // task 9 - long running task
|
||||
int flag;
|
||||
int th = omp_get_thread_num();
|
||||
printf("signalled independent task 9_%d, th %d started....\n", t, th);
|
||||
// Wait for taskwait depend() to finish
|
||||
// If the taskwait depend() improperly depends on this task
|
||||
// to finish, then the test will hang and a timeout should trigger
|
||||
while (1) {
|
||||
#pragma omp atomic read
|
||||
flag = taskwait_flag;
|
||||
if (flag == 1)
|
||||
break;
|
||||
}
|
||||
printf("signalled independent task 9_%d, th %d finished....\n", t, th);
|
||||
}
|
||||
#pragma omp task depend(in: i1, i2)
|
||||
{ // task 0
|
||||
int lcheck, th;
|
||||
#pragma omp atomic capture
|
||||
lcheck = ++checker; // 1 or 2
|
||||
th = omp_get_thread_num();
|
||||
printf("task 0_%d, th %d, checker %d\n", t, th, lcheck);
|
||||
if (lcheck > 2 || lcheck < 1) {
|
||||
err++; // no more than 2 tasks concurrently
|
||||
printf("Error1, checker %d, not 1 or 2\n", lcheck);
|
||||
}
|
||||
mysleep(DELAY);
|
||||
#pragma omp atomic read
|
||||
lcheck = checker; // 1 or 2
|
||||
if (lcheck > 2 || lcheck < 1) {
|
||||
#pragma omp atomic
|
||||
err++;
|
||||
printf("Error2, checker %d, not 1 or 2\n", lcheck);
|
||||
}
|
||||
#pragma omp atomic
|
||||
--checker;
|
||||
}
|
||||
#pragma omp task depend(in: i1, i2)
|
||||
{ // task 1
|
||||
int lcheck, th;
|
||||
#pragma omp atomic capture
|
||||
lcheck = ++checker; // 1 or 2
|
||||
th = omp_get_thread_num();
|
||||
printf("task 1_%d, th %d, checker %d\n", t, th, lcheck);
|
||||
if (lcheck > 2 || lcheck < 1) {
|
||||
err++; // no more than 2 tasks concurrently
|
||||
printf("Error3, checker %d, not 1 or 2\n", lcheck);
|
||||
}
|
||||
mysleep(DELAY);
|
||||
#pragma omp atomic read
|
||||
lcheck = checker; // 1 or 2
|
||||
if (lcheck > 2 || lcheck < 1) {
|
||||
err++;
|
||||
printf("Error4, checker %d, not 1 or 2\n", lcheck);
|
||||
}
|
||||
#pragma omp atomic
|
||||
--checker;
|
||||
}
|
||||
// compiler codegen start
|
||||
// task2
|
||||
ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_s);
|
||||
sdep[0].addr = (size_t)&i1;
|
||||
sdep[0].len = 0; // not used
|
||||
sdep[0].flags = 1; // IN
|
||||
sdep[1].addr = (size_t)&i2;
|
||||
sdep[1].len = 0; // not used
|
||||
sdep[1].flags = 8; // INOUTSET
|
||||
ptr->f_priv = t + 10; // init single first-private variable
|
||||
__kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
|
||||
|
||||
// task3
|
||||
ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_m);
|
||||
sdep[0].addr = (size_t)&i1; // to be ignored
|
||||
sdep[0].len = 0; // not used
|
||||
sdep[0].flags = 1; // IN
|
||||
sdep[1].addr = 0;
|
||||
sdep[1].len = 0; // not used
|
||||
sdep[1].flags = DEP_ALL_MEM; // omp_all_memory
|
||||
ptr->f_priv = t + 20; // init single first-private variable
|
||||
__kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
|
||||
// compiler codegen end
|
||||
#pragma omp task depend(in: i1, i2)
|
||||
{ // task 4
|
||||
int lcheck, th;
|
||||
#pragma omp atomic capture
|
||||
lcheck = ++checker; // 1 or 2
|
||||
th = omp_get_thread_num();
|
||||
printf("task 4_%d, th %d, checker %d\n", t, th, lcheck);
|
||||
if (lcheck > 2 || lcheck < 1) {
|
||||
err++; // no more than 2 tasks concurrently
|
||||
printf("Error5, checker %d, not 1 or 2\n", lcheck);
|
||||
}
|
||||
mysleep(DELAY);
|
||||
#pragma omp atomic read
|
||||
lcheck = checker; // 1 or 2
|
||||
if (lcheck > 2 || lcheck < 1) {
|
||||
err++;
|
||||
printf("Error6, checker %d, not 1 or 2\n", lcheck);
|
||||
}
|
||||
#pragma omp atomic
|
||||
--checker;
|
||||
}
|
||||
#pragma omp task depend(in: i1, i2)
|
||||
{ // task 5
|
||||
int lcheck, th;
|
||||
#pragma omp atomic capture
|
||||
lcheck = ++checker; // 1 or 2
|
||||
th = omp_get_thread_num();
|
||||
printf("task 5_%d, th %d, checker %d\n", t, th, lcheck);
|
||||
if (lcheck > 2 || lcheck < 1) {
|
||||
err++; // no more than 2 tasks concurrently
|
||||
printf("Error7, checker %d, not 1 or 2\n", lcheck);
|
||||
}
|
||||
mysleep(DELAY);
|
||||
#pragma omp atomic read
|
||||
lcheck = checker; // 1 or 2
|
||||
if (lcheck > 2 || lcheck < 1) {
|
||||
err++;
|
||||
printf("Error8, checker %d, not 1 or 2\n", lcheck);
|
||||
}
|
||||
#pragma omp atomic
|
||||
--checker;
|
||||
}
|
||||
// compiler codegen start
|
||||
// task6
|
||||
ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_m);
|
||||
sdep[0].addr = (size_t)(-1); // omp_all_memory
|
||||
sdep[0].len = 0; // not used
|
||||
sdep[0].flags = 2; // OUT
|
||||
ptr->f_priv = t + 30; // init single first-private variable
|
||||
__kmpc_omp_task_with_deps(&loc, gtid, ptr, 1, sdep, 0, 0);
|
||||
|
||||
// task7
|
||||
ptr = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(task_t), 0, thunk_m);
|
||||
sdep[0].addr = 0;
|
||||
sdep[0].len = 0; // not used
|
||||
sdep[0].flags = DEP_ALL_MEM; // omp_all_memory
|
||||
sdep[1].addr = (size_t)&i3; // to be ignored
|
||||
sdep[1].len = 0; // not used
|
||||
sdep[1].flags = 4; // MUTEXINOUTSET
|
||||
ptr->f_priv = t + 40; // init single first-private variable
|
||||
__kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
|
||||
// compiler codegen end
|
||||
#pragma omp task depend(in: i3)
|
||||
{ // task 8
|
||||
int lcheck, th;
|
||||
#pragma omp atomic capture
|
||||
lcheck = ++checker; // 1
|
||||
th = omp_get_thread_num();
|
||||
printf("task 8_%d, th %d, checker %d\n", t, th, lcheck);
|
||||
if (lcheck != 1) {
|
||||
err++;
|
||||
printf("Error9, checker %d, != 1\n", lcheck);
|
||||
}
|
||||
mysleep(DELAY);
|
||||
#pragma omp atomic read
|
||||
lcheck = checker;
|
||||
if (lcheck != 1) {
|
||||
err++;
|
||||
printf("Error10, checker %d, != 1\n", lcheck);
|
||||
}
|
||||
#pragma omp atomic
|
||||
--checker;
|
||||
}
|
||||
mysleep(1); // wait a bit to ensure at least first task is stolen
|
||||
// #pragma omp taskwait depend(omp_all_memory: out)
|
||||
printf("all 10 tasks generated;\n"
|
||||
"taskwait depend(omp_all_memory: out) started, th %d\n", t);
|
||||
__kmpc_omp_wait_deps(&loc, gtid, 1, sdep, 0, 0);
|
||||
#pragma omp atomic write
|
||||
taskwait_flag = 1;
|
||||
printf("taskwait depend(omp_all_memory: out) passed, th %d\n", t);
|
||||
fflush(0);
|
||||
} // single
|
||||
} // parallel
|
||||
if (err == 0 && checker == 0) {
|
||||
printf("passed\n");
|
||||
return 0;
|
||||
} else {
|
||||
printf("failed, err = %d, checker = %d\n", err, checker);
|
||||
return 1;
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user