mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-01-06 08:02:18 +00:00
Fix implementation of OMP_THREAD_LIMIT
This change fixes the implementation of OMP_THREAD_LIMIT. The implementation of this previously was not restricted to a contention group (but it should be, according to the spec), and this is fixed here. A field is added to root thread to store a counter of the threads in the contention group. An extra check is added when reserving threads for a parallel region that checks this variable and compares to threadlimit-var, which is implemented as a new global variable, kmp_cg_max_nth. Associated settings changes were also made, and clean up of comments that referred to OMP_THREAD_LIMIT, but should refer to the new KMP_DEVICE_THREAD_LIMIT (added in an earlier patch). Patch by Terry Wilmarth Differential Revision: https://reviews.llvm.org/D35912 llvm-svn: 309319
This commit is contained in:
parent
1bf535daae
commit
f439246328
@ -433,7 +433,7 @@ SubmitBugReport "Please submit a bug report with this message, comp
|
||||
OBSOLETE "Check NLSPATH environment variable, its value is \"%1$s\"."
|
||||
ChangeStackLimit "Please try changing the shell stack limit or adjusting the "
|
||||
"OMP_STACKSIZE environment variable."
|
||||
Unset_ALL_THREADS "Consider unsetting KMP_ALL_THREADS and OMP_THREAD_LIMIT (if either is set)."
|
||||
Unset_ALL_THREADS "Consider unsetting KMP_DEVICE_THREAD_LIMIT (KMP_ALL_THREADS) and OMP_THREAD_LIMIT (if either is set)."
|
||||
Set_ALL_THREADPRIVATE "Consider setting KMP_ALL_THREADPRIVATE to a value larger than %1$d."
|
||||
PossibleSystemLimitOnThreads "This could also be due to a system-related limit on the number of threads."
|
||||
DuplicateLibrary "This means that multiple copies of the OpenMP runtime have been "
|
||||
|
@ -2689,6 +2689,7 @@ typedef struct kmp_base_root {
|
||||
kmp_lock_t r_begin_lock;
|
||||
volatile int r_begin;
|
||||
int r_blocktime; /* blocktime for this root and descendants */
|
||||
int r_cg_nthreads; // count of active threads in a contention group
|
||||
} kmp_base_root_t;
|
||||
|
||||
typedef union KMP_ALIGN_CACHE kmp_root {
|
||||
@ -2863,8 +2864,10 @@ extern int __kmp_xproc; /* number of processors in the system */
|
||||
extern int __kmp_avail_proc; /* number of processors available to the process */
|
||||
extern size_t __kmp_sys_min_stksize; /* system-defined minimum stack size */
|
||||
extern int __kmp_sys_max_nth; /* system-imposed maximum number of threads */
|
||||
extern int
|
||||
__kmp_max_nth; /* maximum total number of concurrently-existing threads */
|
||||
// maximum total number of concurrently-existing threads on device
|
||||
extern int __kmp_max_nth;
|
||||
// maximum total number of concurrently-existing threads in a contention group
|
||||
extern int __kmp_cg_max_nth;
|
||||
extern int __kmp_threads_capacity; /* capacity of the arrays __kmp_threads and
|
||||
__kmp_root */
|
||||
extern int __kmp_dflt_team_nth; /* default number of threads in a parallel
|
||||
|
@ -550,7 +550,7 @@ int FTN_STDCALL xexpand(FTN_GET_THREAD_LIMIT)(void) {
|
||||
__kmp_serial_initialize();
|
||||
};
|
||||
/* global ICV */
|
||||
return __kmp_max_nth;
|
||||
return __kmp_cg_max_nth;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -135,6 +135,7 @@ int __kmp_avail_proc = 0;
|
||||
size_t __kmp_sys_min_stksize = KMP_MIN_STKSIZE;
|
||||
int __kmp_sys_max_nth = KMP_MAX_NTH;
|
||||
int __kmp_max_nth = 0;
|
||||
int __kmp_cg_max_nth = 0;
|
||||
int __kmp_threads_capacity = 0;
|
||||
int __kmp_dflt_team_nth = 0;
|
||||
int __kmp_dflt_team_nth_ub = 0;
|
||||
|
@ -881,7 +881,7 @@ static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
|
||||
KMP_ASSERT(0);
|
||||
}
|
||||
|
||||
// Respect KMP_ALL_THREADS, KMP_DEVICE_THREAD_LIMIT, OMP_THREAD_LIMIT.
|
||||
// Respect KMP_ALL_THREADS/KMP_DEVICE_THREAD_LIMIT.
|
||||
if (__kmp_nth + new_nthreads -
|
||||
(root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
|
||||
__kmp_max_nth) {
|
||||
@ -899,12 +899,41 @@ static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
|
||||
KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
|
||||
}
|
||||
if (tl_nthreads == 1) {
|
||||
KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced "
|
||||
"reservation to 1 thread\n",
|
||||
KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
|
||||
"reduced reservation to 1 thread\n",
|
||||
master_tid));
|
||||
return 1;
|
||||
}
|
||||
KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced "
|
||||
KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
|
||||
"reservation to %d threads\n",
|
||||
master_tid, tl_nthreads));
|
||||
new_nthreads = tl_nthreads;
|
||||
}
|
||||
|
||||
// Respect OMP_THREAD_LIMIT
|
||||
if (root->r.r_cg_nthreads + new_nthreads -
|
||||
(root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
|
||||
__kmp_cg_max_nth) {
|
||||
int tl_nthreads = __kmp_cg_max_nth - root->r.r_cg_nthreads +
|
||||
(root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
|
||||
if (tl_nthreads <= 0) {
|
||||
tl_nthreads = 1;
|
||||
}
|
||||
|
||||
// If dyn-var is false, emit a 1-time warning.
|
||||
if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
|
||||
__kmp_reserve_warn = 1;
|
||||
__kmp_msg(kmp_ms_warning,
|
||||
KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
|
||||
KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
|
||||
}
|
||||
if (tl_nthreads == 1) {
|
||||
KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
|
||||
"reduced reservation to 1 thread\n",
|
||||
master_tid));
|
||||
return 1;
|
||||
}
|
||||
KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
|
||||
"reservation to %d threads\n",
|
||||
master_tid, tl_nthreads));
|
||||
new_nthreads = tl_nthreads;
|
||||
@ -3116,6 +3145,7 @@ static void __kmp_initialize_root(kmp_root_t *root) {
|
||||
root->r.r_in_parallel = 0;
|
||||
root->r.r_blocktime = __kmp_dflt_blocktime;
|
||||
root->r.r_nested = __kmp_dflt_nested;
|
||||
root->r.r_cg_nthreads = 1;
|
||||
|
||||
/* setup the root team for this task */
|
||||
/* allocate the root team structure */
|
||||
@ -3508,7 +3538,7 @@ static int __kmp_expand_threads(int nWish, int nNeed) {
|
||||
|
||||
// Note that __kmp_threads_capacity is not bounded by __kmp_max_nth. If
|
||||
// __kmp_max_nth is set to some value less than __kmp_sys_max_nth by the
|
||||
// user via OMP_THREAD_LIMIT, then __kmp_threads_capacity may become
|
||||
// user via KMP_DEVICE_THREAD_LIMIT, then __kmp_threads_capacity may become
|
||||
// > __kmp_max_nth in one of two ways:
|
||||
//
|
||||
// 1) The initialization thread (gtid = 0) exits. __kmp_threads[0]
|
||||
@ -3889,6 +3919,8 @@ static int __kmp_reset_root(int gtid, kmp_root_t *root) {
|
||||
|
||||
TCW_4(__kmp_nth,
|
||||
__kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth.
|
||||
root->r.r_cg_nthreads--;
|
||||
|
||||
__kmp_reap_thread(root->r.r_uber_thread, 1);
|
||||
|
||||
// We canot put root thread to __kmp_thread_pool, so we have to reap it istead
|
||||
@ -4169,6 +4201,7 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
|
||||
KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
|
||||
|
||||
TCW_4(__kmp_nth, __kmp_nth + 1);
|
||||
root->r.r_cg_nthreads++;
|
||||
|
||||
new_thr->th.th_task_state = 0;
|
||||
new_thr->th.th_task_state_top = 0;
|
||||
@ -4316,6 +4349,8 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
|
||||
__kmp_all_nth++;
|
||||
__kmp_nth++;
|
||||
|
||||
root->r.r_cg_nthreads++;
|
||||
|
||||
// if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low
|
||||
// numbers of procs, and method #2 (keyed API call) for higher numbers.
|
||||
if (__kmp_adjust_gtid_mode) {
|
||||
@ -5378,6 +5413,7 @@ kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
|
||||
void __kmp_free_thread(kmp_info_t *this_th) {
|
||||
int gtid;
|
||||
kmp_info_t **scan;
|
||||
kmp_root_t *root = this_th->th.th_root;
|
||||
|
||||
KA_TRACE(20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
|
||||
__kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
|
||||
@ -5436,6 +5472,7 @@ void __kmp_free_thread(kmp_info_t *this_th) {
|
||||
__kmp_thread_pool_nth++;
|
||||
|
||||
TCW_4(__kmp_nth, __kmp_nth - 1);
|
||||
root->r.r_cg_nthreads--;
|
||||
|
||||
#ifdef KMP_ADJUST_BLOCKTIME
|
||||
/* Adjust blocktime back to user setting or default if necessary */
|
||||
@ -6375,6 +6412,7 @@ static void __kmp_do_serial_initialize(void) {
|
||||
__kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
|
||||
}
|
||||
__kmp_max_nth = __kmp_sys_max_nth;
|
||||
__kmp_cg_max_nth = __kmp_sys_max_nth;
|
||||
|
||||
// Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME"
|
||||
// part
|
||||
@ -6977,7 +7015,7 @@ void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams,
|
||||
if (num_teams * num_threads > __kmp_max_nth) {
|
||||
int new_threads = __kmp_max_nth / num_teams;
|
||||
if (!__kmp_reserve_warn) { // user asked for too many threads
|
||||
__kmp_reserve_warn = 1; // that conflicts with OMP_THREAD_LIMIT
|
||||
__kmp_reserve_warn = 1; // that conflicts with KMP_DEVICE_THREAD_LIMIT
|
||||
__kmp_msg(kmp_ms_warning,
|
||||
KMP_MSG(CantFormThrTeam, num_threads, new_threads),
|
||||
KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
|
||||
|
@ -569,7 +569,7 @@ static void __kmp_stg_print_size(kmp_str_buf_t *buffer, char const *name,
|
||||
// Parse and print functions.
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// KMP_ALL_THREADS, KMP_DEVICE_THREAD_LIMIT, OMP_THREAD_LIMIT
|
||||
// KMP_DEVICE_THREAD_LIMIT, KMP_ALL_THREADS
|
||||
|
||||
static void __kmp_stg_parse_device_thread_limit(char const *name,
|
||||
char const *value, void *data) {
|
||||
@ -598,6 +598,20 @@ static void __kmp_stg_print_device_thread_limit(kmp_str_buf_t *buffer,
|
||||
__kmp_stg_print_int(buffer, name, __kmp_max_nth);
|
||||
} // __kmp_stg_print_device_thread_limit
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// OMP_THREAD_LIMIT
|
||||
static void __kmp_stg_parse_thread_limit(char const *name, char const *value,
|
||||
void *data) {
|
||||
__kmp_stg_parse_int(name, value, 1, __kmp_sys_max_nth, &__kmp_cg_max_nth);
|
||||
K_DIAG(1, ("__kmp_cg_max_nth == %d\n", __kmp_cg_max_nth));
|
||||
|
||||
} // __kmp_stg_parse_thread_limit
|
||||
|
||||
static void __kmp_stg_print_thread_limit(kmp_str_buf_t *buffer,
|
||||
char const *name, void *data) {
|
||||
__kmp_stg_print_int(buffer, name, __kmp_cg_max_nth);
|
||||
} // __kmp_stg_print_thread_limit
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// KMP_BLOCKTIME
|
||||
|
||||
@ -4386,8 +4400,8 @@ static kmp_setting_t __kmp_stg_table[] = {
|
||||
{"KMP_TASKLOOP_MIN_TASKS", __kmp_stg_parse_taskloop_min_tasks,
|
||||
__kmp_stg_print_taskloop_min_tasks, NULL, 0, 0},
|
||||
#endif
|
||||
{"OMP_THREAD_LIMIT", __kmp_stg_parse_device_thread_limit,
|
||||
__kmp_stg_print_device_thread_limit, NULL, 0, 0},
|
||||
{"OMP_THREAD_LIMIT", __kmp_stg_parse_thread_limit,
|
||||
__kmp_stg_print_thread_limit, NULL, 0, 0},
|
||||
{"OMP_WAIT_POLICY", __kmp_stg_parse_wait_policy,
|
||||
__kmp_stg_print_wait_policy, NULL, 0, 0},
|
||||
{"KMP_DISP_NUM_BUFFERS", __kmp_stg_parse_disp_buffers,
|
||||
@ -4687,27 +4701,22 @@ static void __kmp_stg_init(void) {
|
||||
}; // if
|
||||
}
|
||||
|
||||
{ // Initialize KMP_DEVICE_THREAD_LIMIT, KMP_ALL_THREADS, and
|
||||
// OMP_THREAD_LIMIT data.
|
||||
{ // Initialize KMP_DEVICE_THREAD_LIMIT and KMP_ALL_THREADS
|
||||
kmp_setting_t *kmp_device_thread_limit =
|
||||
__kmp_stg_find("KMP_DEVICE_THREAD_LIMIT"); // 1st priority.
|
||||
kmp_setting_t *kmp_all_threads =
|
||||
__kmp_stg_find("KMP_ALL_THREADS"); // 2nd priority.
|
||||
kmp_setting_t *omp_thread_limit =
|
||||
__kmp_stg_find("OMP_THREAD_LIMIT"); // 3rd priority.
|
||||
|
||||
// !!! volatile keyword is Intel (R) C Compiler bug CQ49908 workaround.
|
||||
static kmp_setting_t *volatile rivals[4];
|
||||
static kmp_setting_t *volatile rivals[3];
|
||||
int i = 0;
|
||||
|
||||
rivals[i++] = kmp_device_thread_limit;
|
||||
rivals[i++] = kmp_all_threads;
|
||||
rivals[i++] = omp_thread_limit;
|
||||
rivals[i++] = NULL;
|
||||
|
||||
kmp_device_thread_limit->data = CCAST(kmp_setting_t **, rivals);
|
||||
kmp_all_threads->data = CCAST(kmp_setting_t **, rivals);
|
||||
omp_thread_limit->data = CCAST(kmp_setting_t **, rivals);
|
||||
}
|
||||
|
||||
#if KMP_AFFINITY_SUPPORTED
|
||||
|
82
openmp/runtime/test/env/omp_thread_limit.c
vendored
Normal file
82
openmp/runtime/test/env/omp_thread_limit.c
vendored
Normal file
@ -0,0 +1,82 @@
|
||||
// RUN: %libomp-compile && env OMP_THREAD_LIMIT=4 %libomp-run 4
|
||||
// RUN: %libomp-compile && env OMP_THREAD_LIMIT=7 %libomp-run 7
|
||||
//
|
||||
// OMP_THREAD_LIMIT=N should imply that no more than N threads are active in
|
||||
// a contention group
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
#include "omp_testsuite.h"
|
||||
|
||||
int failed = 0;
|
||||
|
||||
void usage() {
|
||||
fprintf(stderr, "usage: omp_thread_limit <n>\n");
|
||||
}
|
||||
|
||||
void verify(const char* file_name, int line_number, int team_size) {
|
||||
int num_threads = omp_get_num_threads();
|
||||
if (team_size != num_threads) {
|
||||
#pragma omp critical(A)
|
||||
{
|
||||
char label[256];
|
||||
snprintf(label, sizeof(label), "%s:%d", file_name, line_number);
|
||||
failed = 1;
|
||||
printf("failed: %s: team_size(%d) != omp_get_num_threads(%d)\n",
|
||||
label, team_size, num_threads);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
int cl_thread_limit;
|
||||
|
||||
if (argc != 2) {
|
||||
usage();
|
||||
return 1;
|
||||
}
|
||||
cl_thread_limit = atoi(argv[1]);
|
||||
|
||||
omp_set_dynamic(0);
|
||||
if (omp_get_thread_limit() != cl_thread_limit) {
|
||||
fprintf(stderr, "omp_get_thread_limit failed with %d, should be%d\n",
|
||||
omp_get_thread_limit(), cl_thread_limit);
|
||||
return 1;
|
||||
}
|
||||
else if (omp_get_max_threads() > cl_thread_limit) {
|
||||
#if _OPENMP
|
||||
int team_size = cl_thread_limit;
|
||||
#else
|
||||
int team_size = 1;
|
||||
#endif
|
||||
omp_set_num_threads(19);
|
||||
verify(__FILE__, __LINE__, 1);
|
||||
#pragma omp parallel
|
||||
{
|
||||
verify(__FILE__, __LINE__, team_size);
|
||||
verify(__FILE__, __LINE__, team_size);
|
||||
}
|
||||
verify(__FILE__, __LINE__, 1);
|
||||
|
||||
omp_set_nested(1);
|
||||
#pragma omp parallel num_threads(3)
|
||||
{
|
||||
verify(__FILE__, __LINE__, 3);
|
||||
#pragma omp master
|
||||
#pragma omp parallel num_threads(21)
|
||||
{
|
||||
verify(__FILE__, __LINE__, team_size-2);
|
||||
verify(__FILE__, __LINE__, team_size-2);
|
||||
}
|
||||
}
|
||||
verify(__FILE__, __LINE__, 1);
|
||||
|
||||
return failed;
|
||||
} else {
|
||||
fprintf(stderr, "This test is not applicable for max num_threads='%d'\n",
|
||||
omp_get_max_threads());
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue
Block a user