mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-11-27 15:41:46 +00:00
[OpenMP] Add use of TPAUSE
Add use of TPAUSE (from WAITPKG) to the runtime for Intel hardware, with an envirable to turn it on in a particular C-state. Always uses TPAUSE if it is selected and enabled by Intel hardware and presence of WAITPKG, and if not, falls back to old way of checking __kmp_use_yield, etc. Differential Revision: https://reviews.llvm.org/D115758
This commit is contained in:
parent
1ad48d6de2
commit
2e02579a76
@ -1315,86 +1315,6 @@ static inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = _mm_getcsr(); }
|
||||
|
||||
#define KMP_X86_MXCSR_MASK 0xffffffc0 /* ignore status flags (6 lsb) */
|
||||
|
||||
#if KMP_ARCH_X86
|
||||
extern void __kmp_x86_pause(void);
|
||||
#elif KMP_MIC
|
||||
// Performance testing on KNC (C0QS-7120 P/A/X/D, 61-core, 16 GB Memory) showed
|
||||
// regression after removal of extra PAUSE from spin loops. Changing
|
||||
// the delay from 100 to 300 showed even better performance than double PAUSE
|
||||
// on Spec OMP2001 and LCPC tasking tests, no regressions on EPCC.
|
||||
static inline void __kmp_x86_pause(void) { _mm_delay_32(300); }
|
||||
#else
|
||||
static inline void __kmp_x86_pause(void) { _mm_pause(); }
|
||||
#endif
|
||||
#define KMP_CPU_PAUSE() __kmp_x86_pause()
|
||||
#elif KMP_ARCH_PPC64
|
||||
#define KMP_PPC64_PRI_LOW() __asm__ volatile("or 1, 1, 1")
|
||||
#define KMP_PPC64_PRI_MED() __asm__ volatile("or 2, 2, 2")
|
||||
#define KMP_PPC64_PRI_LOC_MB() __asm__ volatile("" : : : "memory")
|
||||
#define KMP_CPU_PAUSE() \
|
||||
do { \
|
||||
KMP_PPC64_PRI_LOW(); \
|
||||
KMP_PPC64_PRI_MED(); \
|
||||
KMP_PPC64_PRI_LOC_MB(); \
|
||||
} while (0)
|
||||
#else
|
||||
#define KMP_CPU_PAUSE() /* nothing to do */
|
||||
#endif
|
||||
|
||||
#define KMP_INIT_YIELD(count) \
|
||||
{ (count) = __kmp_yield_init; }
|
||||
|
||||
#define KMP_OVERSUBSCRIBED \
|
||||
(TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc))
|
||||
|
||||
#define KMP_TRY_YIELD \
|
||||
((__kmp_use_yield == 1) || (__kmp_use_yield == 2 && (KMP_OVERSUBSCRIBED)))
|
||||
|
||||
#define KMP_TRY_YIELD_OVERSUB \
|
||||
((__kmp_use_yield == 1 || __kmp_use_yield == 2) && (KMP_OVERSUBSCRIBED))
|
||||
|
||||
#define KMP_YIELD(cond) \
|
||||
{ \
|
||||
KMP_CPU_PAUSE(); \
|
||||
if ((cond) && (KMP_TRY_YIELD)) \
|
||||
__kmp_yield(); \
|
||||
}
|
||||
|
||||
#define KMP_YIELD_OVERSUB() \
|
||||
{ \
|
||||
KMP_CPU_PAUSE(); \
|
||||
if ((KMP_TRY_YIELD_OVERSUB)) \
|
||||
__kmp_yield(); \
|
||||
}
|
||||
|
||||
// Note the decrement of 2 in the following Macros. With KMP_LIBRARY=turnaround,
|
||||
// there should be no yielding since initial value from KMP_INIT_YIELD() is odd.
|
||||
#define KMP_YIELD_SPIN(count) \
|
||||
{ \
|
||||
KMP_CPU_PAUSE(); \
|
||||
if (KMP_TRY_YIELD) { \
|
||||
(count) -= 2; \
|
||||
if (!(count)) { \
|
||||
__kmp_yield(); \
|
||||
(count) = __kmp_yield_next; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
#define KMP_YIELD_OVERSUB_ELSE_SPIN(count) \
|
||||
{ \
|
||||
KMP_CPU_PAUSE(); \
|
||||
if ((KMP_TRY_YIELD_OVERSUB)) \
|
||||
__kmp_yield(); \
|
||||
else if (__kmp_use_yield == 1) { \
|
||||
(count) -= 2; \
|
||||
if (!(count)) { \
|
||||
__kmp_yield(); \
|
||||
(count) = __kmp_yield_next; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
// User-level Monitor/Mwait
|
||||
#if KMP_HAVE_UMWAIT
|
||||
// We always try for UMWAIT first
|
||||
@ -1405,6 +1325,7 @@ static inline void __kmp_x86_pause(void) { _mm_pause(); }
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
#endif // KMP_HAVE_WAITPKG_INTRINSICS
|
||||
|
||||
KMP_ATTRIBUTE_TARGET_WAITPKG
|
||||
static inline int __kmp_tpause(uint32_t hint, uint64_t counter) {
|
||||
#if !KMP_HAVE_WAITPKG_INTRINSICS
|
||||
@ -1470,6 +1391,119 @@ __kmp_mm_mwait(unsigned extensions, unsigned hints) {
|
||||
}
|
||||
#endif // KMP_HAVE_UMWAIT
|
||||
|
||||
#if KMP_ARCH_X86
|
||||
extern void __kmp_x86_pause(void);
|
||||
#elif KMP_MIC
|
||||
// Performance testing on KNC (C0QS-7120 P/A/X/D, 61-core, 16 GB Memory) showed
|
||||
// regression after removal of extra PAUSE from spin loops. Changing
|
||||
// the delay from 100 to 300 showed even better performance than double PAUSE
|
||||
// on Spec OMP2001 and LCPC tasking tests, no regressions on EPCC.
|
||||
static inline void __kmp_x86_pause(void) { _mm_delay_32(300); }
|
||||
#else
|
||||
static inline void __kmp_x86_pause(void) { _mm_pause(); }
|
||||
#endif
|
||||
#define KMP_CPU_PAUSE() __kmp_x86_pause()
|
||||
#elif KMP_ARCH_PPC64
|
||||
#define KMP_PPC64_PRI_LOW() __asm__ volatile("or 1, 1, 1")
|
||||
#define KMP_PPC64_PRI_MED() __asm__ volatile("or 2, 2, 2")
|
||||
#define KMP_PPC64_PRI_LOC_MB() __asm__ volatile("" : : : "memory")
|
||||
#define KMP_CPU_PAUSE() \
|
||||
do { \
|
||||
KMP_PPC64_PRI_LOW(); \
|
||||
KMP_PPC64_PRI_MED(); \
|
||||
KMP_PPC64_PRI_LOC_MB(); \
|
||||
} while (0)
|
||||
#else
|
||||
#define KMP_CPU_PAUSE() /* nothing to do */
|
||||
#endif
|
||||
|
||||
#define KMP_INIT_YIELD(count) \
|
||||
{ (count) = __kmp_yield_init; }
|
||||
|
||||
#define KMP_INIT_BACKOFF(time) \
|
||||
{ (time) = __kmp_pause_init; }
|
||||
|
||||
#define KMP_OVERSUBSCRIBED \
|
||||
(TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc))
|
||||
|
||||
#define KMP_TRY_YIELD \
|
||||
((__kmp_use_yield == 1) || (__kmp_use_yield == 2 && (KMP_OVERSUBSCRIBED)))
|
||||
|
||||
#define KMP_TRY_YIELD_OVERSUB \
|
||||
((__kmp_use_yield == 1 || __kmp_use_yield == 2) && (KMP_OVERSUBSCRIBED))
|
||||
|
||||
#define KMP_YIELD(cond) \
|
||||
{ \
|
||||
KMP_CPU_PAUSE(); \
|
||||
if ((cond) && (KMP_TRY_YIELD)) \
|
||||
__kmp_yield(); \
|
||||
}
|
||||
|
||||
#define KMP_YIELD_OVERSUB() \
|
||||
{ \
|
||||
KMP_CPU_PAUSE(); \
|
||||
if ((KMP_TRY_YIELD_OVERSUB)) \
|
||||
__kmp_yield(); \
|
||||
}
|
||||
|
||||
// Note the decrement of 2 in the following Macros. With KMP_LIBRARY=turnaround,
|
||||
// there should be no yielding since initial value from KMP_INIT_YIELD() is odd.
|
||||
#define KMP_YIELD_SPIN(count) \
|
||||
{ \
|
||||
KMP_CPU_PAUSE(); \
|
||||
if (KMP_TRY_YIELD) { \
|
||||
(count) -= 2; \
|
||||
if (!(count)) { \
|
||||
__kmp_yield(); \
|
||||
(count) = __kmp_yield_next; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
// If TPAUSE is available & enabled, use it. If oversubscribed, use the slower
|
||||
// (C0.2) state, which improves performance of other SMT threads on the same
|
||||
// core, otherwise, use the fast (C0.1) default state, or whatever the user has
|
||||
// requested. Uses a timed TPAUSE, and exponential backoff. If TPAUSE isn't
|
||||
// available, fall back to the regular CPU pause and yield combination.
|
||||
#if KMP_HAVE_UMWAIT
|
||||
#define KMP_YIELD_OVERSUB_ELSE_SPIN(count, time) \
|
||||
{ \
|
||||
if (__kmp_tpause_enabled) { \
|
||||
if (KMP_OVERSUBSCRIBED) { \
|
||||
__kmp_tpause(0, (time)); \
|
||||
} else { \
|
||||
__kmp_tpause(__kmp_tpause_hint, (time)); \
|
||||
} \
|
||||
(time) *= 2; \
|
||||
} else { \
|
||||
KMP_CPU_PAUSE(); \
|
||||
if ((KMP_TRY_YIELD_OVERSUB)) { \
|
||||
__kmp_yield(); \
|
||||
} else if (__kmp_use_yield == 1) { \
|
||||
(count) -= 2; \
|
||||
if (!(count)) { \
|
||||
__kmp_yield(); \
|
||||
(count) = __kmp_yield_next; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
#else
|
||||
#define KMP_YIELD_OVERSUB_ELSE_SPIN(count, time) \
|
||||
{ \
|
||||
KMP_CPU_PAUSE(); \
|
||||
if ((KMP_TRY_YIELD_OVERSUB)) \
|
||||
__kmp_yield(); \
|
||||
else if (__kmp_use_yield == 1) { \
|
||||
(count) -= 2; \
|
||||
if (!(count)) { \
|
||||
__kmp_yield(); \
|
||||
(count) = __kmp_yield_next; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
#endif // KMP_HAVE_UMWAIT
|
||||
|
||||
/* ------------------------------------------------------------------------ */
|
||||
/* Support datatypes for the orphaned construct nesting checks. */
|
||||
/* ------------------------------------------------------------------------ */
|
||||
@ -3088,6 +3122,7 @@ extern kmp_int32 __kmp_use_yield;
|
||||
extern kmp_int32 __kmp_use_yield_exp_set;
|
||||
extern kmp_uint32 __kmp_yield_init;
|
||||
extern kmp_uint32 __kmp_yield_next;
|
||||
extern kmp_uint64 __kmp_pause_init;
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
extern int __kmp_allThreadsSpecified;
|
||||
@ -3290,6 +3325,13 @@ extern int __kmp_mwait_enabled; // Runtime check if ring3 mwait is enabled
|
||||
extern int __kmp_mwait_hints; // Hints to pass in to mwait
|
||||
#endif
|
||||
|
||||
#if KMP_HAVE_UMWAIT
|
||||
extern int __kmp_waitpkg_enabled; // Runtime check if waitpkg exists
|
||||
extern int __kmp_tpause_state; // 0 (default), 1=C0.1, 2=C0.2; from KMP_TPAUSE
|
||||
extern int __kmp_tpause_hint; // 1=C0.1 (default), 0=C0.2; from KMP_TPAUSE
|
||||
extern int __kmp_tpause_enabled; // 0 (default), 1 (KMP_TPAUSE is non-zero)
|
||||
#endif
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
extern kmp_global_t __kmp_global; /* global status */
|
||||
|
@ -2655,9 +2655,11 @@ __kmp_wait_4(volatile kmp_uint32 *spinner, kmp_uint32 checker,
|
||||
kmp_uint32 spins;
|
||||
kmp_uint32 (*f)(kmp_uint32, kmp_uint32) = pred;
|
||||
kmp_uint32 r;
|
||||
kmp_uint64 time;
|
||||
|
||||
KMP_FSYNC_SPIN_INIT(obj, CCAST(kmp_uint32 *, spin));
|
||||
KMP_INIT_YIELD(spins);
|
||||
KMP_INIT_BACKOFF(time);
|
||||
// main wait spin loop
|
||||
while (!f(r = TCR_4(*spin), check)) {
|
||||
KMP_FSYNC_SPIN_PREPARE(obj);
|
||||
@ -2665,7 +2667,7 @@ __kmp_wait_4(volatile kmp_uint32 *spinner, kmp_uint32 checker,
|
||||
split. It causes problems with infinite recursion because of exit lock */
|
||||
/* if ( TCR_4(__kmp_global.g.g_done) && __kmp_global.g.g_abort)
|
||||
__kmp_abort_thread(); */
|
||||
KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
|
||||
KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
|
||||
}
|
||||
KMP_FSYNC_SPIN_ACQUIRED(obj);
|
||||
return r;
|
||||
@ -2680,15 +2682,17 @@ void __kmp_wait_4_ptr(void *spinner, kmp_uint32 checker,
|
||||
kmp_uint32 check = checker;
|
||||
kmp_uint32 spins;
|
||||
kmp_uint32 (*f)(void *, kmp_uint32) = pred;
|
||||
kmp_uint64 time;
|
||||
|
||||
KMP_FSYNC_SPIN_INIT(obj, spin);
|
||||
KMP_INIT_YIELD(spins);
|
||||
KMP_INIT_BACKOFF(time);
|
||||
// main wait spin loop
|
||||
while (!f(spin, check)) {
|
||||
KMP_FSYNC_SPIN_PREPARE(obj);
|
||||
/* if we have waited a bit, or are noversubscribed, yield */
|
||||
/* pause is in the following code */
|
||||
KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
|
||||
KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
|
||||
}
|
||||
KMP_FSYNC_SPIN_ACQUIRED(obj);
|
||||
}
|
||||
|
@ -292,10 +292,12 @@ static UT __kmp_wait(volatile UT *spinner, UT checker,
|
||||
UT check = checker;
|
||||
kmp_uint32 spins;
|
||||
kmp_uint32 (*f)(UT, UT) = pred;
|
||||
kmp_uint64 time;
|
||||
UT r;
|
||||
|
||||
KMP_FSYNC_SPIN_INIT(obj, CCAST(UT *, spin));
|
||||
KMP_INIT_YIELD(spins);
|
||||
KMP_INIT_BACKOFF(time);
|
||||
// main wait spin loop
|
||||
while (!f(r = *spin, check)) {
|
||||
KMP_FSYNC_SPIN_PREPARE(obj);
|
||||
@ -305,7 +307,7 @@ static UT __kmp_wait(volatile UT *spinner, UT checker,
|
||||
/* if ( TCR_4(__kmp_global.g.g_done) && __kmp_global.g.g_abort)
|
||||
__kmp_abort_thread(); */
|
||||
// If oversubscribed, or have waited a bit then yield.
|
||||
KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
|
||||
KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
|
||||
}
|
||||
KMP_FSYNC_SPIN_ACQUIRED(obj);
|
||||
return r;
|
||||
|
@ -219,6 +219,13 @@ int __kmp_mwait_enabled = FALSE;
|
||||
int __kmp_mwait_hints = 0;
|
||||
#endif
|
||||
|
||||
#if KMP_HAVE_UMWAIT
|
||||
int __kmp_waitpkg_enabled = 0;
|
||||
int __kmp_tpause_state = 0;
|
||||
int __kmp_tpause_hint = 1;
|
||||
int __kmp_tpause_enabled = 0;
|
||||
#endif
|
||||
|
||||
/* map OMP 3.0 schedule types with our internal schedule types */
|
||||
enum sched_type __kmp_sch_map[kmp_sched_upper - kmp_sched_lower_ext +
|
||||
kmp_sched_upper_std - kmp_sched_lower - 2] = {
|
||||
@ -425,6 +432,7 @@ kmp_int32 __kmp_use_yield_exp_set = 0;
|
||||
|
||||
kmp_uint32 __kmp_yield_init = KMP_INIT_WAIT;
|
||||
kmp_uint32 __kmp_yield_next = KMP_NEXT_WAIT;
|
||||
kmp_uint64 __kmp_pause_init = 1; // for tpause
|
||||
|
||||
/* ------------------------------------------------------ */
|
||||
/* STATE mostly syncronized with global lock */
|
||||
|
@ -96,12 +96,19 @@ __kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t *lck, kmp_int32 gtid) {
|
||||
}
|
||||
|
||||
kmp_uint32 spins;
|
||||
kmp_uint64 time;
|
||||
KMP_FSYNC_PREPARE(lck);
|
||||
KMP_INIT_YIELD(spins);
|
||||
KMP_INIT_BACKOFF(time);
|
||||
kmp_backoff_t backoff = __kmp_spin_backoff_params;
|
||||
do {
|
||||
#if !KMP_HAVE_UMWAIT
|
||||
__kmp_spin_backoff(&backoff);
|
||||
KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
|
||||
#else
|
||||
if (!__kmp_tpause_enabled)
|
||||
__kmp_spin_backoff(&backoff);
|
||||
#endif
|
||||
KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
|
||||
} while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != tas_free ||
|
||||
!__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy));
|
||||
KMP_FSYNC_ACQUIRED(lck);
|
||||
@ -2227,10 +2234,12 @@ __kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
|
||||
// The current implementation of KMP_WAIT doesn't allow for mask
|
||||
// and poll to be re-read every spin iteration.
|
||||
kmp_uint32 spins;
|
||||
kmp_uint64 time;
|
||||
KMP_FSYNC_PREPARE(lck);
|
||||
KMP_INIT_YIELD(spins);
|
||||
KMP_INIT_BACKOFF(time);
|
||||
while (polls[ticket & mask] < ticket) { // atomic load
|
||||
KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
|
||||
KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
|
||||
// Re-read the mask and the poll pointer from the lock structure.
|
||||
//
|
||||
// Make certain that "mask" is read before "polls" !!!
|
||||
@ -2659,9 +2668,17 @@ void __kmp_spin_backoff(kmp_backoff_t *boff) {
|
||||
kmp_uint32 i;
|
||||
for (i = boff->step; i > 0; i--) {
|
||||
kmp_uint64 goal = __kmp_tsc() + boff->min_tick;
|
||||
do {
|
||||
KMP_CPU_PAUSE();
|
||||
} while (before(__kmp_tsc(), goal));
|
||||
#if KMP_HAVE_UMWAIT
|
||||
if (__kmp_umwait_enabled) {
|
||||
__kmp_tpause(0, boff->min_tick);
|
||||
} else {
|
||||
#endif
|
||||
do {
|
||||
KMP_CPU_PAUSE();
|
||||
} while (before(__kmp_tsc(), goal));
|
||||
#if KMP_HAVE_UMWAIT
|
||||
}
|
||||
#endif
|
||||
}
|
||||
boff->step = (boff->step << 1 | 1) & (boff->max_backoff - 1);
|
||||
}
|
||||
|
@ -651,12 +651,15 @@ extern int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck,
|
||||
if (lck->tas.lk.poll != 0 || \
|
||||
!__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) { \
|
||||
kmp_uint32 spins; \
|
||||
kmp_uint64 time; \
|
||||
KMP_FSYNC_PREPARE(lck); \
|
||||
KMP_INIT_YIELD(spins); \
|
||||
KMP_INIT_BACKOFF(time); \
|
||||
do { \
|
||||
KMP_YIELD_OVERSUB_ELSE_SPIN(spins); \
|
||||
} while (lck->tas.lk.poll != 0 || !__kmp_atomic_compare_store_acq( \
|
||||
&lck->tas.lk.poll, 0, gtid + 1)); \
|
||||
KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time); \
|
||||
} while ( \
|
||||
lck->tas.lk.poll != 0 || \
|
||||
!__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)); \
|
||||
} \
|
||||
KMP_FSYNC_ACQUIRED(lck); \
|
||||
} else { \
|
||||
@ -758,10 +761,12 @@ extern int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
|
||||
if ((lck->tas.lk.poll != 0) || \
|
||||
!__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) { \
|
||||
kmp_uint32 spins; \
|
||||
kmp_uint64 time; \
|
||||
KMP_FSYNC_PREPARE(lck); \
|
||||
KMP_INIT_YIELD(spins); \
|
||||
KMP_INIT_BACKOFF(time); \
|
||||
do { \
|
||||
KMP_YIELD_OVERSUB_ELSE_SPIN(spins); \
|
||||
KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time); \
|
||||
} while ( \
|
||||
(lck->tas.lk.poll != 0) || \
|
||||
!__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)); \
|
||||
|
@ -6895,7 +6895,9 @@ static void __kmp_check_mic_type() {
|
||||
static void __kmp_user_level_mwait_init() {
|
||||
struct kmp_cpuid buf;
|
||||
__kmp_x86_cpuid(7, 0, &buf);
|
||||
__kmp_umwait_enabled = ((buf.ecx >> 5) & 1) && __kmp_user_level_mwait;
|
||||
__kmp_waitpkg_enabled = ((buf.ecx >> 5) & 1);
|
||||
__kmp_umwait_enabled = __kmp_waitpkg_enabled && __kmp_user_level_mwait;
|
||||
__kmp_tpause_enabled = __kmp_waitpkg_enabled && (__kmp_tpause_state > 0);
|
||||
KF_TRACE(30, ("__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n",
|
||||
__kmp_umwait_enabled));
|
||||
}
|
||||
|
@ -5171,6 +5171,27 @@ static void __kmp_stg_print_mwait_hints(kmp_str_buf_t *buffer, char const *name,
|
||||
|
||||
#endif // KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
|
||||
|
||||
#if KMP_HAVE_UMWAIT
|
||||
// -----------------------------------------------------------------------------
|
||||
// KMP_TPAUSE
|
||||
// 0 = don't use TPAUSE, 1 = use C0.1 state, 2 = use C0.2 state
|
||||
|
||||
static void __kmp_stg_parse_tpause(char const *name, char const *value,
|
||||
void *data) {
|
||||
__kmp_stg_parse_int(name, value, 0, INT_MAX, &__kmp_tpause_state);
|
||||
if (__kmp_tpause_state != 0) {
|
||||
// The actual hint passed to tpause is: 0 for C0.2 and 1 for C0.1
|
||||
if (__kmp_tpause_state == 2) // use C0.2
|
||||
__kmp_tpause_hint = 0; // default was set to 1 for C0.1
|
||||
}
|
||||
} // __kmp_stg_parse_tpause
|
||||
|
||||
static void __kmp_stg_print_tpause(kmp_str_buf_t *buffer, char const *name,
|
||||
void *data) {
|
||||
__kmp_stg_print_int(buffer, name, __kmp_tpause_state);
|
||||
} // __kmp_stg_print_tpause
|
||||
#endif // KMP_HAVE_UMWAIT
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// OMP_DISPLAY_ENV
|
||||
|
||||
@ -5536,6 +5557,10 @@ static kmp_setting_t __kmp_stg_table[] = {
|
||||
{"KMP_MWAIT_HINTS", __kmp_stg_parse_mwait_hints,
|
||||
__kmp_stg_print_mwait_hints, NULL, 0, 0},
|
||||
#endif
|
||||
|
||||
#if KMP_HAVE_UMWAIT
|
||||
{"KMP_TPAUSE", __kmp_stg_parse_tpause, __kmp_stg_print_tpause, NULL, 0, 0},
|
||||
#endif
|
||||
{"", NULL, NULL, NULL, 0, 0}}; // settings
|
||||
|
||||
static int const __kmp_stg_count =
|
||||
|
@ -3552,9 +3552,11 @@ void __kmp_reap_task_teams(void) {
|
||||
void __kmp_wait_to_unref_task_teams(void) {
|
||||
kmp_info_t *thread;
|
||||
kmp_uint32 spins;
|
||||
kmp_uint64 time;
|
||||
int done;
|
||||
|
||||
KMP_INIT_YIELD(spins);
|
||||
KMP_INIT_BACKOFF(time);
|
||||
|
||||
for (;;) {
|
||||
done = TRUE;
|
||||
@ -3604,7 +3606,7 @@ void __kmp_wait_to_unref_task_teams(void) {
|
||||
}
|
||||
|
||||
// If oversubscribed or have waited a bit, yield.
|
||||
KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
|
||||
KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -377,6 +377,7 @@ __kmp_wait_template(kmp_info_t *this_thr,
|
||||
#else
|
||||
kmp_uint32 hibernate;
|
||||
#endif
|
||||
kmp_uint64 time;
|
||||
|
||||
KMP_FSYNC_SPIN_INIT(spin, NULL);
|
||||
if (flag->done_check()) {
|
||||
@ -476,6 +477,7 @@ final_spin=FALSE)
|
||||
#endif
|
||||
|
||||
KMP_INIT_YIELD(spins); // Setup for waiting
|
||||
KMP_INIT_BACKOFF(time);
|
||||
|
||||
if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ||
|
||||
__kmp_pause_status == kmp_soft_paused) {
|
||||
@ -563,7 +565,7 @@ final_spin=FALSE)
|
||||
|
||||
// If we are oversubscribed, or have waited a bit (and
|
||||
// KMP_LIBRARY=throughput), then yield
|
||||
KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
|
||||
KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
|
||||
|
||||
#if KMP_STATS_ENABLED
|
||||
// Check if thread has been signalled to idle state
|
||||
|
@ -1327,16 +1327,18 @@ static void __kmp_reap_common(kmp_info_t *th) {
|
||||
// KMP_WAIT to cover this usage also.
|
||||
void *obj = NULL;
|
||||
kmp_uint32 spins;
|
||||
kmp_uint64 time;
|
||||
#if USE_ITT_BUILD
|
||||
KMP_FSYNC_SPIN_INIT(obj, (void *)&th->th.th_info.ds.ds_alive);
|
||||
#endif /* USE_ITT_BUILD */
|
||||
KMP_INIT_YIELD(spins);
|
||||
KMP_INIT_BACKOFF(time);
|
||||
do {
|
||||
#if USE_ITT_BUILD
|
||||
KMP_FSYNC_SPIN_PREPARE(obj);
|
||||
#endif /* USE_ITT_BUILD */
|
||||
__kmp_is_thread_alive(th, &exit_val);
|
||||
KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
|
||||
KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
|
||||
} while (exit_val == STILL_ACTIVE && TCR_4(th->th.th_info.ds.ds_alive));
|
||||
#if USE_ITT_BUILD
|
||||
if (exit_val == STILL_ACTIVE) {
|
||||
|
Loading…
Reference in New Issue
Block a user