Added new user-guided lock api, currently disabled. Use KMP_USE_DYNAMIC_LOCK=1 to enable it.

llvm-svn: 230030
This commit is contained in:
Andrey Churbanov 2015-02-20 18:05:17 +00:00
parent a5547bce79
commit 5c56fb55b0
20 changed files with 1578 additions and 27 deletions

View File

@ -84,6 +84,20 @@
extern void __KAI_KMPC_CONVENTION omp_destroy_nest_lock (omp_nest_lock_t *);
extern int __KAI_KMPC_CONVENTION omp_test_nest_lock (omp_nest_lock_t *);
/* lock hint type for dynamic user lock */
typedef enum kmp_lock_hint_t {
kmp_lock_hint_none = 0,
kmp_lock_hint_contended,
kmp_lock_hint_uncontended,
kmp_lock_hint_nonspeculative,
kmp_lock_hint_speculative,
kmp_lock_hint_adaptive,
} kmp_lock_hint_t;
/* hinted lock initializers */
extern void __KAI_KMPC_CONVENTION kmp_init_lock_hinted(omp_lock_t *, kmp_lock_hint_t);
extern void __KAI_KMPC_CONVENTION kmp_init_nest_lock_hinted(omp_nest_lock_t *, kmp_lock_hint_t);
/* time API functions */
extern double __KAI_KMPC_CONVENTION omp_get_wtime (void);
extern double __KAI_KMPC_CONVENTION omp_get_wtick (void);

View File

@ -31,6 +31,7 @@
integer, parameter :: kmp_size_t_kind = int_ptr_kind()
integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind()
integer, parameter :: kmp_cancel_kind = omp_integer_kind
integer, parameter :: kmp_lock_hint_kind = omp_integer_kind
end module omp_lib_kinds
@ -60,6 +61,13 @@
integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_sections = 3
integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_taskgroup = 4
integer (kind=kmp_lock_hint_kind), parameter :: kmp_lock_hint_none = 0
integer (kind=kmp_lock_hint_kind), parameter :: kmp_lock_hint_uncontended = 1
integer (kind=kmp_lock_hint_kind), parameter :: kmp_lock_hint_contended = 2
integer (kind=kmp_lock_hint_kind), parameter :: kmp_lock_hint_nonspeculative = 3
integer (kind=kmp_lock_hint_kind), parameter :: kmp_lock_hint_speculative = 4
integer (kind=kmp_lock_hint_kind), parameter :: kmp_lock_hint_adaptive = 5
interface
! ***
@ -436,6 +444,19 @@
integer (kind=kmp_cancel_kind) cancelkind
logical (kind=omp_logical_kind) kmp_get_cancellation_status
end function kmp_get_cancellation_status
subroutine kmp_init_lock_hinted(lockvar, lockhint)
use omp_lib_kinds
integer (kind=omp_lock_kind) lockvar
integer (kind=kmp_lock_hint_kind) lockhint
end subroutine kmp_init_lock_hinted
subroutine kmp_init_nest_lock_hinted(lockvar, lockhint)
use omp_lib_kinds
integer (kind=omp_nest_lock_kind) lockvar
integer (kind=kmp_lock_hint_kind) lockhint
end subroutine kmp_init_nest_lock_hinted
end interface
!dec$ if defined(_WIN32)
@ -521,6 +542,9 @@
!dec$ attributes alias:'KMP_GET_CANCELLATION_STATUS' :: kmp_get_cancellation_status
!dec$ attributes alias:'KMP_INIT_LOCK_HINTED'::kmp_init_lock_hinted
!dec$ attributes alias:'KMP_INIT_NEST_LOCK_HINTED'::kmp_init_nest_lock_hinted
!dec$ else
!***
@ -597,6 +621,9 @@
!dec$ attributes alias:'_KMP_GET_CANCELLATION_STATUS' :: kmp_get_cancellation_status
!dec$ attributes alias:'_KMP_INIT_LOCK_HINTED'::kmp_init_lock_hinted
!dec$ attributes alias:'_KMP_INIT_NEST_LOCK_HINTED'::kmp_init_nest_lock_hinted
!dec$ endif
!dec$ endif
@ -675,6 +702,9 @@
!dec$ attributes alias:'kmp_set_warnings_off_'::kmp_set_warnings_off
!dec$ attributes alias:'kmp_get_cancellation_status_'::kmp_get_cancellation_status
!dec$ attributes alias:'kmp_init_lock_hinted_'::kmp_init_lock_hinted
!dec$ attributes alias:'kmp_init_nest_lock_hinted_'::kmp_init_nest_lock_hinted
!dec$ endif
!dec$ if defined(__APPLE__)
@ -751,6 +781,9 @@
!dec$ attributes alias:'_kmp_get_cancellation_status_'::kmp_get_cancellation_status
!dec$ attributes alias:'_kmp_init_lock_hinted_'::kmp_init_lock_hinted
!dec$ attributes alias:'_kmp_init_nest_lock_hinted_'::kmp_init_nest_lock_hinted
!dec$ endif
end module omp_lib

View File

@ -27,6 +27,7 @@
integer, parameter :: kmp_size_t_kind = c_size_t
integer, parameter :: kmp_affinity_mask_kind = c_intptr_t
integer, parameter :: kmp_cancel_kind = omp_integer_kind
integer, parameter :: kmp_lock_hint_kind = omp_integer_kind
end module omp_lib_kinds
@ -58,6 +59,13 @@
integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_sections = 3
integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_taskgroup = 4
integer (kind=kmp_lock_hint_kind), parameter :: kmp_lock_hint_none = 0
integer (kind=kmp_lock_hint_kind), parameter :: kmp_lock_hint_uncontended = 1
integer (kind=kmp_lock_hint_kind), parameter :: kmp_lock_hint_contended = 2
integer (kind=kmp_lock_hint_kind), parameter :: kmp_lock_hint_nonspeculative = 3
integer (kind=kmp_lock_hint_kind), parameter :: kmp_lock_hint_speculative = 4
integer (kind=kmp_lock_hint_kind), parameter :: kmp_lock_hint_adaptive = 5
interface
! ***
@ -438,6 +446,18 @@
logical (kind=omp_logical_kind) kmp_get_cancellation_status
end function kmp_get_cancellation_status
subroutine kmp_init_lock_hinted(lockvar, lockhint) bind(c)
use omp_lib_kinds
integer (kind=omp_lock_kind) lockvar
integer (kind=kmp_lock_hint_kind), value :: lockhint
end subroutine kmp_init_lock_hinted
subroutine kmp_init_nest_lock_hinted(lockvar, lockhint) bind(c)
use omp_lib_kinds
integer (kind=omp_lock_kind) lockvar
integer (kind=kmp_lock_hint_kind), value :: lockhint
end subroutine kmp_init_nest_lock_hinted
end interface
end module omp_lib

View File

@ -28,6 +28,7 @@
integer, parameter :: kmp_pointer_kind = int_ptr_kind()
integer, parameter :: kmp_size_t_kind = int_ptr_kind()
integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind()
integer, parameter :: kmp_lock_hint_kind = omp_integer_kind
integer (kind=omp_integer_kind), parameter :: openmp_version = $OMP_VERSION
integer (kind=omp_integer_kind), parameter :: kmp_version_major = $KMP_VERSION_MAJOR
@ -47,6 +48,13 @@
integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3
integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4
integer (kind=kmp_lock_hint_kind), parameter :: kmp_lock_hint_none = 0
integer (kind=kmp_lock_hint_kind), parameter :: kmp_lock_hint_uncontended = 1
integer (kind=kmp_lock_hint_kind), parameter :: kmp_lock_hint_contended = 2
integer (kind=kmp_lock_hint_kind), parameter :: kmp_lock_hint_nonspeculative = 3
integer (kind=kmp_lock_hint_kind), parameter :: kmp_lock_hint_speculative = 4
integer (kind=kmp_lock_hint_kind), parameter :: kmp_lock_hint_adaptive = 5
interface
! ***
@ -413,6 +421,18 @@
subroutine kmp_set_warnings_off() bind(c)
end subroutine kmp_set_warnings_off
subroutine kmp_init_lock_hinted(lockvar, lockhint) bind(c)
import
integer (kind=omp_lock_kind) lockvar
integer (kind=kmp_lock_hint_kind), value :: lockhint
end subroutine kmp_init_lock_hinted
subroutine kmp_init_nest_lock_hinted(lockvar, lockhint) bind(c)
import
integer (kind=omp_lock_kind) lockvar
integer (kind=kmp_lock_hint_kind), value :: lockhint
end subroutine kmp_init_nest_lock_hinted
end interface
!DIR$ IF DEFINED (__INTEL_OFFLOAD)
@ -480,6 +500,8 @@
!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_free
!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_warnings_on
!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_warnings_off
!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_init_lock_hinted
!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_init_nest_lock_hinted
!DIR$ IF(__INTEL_COMPILER.GE.1400)
!$omp declare target(omp_set_num_threads )
@ -546,6 +568,8 @@
!$omp declare target(kmp_free )
!$omp declare target(kmp_set_warnings_on )
!$omp declare target(kmp_set_warnings_off )
!$omp declare target(kmp_init_lock_hinted )
!$omp declare target(kmp_init_nest_lock_hinted )
!DIR$ ENDIF
!DIR$ ENDIF

View File

@ -667,10 +667,17 @@ __kmpc_master(ident_t *loc, kmp_int32 global_tid)
status = 1;
if ( __kmp_env_consistency_check ) {
#if KMP_USE_DYNAMIC_LOCK
if (status)
__kmp_push_sync( global_tid, ct_master, loc, NULL, 0 );
else
__kmp_check_sync( global_tid, ct_master, loc, NULL, 0 );
#else
if (status)
__kmp_push_sync( global_tid, ct_master, loc, NULL );
else
__kmp_check_sync( global_tid, ct_master, loc, NULL );
#endif
}
return status;
@ -764,6 +771,144 @@ __kmpc_end_ordered( ident_t * loc, kmp_int32 gtid )
__kmp_parallel_dxo( & gtid, & cid, loc );
}
#if KMP_USE_DYNAMIC_LOCK
static __forceinline kmp_indirect_lock_t *
__kmp_get_indirect_csptr(kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid, kmp_dyna_lockseq_t seq)
{
// Code from __kmp_get_critical_section_ptr
// This function returns an indirect lock object instead of a user lock.
kmp_indirect_lock_t **lck, *ret;
lck = (kmp_indirect_lock_t **)crit;
ret = (kmp_indirect_lock_t *)TCR_PTR(*lck);
if (ret == NULL) {
void *idx;
kmp_indirect_locktag_t tag = DYNA_GET_I_TAG(seq);
kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
ret = ilk;
DYNA_I_LOCK_FUNC(ilk, init)(ilk->lock);
DYNA_SET_I_LOCK_LOCATION(ilk, loc);
DYNA_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
KA_TRACE(20, ("__kmp_get_indirect_csptr: initialized indirect lock #%d\n", tag));
#if USE_ITT_BUILD
__kmp_itt_critical_creating(ilk->lock, loc);
#endif
int status = KMP_COMPARE_AND_STORE_PTR(lck, 0, ilk);
if (status == 0) {
#if USE_ITT_BUILD
__kmp_itt_critical_destroyed(ilk->lock);
#endif
// Postponing destroy, to avoid costly dispatch here.
//DYNA_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
ret = (kmp_indirect_lock_t *)TCR_PTR(*lck);
KMP_DEBUG_ASSERT(ret != NULL);
}
}
return ret;
}
// Fast-path acquire tas lock
#define DYNA_ACQUIRE_TAS_LOCK(lock, gtid) { \
kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
if (l->lk.poll != DYNA_LOCK_FREE(tas) || \
! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas))) { \
kmp_uint32 spins; \
KMP_FSYNC_PREPARE(l); \
KMP_INIT_YIELD(spins); \
if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
KMP_YIELD(TRUE); \
} else { \
KMP_YIELD_SPIN(spins); \
} \
while (l->lk.poll != DYNA_LOCK_FREE(tas) || \
! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas))) { \
if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
KMP_YIELD(TRUE); \
} else { \
KMP_YIELD_SPIN(spins); \
} \
} \
} \
KMP_FSYNC_ACQUIRED(l); \
}
// Fast-path test tas lock
#define DYNA_TEST_TAS_LOCK(lock, gtid, rc) { \
kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
rc = l->lk.poll == DYNA_LOCK_FREE(tas) && \
KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas)); \
}
// Fast-path release tas lock
#define DYNA_RELEASE_TAS_LOCK(lock, gtid) { \
TCW_4(((kmp_tas_lock_t *)lock)->lk.poll, DYNA_LOCK_FREE(tas)); \
KMP_MB(); \
}
#if DYNA_HAS_FUTEX
# include <unistd.h>
# include <sys/syscall.h>
# ifndef FUTEX_WAIT
# define FUTEX_WAIT 0
# endif
# ifndef FUTEX_WAKE
# define FUTEX_WAKE 1
# endif
// Fast-path acquire futex lock
#define DYNA_ACQUIRE_FUTEX_LOCK(lock, gtid) { \
kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
kmp_int32 gtid_code = (gtid+1) << 1; \
KMP_MB(); \
KMP_FSYNC_PREPARE(ftx); \
kmp_int32 poll_val; \
while ((poll_val = KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), DYNA_LOCK_FREE(futex), \
DYNA_LOCK_BUSY(gtid_code, futex))) != DYNA_LOCK_FREE(futex)) { \
kmp_int32 cond = DYNA_LOCK_STRIP(poll_val) & 1; \
if (!cond) { \
if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, poll_val | DYNA_LOCK_BUSY(1, futex))) { \
continue; \
} \
poll_val |= DYNA_LOCK_BUSY(1, futex); \
} \
kmp_int32 rc; \
if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, NULL, NULL, 0)) != 0) { \
continue; \
} \
gtid_code |= 1; \
} \
KMP_FSYNC_ACQUIRED(ftx); \
}
// Fast-path test futex lock
#define DYNA_TEST_FUTEX_LOCK(lock, gtid, rc) { \
kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), DYNA_LOCK_FREE(futex), DYNA_LOCK_BUSY(gtid+1, futex) << 1)) { \
KMP_FSYNC_ACQUIRED(ftx); \
rc = TRUE; \
} else { \
rc = FALSE; \
} \
}
// Fast-path release futex lock
#define DYNA_RELEASE_FUTEX_LOCK(lock, gtid) { \
kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
KMP_MB(); \
KMP_FSYNC_RELEASING(ftx); \
kmp_int32 poll_val = KMP_XCHG_FIXED32(&(ftx->lk.poll), DYNA_LOCK_FREE(futex)); \
if (DYNA_LOCK_STRIP(poll_val) & 1) { \
syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, DYNA_LOCK_BUSY(1, futex), NULL, NULL, 0); \
} \
KMP_MB(); \
KMP_YIELD(TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \
}
#endif // DYNA_HAS_FUTEX
#else // KMP_USE_DYNAMIC_LOCK
static kmp_user_lock_p
__kmp_get_critical_section_ptr( kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid )
{
@ -815,6 +960,8 @@ __kmp_get_critical_section_ptr( kmp_critical_name * crit, ident_t const * loc, k
return lck;
}
#endif // KMP_USE_DYNAMIC_LOCK
/*!
@ingroup WORK_SHARING
@param loc source location information.
@ -833,6 +980,47 @@ __kmpc_critical( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit )
KC_TRACE( 10, ("__kmpc_critical: called T#%d\n", global_tid ) );
#if KMP_USE_DYNAMIC_LOCK
// Assumption: all direct locks fit in OMP_CRITICAL_SIZE.
// The global sequence __kmp_user_lock_seq is used unless compiler pushes a value.
if (DYNA_IS_D_LOCK(__kmp_user_lock_seq)) {
lck = (kmp_user_lock_p)crit;
// The thread that reaches here first needs to tag the lock word.
if (*((kmp_dyna_lock_t *)lck) == 0) {
KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)lck, 0, DYNA_GET_D_TAG(__kmp_user_lock_seq));
}
if (__kmp_env_consistency_check) {
__kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
}
# if USE_ITT_BUILD
__kmp_itt_critical_acquiring(lck);
# endif
# if DYNA_USE_FAST_TAS
if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
DYNA_ACQUIRE_TAS_LOCK(lck, global_tid);
} else
# elif DYNA_USE_FAST_FUTEX
if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
DYNA_ACQUIRE_FUTEX_LOCK(lck, global_tid);
} else
# endif
{
DYNA_D_LOCK_FUNC(lck, set)((kmp_dyna_lock_t *)lck, global_tid);
}
} else {
kmp_indirect_lock_t *ilk = __kmp_get_indirect_csptr(crit, loc, global_tid, __kmp_user_lock_seq);
lck = ilk->lock;
if (__kmp_env_consistency_check) {
__kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
}
# if USE_ITT_BUILD
__kmp_itt_critical_acquiring(lck);
# endif
DYNA_I_LOCK_FUNC(ilk, set)(lck, global_tid);
}
#else // KMP_USE_DYNAMIC_LOCK
//TODO: add THR_OVHD_STATE
KMP_CHECK_USER_LOCK_INIT();
@ -864,9 +1052,10 @@ __kmpc_critical( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit )
__kmp_itt_critical_acquiring( lck );
#endif /* USE_ITT_BUILD */
// Value of 'crit' should be good for using as a critical_id of the critical section directive.
__kmp_acquire_user_lock_with_checks( lck, global_tid );
#endif // KMP_USE_DYNAMIC_LOCK
#if USE_ITT_BUILD
__kmp_itt_critical_acquired( lck );
#endif /* USE_ITT_BUILD */
@ -890,6 +1079,43 @@ __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
KC_TRACE( 10, ("__kmpc_end_critical: called T#%d\n", global_tid ));
#if KMP_USE_DYNAMIC_LOCK
if (DYNA_IS_D_LOCK(__kmp_user_lock_seq)) {
lck = (kmp_user_lock_p)crit;
KMP_ASSERT(lck != NULL);
if (__kmp_env_consistency_check) {
__kmp_pop_sync(global_tid, ct_critical, loc);
}
# if USE_ITT_BUILD
__kmp_itt_critical_releasing( lck );
# endif
# if DYNA_USE_FAST_TAS
if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
DYNA_RELEASE_TAS_LOCK(lck, global_tid);
} else
# elif DYNA_USE_FAST_FUTEX
if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
DYNA_RELEASE_FUTEX_LOCK(lck, global_tid);
} else
# endif
{
DYNA_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
}
} else {
kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
KMP_ASSERT(ilk != NULL);
lck = ilk->lock;
if (__kmp_env_consistency_check) {
__kmp_pop_sync(global_tid, ct_critical, loc);
}
# if USE_ITT_BUILD
__kmp_itt_critical_releasing( lck );
# endif
DYNA_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
}
#else // KMP_USE_DYNAMIC_LOCK
if ( ( __kmp_user_lock_kind == lk_tas )
&& ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
lck = (kmp_user_lock_p)crit;
@ -913,9 +1139,10 @@ __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
__kmp_itt_critical_releasing( lck );
#endif /* USE_ITT_BUILD */
// Value of 'crit' should be good for using as a critical_id of the critical section directive.
__kmp_release_user_lock_with_checks( lck, global_tid );
#endif // KMP_USE_DYNAMIC_LOCK
KA_TRACE( 15, ("__kmpc_end_critical: done T#%d\n", global_tid ));
}
@ -1319,6 +1546,27 @@ __kmpc_copyprivate( ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_dat
/* initialize the lock */
void
__kmpc_init_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
#if KMP_USE_DYNAMIC_LOCK
KMP_DEBUG_ASSERT(__kmp_init_serial);
if (__kmp_env_consistency_check && user_lock == NULL) {
KMP_FATAL(LockIsUninitialized, "omp_init_lock");
}
if (DYNA_IS_D_LOCK(__kmp_user_lock_seq)) {
DYNA_INIT_D_LOCK(user_lock, __kmp_user_lock_seq);
# if USE_ITT_BUILD
__kmp_itt_lock_creating((kmp_user_lock_p)user_lock, NULL);
# endif
} else {
DYNA_INIT_I_LOCK(user_lock, __kmp_user_lock_seq);
kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(user_lock);
DYNA_SET_I_LOCK_LOCATION(ilk, loc);
# if USE_ITT_BUILD
__kmp_itt_lock_creating(ilk->lock, loc);
# endif
}
#else // KMP_USE_DYNAMIC_LOCK
static char const * const func = "omp_init_lock";
kmp_user_lock_p lck;
KMP_DEBUG_ASSERT( __kmp_init_serial );
@ -1350,11 +1598,42 @@ __kmpc_init_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
#if USE_ITT_BUILD
__kmp_itt_lock_creating( lck );
#endif /* USE_ITT_BUILD */
#endif // KMP_USE_DYNAMIC_LOCK
} // __kmpc_init_lock
/* initialize the lock */
void
__kmpc_init_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
#if KMP_USE_DYNAMIC_LOCK
KMP_DEBUG_ASSERT(__kmp_init_serial);
if (__kmp_env_consistency_check && user_lock == NULL) {
KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
}
// Invoke init function after converting to nested version.
kmp_dyna_lockseq_t nested_seq;
switch (__kmp_user_lock_seq) {
case lockseq_tas: nested_seq = lockseq_nested_tas; break;
#if DYNA_HAS_FUTEX
case lockseq_futex: nested_seq = lockseq_nested_futex; break;
#endif
case lockseq_ticket: nested_seq = lockseq_nested_ticket; break;
case lockseq_queuing: nested_seq = lockseq_nested_queuing; break;
case lockseq_drdpa: nested_seq = lockseq_nested_drdpa; break;
default: nested_seq = lockseq_nested_queuing; break;
// Use nested queuing lock for lock kinds without "nested" implementation.
}
DYNA_INIT_I_LOCK(user_lock, nested_seq);
// All nested locks are indirect locks.
kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(user_lock);
DYNA_SET_I_LOCK_LOCATION(ilk, loc);
# if USE_ITT_BUILD
__kmp_itt_lock_creating(ilk->lock, loc);
# endif
#else // KMP_USE_DYNAMIC_LOCK
static char const * const func = "omp_init_nest_lock";
kmp_user_lock_p lck;
KMP_DEBUG_ASSERT( __kmp_init_serial );
@ -1388,11 +1667,25 @@ __kmpc_init_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
#if USE_ITT_BUILD
__kmp_itt_lock_creating( lck );
#endif /* USE_ITT_BUILD */
#endif // KMP_USE_DYNAMIC_LOCK
} // __kmpc_init_nest_lock
void
__kmpc_destroy_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
#if KMP_USE_DYNAMIC_LOCK
# if USE_ITT_BUILD
kmp_user_lock_p lck;
if (DYNA_EXTRACT_D_TAG(user_lock) == 0) {
lck = ((kmp_indirect_lock_t *)DYNA_LOOKUP_I_LOCK(user_lock))->lock;
} else {
lck = (kmp_user_lock_p)user_lock;
}
__kmp_itt_lock_destroyed(lck);
# endif
DYNA_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
#else
kmp_user_lock_p lck;
if ( ( __kmp_user_lock_kind == lk_tas )
@ -1427,11 +1720,21 @@ __kmpc_destroy_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
else {
__kmp_user_lock_free( user_lock, gtid, lck );
}
#endif // KMP_USE_DYNAMIC_LOCK
} // __kmpc_destroy_lock
/* destroy the lock */
void
__kmpc_destroy_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
#if KMP_USE_DYNAMIC_LOCK
# if USE_ITT_BUILD
kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(user_lock);
__kmp_itt_lock_destroyed(ilk->lock);
# endif
DYNA_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
#else // KMP_USE_DYNAMIC_LOCK
kmp_user_lock_p lck;
@ -1470,11 +1773,35 @@ __kmpc_destroy_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
else {
__kmp_user_lock_free( user_lock, gtid, lck );
}
#endif // KMP_USE_DYNAMIC_LOCK
} // __kmpc_destroy_nest_lock
void
__kmpc_set_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
KMP_COUNT_BLOCK(OMP_set_lock);
#if KMP_USE_DYNAMIC_LOCK
int tag = DYNA_EXTRACT_D_TAG(user_lock);
# if USE_ITT_BUILD
__kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); // itt function will get to the right lock object.
# endif
# if DYNA_USE_FAST_TAS
if (tag == locktag_tas && !__kmp_env_consistency_check) {
DYNA_ACQUIRE_TAS_LOCK(user_lock, gtid);
} else
# elif DYNA_USE_FAST_FUTEX
if (tag == locktag_futex && !__kmp_env_consistency_check) {
DYNA_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
} else
# endif
{
__kmp_direct_set_ops[tag]((kmp_dyna_lock_t *)user_lock, gtid);
}
# if USE_ITT_BUILD
__kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
# endif
#else // KMP_USE_DYNAMIC_LOCK
kmp_user_lock_p lck;
if ( ( __kmp_user_lock_kind == lk_tas )
@ -1500,11 +1827,23 @@ __kmpc_set_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
#if USE_ITT_BUILD
__kmp_itt_lock_acquired( lck );
#endif /* USE_ITT_BUILD */
}
#endif // KMP_USE_DYNAMIC_LOCK
}
void
__kmpc_set_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
#if KMP_USE_DYNAMIC_LOCK
# if USE_ITT_BUILD
__kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
# endif
DYNA_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
# if USE_ITT_BUILD
__kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
#endif
#else // KMP_USE_DYNAMIC_LOCK
kmp_user_lock_p lck;
if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
@ -1531,11 +1870,33 @@ __kmpc_set_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
#if USE_ITT_BUILD
__kmp_itt_lock_acquired( lck );
#endif /* USE_ITT_BUILD */
#endif // KMP_USE_DYNAMIC_LOCK
}
void
__kmpc_unset_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
{
#if KMP_USE_DYNAMIC_LOCK
int tag = DYNA_EXTRACT_D_TAG(user_lock);
# if USE_ITT_BUILD
__kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
# endif
# if DYNA_USE_FAST_TAS
if (tag == locktag_tas && !__kmp_env_consistency_check) {
DYNA_RELEASE_TAS_LOCK(user_lock, gtid);
} else
# elif DYNA_USE_FAST_FUTEX
if (tag == locktag_futex && !__kmp_env_consistency_check) {
DYNA_RELEASE_FUTEX_LOCK(user_lock, gtid);
} else
# endif
{
__kmp_direct_unset_ops[tag]((kmp_dyna_lock_t *)user_lock, gtid);
}
#else // KMP_USE_DYNAMIC_LOCK
kmp_user_lock_p lck;
/* Can't use serial interval since not block structured */
@ -1570,12 +1931,23 @@ __kmpc_unset_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
#endif /* USE_ITT_BUILD */
RELEASE_LOCK( lck, gtid );
#endif // KMP_USE_DYNAMIC_LOCK
}
/* release the lock */
void
__kmpc_unset_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
{
#if KMP_USE_DYNAMIC_LOCK
# if USE_ITT_BUILD
__kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
# endif
DYNA_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
#else // KMP_USE_DYNAMIC_LOCK
kmp_user_lock_p lck;
/* Can't use serial interval since not block structured */
@ -1613,6 +1985,8 @@ __kmpc_unset_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
#endif /* USE_ITT_BUILD */
RELEASE_NESTED_LOCK( lck, gtid );
#endif // KMP_USE_DYNAMIC_LOCK
}
/* try to acquire the lock */
@ -1621,6 +1995,39 @@ __kmpc_test_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
{
KMP_COUNT_BLOCK(OMP_test_lock);
KMP_TIME_BLOCK(OMP_test_lock);
#if KMP_USE_DYNAMIC_LOCK
int rc;
int tag = DYNA_EXTRACT_D_TAG(user_lock);
# if USE_ITT_BUILD
__kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
# endif
# if DYNA_USE_FAST_TAS
if (tag == locktag_tas && !__kmp_env_consistency_check) {
DYNA_TEST_TAS_LOCK(user_lock, gtid, rc);
} else
# elif DYNA_USE_FAST_FUTEX
if (tag == locktag_futex && !__kmp_env_consistency_check) {
DYNA_TEST_FUTEX_LOCK(user_lock, gtid, rc);
} else
# endif
{
rc = __kmp_direct_test_ops[tag]((kmp_dyna_lock_t *)user_lock, gtid);
}
if (rc) {
# if USE_ITT_BUILD
__kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
# endif
return FTN_TRUE;
} else {
# if USE_ITT_BUILD
__kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
# endif
return FTN_FALSE;
}
#else // KMP_USE_DYNAMIC_LOCK
kmp_user_lock_p lck;
int rc;
@ -1653,12 +2060,31 @@ __kmpc_test_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
return ( rc ? FTN_TRUE : FTN_FALSE );
/* Can't use serial interval since not block structured */
#endif // KMP_USE_DYNAMIC_LOCK
}
/* try to acquire the lock */
int
__kmpc_test_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
{
#if KMP_USE_DYNAMIC_LOCK
int rc;
# if USE_ITT_BUILD
__kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
# endif
rc = DYNA_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
# if USE_ITT_BUILD
if (rc) {
__kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
} else {
__kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
}
# endif
return rc;
#else // KMP_USE_DYNAMIC_LOCK
kmp_user_lock_p lck;
int rc;
@ -1692,6 +2118,8 @@ __kmpc_test_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
return rc;
/* Can't use serial interval since not block structured */
#endif // KMP_USE_DYNAMIC_LOCK
}
@ -1723,6 +2151,29 @@ __kmp_enter_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid,
// should we keep it visible in new reduce block?
kmp_user_lock_p lck;
#if KMP_USE_DYNAMIC_LOCK
if (DYNA_IS_D_LOCK(__kmp_user_lock_seq)) {
lck = (kmp_user_lock_p)crit;
if (*((kmp_dyna_lock_t *)lck) == 0) {
KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)lck, 0, DYNA_GET_D_TAG(__kmp_user_lock_seq));
}
KMP_DEBUG_ASSERT(lck != NULL);
if (__kmp_env_consistency_check) {
__kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
}
DYNA_D_LOCK_FUNC(lck, set)((kmp_dyna_lock_t *)lck, global_tid);
} else {
kmp_indirect_lock_t *ilk = __kmp_get_indirect_csptr(crit, loc, global_tid, __kmp_user_lock_seq);
KMP_DEBUG_ASSERT(ilk != NULL);
if (__kmp_env_consistency_check) {
__kmp_push_sync(global_tid, ct_critical, loc, ilk->lock, __kmp_user_lock_seq);
}
DYNA_I_LOCK_FUNC(ilk, set)(ilk->lock, global_tid);
}
#else // KMP_USE_DYNAMIC_LOCK
// We know that the fast reduction code is only emitted by Intel compilers
// with 32 byte critical sections. If there isn't enough space, then we
// have to use a pointer.
@ -1738,6 +2189,8 @@ __kmp_enter_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid,
__kmp_push_sync( global_tid, ct_critical, loc, lck );
__kmp_acquire_user_lock_with_checks( lck, global_tid );
#endif // KMP_USE_DYNAMIC_LOCK
}
// used in a critical section reduce block
@ -1746,6 +2199,22 @@ __kmp_end_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, km
kmp_user_lock_p lck;
#if KMP_USE_DYNAMIC_LOCK
if (DYNA_IS_D_LOCK(__kmp_user_lock_seq)) {
lck = (kmp_user_lock_p)crit;
if (__kmp_env_consistency_check)
__kmp_pop_sync(global_tid, ct_critical, loc);
DYNA_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
} else {
kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
if (__kmp_env_consistency_check)
__kmp_pop_sync(global_tid, ct_critical, loc);
DYNA_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
}
#else // KMP_USE_DYNAMIC_LOCK
// We know that the fast reduction code is only emitted by Intel compilers with 32 byte critical
// sections. If there isn't enough space, then we have to use a pointer.
if ( __kmp_base_user_lock_size > 32 ) {
@ -1760,6 +2229,7 @@ __kmp_end_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, km
__kmp_release_user_lock_with_checks( lck, global_tid );
#endif // KMP_USE_DYNAMIC_LOCK
} // __kmp_end_critical_section_reduce_block
@ -1802,8 +2272,13 @@ __kmpc_reduce_nowait(
__kmp_parallel_initialize();
// check correctness of reduce block nesting
#if KMP_USE_DYNAMIC_LOCK
if ( __kmp_env_consistency_check )
__kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 );
#else
if ( __kmp_env_consistency_check )
__kmp_push_sync( global_tid, ct_reduce, loc, NULL );
#endif
#if OMP_40_ENABLED
th = __kmp_thread_from_gtid(global_tid);
@ -1991,8 +2466,13 @@ __kmpc_reduce(
__kmp_parallel_initialize();
// check correctness of reduce block nesting
#if KMP_USE_DYNAMIC_LOCK
if ( __kmp_env_consistency_check )
__kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 );
#else
if ( __kmp_env_consistency_check )
__kmp_push_sync( global_tid, ct_reduce, loc, NULL );
#endif
packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck );
__KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method );

View File

@ -355,7 +355,11 @@ __kmp_dispatch_deo_error( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
th = __kmp_threads[*gtid_ref];
if ( th -> th.th_root -> r.r_active
&& ( th -> th.th_dispatch -> th_dispatch_pr_current -> pushed_ws != ct_none ) ) {
#if KMP_USE_DYNAMIC_LOCK
__kmp_push_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref, NULL, 0 );
#else
__kmp_push_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref, NULL );
#endif
}
}
}
@ -377,7 +381,11 @@ __kmp_dispatch_deo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
pr = reinterpret_cast< dispatch_private_info_template< UT >* >
( th -> th.th_dispatch -> th_dispatch_pr_current );
if ( pr -> pushed_ws != ct_none ) {
#if KMP_USE_DYNAMIC_LOCK
__kmp_push_sync( gtid, ct_ordered_in_pdo, loc_ref, NULL, 0 );
#else
__kmp_push_sync( gtid, ct_ordered_in_pdo, loc_ref, NULL );
#endif
}
}

View File

@ -287,7 +287,11 @@ __kmp_push_workshare( int gtid, enum cons_type ct, ident_t const * ident )
}
void
#if KMP_USE_DYNAMIC_LOCK
__kmp_check_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p lck, kmp_uint32 seq )
#else
__kmp_check_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p lck )
#endif
{
struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons;
@ -345,7 +349,11 @@ __kmp_check_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_l
}
}
} else if ( ct == ct_critical ) {
#if KMP_USE_DYNAMIC_LOCK
if ( lck != NULL && __kmp_get_user_lock_owner( lck, seq ) == gtid ) { /* this same thread already has lock for this critical section */
#else
if ( lck != NULL && __kmp_get_user_lock_owner( lck ) == gtid ) { /* this same thread already has lock for this critical section */
#endif
int index = p->s_top;
struct cons_data cons = { NULL, ct_critical, 0, NULL };
/* walk up construct stack and try to find critical with matching name */
@ -380,14 +388,22 @@ __kmp_check_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_l
}
void
#if KMP_USE_DYNAMIC_LOCK
__kmp_push_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p lck, kmp_uint32 seq )
#else
__kmp_push_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p lck )
#endif
{
int tos;
struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons;
KMP_ASSERT( gtid == __kmp_get_gtid() );
KE_TRACE( 10, ("__kmp_push_sync (gtid=%d)\n", gtid ) );
#if KMP_USE_DYNAMIC_LOCK
__kmp_check_sync( gtid, ct, ident, lck, seq );
#else
__kmp_check_sync( gtid, ct, ident, lck );
#endif
KE_TRACE( 100, ( PUSH_MSG( ct, ident ) ) );
tos = ++ p->stack_top;
p->stack_data[ tos ].type = ct;

View File

@ -31,10 +31,18 @@ void __kmp_free_cons_stack( void * ptr );
void __kmp_push_parallel( int gtid, ident_t const * ident );
void __kmp_push_workshare( int gtid, enum cons_type ct, ident_t const * ident );
#if KMP_USE_DYNAMIC_LOCK
void __kmp_push_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p name, kmp_uint32 );
#else
void __kmp_push_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p name );
#endif
void __kmp_check_workshare( int gtid, enum cons_type ct, ident_t const * ident );
#if KMP_USE_DYNAMIC_LOCK
void __kmp_check_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p name, kmp_uint32 );
#else
void __kmp_check_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p name );
#endif
void __kmp_pop_parallel( int gtid, ident_t const * ident );
enum cons_type __kmp_pop_workshare( int gtid, enum cons_type ct, ident_t const * ident );

View File

@ -802,6 +802,28 @@ xexpand(FTN_IS_INITIAL_DEVICE)( void )
typedef enum { UNINIT = -1, UNLOCKED, LOCKED } kmp_stub_lock_t;
#endif /* KMP_STUB */
#if KMP_USE_DYNAMIC_LOCK
void FTN_STDCALL
FTN_INIT_LOCK_HINTED( void **user_lock, int KMP_DEREF hint )
{
#ifdef KMP_STUB
*((kmp_stub_lock_t *)user_lock) = UNLOCKED;
#else
__kmp_init_lock_hinted( user_lock, KMP_DEREF hint );
#endif
}
void FTN_STDCALL
FTN_INIT_NEST_LOCK_HINTED( void **user_lock, int KMP_DEREF hint )
{
#ifdef KMP_STUB
*((kmp_stub_lock_t *)user_lock) = UNLOCKED;
#else
__kmp_init_nest_lock_hinted( user_lock, KMP_DEREF hint );
#endif
}
#endif
/* initialize the lock */
void FTN_STDCALL
xexpand(FTN_INIT_LOCK)( void **user_lock )

View File

@ -79,6 +79,10 @@
#define FTN_GET_TEAM_NUM omp_get_team_num
#endif
#define FTN_INIT_LOCK omp_init_lock
#if KMP_USE_DYNAMIC_LOCK
#define FTN_INIT_LOCK_HINTED kmp_init_lock_hinted
#define FTN_INIT_NEST_LOCK_HINTED kmp_init_nest_lock_hinted
#endif
#define FTN_DESTROY_LOCK omp_destroy_lock
#define FTN_SET_LOCK omp_set_lock
#define FTN_UNSET_LOCK omp_unset_lock
@ -171,6 +175,10 @@
#define FTN_GET_TEAM_NUM omp_get_team_num_
#endif
#define FTN_INIT_LOCK omp_init_lock_
#if KMP_USE_DYNAMIC_LOCK
#define FTN_INIT_LOCK_HINTED kmp_init_lock_hinted_
#define FTN_INIT_NEST_LOCK_HINTED kmp_init_nest_lock_hinted_
#endif
#define FTN_DESTROY_LOCK omp_destroy_lock_
#define FTN_SET_LOCK omp_set_lock_
#define FTN_UNSET_LOCK omp_unset_lock_
@ -264,6 +272,10 @@
#define FTN_GET_TEAM_NUM OMP_GET_TEAM_NUM
#endif
#define FTN_INIT_LOCK OMP_INIT_LOCK
#if KMP_USE_DYNAMIC_LOCK
#define FTN_INIT_LOCK_HINTED KMP_INIT_LOCK_HINTED
#define FTN_INIT_NEST_LOCK_HINTED KMP_INIT_NEST_LOCK_HINTED
#endif
#define FTN_DESTROY_LOCK OMP_DESTROY_LOCK
#define FTN_SET_LOCK OMP_SET_LOCK
#define FTN_UNSET_LOCK OMP_UNSET_LOCK
@ -357,6 +369,10 @@
#define FTN_GET_TEAM_NUM OMP_GET_TEAM_NUM_
#endif
#define FTN_INIT_LOCK OMP_INIT_LOCK_
#if KMP_USE_DYNAMIC_LOCK
#define FTN_INIT_LOCK_HINTED KMP_INIT_LOCK_HINTED_
#define FTN_INIT_NEST_LOCK_HINTED KMP_INIT_NEST_LOCK_HINTED_
#endif
#define FTN_DESTROY_LOCK OMP_DESTROY_LOCK_
#define FTN_SET_LOCK OMP_SET_LOCK_
#define FTN_UNSET_LOCK OMP_UNSET_LOCK_

View File

@ -84,7 +84,11 @@ __kmp_inline void __kmp_itt_task_starting( void * object );
__kmp_inline void __kmp_itt_task_finished( void * object );
// --- Lock reporting ---
#if KMP_USE_DYNAMIC_LOCK
__kmp_inline void __kmp_itt_lock_creating( kmp_user_lock_p lock, const ident_t * );
#else
__kmp_inline void __kmp_itt_lock_creating( kmp_user_lock_p lock );
#endif
__kmp_inline void __kmp_itt_lock_acquiring( kmp_user_lock_p lock );
__kmp_inline void __kmp_itt_lock_acquired( kmp_user_lock_p lock );
__kmp_inline void __kmp_itt_lock_releasing( kmp_user_lock_p lock );
@ -92,7 +96,11 @@ __kmp_inline void __kmp_itt_lock_cancelled( kmp_user_lock_p lock );
__kmp_inline void __kmp_itt_lock_destroyed( kmp_user_lock_p lock );
// --- Critical reporting ---
#if KMP_USE_DYNAMIC_LOCK
__kmp_inline void __kmp_itt_critical_creating( kmp_user_lock_p lock, const ident_t * );
#else
__kmp_inline void __kmp_itt_critical_creating( kmp_user_lock_p lock );
#endif
__kmp_inline void __kmp_itt_critical_acquiring( kmp_user_lock_p lock );
__kmp_inline void __kmp_itt_critical_acquired( kmp_user_lock_p lock );
__kmp_inline void __kmp_itt_critical_releasing( kmp_user_lock_p lock );

View File

@ -734,6 +734,21 @@ __kmp_itt_task_finished(
// -------------------------------------------------------------------------------------------------
#if KMP_USE_DYNAMIC_LOCK
// Takes location information directly
__kmp_inline
void
___kmp_itt_lock_init( kmp_user_lock_p lock, char const *type, const ident_t *loc ) {
#if USE_ITT_NOTIFY
if ( __itt_sync_create_ptr ) {
char const * src = ( loc == NULL ? NULL : loc->psource );
KMP_ITT_DEBUG_LOCK();
__itt_sync_create( lock, type, src, 0 );
KMP_ITT_DEBUG_PRINT( "[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type, src );
}
#endif
}
#else // KMP_USE_DYNAMIC_LOCK
// Internal guts -- common code for locks and critical sections, do not call directly.
__kmp_inline
void
@ -750,6 +765,7 @@ ___kmp_itt_lock_init( kmp_user_lock_p lock, char const * type ) {
}; // if
#endif
} // ___kmp_itt_lock_init
#endif // KMP_USE_DYNAMIC_LOCK
// Internal guts -- common code for locks and critical sections, do not call directly.
__kmp_inline
@ -765,29 +781,82 @@ ___kmp_itt_lock_fini( kmp_user_lock_p lock, char const * type ) {
// -------------------------------------------------------------------------------------------------
#if KMP_USE_DYNAMIC_LOCK
void
__kmp_itt_lock_creating( kmp_user_lock_p lock, const ident_t *loc ) {
___kmp_itt_lock_init( lock, "OMP Lock", loc );
}
#else
void
__kmp_itt_lock_creating( kmp_user_lock_p lock ) {
___kmp_itt_lock_init( lock, "OMP Lock" );
} // __kmp_itt_lock_creating
#endif
void
__kmp_itt_lock_acquiring( kmp_user_lock_p lock ) {
#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
// postpone lock object access
if ( __itt_sync_prepare_ptr ) {
if ( DYNA_EXTRACT_D_TAG(lock) == 0 ) {
kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(lock);
__itt_sync_prepare( ilk->lock );
} else {
__itt_sync_prepare( lock );
}
}
#else
__itt_sync_prepare( lock );
#endif
} // __kmp_itt_lock_acquiring
void
__kmp_itt_lock_acquired( kmp_user_lock_p lock ) {
#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
// postpone lock object access
if ( __itt_sync_acquired_ptr ) {
if ( DYNA_EXTRACT_D_TAG(lock) == 0 ) {
kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(lock);
__itt_sync_acquired( ilk->lock );
} else {
__itt_sync_acquired( lock );
}
}
#else
__itt_sync_acquired( lock );
#endif
} // __kmp_itt_lock_acquired
void
__kmp_itt_lock_releasing( kmp_user_lock_p lock ) {
#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
if ( __itt_sync_releasing_ptr ) {
if ( DYNA_EXTRACT_D_TAG(lock) == 0 ) {
kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(lock);
__itt_sync_releasing( ilk->lock );
} else {
__itt_sync_releasing( lock );
}
}
#else
__itt_sync_releasing( lock );
#endif
} // __kmp_itt_lock_releasing
void
__kmp_itt_lock_cancelled( kmp_user_lock_p lock ) {
#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
if ( __itt_sync_cancel_ptr ) {
if ( DYNA_EXTRACT_D_TAG(lock) == 0 ) {
kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(lock);
__itt_sync_cancel( ilk->lock );
} else {
__itt_sync_cancel( lock );
}
}
#else
__itt_sync_cancel( lock );
#endif
} // __kmp_itt_lock_cancelled
void
@ -802,11 +871,17 @@ __kmp_itt_lock_destroyed( kmp_user_lock_p lock ) {
Critical sections are treated exactly as locks (but have different object type).
------------------------------------------------------------------------------------------------
*/
#if KMP_USE_DYNAMIC_LOCK
void
__kmp_itt_critical_creating( kmp_user_lock_p lock, const ident_t *loc ) {
___kmp_itt_lock_init( lock, "OMP Critical", loc);
}
#else
void
__kmp_itt_critical_creating( kmp_user_lock_p lock ) {
___kmp_itt_lock_init( lock, "OMP Critical" );
} // __kmp_itt_critical_creating
#endif
void
__kmp_itt_critical_acquiring( kmp_user_lock_p lock ) {

View File

@ -75,7 +75,7 @@ __kmp_validate_locks( void )
static kmp_int32
__kmp_get_tas_lock_owner( kmp_tas_lock_t *lck )
{
return TCR_4( lck->lk.poll ) - 1;
return DYNA_LOCK_STRIP(TCR_4( lck->lk.poll )) - 1;
}
static inline bool
@ -96,8 +96,8 @@ __kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid )
/* else __kmp_printf( "." );*/
#endif /* USE_LOCK_PROFILE */
if ( ( lck->lk.poll == 0 )
&& KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, gtid + 1 ) ) {
if ( ( lck->lk.poll == DYNA_LOCK_FREE(tas) )
&& KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas) ) ) {
KMP_FSYNC_ACQUIRED(lck);
return;
}
@ -113,8 +113,8 @@ __kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid )
KMP_YIELD_SPIN( spins );
}
while ( ( lck->lk.poll != 0 ) ||
( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, gtid + 1 ) ) ) {
while ( ( lck->lk.poll != DYNA_LOCK_FREE(tas) ) ||
( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas) ) ) ) {
//
// FIXME - use exponential backoff here
//
@ -152,8 +152,8 @@ __kmp_acquire_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
int
__kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
{
if ( ( lck->lk.poll == 0 )
&& KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, gtid + 1 ) ) {
if ( ( lck->lk.poll == DYNA_LOCK_FREE(tas) )
&& KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas) ) ) {
KMP_FSYNC_ACQUIRED( lck );
return TRUE;
}
@ -177,8 +177,7 @@ __kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
KMP_MB(); /* Flush all pending memory write invalidates. */
KMP_FSYNC_RELEASING(lck);
KMP_ST_REL32( &(lck->lk.poll), 0 );
KMP_ST_REL32( &(lck->lk.poll), DYNA_LOCK_FREE(tas) );
KMP_MB(); /* Flush all pending memory write invalidates. */
KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
@ -207,7 +206,7 @@ __kmp_release_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
void
__kmp_init_tas_lock( kmp_tas_lock_t * lck )
{
TCW_4( lck->lk.poll, 0 );
TCW_4( lck->lk.poll, DYNA_LOCK_FREE(tas) );
}
static void
@ -370,7 +369,7 @@ __kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck )
static kmp_int32
__kmp_get_futex_lock_owner( kmp_futex_lock_t *lck )
{
return ( TCR_4( lck->lk.poll ) >> 1 ) - 1;
return DYNA_LOCK_STRIP(( TCR_4( lck->lk.poll ) >> 1 )) - 1;
}
static inline bool
@ -398,9 +397,11 @@ __kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid )
lck, lck->lk.poll, gtid ) );
kmp_int32 poll_val;
while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), 0,
gtid_code ) ) != 0 ) {
kmp_int32 cond = poll_val & 1;
while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), DYNA_LOCK_FREE(futex),
DYNA_LOCK_BUSY(gtid_code, futex) ) ) != DYNA_LOCK_FREE(futex) ) {
kmp_int32 cond = DYNA_LOCK_STRIP(poll_val) & 1;
KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
lck, gtid, poll_val, cond ) );
@ -417,13 +418,12 @@ __kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid )
// Try to set the lsb in the poll to indicate to the owner
// thread that they need to wake this thread up.
//
if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ),
poll_val, poll_val | 1 ) ) {
if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ), poll_val, poll_val | DYNA_LOCK_BUSY(1, futex) ) ) {
KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
lck, lck->lk.poll, gtid ) );
continue;
}
poll_val |= 1;
poll_val |= DYNA_LOCK_BUSY(1, futex);
KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n",
lck, lck->lk.poll, gtid ) );
@ -479,7 +479,7 @@ __kmp_acquire_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
int
__kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
{
if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, ( gtid + 1 ) << 1 ) ) {
if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(futex), DYNA_LOCK_BUSY(gtid+1, futex) << 1 ) ) {
KMP_FSYNC_ACQUIRED( lck );
return TRUE;
}
@ -507,15 +507,15 @@ __kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
KMP_FSYNC_RELEASING(lck);
kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), 0 );
kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), DYNA_LOCK_FREE(futex) );
KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
lck, gtid, poll_val ) );
if ( poll_val & 1 ) {
if ( DYNA_LOCK_STRIP(poll_val) & 1 ) {
KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
lck, gtid ) );
syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, 1, NULL, NULL, 0 );
syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, DYNA_LOCK_BUSY(1, futex), NULL, NULL, 0 );
}
KMP_MB(); /* Flush all pending memory write invalidates. */
@ -549,7 +549,7 @@ __kmp_release_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
void
__kmp_init_futex_lock( kmp_futex_lock_t * lck )
{
TCW_4( lck->lk.poll, 0 );
TCW_4( lck->lk.poll, DYNA_LOCK_FREE(futex) );
}
static void
@ -2933,6 +2933,576 @@ __kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags )
lck->lk.flags = flags;
}
#if KMP_USE_DYNAMIC_LOCK
// Definitions of lock hints.
# ifndef __OMP_H
typedef enum kmp_lock_hint_t {
kmp_lock_hint_none = 0,
kmp_lock_hint_contended,
kmp_lock_hint_uncontended,
kmp_lock_hint_nonspeculative,
kmp_lock_hint_speculative,
kmp_lock_hint_adaptive,
} kmp_lock_hint_t;
# endif
// Direct lock initializers. It simply writes a tag to the low 8 bits of the lock word.
#define expand_init_lock(l, a) \
static void init_##l##_lock(kmp_dyna_lock_t *lck, kmp_dyna_lockseq_t seq) { \
*lck = DYNA_LOCK_FREE(l); \
KA_TRACE(20, ("Initialized direct lock, tag = %x\n", *lck)); \
}
FOREACH_D_LOCK(expand_init_lock, 0)
#undef expand_init_lock
#if DYNA_HAS_HLE
// HLE lock functions - imported from the testbed runtime.
#if KMP_MIC
# define machine_pause() _mm_delay_32(10) // TODO: find the right argument
#else
# define machine_pause() _mm_pause()
#endif
#define HLE_ACQUIRE ".byte 0xf2;"
#define HLE_RELEASE ".byte 0xf3;"
static inline kmp_uint32
swap4(kmp_uint32 volatile *p, kmp_uint32 v)
{
__asm__ volatile(HLE_ACQUIRE "xchg %1,%0"
: "+r"(v), "+m"(*p)
:
: "memory");
return v;
}
static void
__kmp_destroy_hle_lock(kmp_dyna_lock_t *lck)
{
*lck = 0;
}
static void
__kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
{
// Use gtid for DYNA_LOCK_BUSY if necessary
if (swap4(lck, DYNA_LOCK_BUSY(1, hle)) != DYNA_LOCK_FREE(hle)) {
int delay = 1;
do {
while (*(kmp_uint32 volatile *)lck != DYNA_LOCK_FREE(hle)) {
for (int i = delay; i != 0; --i)
machine_pause();
delay = ((delay << 1) | 1) & 7;
}
} while (swap4(lck, DYNA_LOCK_BUSY(1, hle)) != DYNA_LOCK_FREE(hle));
}
}
static void
__kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
{
__kmp_acquire_hle_lock(lck, gtid); // TODO: add checks
}
static void
__kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
{
__asm__ volatile(HLE_RELEASE "movl %1,%0"
: "=m"(*lck)
: "r"(DYNA_LOCK_FREE(hle))
: "memory");
}
static void
__kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
{
__kmp_release_hle_lock(lck, gtid); // TODO: add checks
}
static int
__kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
{
return swap4(lck, DYNA_LOCK_BUSY(1, hle)) == DYNA_LOCK_FREE(hle);
}
static int
__kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
{
return __kmp_test_hle_lock(lck, gtid); // TODO: add checks
}
#endif // DYNA_HAS_HLE
// Entry functions for indirect locks (first element of direct_*_ops[]).
static void __kmp_init_indirect_lock(kmp_dyna_lock_t * l, kmp_dyna_lockseq_t tag);
static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock);
static void __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
static void __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
static int __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
static void __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
//
// Jump tables for the indirect lock functions.
// Only fill in the odd entries, that avoids the need to shift out the low bit.
//
#define expand_func0(l, op) 0,op##_##l##_##lock,
void (*__kmp_direct_init_ops[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t)
= { __kmp_init_indirect_lock, 0, FOREACH_D_LOCK(expand_func0, init) };
#define expand_func1(l, op) 0,(void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_##lock,
void (*__kmp_direct_destroy_ops[])(kmp_dyna_lock_t *)
= { __kmp_destroy_indirect_lock, 0, FOREACH_D_LOCK(expand_func1, destroy) };
// Differentiates *lock and *lock_with_checks.
#define expand_func2(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock,
#define expand_func2c(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
static void (*direct_set_tab[][DYNA_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32)
= { { __kmp_set_indirect_lock, 0, FOREACH_D_LOCK(expand_func2, acquire) },
{ __kmp_set_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func2c, acquire) } };
static void (*direct_unset_tab[][DYNA_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32)
= { { __kmp_unset_indirect_lock, 0, FOREACH_D_LOCK(expand_func2, release) },
{ __kmp_unset_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func2c, release) } };
#define expand_func3(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock,
#define expand_func3c(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
static int (*direct_test_tab[][DYNA_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32)
= { { __kmp_test_indirect_lock, 0, FOREACH_D_LOCK(expand_func3, test) },
{ __kmp_test_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func3c, test) } };
// Exposes only one set of jump tables (*lock or *lock_with_checks).
void (*(*__kmp_direct_set_ops))(kmp_dyna_lock_t *, kmp_int32) = 0;
void (*(*__kmp_direct_unset_ops))(kmp_dyna_lock_t *, kmp_int32) = 0;
int (*(*__kmp_direct_test_ops))(kmp_dyna_lock_t *, kmp_int32) = 0;
//
// Jump tables for the indirect lock functions.
//
#define expand_func4(l, op) (void (*)(kmp_user_lock_p))__kmp_##op##_##l##_##lock,
void (*__kmp_indirect_init_ops[])(kmp_user_lock_p)
= { FOREACH_I_LOCK(expand_func4, init) };
void (*__kmp_indirect_destroy_ops[])(kmp_user_lock_p)
= { FOREACH_I_LOCK(expand_func4, destroy) };
// Differentiates *lock and *lock_with_checks.
#define expand_func5(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock,
#define expand_func5c(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
static void (*indirect_set_tab[][DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32)
= { { FOREACH_I_LOCK(expand_func5, acquire) },
{ FOREACH_I_LOCK(expand_func5c, acquire) } };
static void (*indirect_unset_tab[][DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32)
= { { FOREACH_I_LOCK(expand_func5, release) },
{ FOREACH_I_LOCK(expand_func5c, release) } };
#define expand_func6(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock,
#define expand_func6c(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
static int (*indirect_test_tab[][DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32)
= { { FOREACH_I_LOCK(expand_func6, test) },
{ FOREACH_I_LOCK(expand_func6c, test) } };
// Exposes only one set of jump tables (*lock or *lock_with_checks).
void (*(*__kmp_indirect_set_ops))(kmp_user_lock_p, kmp_int32) = 0;
void (*(*__kmp_indirect_unset_ops))(kmp_user_lock_p, kmp_int32) = 0;
int (*(*__kmp_indirect_test_ops))(kmp_user_lock_p, kmp_int32) = 0;
// Lock index table.
kmp_indirect_lock_t **__kmp_indirect_lock_table;
kmp_lock_index_t __kmp_indirect_lock_table_size;
kmp_lock_index_t __kmp_indirect_lock_table_next;
// Size of indirect locks.
static kmp_uint32 __kmp_indirect_lock_size[DYNA_NUM_I_LOCKS] = {
sizeof(kmp_ticket_lock_t), sizeof(kmp_queuing_lock_t),
#if KMP_USE_ADAPTIVE_LOCKS
sizeof(kmp_adaptive_lock_t),
#endif
sizeof(kmp_drdpa_lock_t),
sizeof(kmp_tas_lock_t),
#if DYNA_HAS_FUTEX
sizeof(kmp_futex_lock_t),
#endif
sizeof(kmp_ticket_lock_t), sizeof(kmp_queuing_lock_t),
sizeof(kmp_drdpa_lock_t)
};
// Jump tables for lock accessor/modifier.
void (*__kmp_indirect_set_location[DYNA_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *) = { 0 };
void (*__kmp_indirect_set_flags[DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t) = { 0 };
const ident_t * (*__kmp_indirect_get_location[DYNA_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 };
kmp_lock_flags_t (*__kmp_indirect_get_flags[DYNA_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 };
// Use different lock pools for different lock types.
static kmp_indirect_lock_t * __kmp_indirect_lock_pool[DYNA_NUM_I_LOCKS] = { 0 };
// Inserts the given lock ptr to the lock table.
kmp_lock_index_t
__kmp_insert_indirect_lock(kmp_indirect_lock_t *lck)
{
kmp_lock_index_t next = __kmp_indirect_lock_table_next;
// Check capacity and double the size if required
if (next >= __kmp_indirect_lock_table_size) {
kmp_lock_index_t i;
kmp_lock_index_t size = __kmp_indirect_lock_table_size;
kmp_indirect_lock_t **old_table = __kmp_indirect_lock_table;
__kmp_indirect_lock_table = (kmp_indirect_lock_t **)__kmp_allocate(2*next*sizeof(kmp_indirect_lock_t *));
memcpy(__kmp_indirect_lock_table, old_table, next*sizeof(kmp_indirect_lock_t *));
__kmp_free(old_table);
__kmp_indirect_lock_table_size = 2*next;
}
// Insert lck to the table and return the index.
__kmp_indirect_lock_table[next] = lck;
__kmp_indirect_lock_table_next++;
return next;
}
// User lock allocator for dynamically dispatched locks.
kmp_indirect_lock_t *
__kmp_allocate_indirect_lock(void **user_lock, kmp_int32 gtid, kmp_indirect_locktag_t tag)
{
kmp_indirect_lock_t *lck;
kmp_lock_index_t idx;
__kmp_acquire_lock(&__kmp_global_lock, gtid);
if (__kmp_indirect_lock_pool[tag] != NULL) {
lck = __kmp_indirect_lock_pool[tag];
if (OMP_LOCK_T_SIZE < sizeof(void *))
idx = lck->lock->pool.index;
__kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next;
} else {
lck = (kmp_indirect_lock_t *)__kmp_allocate(sizeof(kmp_indirect_lock_t));
lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]);
if (OMP_LOCK_T_SIZE < sizeof(void *))
idx = __kmp_insert_indirect_lock(lck);
}
__kmp_release_lock(&__kmp_global_lock, gtid);
lck->type = tag;
if (OMP_LOCK_T_SIZE < sizeof(void *)) {
*((kmp_lock_index_t *)user_lock) = idx << 1; // indirect lock word must be even.
} else {
*((kmp_indirect_lock_t **)user_lock) = lck;
}
return lck;
}
// User lock lookup for dynamically dispatched locks.
static __forceinline
kmp_indirect_lock_t *
__kmp_lookup_indirect_lock(void **user_lock, const char *func)
{
if (__kmp_env_consistency_check) {
kmp_indirect_lock_t *lck = NULL;
if (user_lock == NULL) {
KMP_FATAL(LockIsUninitialized, func);
}
if (OMP_LOCK_T_SIZE < sizeof(void *)) {
kmp_lock_index_t idx = DYNA_EXTRACT_I_INDEX(user_lock);
if (idx < 0 || idx >= __kmp_indirect_lock_table_size) {
KMP_FATAL(LockIsUninitialized, func);
}
lck = __kmp_indirect_lock_table[idx];
} else {
lck = *((kmp_indirect_lock_t **)user_lock);
}
if (lck == NULL) {
KMP_FATAL(LockIsUninitialized, func);
}
return lck;
} else {
if (OMP_LOCK_T_SIZE < sizeof(void *)) {
return __kmp_indirect_lock_table[DYNA_EXTRACT_I_INDEX(user_lock)];
} else {
return *((kmp_indirect_lock_t **)user_lock);
}
}
}
static void
__kmp_init_indirect_lock(kmp_dyna_lock_t * lock, kmp_dyna_lockseq_t seq)
{
#if KMP_USE_ADAPTIVE_LOCKS
if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) {
KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive");
seq = lockseq_queuing;
}
#endif
kmp_indirect_locktag_t tag = DYNA_GET_I_TAG(seq);
kmp_indirect_lock_t *l = __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag);
DYNA_I_LOCK_FUNC(l, init)(l->lock);
KA_TRACE(20, ("__kmp_init_indirect_lock: initialized indirect lock, tag = %x\n", l->type));
}
static void
__kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock)
{
kmp_uint32 gtid = __kmp_entry_gtid();
kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock");
DYNA_I_LOCK_FUNC(l, destroy)(l->lock);
kmp_indirect_locktag_t tag = l->type;
__kmp_acquire_lock(&__kmp_global_lock, gtid);
// Use the base lock's space to keep the pool chain.
l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag];
if (OMP_LOCK_T_SIZE < sizeof(void *)) {
l->lock->pool.index = DYNA_EXTRACT_I_INDEX(lock);
}
__kmp_indirect_lock_pool[tag] = l;
__kmp_release_lock(&__kmp_global_lock, gtid);
}
static void
__kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
{
kmp_indirect_lock_t *l = DYNA_LOOKUP_I_LOCK(lock);
DYNA_I_LOCK_FUNC(l, set)(l->lock, gtid);
}
static void
__kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
{
kmp_indirect_lock_t *l = DYNA_LOOKUP_I_LOCK(lock);
DYNA_I_LOCK_FUNC(l, unset)(l->lock, gtid);
}
static int
__kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
{
kmp_indirect_lock_t *l = DYNA_LOOKUP_I_LOCK(lock);
return DYNA_I_LOCK_FUNC(l, test)(l->lock, gtid);
}
static void
__kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
{
kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock");
DYNA_I_LOCK_FUNC(l, set)(l->lock, gtid);
}
static void
__kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
{
kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock");
DYNA_I_LOCK_FUNC(l, unset)(l->lock, gtid);
}
static int
__kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
{
kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock");
return DYNA_I_LOCK_FUNC(l, test)(l->lock, gtid);
}
kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing;
// Initialize a hinted lock.
void
__kmp_init_lock_hinted(void **lock, int hint)
{
kmp_dyna_lockseq_t seq;
switch (hint) {
case kmp_lock_hint_uncontended:
seq = lockseq_tas;
break;
case kmp_lock_hint_speculative:
#if DYNA_HAS_HLE
seq = lockseq_hle;
#else
seq = lockseq_tas;
#endif
break;
case kmp_lock_hint_adaptive:
#if KMP_USE_ADAPTIVE_LOCKS
seq = lockseq_adaptive;
#else
seq = lockseq_queuing;
#endif
break;
// Defaults to queuing locks.
case kmp_lock_hint_contended:
case kmp_lock_hint_nonspeculative:
default:
seq = lockseq_queuing;
break;
}
if (DYNA_IS_D_LOCK(seq)) {
DYNA_INIT_D_LOCK(lock, seq);
#if USE_ITT_BUILD
__kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
#endif
} else {
DYNA_INIT_I_LOCK(lock, seq);
#if USE_ITT_BUILD
kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(lock);
__kmp_itt_lock_creating(ilk->lock, NULL);
#endif
}
}
// This is used only in kmp_error.c when consistency checking is on.
kmp_int32
__kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq)
{
switch (seq) {
case lockseq_tas:
case lockseq_nested_tas:
return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck);
#if DYNA_HAS_FUTEX
case lockseq_futex:
case lockseq_nested_futex:
return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck);
#endif
case lockseq_ticket:
case lockseq_nested_ticket:
return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck);
case lockseq_queuing:
case lockseq_nested_queuing:
#if KMP_USE_ADAPTIVE_LOCKS
case lockseq_adaptive:
return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck);
#endif
case lockseq_drdpa:
case lockseq_nested_drdpa:
return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck);
default:
return 0;
}
}
// The value initialized from KMP_LOCK_KIND needs to be translated to its
// nested version.
void
__kmp_init_nest_lock_hinted(void **lock, int hint)
{
kmp_dyna_lockseq_t seq;
switch (hint) {
case kmp_lock_hint_uncontended:
seq = lockseq_nested_tas;
break;
// Defaults to queuing locks.
case kmp_lock_hint_contended:
case kmp_lock_hint_nonspeculative:
default:
seq = lockseq_nested_queuing;
break;
}
DYNA_INIT_I_LOCK(lock, seq);
#if USE_ITT_BUILD
kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(lock);
__kmp_itt_lock_creating(ilk->lock, NULL);
#endif
}
// Initializes the lock table for indirect locks.
static void
__kmp_init_indirect_lock_table()
{
__kmp_indirect_lock_table = (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *)*1024);
__kmp_indirect_lock_table_size = 1024;
__kmp_indirect_lock_table_next = 0;
}
#if KMP_USE_ADAPTIVE_LOCKS
# define init_lock_func(table, expand) { \
table[locktag_ticket] = expand(ticket); \
table[locktag_queuing] = expand(queuing); \
table[locktag_adaptive] = expand(queuing); \
table[locktag_drdpa] = expand(drdpa); \
table[locktag_nested_ticket] = expand(ticket); \
table[locktag_nested_queuing] = expand(queuing); \
table[locktag_nested_drdpa] = expand(drdpa); \
}
#else
# define init_lock_func(table, expand) { \
table[locktag_ticket] = expand(ticket); \
table[locktag_queuing] = expand(queuing); \
table[locktag_drdpa] = expand(drdpa); \
table[locktag_nested_ticket] = expand(ticket); \
table[locktag_nested_queuing] = expand(queuing); \
table[locktag_nested_drdpa] = expand(drdpa); \
}
#endif // KMP_USE_ADAPTIVE_LOCKS
// Initializes data for dynamic user locks.
void
__kmp_init_dynamic_user_locks()
{
// Initialize jump table location
int offset = (__kmp_env_consistency_check)? 1: 0;
__kmp_direct_set_ops = direct_set_tab[offset];
__kmp_direct_unset_ops = direct_unset_tab[offset];
__kmp_direct_test_ops = direct_test_tab[offset];
__kmp_indirect_set_ops = indirect_set_tab[offset];
__kmp_indirect_unset_ops = indirect_unset_tab[offset];
__kmp_indirect_test_ops = indirect_test_tab[offset];
__kmp_init_indirect_lock_table();
// Initialize lock accessor/modifier
// Could have used designated initializer, but -TP /Qstd=c99 did not work with icl.exe.
#define expand_func(l) (void (*)(kmp_user_lock_p, const ident_t *))__kmp_set_##l##_lock_location
init_lock_func(__kmp_indirect_set_location, expand_func);
#undef expand_func
#define expand_func(l) (void (*)(kmp_user_lock_p, kmp_lock_flags_t))__kmp_set_##l##_lock_flags
init_lock_func(__kmp_indirect_set_flags, expand_func);
#undef expand_func
#define expand_func(l) (const ident_t * (*)(kmp_user_lock_p))__kmp_get_##l##_lock_location
init_lock_func(__kmp_indirect_get_location, expand_func);
#undef expand_func
#define expand_func(l) (kmp_lock_flags_t (*)(kmp_user_lock_p))__kmp_get_##l##_lock_flags
init_lock_func(__kmp_indirect_get_flags, expand_func);
#undef expand_func
__kmp_init_user_locks = TRUE;
}
// Clean up the lock table.
void
__kmp_cleanup_indirect_user_locks()
{
kmp_lock_index_t i;
int k;
// Clean up locks in the pools first (they were already destroyed before going into the pools).
for (k = 0; k < DYNA_NUM_I_LOCKS; ++k) {
kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k];
while (l != NULL) {
kmp_indirect_lock_t *ll = l;
l = (kmp_indirect_lock_t *)l->lock->pool.next;
if (OMP_LOCK_T_SIZE < sizeof(void *)) {
__kmp_indirect_lock_table[ll->lock->pool.index] = NULL;
}
__kmp_free(ll->lock);
__kmp_free(ll);
}
}
// Clean up the remaining undestroyed locks.
for (i = 0; i < __kmp_indirect_lock_table_next; i++) {
kmp_indirect_lock_t *l = __kmp_indirect_lock_table[i];
if (l != NULL) {
// Locks not destroyed explicitly need to be destroyed here.
DYNA_I_LOCK_FUNC(l, destroy)(l->lock);
__kmp_free(l->lock);
__kmp_free(l);
}
}
// Free the table
__kmp_free(__kmp_indirect_lock_table);
__kmp_init_user_locks = FALSE;
}
enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
int __kmp_num_locks_in_block = 1; // FIXME - tune this value
#else // KMP_USE_DYNAMIC_LOCK
/* ------------------------------------------------------------------------ */
/* user locks
*
@ -3539,3 +4109,4 @@ __kmp_cleanup_user_locks( void )
TCW_4(__kmp_init_user_locks, FALSE);
}
#endif // KMP_USE_DYNAMIC_LOCK

View File

@ -619,6 +619,8 @@ union kmp_user_lock {
typedef union kmp_user_lock *kmp_user_lock_p;
#if ! KMP_USE_DYNAMIC_LOCK
extern size_t __kmp_base_user_lock_size;
extern size_t __kmp_user_lock_size;
@ -1015,9 +1017,220 @@ extern void __kmp_cleanup_user_locks();
} \
}
#endif // KMP_USE_DYNAMIC_LOCK
#undef KMP_PAD
#undef KMP_GTID_DNE
#if KMP_USE_DYNAMIC_LOCK
#define DYNA_HAS_FUTEX (KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM))
#define DYNA_HAS_HLE (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC)
#define DYNA_USE_FAST_FUTEX 0 && DYNA_HAS_FUTEX
#define DYNA_USE_FAST_TAS 1 && DYNA_HAS_FUTEX
// List of lock definitions; all nested locks are indirect locks.
// hle lock is xchg lock prefixed with XACQUIRE/XRELEASE.
// All nested locks are indirect lock types.
#if DYNA_HAS_FUTEX
# if DYNA_HAS_HLE
# define FOREACH_D_LOCK(m, a) m(tas, a) m(futex, a) m(hle, a)
# define DYNA_LAST_D_LOCK_SEQ lockseq_hle
# else
# define FOREACH_D_LOCK(m, a) m(tas, a) m(futex, a)
# define DYNA_LAST_D_LOCK_SEQ lockseq_futex
# endif // DYNA_HAS_HLE
# if KMP_USE_ADAPTIVE_LOCKS
# define FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) \
m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a) \
m(nested_queuing, a) m(nested_drdpa, a)
# else
# define FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(drdpa, a) \
m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a) \
m(nested_queuing, a) m(nested_drdpa, a)
# endif // KMP_USE_ADAPTIVE_LOCKS
#else
# if DYNA_HAS_HLE
# define FOREACH_D_LOCK(m, a) m(tas, a) m(hle, a)
# define DYNA_LAST_D_LOCK_SEQ lockseq_hle
# else
# define FOREACH_D_LOCK(m, a) m(tas, a)
# define DYNA_LAST_D_LOCK_SEQ lockseq_tas
# endif // DYNA_HAS_HLE
# if KMP_USE_ADAPTIVE_LOCKS
# define FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) \
m(nested_tas, a) m(nested_ticket, a) \
m(nested_queuing, a) m(nested_drdpa, a)
# else
# define FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(drdpa, a) \
m(nested_tas, a) m(nested_ticket, a) \
m(nested_queuing, a) m(nested_drdpa, a)
# endif // KMP_USE_ADAPTIVE_LOCKS
#endif // DYNA_HAS_FUTEX
// Information used in dynamic dispatch
#define DYNA_LOCK_VALUE_SHIFT 8
#define DYNA_LOCK_TYPE_MASK ((1<<DYNA_LOCK_VALUE_SHIFT)-1)
#define DYNA_NUM_D_LOCKS DYNA_LAST_D_LOCK_SEQ
#define DYNA_NUM_I_LOCKS (locktag_nested_drdpa+1)
// Base type for dynamic locks.
typedef kmp_uint32 kmp_dyna_lock_t;
// Lock sequence that enumerates all lock kinds.
// Always make this enumeration consistent with kmp_lockseq_t in the include directory.
typedef enum {
lockseq_indirect = 0,
#define expand_seq(l,a) lockseq_##l,
FOREACH_D_LOCK(expand_seq, 0)
FOREACH_I_LOCK(expand_seq, 0)
#undef expand_seq
} kmp_dyna_lockseq_t;
// Enumerates indirect lock tags.
typedef enum {
#define expand_tag(l,a) locktag_##l,
FOREACH_I_LOCK(expand_tag, 0)
#undef expand_tag
} kmp_indirect_locktag_t;
// Utility macros that extract information from lock sequences.
#define DYNA_IS_D_LOCK(seq) (seq >= lockseq_tas && seq <= DYNA_LAST_D_LOCK_SEQ)
#define DYNA_IS_I_LOCK(seq) (seq >= lockseq_ticket && seq <= lockseq_nested_drdpa)
#define DYNA_GET_I_TAG(seq) (kmp_indirect_locktag_t)(seq - lockseq_ticket)
#define DYNA_GET_D_TAG(seq) (seq<<1 | 1)
// Enumerates direct lock tags starting from indirect tag.
typedef enum {
#define expand_tag(l,a) locktag_##l = DYNA_GET_D_TAG(lockseq_##l),
FOREACH_D_LOCK(expand_tag, 0)
#undef expand_tag
} kmp_direct_locktag_t;
// Indirect lock type
typedef struct {
kmp_user_lock_p lock;
kmp_indirect_locktag_t type;
} kmp_indirect_lock_t;
// Function tables for direct locks. Set/unset/test differentiate functions with/without consistency checking.
extern void (*__kmp_direct_init_ops[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t);
extern void (*__kmp_direct_destroy_ops[])(kmp_dyna_lock_t *);
extern void (*(*__kmp_direct_set_ops))(kmp_dyna_lock_t *, kmp_int32);
extern void (*(*__kmp_direct_unset_ops))(kmp_dyna_lock_t *, kmp_int32);
extern int (*(*__kmp_direct_test_ops))(kmp_dyna_lock_t *, kmp_int32);
// Function tables for indirect locks. Set/unset/test differentiate functions with/withuot consistency checking.
extern void (*__kmp_indirect_init_ops[])(kmp_user_lock_p);
extern void (*__kmp_indirect_destroy_ops[])(kmp_user_lock_p);
extern void (*(*__kmp_indirect_set_ops))(kmp_user_lock_p, kmp_int32);
extern void (*(*__kmp_indirect_unset_ops))(kmp_user_lock_p, kmp_int32);
extern int (*(*__kmp_indirect_test_ops))(kmp_user_lock_p, kmp_int32);
// Extracts direct lock tag from a user lock pointer
#define DYNA_EXTRACT_D_TAG(l) (*((kmp_dyna_lock_t *)(l)) & DYNA_LOCK_TYPE_MASK & -(*((kmp_dyna_lock_t *)(l)) & 1))
// Extracts indirect lock index from a user lock pointer
#define DYNA_EXTRACT_I_INDEX(l) (*(kmp_lock_index_t *)(l) >> 1)
// Returns function pointer to the direct lock function with l (kmp_dyna_lock_t *) and op (operation type).
#define DYNA_D_LOCK_FUNC(l, op) __kmp_direct_##op##_ops[DYNA_EXTRACT_D_TAG(l)]
// Returns function pointer to the indirect lock function with l (kmp_indirect_lock_t *) and op (operation type).
#define DYNA_I_LOCK_FUNC(l, op) __kmp_indirect_##op##_ops[((kmp_indirect_lock_t *)(l))->type]
// Initializes a direct lock with the given lock pointer and lock sequence.
#define DYNA_INIT_D_LOCK(l, seq) __kmp_direct_init_ops[DYNA_GET_D_TAG(seq)]((kmp_dyna_lock_t *)l, seq)
// Initializes an indirect lock with the given lock pointer and lock sequence.
#define DYNA_INIT_I_LOCK(l, seq) __kmp_direct_init_ops[0]((kmp_dyna_lock_t *)(l), seq)
// Returns "free" lock value for the given lock type.
#define DYNA_LOCK_FREE(type) (locktag_##type)
// Returns "busy" lock value for the given lock teyp.
#define DYNA_LOCK_BUSY(v, type) ((v)<<DYNA_LOCK_VALUE_SHIFT | locktag_##type)
// Returns lock value after removing (shifting) lock tag.
#define DYNA_LOCK_STRIP(v) ((v)>>DYNA_LOCK_VALUE_SHIFT)
// Updates __kmp_user_lock_seq with the give lock type.
#define DYNA_STORE_LOCK_SEQ(type) (__kmp_user_lock_seq = lockseq_##type)
// Internal entries for hinted lock initializers.
extern void __kmp_init_lock_hinted(void **, int);
extern void __kmp_init_nest_lock_hinted(void **, int);
// Initializes global states and data structures for managing dynamic user locks.
extern void __kmp_init_dynamic_user_locks();
// Allocates and returns an indirect lock with the given indirect lock tag.
extern kmp_indirect_lock_t * __kmp_allocate_indirect_lock(void **, kmp_int32, kmp_indirect_locktag_t);
// Cleans up global states and data structures for managing dynamic user locks.
extern void __kmp_cleanup_indirect_user_locks();
// Default user lock sequence when not using hinted locks.
extern kmp_dyna_lockseq_t __kmp_user_lock_seq;
// Jump table for "set lock location", available only for indirect locks.
extern void (*__kmp_indirect_set_location[DYNA_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *);
#define DYNA_SET_I_LOCK_LOCATION(lck, loc) { \
if (__kmp_indirect_set_location[(lck)->type] != NULL) \
__kmp_indirect_set_location[(lck)->type]((lck)->lock, loc); \
}
// Jump table for "set lock flags", available only for indirect locks.
extern void (*__kmp_indirect_set_flags[DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t);
#define DYNA_SET_I_LOCK_FLAGS(lck, flag) { \
if (__kmp_indirect_set_flags[(lck)->type] != NULL) \
__kmp_indirect_set_flags[(lck)->type]((lck)->lock, flag); \
}
// Jump table for "get lock location", available only for indirect locks.
extern const ident_t * (*__kmp_indirect_get_location[DYNA_NUM_I_LOCKS])(kmp_user_lock_p);
#define DYNA_GET_I_LOCK_LOCATION(lck) ( __kmp_indirect_get_location[(lck)->type] != NULL \
? __kmp_indirect_get_location[(lck)->type]((lck)->lock) \
: NULL )
// Jump table for "get lock flags", available only for indirect locks.
extern kmp_lock_flags_t (*__kmp_indirect_get_flags[DYNA_NUM_I_LOCKS])(kmp_user_lock_p);
#define DYNA_GET_I_LOCK_FLAGS(lck) ( __kmp_indirect_get_flags[(lck)->type] != NULL \
? __kmp_indirect_get_flags[(lck)->type]((lck)->lock) \
: NULL )
//
// Lock table for indirect locks.
//
// Simple linear structure is used to keep pointers to allocated indirect locks.
extern kmp_indirect_lock_t **__kmp_indirect_lock_table;
// Current size of the lock table; it may increase but never shrink.
extern kmp_lock_index_t __kmp_indirect_lock_table_size;
// Next index to be used for a new indirect lock (= number of indirect locks allocated).
extern kmp_lock_index_t __kmp_indirect_lock_table_next;
// Number of locks in a lock block, which is fixed to "1" now.
// TODO: No lock block implementation now. If we do support, we need to manage lock block data
// structure for each indirect lock type.
extern int __kmp_num_locks_in_block;
// Fast lock table lookup without consistency checking
#define DYNA_LOOKUP_I_LOCK(l) ( (OMP_LOCK_T_SIZE < sizeof(void *)) \
? __kmp_indirect_lock_table[DYNA_EXTRACT_I_INDEX(l)] \
: *((kmp_indirect_lock_t **)l) )
// Used once in kmp_error.c
extern kmp_int32
__kmp_get_user_lock_owner(kmp_user_lock_p, kmp_uint32);
#else // KMP_USE_DYNAMIC_LOCK
# define DYNA_LOCK_BUSY(v, type) (v)
# define DYNA_LOCK_FREE(type) 0
# define DYNA_LOCK_STRIP(v) (v)
# define DYNA_STORE_LOCK_SEQ(seq)
#endif // KMP_USE_DYNAMIC_LOCK
#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus

View File

@ -79,7 +79,9 @@ typedef struct {
addr_and_size_t roots; // Pointer to __kmp_root.
addr_and_size_t capacity; // Pointer to __kmp_threads_capacity.
addr_and_size_t monitor; // Pointer to __kmp_monitor.
#if ! KMP_USE_DYNAMIC_LOCK
addr_and_size_t lock_table; // Pointer to __kmp_lock_table.
#endif
addr_and_size_t func_microtask;
addr_and_size_t func_fork;
addr_and_size_t func_fork_teams;
@ -159,11 +161,13 @@ typedef struct {
offset_and_size_t lk_depth_locked;
offset_and_size_t lk_lock_flags;
#if ! KMP_USE_DYNAMIC_LOCK
/* lock_table_t */
kmp_int32 lt_size_of_struct; /* Size and layout of kmp_lock_table_t. */
offset_and_size_t lt_used;
offset_and_size_t lt_allocated;
offset_and_size_t lt_table;
#endif
/* task_team_t */
kmp_int32 tt_sizeof_struct;

View File

@ -815,6 +815,11 @@ typedef void (*microtask_t)( int *gtid, int *npr, ... );
# define USE_CMPXCHG_FIX 1
#endif
// Enable dynamic user lock
#ifndef KMP_USE_DYNAMIC_LOCK
# define KMP_USE_DYNAMIC_LOCK 0
#endif
// Warning levels
enum kmp_warnings_level {
kmp_warnings_off = 0, /* No warnings */

View File

@ -716,7 +716,11 @@ __kmp_parallel_deo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
if( __kmp_env_consistency_check ) {
if( __kmp_threads[gtid]->th.th_root->r.r_active )
#if KMP_USE_DYNAMIC_LOCK
__kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL, 0 );
#else
__kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL );
#endif
}
#ifdef BUILD_PARALLEL_ORDERED
if( !team->t.t_serialized ) {
@ -6735,7 +6739,11 @@ __kmp_cleanup( void )
__kmp_root = NULL;
__kmp_threads_capacity = 0;
#if KMP_USE_DYNAMIC_LOCK
__kmp_cleanup_indirect_user_locks();
#else
__kmp_cleanup_user_locks();
#endif
#if KMP_AFFINITY_SUPPORTED
KMP_INTERNAL_FREE( (void *) __kmp_cpuinfo_file );

View File

@ -3996,11 +3996,13 @@ __kmp_stg_parse_lock_kind( char const * name, char const * value, void * data )
|| __kmp_str_match( "testand-set", 2, value )
|| __kmp_str_match( "testandset", 2, value ) ) {
__kmp_user_lock_kind = lk_tas;
DYNA_STORE_LOCK_SEQ(tas);
}
#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
else if ( __kmp_str_match( "futex", 1, value ) ) {
if ( __kmp_futex_determine_capable() ) {
__kmp_user_lock_kind = lk_futex;
DYNA_STORE_LOCK_SEQ(futex);
}
else {
KMP_WARNING( FutexNotSupported, name, value );
@ -4009,10 +4011,12 @@ __kmp_stg_parse_lock_kind( char const * name, char const * value, void * data )
#endif
else if ( __kmp_str_match( "ticket", 2, value ) ) {
__kmp_user_lock_kind = lk_ticket;
DYNA_STORE_LOCK_SEQ(ticket);
}
else if ( __kmp_str_match( "queuing", 1, value )
|| __kmp_str_match( "queue", 1, value ) ) {
__kmp_user_lock_kind = lk_queuing;
DYNA_STORE_LOCK_SEQ(queuing);
}
else if ( __kmp_str_match( "drdpa ticket", 1, value )
|| __kmp_str_match( "drdpa_ticket", 1, value )
@ -4020,17 +4024,25 @@ __kmp_stg_parse_lock_kind( char const * name, char const * value, void * data )
|| __kmp_str_match( "drdpaticket", 1, value )
|| __kmp_str_match( "drdpa", 1, value ) ) {
__kmp_user_lock_kind = lk_drdpa;
DYNA_STORE_LOCK_SEQ(drdpa);
}
#if KMP_USE_ADAPTIVE_LOCKS
else if ( __kmp_str_match( "adaptive", 1, value ) ) {
if( __kmp_cpuinfo.rtm ) { // ??? Is cpuinfo available here?
__kmp_user_lock_kind = lk_adaptive;
DYNA_STORE_LOCK_SEQ(adaptive);
} else {
KMP_WARNING( AdaptiveNotSupported, name, value );
__kmp_user_lock_kind = lk_queuing;
DYNA_STORE_LOCK_SEQ(queuing);
}
}
#endif // KMP_USE_ADAPTIVE_LOCKS
#if KMP_USE_DYNAMIC_LOCK
else if ( __kmp_str_match("hle", 1, value) ) {
DYNA_STORE_LOCK_SEQ(hle);
}
#endif
else {
KMP_WARNING( StgInvalidValue, name, value );
}
@ -5057,16 +5069,24 @@ __kmp_env_initialize( char const * string ) {
if ( __kmp_user_lock_kind == lk_default ) {
__kmp_user_lock_kind = lk_queuing;
}
#if KMP_USE_DYNAMIC_LOCK
__kmp_init_dynamic_user_locks();
#else
__kmp_set_user_lock_vptrs( __kmp_user_lock_kind );
#endif
}
else {
KMP_DEBUG_ASSERT( string != NULL); // kmp_set_defaults() was called
KMP_DEBUG_ASSERT( __kmp_user_lock_kind != lk_default );
__kmp_set_user_lock_vptrs( __kmp_user_lock_kind );
// Binds lock functions again to follow the transition between different
// KMP_CONSISTENCY_CHECK values. Calling this again is harmless as long
// as we do not allow lock kind changes after making a call to any
// user lock functions (true).
#if KMP_USE_DYNAMIC_LOCK
__kmp_init_dynamic_user_locks();
#else
__kmp_set_user_lock_vptrs( __kmp_user_lock_kind );
#endif
}
#if KMP_AFFINITY_SUPPORTED

View File

@ -48,7 +48,11 @@ __kmp_taskq_eo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
kmp_taskq_t *tq = & __kmp_threads[gtid] -> th.th_team -> t.t_taskq;
if ( __kmp_env_consistency_check )
#if KMP_USE_DYNAMIC_LOCK
__kmp_push_sync( gtid, ct_ordered_in_taskq, loc_ref, NULL, 0 );
#else
__kmp_push_sync( gtid, ct_ordered_in_taskq, loc_ref, NULL );
#endif
if ( ! __kmp_threads[ gtid ]-> th.th_team -> t.t_serialized ) {
KMP_MB(); /* Flush all pending memory write invalidates. */

View File

@ -1579,10 +1579,12 @@ __kmp_atfork_child (void)
__kmp_init_common = FALSE;
TCW_4(__kmp_init_user_locks, FALSE);
#if ! KMP_USE_DYNAMIC_LOCK
__kmp_user_lock_table.used = 1;
__kmp_user_lock_table.allocated = 0;
__kmp_user_lock_table.table = NULL;
__kmp_lock_blocks = NULL;
#endif
__kmp_all_nth = 0;
TCW_4(__kmp_nth, 0);