These are the actual changes in the runtime to issue OMPT-related functions. All of them are surrounded by #if OMPT_SUPPORT and can be disabled (which is the default).

llvm-svn: 236122
This commit is contained in:
Andrey Churbanov 2015-04-29 16:42:24 +00:00
parent 9dd4e4e63a
commit d7d088f815
13 changed files with 1277 additions and 27 deletions

View File

@ -3023,11 +3023,17 @@ extern kmp_info_t * __kmp_allocate_thread( kmp_root_t *root,
kmp_team_t *team, int tid);
#if OMP_40_ENABLED
extern kmp_team_t * __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
#if OMPT_SUPPORT
ompt_parallel_id_t ompt_parallel_id,
#endif
kmp_proc_bind_t proc_bind,
kmp_internal_control_t *new_icvs,
int argc USE_NESTED_HOT_ARG(kmp_info_t *thr) );
#else
extern kmp_team_t * __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
#if OMPT_SUPPORT
ompt_parallel_id_t ompt_parallel_id,
#endif
kmp_internal_control_t *new_icvs,
int argc USE_NESTED_HOT_ARG(kmp_info_t *thr) );
#endif // OMP_40_ENABLED
@ -3062,7 +3068,11 @@ enum fork_context_e
fork_context_last
};
extern int __kmp_fork_call( ident_t *loc, int gtid, enum fork_context_e fork_context,
kmp_int32 argc, microtask_t microtask, launch_t invoker,
kmp_int32 argc,
#if OMPT_SUPPORT
void *unwrapped_task,
#endif
microtask_t microtask, launch_t invoker,
/* TODO: revert workaround for Intel(R) 64 tracker #96 */
#if (KMP_ARCH_ARM || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64) && KMP_OS_LINUX
va_list *ap
@ -3172,7 +3182,11 @@ extern void __kmp_clear_x87_fpu_status_word();
#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
extern int __kmp_invoke_microtask( microtask_t pkfn, int gtid, int npr, int argc, void *argv[] );
extern int __kmp_invoke_microtask( microtask_t pkfn, int gtid, int npr, int argc, void *argv[]
#if OMPT_SUPPORT
, void **exit_frame_ptr
#endif
);
/* ------------------------------------------------------------------------ */

View File

@ -19,6 +19,10 @@
#include "kmp_os.h"
#include "kmp_lock.h"
#if OMPT_SUPPORT
#include "ompt-specific.h"
#endif
// C++ build port.
// Intel compiler does not support _Complex datatype on win.
// Intel compiler supports _Complex datatype on lin and mac.
@ -366,7 +370,23 @@ typedef kmp_queuing_lock_t kmp_atomic_lock_t;
static inline void
__kmp_acquire_atomic_lock( kmp_atomic_lock_t *lck, kmp_int32 gtid )
{
#if OMPT_SUPPORT && OMPT_TRACE
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_wait_atomic)) {
ompt_callbacks.ompt_callback(ompt_event_wait_atomic)(
(ompt_wait_id_t) lck);
}
#endif
__kmp_acquire_queuing_lock( lck, gtid );
#if OMPT_SUPPORT && OMPT_TRACE
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_acquired_atomic)) {
ompt_callbacks.ompt_callback(ompt_event_acquired_atomic)(
(ompt_wait_id_t) lck);
}
#endif
}
static inline int
@ -379,6 +399,13 @@ static inline void
__kmp_release_atomic_lock( kmp_atomic_lock_t *lck, kmp_int32 gtid )
{
__kmp_release_queuing_lock( lck, gtid );
#if OMPT_SUPPORT && OMPT_BLAME
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_release_atomic)) {
ompt_callbacks.ompt_callback(ompt_event_release_atomic)(
(ompt_wait_id_t) lck);
}
#endif
}
static inline void

View File

@ -1034,10 +1034,37 @@ __kmp_barrier(enum barrier_type bt, int gtid, int is_split, size_t reduce_size,
register kmp_team_t *team = this_thr->th.th_team;
register int status = 0;
ident_t *loc = __kmp_threads[gtid]->th.th_ident;
#if OMPT_SUPPORT
ompt_task_id_t my_task_id;
ompt_parallel_id_t my_parallel_id;
#endif
KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) has arrived\n",
gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)));
#if OMPT_SUPPORT && OMPT_TRACE
if (ompt_status & ompt_status_track) {
if (ompt_status == ompt_status_track_callback) {
my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id;
my_parallel_id = team->t.ompt_team_info.parallel_id;
if (this_thr->th.ompt_thread_info.state == ompt_state_wait_single) {
if (ompt_callbacks.ompt_callback(ompt_event_single_others_end)) {
ompt_callbacks.ompt_callback(ompt_event_single_others_end)(
my_parallel_id, my_task_id);
}
}
this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier;
if (ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) {
ompt_callbacks.ompt_callback(ompt_event_barrier_begin)(
my_parallel_id, my_task_id);
}
} else {
this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier;
}
}
#endif
if (! team->t.t_serialized) {
#if USE_ITT_BUILD
// This value will be used in itt notify events below.
@ -1195,6 +1222,20 @@ __kmp_barrier(enum barrier_type bt, int gtid, int is_split, size_t reduce_size,
}
KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n",
gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid), status));
#if OMPT_SUPPORT
if (ompt_status & ompt_status_track) {
#if OMPT_TRACE
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_barrier_end)) {
ompt_callbacks.ompt_callback(ompt_event_barrier_end)(
my_parallel_id, my_task_id);
}
#endif
this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
}
#endif
return status;
}
@ -1286,6 +1327,16 @@ __kmp_join_barrier(int gtid)
KMP_DEBUG_ASSERT(this_thr == team->t.t_threads[tid]);
KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n", gtid, team_id, tid));
#if OMPT_SUPPORT && OMPT_TRACE
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) {
ompt_callbacks.ompt_callback(ompt_event_barrier_begin)(
team->t.ompt_team_info.parallel_id,
team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
}
this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier;
#endif
if (__kmp_tasking_mode == tskm_extra_barrier) {
__kmp_tasking_barrier(team, this_thr, gtid);
KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) past taking barrier\n", gtid, team_id, tid));
@ -1401,6 +1452,22 @@ __kmp_join_barrier(int gtid)
// TODO now, mark worker threads as done so they may be disbanded
KMP_MB(); // Flush all pending memory write invalidates.
KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) leaving\n", gtid, team_id, tid));
#if OMPT_SUPPORT
if (ompt_status == ompt_status_track) {
#if OMPT_TRACE
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_barrier_end)) {
ompt_callbacks.ompt_callback(ompt_event_barrier_end)(
team->t.ompt_team_info.parallel_id,
team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
}
#endif
// return to default state
this_thr->th.ompt_thread_info.state = ompt_state_overhead;
}
#endif
}

View File

@ -20,6 +20,11 @@
#include "kmp_error.h"
#include "kmp_stats.h"
#if OMPT_SUPPORT
#include "ompt-internal.h"
#include "ompt-specific.h"
#endif
#define MAX_MESSAGE 512
/* ------------------------------------------------------------------------ */
@ -283,12 +288,23 @@ __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
va_list ap;
va_start( ap, microtask );
#if OMPT_SUPPORT
kmp_info_t *master_th = __kmp_threads[ gtid ];
kmp_team_t *parent_team = master_th->th.th_team;
int tid = __kmp_tid_from_gtid( gtid );
parent_team->t.t_implicit_task_taskdata[tid].
ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(0);
#endif
#if INCLUDE_SSC_MARKS
SSC_MARK_FORKING();
#endif
__kmp_fork_call( loc, gtid, fork_context_intel,
argc,
VOLATILE_CAST(microtask_t) microtask,
#if OMPT_SUPPORT
VOLATILE_CAST(void *) microtask, // "unwrapped" task
#endif
VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
/* TODO: revert workaround for Intel(R) 64 tracker #96 */
#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
@ -303,6 +319,13 @@ __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
__kmp_join_call( loc, gtid );
va_end( ap );
#if OMPT_SUPPORT
if (ompt_status & ompt_status_track) {
parent_team->t.t_implicit_task_taskdata[tid].
ompt_task_info.frame.reenter_runtime_frame = 0;
}
#endif
}
KMP_START_EXPLICIT_TIMER(OMP_serial);
}
@ -358,7 +381,10 @@ __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
__kmp_fork_call( loc, gtid, fork_context_intel,
argc,
VOLATILE_CAST(microtask_t) __kmp_teams_master,
#if OMPT_SUPPORT
VOLATILE_CAST(void *) microtask, // "unwrapped" task
#endif
VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task
VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
&ap
@ -662,6 +688,20 @@ __kmpc_master(ident_t *loc, kmp_int32 global_tid)
if( KMP_MASTER_GTID( global_tid ))
status = 1;
#if OMPT_SUPPORT && OMPT_TRACE
if (status) {
kmp_info_t *this_thr = __kmp_threads[ global_tid ];
kmp_team_t *team = this_thr -> th.th_team;
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_master_begin)) {
int tid = __kmp_tid_from_gtid( global_tid );
ompt_callbacks.ompt_callback(ompt_event_master_begin)(
team->t.ompt_team_info.parallel_id,
team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
}
}
#endif
if ( __kmp_env_consistency_check ) {
#if KMP_USE_DYNAMIC_LOCK
if (status)
@ -694,6 +734,18 @@ __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
KMP_DEBUG_ASSERT( KMP_MASTER_GTID( global_tid ));
#if OMPT_SUPPORT && OMPT_TRACE
kmp_info_t *this_thr = __kmp_threads[ global_tid ];
kmp_team_t *team = this_thr -> th.th_team;
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_master_end)) {
int tid = __kmp_tid_from_gtid( global_tid );
ompt_callbacks.ompt_callback(ompt_event_master_end)(
team->t.ompt_team_info.parallel_id,
team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
}
#endif
if ( __kmp_env_consistency_check ) {
if( global_tid < 0 )
KMP_WARNING( ThreadIdentInvalid );
@ -729,11 +781,41 @@ __kmpc_ordered( ident_t * loc, kmp_int32 gtid )
th = __kmp_threads[ gtid ];
#if OMPT_SUPPORT && OMPT_TRACE
if (ompt_status & ompt_status_track) {
/* OMPT state update */
th->th.ompt_thread_info.wait_id = (uint64_t) loc;
th->th.ompt_thread_info.state = ompt_state_wait_ordered;
/* OMPT event callback */
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_wait_ordered)) {
ompt_callbacks.ompt_callback(ompt_event_wait_ordered)(
th->th.ompt_thread_info.wait_id);
}
}
#endif
if ( th -> th.th_dispatch -> th_deo_fcn != 0 )
(*th->th.th_dispatch->th_deo_fcn)( & gtid, & cid, loc );
else
__kmp_parallel_deo( & gtid, & cid, loc );
#if OMPT_SUPPORT && OMPT_TRACE
if (ompt_status & ompt_status_track) {
/* OMPT state update */
th->th.ompt_thread_info.state = ompt_state_work_parallel;
th->th.ompt_thread_info.wait_id = 0;
/* OMPT event callback */
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)) {
ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)(
th->th.ompt_thread_info.wait_id);
}
}
#endif
#if USE_ITT_BUILD
__kmp_itt_ordered_start( gtid );
#endif /* USE_ITT_BUILD */
@ -765,6 +847,14 @@ __kmpc_end_ordered( ident_t * loc, kmp_int32 gtid )
(*th->th.th_dispatch->th_dxo_fcn)( & gtid, & cid, loc );
else
__kmp_parallel_dxo( & gtid, & cid, loc );
#if OMPT_SUPPORT && OMPT_BLAME
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
th->th.ompt_thread_info.wait_id);
}
#endif
}
#if KMP_USE_DYNAMIC_LOCK
@ -1137,6 +1227,14 @@ __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
// Value of 'crit' should be good for using as a critical_id of the critical section directive.
__kmp_release_user_lock_with_checks( lck, global_tid );
#if OMPT_SUPPORT && OMPT_BLAME
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_release_critical)) {
ompt_callbacks.ompt_callback(ompt_event_release_critical)(
(uint64_t) lck);
}
#endif
#endif // KMP_USE_DYNAMIC_LOCK
KA_TRACE( 15, ("__kmpc_end_critical: done T#%d\n", global_tid ));
@ -1257,6 +1355,31 @@ __kmpc_single(ident_t *loc, kmp_int32 global_tid)
{
KMP_COUNT_BLOCK(OMP_SINGLE);
kmp_int32 rc = __kmp_enter_single( global_tid, loc, TRUE );
#if OMPT_SUPPORT && OMPT_TRACE
kmp_info_t *this_thr = __kmp_threads[ global_tid ];
kmp_team_t *team = this_thr -> th.th_team;
int tid = __kmp_tid_from_gtid( global_tid );
if ((ompt_status == ompt_status_track_callback)) {
if (rc) {
if (ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)) {
ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)(
team->t.ompt_team_info.parallel_id,
team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id,
team->t.ompt_team_info.microtask);
}
} else {
if (ompt_callbacks.ompt_callback(ompt_event_single_others_begin)) {
ompt_callbacks.ompt_callback(ompt_event_single_others_begin)(
team->t.ompt_team_info.parallel_id,
team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
}
this_thr->th.ompt_thread_info.state = ompt_state_wait_single;
}
}
#endif
return rc;
}
@ -1273,6 +1396,19 @@ void
__kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
{
__kmp_exit_single( global_tid );
#if OMPT_SUPPORT && OMPT_TRACE
kmp_info_t *this_thr = __kmp_threads[ global_tid ];
kmp_team_t *team = this_thr -> th.th_team;
int tid = __kmp_tid_from_gtid( global_tid );
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)) {
ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)(
team->t.ompt_team_info.parallel_id,
team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
}
#endif
}
/*!
@ -1287,6 +1423,19 @@ __kmpc_for_static_fini( ident_t *loc, kmp_int32 global_tid )
{
KE_TRACE( 10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
#if OMPT_SUPPORT && OMPT_TRACE
kmp_info_t *this_thr = __kmp_threads[ global_tid ];
kmp_team_t *team = this_thr -> th.th_team;
int tid = __kmp_tid_from_gtid( global_tid );
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_loop_end)) {
ompt_callbacks.ompt_callback(ompt_event_loop_end)(
team->t.ompt_team_info.parallel_id,
team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
}
#endif
if ( __kmp_env_consistency_check )
__kmp_pop_workshare( global_tid, ct_pdo, loc );
}
@ -1928,6 +2077,13 @@ __kmpc_unset_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
RELEASE_LOCK( lck, gtid );
#if OMPT_SUPPORT && OMPT_BLAME
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_release_lock)) {
ompt_callbacks.ompt_callback(ompt_event_release_lock)((uint64_t) lck);
}
#endif
#endif // KMP_USE_DYNAMIC_LOCK
}
@ -1980,7 +2136,20 @@ __kmpc_unset_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
__kmp_itt_lock_releasing( lck );
#endif /* USE_ITT_BUILD */
RELEASE_NESTED_LOCK( lck, gtid );
int release_status = RELEASE_NESTED_LOCK( lck, gtid );
#if OMPT_SUPPORT && OMPT_BLAME
if (ompt_status == ompt_status_track_callback) {
if (release_status == KMP_LOCK_RELEASED) {
if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)) {
ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)(
(uint64_t) lck);
}
} else if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)) {
ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)(
(uint64_t) lck);
}
}
#endif
#endif // KMP_USE_DYNAMIC_LOCK
}

View File

@ -35,6 +35,11 @@
#include <float.h>
#endif
#if OMPT_SUPPORT
#include "ompt-internal.h"
#include "ompt-specific.h"
#endif
/* ------------------------------------------------------------------------ */
/* ------------------------------------------------------------------------ */
@ -1189,6 +1194,16 @@ __kmp_dispatch_init(
}
}
#endif // ( KMP_STATIC_STEAL_ENABLED && USE_STEALING )
#if OMPT_SUPPORT && OMPT_TRACE
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
team_info->parallel_id, task_info->task_id, team_info->microtask);
}
#endif
}
/*
@ -1339,6 +1354,24 @@ __kmp_dispatch_finish_chunk( int gtid, ident_t *loc )
#endif /* KMP_GOMP_COMPAT */
/* Define a macro for exiting __kmp_dispatch_next(). If status is 0
* (no more work), then tell OMPT the loop is over. In some cases
* kmp_dispatch_fini() is not called. */
#if OMPT_SUPPORT && OMPT_TRACE
#define OMPT_LOOP_END \
if (status == 0) { \
if ((ompt_status == ompt_status_track_callback) && \
ompt_callbacks.ompt_callback(ompt_event_loop_end)) { \
ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); \
ompt_task_info_t *task_info = __ompt_get_taskinfo(0); \
ompt_callbacks.ompt_callback(ompt_event_loop_end)( \
team_info->parallel_id, task_info->task_id); \
} \
}
#else
#define OMPT_LOOP_END // no-op
#endif
template< typename T >
static int
__kmp_dispatch_next(
@ -1476,6 +1509,7 @@ __kmp_dispatch_next(
#if INCLUDE_SSC_MARKS
SSC_MARK_DISPATCH_NEXT();
#endif
OMPT_LOOP_END;
return status;
} else {
kmp_int32 last = 0;
@ -2115,6 +2149,7 @@ __kmp_dispatch_next(
#if INCLUDE_SSC_MARKS
SSC_MARK_DISPATCH_NEXT();
#endif
OMPT_LOOP_END;
return status;
}

View File

@ -19,6 +19,10 @@
#include "kmp.h"
#include "kmp_atomic.h"
#if OMPT_SUPPORT
#include "ompt-specific.h"
#endif
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
@ -106,6 +110,11 @@ xexpand(KMP_API_NAME_GOMP_ATOMIC_START)(void)
{
int gtid = __kmp_entry_gtid();
KA_TRACE(20, ("GOMP_atomic_start: T#%d\n", gtid));
#if OMPT_SUPPORT
__ompt_thread_assign_wait_id(0);
#endif
__kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
}
@ -246,7 +255,36 @@ void
__kmp_GOMP_microtask_wrapper(int *gtid, int *npr, void (*task)(void *),
void *data)
{
#if OMPT_SUPPORT
kmp_info_t *thr;
ompt_frame_t *ompt_frame;
ompt_state_t enclosing_state;
if (ompt_status & ompt_status_track) {
// get pointer to thread data structure
thr = __kmp_threads[*gtid];
// save enclosing task state; set current state for task
enclosing_state = thr->th.ompt_thread_info.state;
thr->th.ompt_thread_info.state = ompt_state_work_parallel;
// set task frame
ompt_frame = __ompt_get_task_frame_internal(0);
ompt_frame->exit_runtime_frame = __builtin_frame_address(0);
}
#endif
task(data);
#if OMPT_SUPPORT
if (ompt_status & ompt_status_track) {
// clear task frame
ompt_frame->exit_runtime_frame = NULL;
// restore enclosing state
thr->th.ompt_thread_info.state = enclosing_state;
}
#endif
}
@ -264,10 +302,37 @@ __kmp_GOMP_parallel_microtask_wrapper(int *gtid, int *npr,
KMP_DISPATCH_INIT(loc, *gtid, schedule, start, end, incr, chunk_size,
schedule != kmp_sch_static);
#if OMPT_SUPPORT
kmp_info_t *thr;
ompt_frame_t *ompt_frame;
ompt_state_t enclosing_state;
if (ompt_status & ompt_status_track) {
thr = __kmp_threads[*gtid];
// save enclosing task state; set current state for task
enclosing_state = thr->th.ompt_thread_info.state;
thr->th.ompt_thread_info.state = ompt_state_work_parallel;
// set task frame
ompt_frame = __ompt_get_task_frame_internal(0);
ompt_frame->exit_runtime_frame = __builtin_frame_address(0);
}
#endif
//
// Now invoke the microtask.
//
task(data);
#if OMPT_SUPPORT
if (ompt_status & ompt_status_track) {
// clear task frame
ompt_frame->exit_runtime_frame = NULL;
// reset enclosing state
thr->th.ompt_thread_info.state = enclosing_state;
}
#endif
}
@ -275,14 +340,26 @@ __kmp_GOMP_parallel_microtask_wrapper(int *gtid, int *npr,
static
#endif /* KMP_DEBUG */
void
__kmp_GOMP_fork_call(ident_t *loc, int gtid, microtask_t wrapper, int argc,...)
__kmp_GOMP_fork_call(ident_t *loc, int gtid, void (*unwrapped_task)(void *), microtask_t wrapper, int argc,...)
{
int rc;
kmp_info_t *thr = __kmp_threads[gtid];
kmp_team_t *team = thr->th.th_team;
int tid = __kmp_tid_from_gtid(gtid);
va_list ap;
va_start(ap, argc);
rc = __kmp_fork_call(loc, gtid, fork_context_gnu, argc, wrapper, __kmp_invoke_task_func,
#if OMPT_SUPPORT
team->t.t_implicit_task_taskdata[tid].
ompt_task_info.frame.reenter_runtime_frame = NULL;
#endif
rc = __kmp_fork_call(loc, gtid, fork_context_gnu, argc,
#if OMPT_SUPPORT
VOLATILE_CAST(void *) unwrapped_task,
#endif
wrapper, __kmp_invoke_task_func,
#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
&ap
#else
@ -293,10 +370,69 @@ __kmp_GOMP_fork_call(ident_t *loc, int gtid, microtask_t wrapper, int argc,...)
va_end(ap);
if (rc) {
kmp_info_t *thr = __kmp_threads[gtid];
__kmp_run_before_invoked_task(gtid, __kmp_tid_from_gtid(gtid), thr,
thr->th.th_team);
__kmp_run_before_invoked_task(gtid, tid, thr, team);
}
#if OMPT_SUPPORT && OMPT_TRACE
if (ompt_status & ompt_status_track) {
ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
// implicit task callback
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
team_info->parallel_id, task_info->task_id);
}
thr->th.ompt_thread_info.state = ompt_state_work_parallel;
}
#endif
}
static void
__kmp_GOMP_serialized_parallel(ident_t *loc, kmp_int32 gtid, void (*task)(void *))
{
__kmp_serialized_parallel(loc, gtid);
#if OMPT_SUPPORT
if (ompt_status & ompt_status_track) {
ompt_task_id_t ompt_task_id = __ompt_get_task_id_internal(0);
ompt_frame_t *ompt_frame = __ompt_get_task_frame_internal(0);
kmp_info_t *thr = __kmp_threads[gtid];
ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(gtid);
ompt_task_id_t my_ompt_task_id = __ompt_task_id_new(gtid);
ompt_frame->exit_runtime_frame = NULL;
// parallel region callback
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) {
int team_size = 1;
ompt_callbacks.ompt_callback(ompt_event_parallel_begin)(
ompt_task_id, ompt_frame, ompt_parallel_id,
team_size, (void *) task);
}
// set up lightweight task
ompt_lw_taskteam_t *lwt = (ompt_lw_taskteam_t *)
__kmp_allocate(sizeof(ompt_lw_taskteam_t));
__ompt_lw_taskteam_init(lwt, thr, gtid, (void *) task, ompt_parallel_id);
lwt->ompt_task_info.task_id = my_ompt_task_id;
lwt->ompt_task_info.frame.exit_runtime_frame = 0;
__ompt_lw_taskteam_link(lwt, thr);
#if OMPT_TRACE
// implicit task callback
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
ompt_parallel_id, my_ompt_task_id);
}
thr->th.ompt_thread_info.state = ompt_state_work_parallel;
#endif
}
#endif
}
@ -304,6 +440,16 @@ void
xexpand(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *), void *data, unsigned num_threads)
{
int gtid = __kmp_entry_gtid();
#if OMPT_SUPPORT
ompt_frame_t *parent_frame;
if (ompt_status & ompt_status_track) {
parent_frame = __ompt_get_task_frame_internal(0);
parent_frame->reenter_runtime_frame = __builtin_frame_address(0);
}
#endif
MKLOC(loc, "GOMP_parallel_start");
KA_TRACE(20, ("GOMP_parallel_start: T#%d\n", gtid));
@ -311,12 +457,18 @@ xexpand(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *), void *data, unsi
if (num_threads != 0) {
__kmp_push_num_threads(&loc, gtid, num_threads);
}
__kmp_GOMP_fork_call(&loc, gtid,
__kmp_GOMP_fork_call(&loc, gtid, task,
(microtask_t)__kmp_GOMP_microtask_wrapper, 2, task, data);
}
else {
__kmpc_serialized_parallel(&loc, gtid);
__kmp_GOMP_serialized_parallel(&loc, gtid, task);
}
#if OMPT_SUPPORT
if (ompt_status & ompt_status_track) {
parent_frame->reenter_runtime_frame = NULL;
}
#endif
}
@ -324,9 +476,39 @@ void
xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(void)
{
int gtid = __kmp_get_gtid();
kmp_info_t *thr = __kmp_threads[gtid];
MKLOC(loc, "GOMP_parallel_end");
KA_TRACE(20, ("GOMP_parallel_end: T#%d\n", gtid));
#if OMPT_SUPPORT
ompt_parallel_id_t parallel_id;
ompt_frame_t *ompt_frame = NULL;
if (ompt_status & ompt_status_track) {
ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
parallel_id = team_info->parallel_id;
ompt_frame = __ompt_get_task_frame_internal(0);
ompt_frame->exit_runtime_frame = __builtin_frame_address(0);
#if OMPT_TRACE
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
parallel_id, task_info->task_id);
}
#endif
// unlink if necessary. no-op if there is not a lightweight task.
ompt_lw_taskteam_t *lwt = __ompt_lw_taskteam_unlink(thr);
// GOMP allocates/frees lwt since it can't be kept on the stack
if (lwt) __kmp_free(lwt);
}
#endif
if (! __kmp_threads[gtid]->th.th_team->t.t_serialized) {
kmp_info_t *thr = __kmp_threads[gtid];
__kmp_run_after_invoked_task(gtid, __kmp_tid_from_gtid(gtid), thr,
@ -335,6 +517,22 @@ xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(void)
}
else {
__kmpc_end_serialized_parallel(&loc, gtid);
#if OMPT_SUPPORT
if (ompt_status & ompt_status_track) {
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
parallel_id, task_info->task_id);
}
thr->th.ompt_thread_info.state =
(((thr->th.th_team)->t.t_serialized) ?
ompt_state_work_serial : ompt_state_work_parallel);
}
#endif
}
}
@ -635,13 +833,13 @@ LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT), \
if (num_threads != 0) { \
__kmp_push_num_threads(&loc, gtid, num_threads); \
} \
__kmp_GOMP_fork_call(&loc, gtid, \
__kmp_GOMP_fork_call(&loc, gtid, task, \
(microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9, \
task, data, num_threads, &loc, (schedule), lb, \
(str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz); \
} \
else { \
__kmpc_serialized_parallel(&loc, gtid); \
__kmp_GOMP_serialized_parallel(&loc, gtid, task); \
} \
\
KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \
@ -707,9 +905,32 @@ xexpand(KMP_API_NAME_GOMP_TASK)(void (*func)(void *), void *data, void (*copy_fu
__kmpc_omp_task(&loc, gtid, task);
}
else {
#if OMPT_SUPPORT
ompt_thread_info_t oldInfo;
kmp_info_t *thread;
kmp_taskdata_t *taskdata;
if (ompt_status & ompt_status_track) {
// Store the threads states and restore them after the task
thread = __kmp_threads[ gtid ];
taskdata = KMP_TASK_TO_TASKDATA(task);
oldInfo = thread->th.ompt_thread_info;
thread->th.ompt_thread_info.wait_id = 0;
thread->th.ompt_thread_info.state = ompt_state_work_parallel;
taskdata->ompt_task_info.frame.exit_runtime_frame =
__builtin_frame_address(0);
}
#endif
__kmpc_omp_task_begin_if0(&loc, gtid, task);
func(data);
__kmpc_omp_task_complete_if0(&loc, gtid, task);
#if OMPT_SUPPORT
if (ompt_status & ompt_status_track) {
thread->th.ompt_thread_info = oldInfo;
taskdata->ompt_task_info.frame.exit_runtime_frame = 0;
}
#endif
}
KA_TRACE(20, ("GOMP_task exit: T#%d\n", gtid));
@ -801,6 +1022,16 @@ xexpand(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START)(void (*task) (void *), void *
{
int gtid = __kmp_entry_gtid();
int last = FALSE;
#if OMPT_SUPPORT
ompt_frame_t *parent_frame;
if (ompt_status & ompt_status_track) {
parent_frame = __ompt_get_task_frame_internal(0);
parent_frame->reenter_runtime_frame = __builtin_frame_address(0);
}
#endif
MKLOC(loc, "GOMP_parallel_sections_start");
KA_TRACE(20, ("GOMP_parallel_sections_start: T#%d\n", gtid));
@ -808,15 +1039,21 @@ xexpand(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START)(void (*task) (void *), void *
if (num_threads != 0) {
__kmp_push_num_threads(&loc, gtid, num_threads);
}
__kmp_GOMP_fork_call(&loc, gtid,
__kmp_GOMP_fork_call(&loc, gtid, task,
(microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9, task, data,
num_threads, &loc, kmp_nm_dynamic_chunked, (kmp_int)1,
(kmp_int)count, (kmp_int)1, (kmp_int)1);
}
else {
__kmpc_serialized_parallel(&loc, gtid);
__kmp_GOMP_serialized_parallel(&loc, gtid, task);
}
#if OMPT_SUPPORT
if (ompt_status & ompt_status_track) {
parent_frame->reenter_runtime_frame = NULL;
}
#endif
KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE);
KA_TRACE(20, ("GOMP_parallel_sections_start exit: T#%d\n", gtid));
@ -865,11 +1102,11 @@ xexpand(KMP_API_NAME_GOMP_PARALLEL)(void (*task)(void *), void *data, unsigned n
if(flags != 0) {
__kmp_push_proc_bind(&loc, gtid, (kmp_proc_bind_t)flags);
}
__kmp_GOMP_fork_call(&loc, gtid,
__kmp_GOMP_fork_call(&loc, gtid, task,
(microtask_t)__kmp_GOMP_microtask_wrapper, 2, task, data);
}
else {
__kmpc_serialized_parallel(&loc, gtid);
__kmp_GOMP_serialized_parallel(&loc, gtid, task);
}
task(data);
xexpand(KMP_API_NAME_GOMP_PARALLEL_END)();
@ -891,13 +1128,13 @@ xexpand(KMP_API_NAME_GOMP_PARALLEL_SECTIONS)(void (*task) (void *), void *data,
if(flags != 0) {
__kmp_push_proc_bind(&loc, gtid, (kmp_proc_bind_t)flags);
}
__kmp_GOMP_fork_call(&loc, gtid,
__kmp_GOMP_fork_call(&loc, gtid, task,
(microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9, task, data,
num_threads, &loc, kmp_nm_dynamic_chunked, (kmp_int)1,
(kmp_int)count, (kmp_int)1, (kmp_int)1);
}
else {
__kmpc_serialized_parallel(&loc, gtid);
__kmp_GOMP_serialized_parallel(&loc, gtid, task);
}
KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE);
@ -924,13 +1161,13 @@ xexpand(KMP_API_NAME_GOMP_PARALLEL_SECTIONS)(void (*task) (void *), void *data,
if (flags != 0) { \
__kmp_push_proc_bind(&loc, gtid, (kmp_proc_bind_t)flags); \
} \
__kmp_GOMP_fork_call(&loc, gtid, \
__kmp_GOMP_fork_call(&loc, gtid, task, \
(microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9, \
task, data, num_threads, &loc, (schedule), lb, \
(str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz); \
} \
else { \
__kmpc_serialized_parallel(&loc, gtid); \
__kmp_GOMP_serialized_parallel(&loc, gtid, task); \
} \
\
KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \

View File

@ -1206,6 +1206,10 @@ __kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck,
volatile kmp_uint32 *spin_here_p;
kmp_int32 need_mf = 1;
#if OMPT_SUPPORT
ompt_state_t prev_state = ompt_state_undefined;
#endif
KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
KMP_FSYNC_PREPARE( lck );
@ -1309,6 +1313,16 @@ __kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck,
#ifdef DEBUG_QUEUING_LOCKS
TRACE_LOCK_HT( gtid+1, "acq exit: ", head, 0 );
#endif
#if OMPT_SUPPORT
if ((ompt_status & ompt_status_track) &&
prev_state != ompt_state_undefined) {
/* change the state before clearing wait_id */
this_thr->th.ompt_thread_info.state = prev_state;
this_thr->th.ompt_thread_info.wait_id = 0;
}
#endif
KMP_FSYNC_ACQUIRED( lck );
return; /* lock holder cannot be on queue */
}
@ -1317,6 +1331,16 @@ __kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck,
break;
}
#if OMPT_SUPPORT
if ((ompt_status & ompt_status_track) &&
prev_state == ompt_state_undefined) {
/* this thread will spin; set wait_id before entering wait state */
prev_state = this_thr->th.ompt_thread_info.state;
this_thr->th.ompt_thread_info.wait_id = (uint64_t) lck;
this_thr->th.ompt_thread_info.state = ompt_state_wait_lock;
}
#endif
if ( enqueued ) {
if ( tail > 0 ) {
kmp_info_t *tail_thr = __kmp_thread_from_gtid( tail - 1 );
@ -1346,6 +1370,13 @@ __kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck,
#ifdef DEBUG_QUEUING_LOCKS
TRACE_LOCK( gtid+1, "acq exit 2" );
#endif
#if OMPT_SUPPORT
/* change the state before clearing wait_id */
this_thr->th.ompt_thread_info.state = prev_state;
this_thr->th.ompt_thread_info.wait_id = 0;
#endif
/* got lock, we were dequeued by the thread that released lock */
return;
}
@ -1491,6 +1522,11 @@ __kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
#ifdef DEBUG_QUEUING_LOCKS
TRACE_LOCK_HT( gtid+1, "rel exit: ", 0, 0 );
#endif
#if OMPT_SUPPORT
/* nothing to do - no other thread is trying to shift blame */
#endif
return KMP_LOCK_RELEASED;
}
dequeued = FALSE;

View File

@ -26,6 +26,10 @@
#include "kmp_stats.h"
#include "kmp_wait_release.h"
#if OMPT_SUPPORT
#include "ompt-specific.h"
#endif
/* these are temporary issues to be dealt with */
#define KMP_USE_PRCTL 0
#define KMP_USE_POOLED_ALLOC 0
@ -759,6 +763,16 @@ __kmp_parallel_dxo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
/* TODO repleace with general release procedure */
team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc );
#if OMPT_SUPPORT && OMPT_BLAME
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
/* accept blame for "ordered" waiting */
kmp_info_t *this_thread = __kmp_threads[gtid];
ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
this_thread->th.ompt_thread_info.wait_id);
}
#endif
KMP_MB(); /* Flush all pending memory write invalidates. */
}
#endif /* BUILD_PARALLEL_ORDERED */
@ -1271,7 +1285,14 @@ __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
__kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
#if OMPT_SUPPORT
ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
#endif
new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
#if OMPT_SUPPORT
ompt_parallel_id,
#endif
#if OMP_40_ENABLED
proc_bind,
#endif
@ -1355,6 +1376,11 @@ __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
}
this_thr->th.th_dispatch = serial_team->t.t_dispatch;
#if OMPT_SUPPORT
ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
__ompt_team_assign_id(serial_team, ompt_parallel_id);
#endif
KMP_MB();
} else {
@ -1422,6 +1448,9 @@ __kmp_fork_call(
int gtid,
enum fork_context_e call_context, // Intel, GNU, ...
kmp_int32 argc,
#if OMPT_SUPPORT
void *unwrapped_task,
#endif
microtask_t microtask,
launch_t invoker,
/* TODO: revert workaround for Intel(R) 64 tracker #96 */
@ -1477,6 +1506,21 @@ __kmp_fork_call(
root = master_th->th.th_root;
master_active = root->r.r_active;
master_set_numthreads = master_th->th.th_set_nproc;
#if OMPT_SUPPORT
ompt_parallel_id_t ompt_parallel_id;
ompt_task_id_t ompt_task_id;
ompt_frame_t *ompt_frame;
ompt_task_id_t my_task_id;
ompt_parallel_id_t my_parallel_id;
if (ompt_status & ompt_status_track) {
ompt_parallel_id = __ompt_parallel_id_new(gtid);
ompt_task_id = __ompt_get_task_id_internal(0);
ompt_frame = __ompt_get_task_frame_internal(0);
}
#endif
// Nested level will be an index in the nested nthreads array
level = parent_team->t.t_level;
#if OMP_40_ENABLED
@ -1493,6 +1537,16 @@ __kmp_fork_call(
}
#endif
#if OMPT_SUPPORT
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) {
int team_size = master_set_numthreads;
ompt_callbacks.ompt_callback(ompt_event_parallel_begin)(
ompt_task_id, ompt_frame, ompt_parallel_id,
team_size, unwrapped_task);
}
#endif
master_th->th.th_ident = loc;
@ -1519,11 +1573,77 @@ __kmp_fork_call(
KMP_DEBUG_ASSERT( parent_team->t.t_serialized > 1 );
parent_team->t.t_serialized--; // AC: need this in order enquiry functions
// work correctly, will restore at join time
#if OMPT_SUPPORT
void *dummy;
void **exit_runtime_p;
ompt_lw_taskteam_t lw_taskteam;
if (ompt_status & ompt_status_track) {
__ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
unwrapped_task, ompt_parallel_id);
lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
__ompt_lw_taskteam_link(&lw_taskteam, master_th);
#if OMPT_TRACE
/* OMPT implicit task begin */
my_task_id = lw_taskteam.ompt_task_info.task_id;
my_parallel_id = parent_team->t.ompt_team_info.parallel_id;
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
my_parallel_id, my_task_id);
}
#endif
/* OMPT state */
master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
} else {
exit_runtime_p = &dummy;
}
#endif
KMP_TIME_BLOCK(OMP_work);
__kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv );
__kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
#if OMPT_SUPPORT
, exit_runtime_p
#endif
);
#if OMPT_SUPPORT
if (ompt_status & ompt_status_track) {
#if OMPT_TRACE
lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0;
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
ompt_parallel_id, ompt_task_id);
}
__ompt_lw_taskteam_unlink(master_th);
// reset clear the task id only after unlinking the task
lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
#endif
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
ompt_parallel_id, ompt_task_id);
}
master_th->th.ompt_thread_info.state = ompt_state_overhead;
}
#endif
return TRUE;
}
parent_team->t.t_pkfn = microtask;
#if OMPT_SUPPORT
parent_team->t.ompt_team_info.microtask = unwrapped_task;
#endif
parent_team->t.t_invoke = invoker;
KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
parent_team->t.t_active_level ++;
@ -1620,10 +1740,70 @@ __kmp_fork_call(
// revert change made in __kmpc_serialized_parallel()
master_th->th.th_serial_team->t.t_level--;
// Get args from parent team for teams construct
#if OMPT_SUPPORT
void *dummy;
void **exit_runtime_p;
ompt_lw_taskteam_t lw_taskteam;
if (ompt_status & ompt_status_track) {
__ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
unwrapped_task, ompt_parallel_id);
lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
__ompt_lw_taskteam_link(&lw_taskteam, master_th);
#if OMPT_TRACE
my_task_id = lw_taskteam.ompt_task_info.task_id;
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
ompt_parallel_id, my_task_id);
}
#endif
/* OMPT state */
master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
} else {
exit_runtime_p = &dummy;
}
#endif
{
KMP_TIME_BLOCK(OMP_work);
__kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv );
__kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
#if OMPT_SUPPORT
, exit_runtime_p
#endif
);
}
#if OMPT_SUPPORT
if (ompt_status & ompt_status_track) {
lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0;
#if OMPT_TRACE
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
ompt_parallel_id, ompt_task_id);
}
#endif
__ompt_lw_taskteam_unlink(master_th);
// reset clear the task id only after unlinking the task
lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
ompt_parallel_id, ompt_task_id);
}
master_th->th.ompt_thread_info.state = ompt_state_overhead;
}
#endif
} else if ( microtask == (microtask_t)__kmp_teams_master ) {
KMP_DEBUG_ASSERT( master_th->th.th_team == master_th->th.th_serial_team );
team = master_th->th.th_team;
@ -1664,15 +1844,88 @@ __kmp_fork_call(
*argv++ = va_arg( ap, void * );
#endif
KMP_MB();
#if OMPT_SUPPORT
void *dummy;
void **exit_runtime_p;
ompt_lw_taskteam_t lw_taskteam;
if (ompt_status & ompt_status_track) {
__ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
unwrapped_task, ompt_parallel_id);
lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
__ompt_lw_taskteam_link(&lw_taskteam, master_th);
#if OMPT_TRACE
/* OMPT implicit task begin */
my_task_id = lw_taskteam.ompt_task_info.task_id;
my_parallel_id = ompt_parallel_id;
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
my_parallel_id, my_task_id);
}
#endif
/* OMPT state */
master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
} else {
exit_runtime_p = &dummy;
}
#endif
{
KMP_TIME_BLOCK(OMP_work);
__kmp_invoke_microtask( microtask, gtid, 0, argc, args );
__kmp_invoke_microtask( microtask, gtid, 0, argc, args
#if OMPT_SUPPORT
, exit_runtime_p
#endif
);
}
#if OMPT_SUPPORT
if (ompt_status & ompt_status_track) {
#if OMPT_TRACE
lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0;
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
my_parallel_id, my_task_id);
}
#endif
__ompt_lw_taskteam_unlink(master_th);
// reset clear the task id only after unlinking the task
lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
ompt_parallel_id, ompt_task_id);
}
master_th->th.ompt_thread_info.state = ompt_state_overhead;
}
#endif
#if OMP_40_ENABLED
}
#endif /* OMP_40_ENABLED */
}
else if ( call_context == fork_context_gnu ) {
#if OMPT_SUPPORT
ompt_lw_taskteam_t *lwt = (ompt_lw_taskteam_t *)
__kmp_allocate(sizeof(ompt_lw_taskteam_t));
__ompt_lw_taskteam_init(lwt, master_th, gtid,
unwrapped_task, ompt_parallel_id);
lwt->ompt_task_info.task_id = __ompt_task_id_new(gtid);
lwt->ompt_task_info.frame.exit_runtime_frame = 0;
__ompt_lw_taskteam_link(lwt, master_th);
#endif
// we were called from GNU native code
KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid ));
return FALSE;
@ -1759,6 +2012,9 @@ __kmp_fork_call(
/* allocate a new parallel team */
KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) );
team = __kmp_allocate_team(root, nthreads, nthreads,
#if OMPT_SUPPORT
ompt_parallel_id,
#endif
#if OMP_40_ENABLED
proc_bind,
#endif
@ -1767,6 +2023,9 @@ __kmp_fork_call(
/* allocate a new parallel team */
KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) );
team = __kmp_allocate_team(root, nthreads, nthreads,
#if OMPT_SUPPORT
ompt_parallel_id,
#endif
#if OMP_40_ENABLED
proc_bind,
#endif
@ -1781,6 +2040,9 @@ __kmp_fork_call(
team->t.t_ident = loc;
team->t.t_parent = parent_team;
TCW_SYNC_PTR(team->t.t_pkfn, microtask);
#if OMPT_SUPPORT
TCW_SYNC_PTR(team->t.ompt_team_info.microtask, unwrapped_task);
#endif
team->t.t_invoke = invoker; /* TODO move this to root, maybe */
// TODO: parent_team->t.t_level == INT_MAX ???
#if OMP_40_ENABLED
@ -1867,6 +2129,9 @@ __kmp_fork_call(
__kmp_fork_team_threads( root, team, master_th, gtid );
__kmp_setup_icv_copy( team, nthreads, &master_th->th.th_current_task->td_icvs, loc );
#if OMPT_SUPPORT
master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
#endif
__kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
@ -1948,9 +2213,42 @@ __kmp_fork_call(
KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
#if OMPT_SUPPORT
if (ompt_status & ompt_status_track) {
master_th->th.ompt_thread_info.state = ompt_state_overhead;
}
#endif
return TRUE;
}
#if OMPT_SUPPORT
static inline void
__kmp_join_restore_state(
kmp_info_t *thread,
kmp_team_t *team)
{
// restore state outside the region
thread->th.ompt_thread_info.state = ((team->t.t_serialized) ?
ompt_state_work_serial : ompt_state_work_parallel);
}
static inline void
__kmp_join_ompt(
kmp_info_t *thread,
kmp_team_t *team,
ompt_parallel_id_t parallel_id)
{
if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
parallel_id, task_info->task_id);
}
__kmp_join_restore_state(thread,team);
}
#endif
void
__kmp_join_call(ident_t *loc, int gtid
#if OMP_40_ENABLED
@ -1976,6 +2274,12 @@ __kmp_join_call(ident_t *loc, int gtid
master_th->th.th_ident = loc;
#if OMPT_SUPPORT
if (ompt_status & ompt_status_track) {
master_th->th.ompt_thread_info.state = ompt_state_overhead;
}
#endif
#if KMP_DEBUG
if ( __kmp_tasking_mode != tskm_immediate_exec ) {
KA_TRACE( 20, ( "__kmp_join_call: T#%d, old team = %p old task_team = %p, th_task_team = %p\n",
@ -2003,6 +2307,13 @@ __kmp_join_call(ident_t *loc, int gtid
}
#endif /* OMP_40_ENABLED */
__kmpc_end_serialized_parallel( loc, gtid );
#if OMPT_SUPPORT
if (ompt_status == ompt_status_track_callback) {
__kmp_join_restore_state(master_th, parent_team);
}
#endif
return;
}
@ -2022,6 +2333,10 @@ __kmp_join_call(ident_t *loc, int gtid
KMP_MB();
#if OMPT_SUPPORT
ompt_parallel_id_t parallel_id = team->t.ompt_team_info.parallel_id;
#endif
#if USE_ITT_BUILD
if ( __itt_stack_caller_create_ptr ) {
__kmp_itt_stack_caller_destroy( (__itt_caller)team->t.t_stack_id ); // destroy the stack stitching id after join barrier
@ -2097,6 +2412,13 @@ __kmp_join_call(ident_t *loc, int gtid
}
}
}
#if OMPT_SUPPORT
if (ompt_status == ompt_status_track_callback) {
__kmp_join_ompt(master_th, parent_team, parallel_id);
}
#endif
return;
}
#endif /* OMP_40_ENABLED */
@ -2182,6 +2504,12 @@ __kmp_join_call(ident_t *loc, int gtid
__kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
#if OMPT_SUPPORT
if (ompt_status == ompt_status_track_callback) {
__kmp_join_ompt(master_th, parent_team, parallel_id);
}
#endif
KMP_MB();
KA_TRACE( 20, ("__kmp_join_call: exit T#%d\n", gtid ));
}
@ -2814,11 +3142,15 @@ __kmp_initialize_root( kmp_root_t *root )
/* setup the root team for this task */
/* allocate the root team structure */
KF_TRACE( 10, ( "__kmp_initialize_root: before root_team\n" ) );
root_team =
__kmp_allocate_team(
root,
1, // new_nproc
1, // max_nproc
#if OMPT_SUPPORT
0, // root parallel id
#endif
#if OMP_40_ENABLED
__kmp_nested_proc_bind.bind_types[0],
#endif
@ -2845,11 +3177,15 @@ __kmp_initialize_root( kmp_root_t *root )
/* setup the hot team for this task */
/* allocate the hot team structure */
KF_TRACE( 10, ( "__kmp_initialize_root: before hot_team\n" ) );
hot_team =
__kmp_allocate_team(
root,
1, // new_nproc
__kmp_dflt_team_nth_ub * 2, // max_nproc
#if OMPT_SUPPORT
0, // root parallel id
#endif
#if OMP_40_ENABLED
__kmp_nested_proc_bind.bind_types[0],
#endif
@ -3425,7 +3761,11 @@ __kmp_register_root( int initial_thread )
if( ! root_thread->th.th_serial_team ) {
kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
KF_TRACE( 10, ( "__kmp_register_root: before serial_team\n" ) );
root_thread->th.th_serial_team = __kmp_allocate_team( root, 1, 1,
#if OMPT_SUPPORT
0, // root parallel id
#endif
#if OMP_40_ENABLED
proc_bind_default,
#endif
@ -3563,6 +3903,14 @@ __kmp_reset_root(int gtid, kmp_root_t *root)
__kmp_free_handle( root->r.r_uber_thread->th.th_info.ds.ds_thread );
#endif /* KMP_OS_WINDOWS */
#if OMPT_SUPPORT
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
int gtid = __kmp_get_gtid();
__ompt_thread_end(ompt_thread_initial, gtid);
}
#endif
TCW_4(__kmp_nth, __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth.
__kmp_reap_thread( root->r.r_uber_thread, 1 );
@ -3894,8 +4242,12 @@ __kmp_allocate_thread( kmp_root_t *root, kmp_team_t *team, int new_tid )
{
kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs( team );
KF_TRACE( 10, ( "__kmp_allocate_thread: before th_serial/serial_team\n" ) );
new_thr->th.th_serial_team = serial_team =
(kmp_team_t*) __kmp_allocate_team( root, 1, 1,
#if OMPT_SUPPORT
0, // root parallel id
#endif
#if OMP_40_ENABLED
proc_bind_default,
#endif
@ -4395,6 +4747,9 @@ __kmp_partition_places( kmp_team_t *team )
/* allocate a new team data structure to use. take one off of the free pool if available */
kmp_team_t *
__kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
#if OMPT_SUPPORT
ompt_parallel_id_t ompt_parallel_id,
#endif
#if OMP_40_ENABLED
kmp_proc_bind_t new_proc_bind,
#endif
@ -4764,6 +5119,10 @@ __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
}
#endif
#if OMPT_SUPPORT
__ompt_team_assign_id(team, ompt_parallel_id);
#endif
KMP_MB();
return team;
@ -4804,6 +5163,11 @@ __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
#endif
KA_TRACE( 20, ("__kmp_allocate_team: using team from pool %d.\n", team->t.t_id ));
#if OMPT_SUPPORT
__ompt_team_assign_id(team, ompt_parallel_id);
#endif
KMP_MB();
return team;
@ -4856,6 +5220,11 @@ __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
team->t.t_proc_bind = new_proc_bind;
#endif
#if OMPT_SUPPORT
__ompt_team_assign_id(team, ompt_parallel_id);
team->t.ompt_serialized_team_info = NULL;
#endif
KMP_MB();
KA_TRACE( 20, ("__kmp_allocate_team: done creating a new team %d.\n", team->t.t_id ));
@ -5101,6 +5470,18 @@ __kmp_launch_thread( kmp_info_t *this_thr )
this_thr->th.th_cons = __kmp_allocate_cons_stack( gtid ); // ATT: Memory leak?
}
#if OMPT_SUPPORT
if (ompt_status & ompt_status_track) {
this_thr->th.ompt_thread_info.state = ompt_state_overhead;
this_thr->th.ompt_thread_info.wait_id = 0;
this_thr->th.ompt_thread_info.idle_frame = __builtin_frame_address(0);
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_thread_begin)) {
__ompt_thread_begin(ompt_thread_worker, gtid);
}
}
#endif
/* This is the place where threads wait for work */
while( ! TCR_4(__kmp_global.g.g_done) ) {
KMP_DEBUG_ASSERT( this_thr == __kmp_threads[ gtid ] );
@ -5109,9 +5490,21 @@ __kmp_launch_thread( kmp_info_t *this_thr )
/* wait for work to do */
KA_TRACE( 20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid ));
#if OMPT_SUPPORT
if (ompt_status & ompt_status_track) {
this_thr->th.ompt_thread_info.state = ompt_state_idle;
}
#endif
/* No tid yet since not part of a team */
__kmp_fork_barrier( gtid, KMP_GTID_DNE );
#if OMPT_SUPPORT
if (ompt_status & ompt_status_track) {
this_thr->th.ompt_thread_info.state = ompt_state_overhead;
}
#endif
pteam = (kmp_team_t *(*))(& this_thr->th.th_team);
/* have we been allocated? */
@ -5124,6 +5517,12 @@ __kmp_launch_thread( kmp_info_t *this_thr )
updateHWFPControl (*pteam);
#if OMPT_SUPPORT
if (ompt_status & ompt_status_track) {
this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
}
#endif
KMP_STOP_EXPLICIT_TIMER(USER_launch_thread_loop);
{
KMP_TIME_BLOCK(USER_worker_invoke);
@ -5132,6 +5531,15 @@ __kmp_launch_thread( kmp_info_t *this_thr )
KMP_START_EXPLICIT_TIMER(USER_launch_thread_loop);
KMP_ASSERT( rc );
#if OMPT_SUPPORT
if (ompt_status & ompt_status_track) {
/* no frame set while outside task */
int tid = __kmp_tid_from_gtid(gtid);
(*pteam)->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_runtime_frame = 0;
this_thr->th.ompt_thread_info.state = ompt_state_overhead;
}
#endif
KMP_MB();
KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn));
@ -5142,6 +5550,13 @@ __kmp_launch_thread( kmp_info_t *this_thr )
}
TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
#if OMPT_SUPPORT
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
__ompt_thread_end(ompt_thread_worker, gtid);
}
#endif
if ( TCR_PTR( this_thr->th.th_task_team ) != NULL ) {
__kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
}
@ -5480,6 +5895,9 @@ __kmp_internal_end(void)
__kmp_cleanup();
#if OMPT_SUPPORT
ompt_fini();
#endif
}
void
@ -6140,6 +6558,9 @@ __kmp_do_serial_initialize( void )
KMP_MB();
KA_TRACE( 10, ("__kmp_do_serial_initialize: exit\n" ) );
#if OMPT_SUPPORT
ompt_init();
#endif
}
void
@ -6284,6 +6705,9 @@ __kmp_middle_initialize( void )
}
__kmp_do_middle_initialize();
__kmp_release_bootstrap_lock( &__kmp_initz_lock );
#if OMPT_SUPPORT
ompt_init();
#endif
}
void
@ -6353,6 +6777,9 @@ __kmp_parallel_initialize( void )
KA_TRACE( 10, ("__kmp_parallel_initialize: exit\n" ) );
__kmp_release_bootstrap_lock( &__kmp_initz_lock );
#if OMPT_SUPPORT
ompt_init();
#endif
}
@ -6409,8 +6836,49 @@ __kmp_invoke_task_func( int gtid )
#if INCLUDE_SSC_MARKS
SSC_MARK_INVOKING();
#endif
#if OMPT_SUPPORT
void *dummy;
void **exit_runtime_p;
ompt_task_id_t my_task_id;
ompt_parallel_id_t my_parallel_id;
if (ompt_status & ompt_status_track) {
exit_runtime_p = &(team->t.t_implicit_task_taskdata[tid].
ompt_task_info.frame.exit_runtime_frame);
} else {
exit_runtime_p = &dummy;
}
#if OMPT_TRACE
my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id;
my_parallel_id = team->t.ompt_team_info.parallel_id;
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
my_parallel_id, my_task_id);
}
#endif
#endif
rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn),
gtid, tid, (int) team->t.t_argc, (void **) team->t.t_argv );
gtid, tid, (int) team->t.t_argc, (void **) team->t.t_argv
#if OMPT_SUPPORT
, exit_runtime_p
#endif
);
#if OMPT_SUPPORT && OMPT_TRACE
if (ompt_status & ompt_status_track) {
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
my_parallel_id, my_task_id);
}
// the implicit task is not dead yet, so we can't clear its task id here
team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_runtime_frame = 0;
}
#endif
#if USE_ITT_BUILD
if ( __itt_stack_caller_create_ptr ) {
@ -6442,7 +6910,10 @@ __kmp_teams_master( int gtid )
#endif
__kmp_fork_call( loc, gtid, fork_context_intel,
team->t.t_argc,
(microtask_t)thr->th.th_teams_microtask,
#if OMPT_SUPPORT
(void *)thr->th.th_teams_microtask, // "unwrapped" task
#endif
(microtask_t)thr->th.th_teams_microtask, // "wrapped" task
VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
NULL );
#if INCLUDE_SSC_MARKS

View File

@ -29,6 +29,10 @@
#include "kmp_stats.h"
#include "kmp_itt.h"
#if OMPT_SUPPORT
#include "ompt-specific.h"
#endif
// template for type limits
template< typename T >
struct i_maxmin {
@ -89,6 +93,11 @@ __kmp_for_static_init(
register UT trip_count;
register kmp_team_t *team;
#if OMPT_SUPPORT && OMPT_TRACE
ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
#endif
KMP_DEBUG_ASSERT( plastiter && plower && pupper && pstride );
KE_TRACE( 10, ("__kmpc_for_static_init called (%d)\n", global_tid));
#ifdef KMP_DEBUG
@ -132,6 +141,15 @@ __kmp_for_static_init(
}
#endif
KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
#if OMPT_SUPPORT && OMPT_TRACE
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
team_info->parallel_id, task_info->task_id,
team_info->microtask);
}
#endif
return;
}
@ -168,6 +186,15 @@ __kmp_for_static_init(
}
#endif
KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
#if OMPT_SUPPORT && OMPT_TRACE
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
team_info->parallel_id, task_info->task_id,
team_info->microtask);
}
#endif
return;
}
nth = team->t.t_nproc;
@ -187,6 +214,15 @@ __kmp_for_static_init(
}
#endif
KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
#if OMPT_SUPPORT && OMPT_TRACE
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
team_info->parallel_id, task_info->task_id,
team_info->microtask);
}
#endif
return;
}
@ -304,6 +340,15 @@ __kmp_for_static_init(
}
#endif
KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) );
#if OMPT_SUPPORT && OMPT_TRACE
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
team_info->parallel_id, task_info->task_id, team_info->microtask);
}
#endif
return;
}

View File

@ -434,6 +434,18 @@ __kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_ta
KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n",
gtid, taskdata ) );
#if OMPT_SUPPORT
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_task_begin)) {
kmp_taskdata_t *parent = taskdata->td_parent;
ompt_callbacks.ompt_callback(ompt_event_task_begin)(
parent ? parent->ompt_task_info.task_id : ompt_task_id_none,
parent ? &(parent->ompt_task_info.frame) : NULL,
taskdata->ompt_task_info.task_id,
taskdata->ompt_task_info.function);
}
#endif
return;
}
@ -579,6 +591,15 @@ __kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_tas
kmp_info_t * thread = __kmp_threads[ gtid ];
kmp_int32 children = 0;
#if OMPT_SUPPORT
if ((ompt_status == ompt_status_track_callback) &&
ompt_callbacks.ompt_callback(ompt_event_task_end)) {
kmp_taskdata_t *parent = taskdata->td_parent;
ompt_callbacks.ompt_callback(ompt_event_task_end)(
taskdata->ompt_task_info.task_id);
}
#endif
KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n",
gtid, taskdata, resumed_task) );
@ -654,6 +675,9 @@ __kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_tas
// Free this task and then ancestor tasks if they have no children.
__kmp_free_task_and_ancestors(gtid, taskdata, thread);
// FIXME johnmc: I this statement should be before the last one so if an
// asynchronous inquiry peers into the runtime system it doesn't see the freed
// task as the current task
__kmp_threads[ gtid ] -> th.th_current_task = resumed_task; // restore current_task
// TODO: GEH - make sure root team implicit task is initialized properly.
@ -783,6 +807,10 @@ __kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *te
KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
}
#if OMPT_SUPPORT
__kmp_task_init_ompt(task, tid);
#endif
KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n",
tid, team, task ) );
}
@ -937,6 +965,15 @@ __kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags,
KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
gtid, taskdata, taskdata->td_parent) );
#if OMPT_SUPPORT
if (ompt_status & ompt_status_track) {
taskdata->ompt_task_info.task_id = __ompt_task_id_new(gtid);
taskdata->ompt_task_info.function = (void*) task_entry;
taskdata->ompt_task_info.frame = (ompt_frame_t)
{ .exit_runtime_frame = NULL, .reenter_runtime_frame = NULL };
}
#endif
return task;
}
@ -984,6 +1021,19 @@ __kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_ta
__kmp_task_start( gtid, task, current_task );
#if OMPT_SUPPORT
ompt_thread_info_t oldInfo;
kmp_info_t * thread;
if (ompt_status & ompt_status_track) {
// Store the threads states and restore them after the task
thread = __kmp_threads[ gtid ];
oldInfo = thread->th.ompt_thread_info;
thread->th.ompt_thread_info.wait_id = 0;
thread->th.ompt_thread_info.state = ompt_state_work_parallel;
taskdata->ompt_task_info.frame.exit_runtime_frame = __builtin_frame_address(0);
}
#endif
#if OMP_40_ENABLED
// TODO: cancel tasks if the parallel region has also been cancelled
// TODO: check if this sequence can be hoisted above __kmp_task_start
@ -1017,6 +1067,14 @@ __kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_ta
}
#endif // OMP_40_ENABLED
#if OMPT_SUPPORT
if (ompt_status & ompt_status_track) {
thread->th.ompt_thread_info = oldInfo;
taskdata->ompt_task_info.frame.exit_runtime_frame = 0;
}
#endif
__kmp_task_finish( gtid, task, current_task );
KA_TRACE(30, ("__kmp_inovke_task(exit): T#%d completed task %p, resuming task %p\n",
@ -1073,6 +1131,13 @@ __kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate
{
kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
#if OMPT_SUPPORT
if (ompt_status & ompt_status_track) {
new_taskdata->ompt_task_info.frame.reenter_runtime_frame =
__builtin_frame_address(0);
}
#endif
/* Should we execute the new task or queue it? For now, let's just always try to
queue it. If the queue fills up, then we'll execute it. */
@ -1084,6 +1149,11 @@ __kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate
__kmp_invoke_task( gtid, new_task, current_task );
}
#if OMPT_SUPPORT
if (ompt_status & ompt_status_track) {
new_taskdata->ompt_task_info.frame.reenter_runtime_frame = 0;
}
#endif
return TASK_CURRENT_NOT_QUEUED;
}

View File

@ -95,6 +95,32 @@ static inline void __kmp_wait_template(kmp_info_t *this_thr, C *flag, int final_
th_gtid = this_thr->th.th_info.ds.ds_gtid;
KA_TRACE(20, ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
#if OMPT_SUPPORT && OMPT_BLAME
if (ompt_status == ompt_status_track_callback) {
if (this_thr->th.ompt_thread_info.state == ompt_state_idle){
if (ompt_callbacks.ompt_callback(ompt_event_idle_begin)) {
ompt_callbacks.ompt_callback(ompt_event_idle_begin)(th_gtid + 1);
}
} else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)) {
KMP_DEBUG_ASSERT(this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier ||
this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit ||
this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_explicit);
ompt_lw_taskteam_t* team = this_thr->th.th_team->t.ompt_serialized_team_info;
ompt_parallel_id_t pId;
ompt_task_id_t tId;
if (team){
pId = team->ompt_team_info.parallel_id;
tId = team->ompt_task_info.task_id;
} else {
pId = this_thr->th.th_team->t.ompt_team_info.parallel_id;
tId = this_thr->th.th_current_task->ompt_task_info.task_id;
}
ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)(pId, tId);
}
}
#endif
// Setup for waiting
KMP_INIT_YIELD(spins);
@ -207,6 +233,33 @@ static inline void __kmp_wait_template(kmp_info_t *this_thr, C *flag, int final_
}
// TODO: If thread is done with work and times out, disband/free
}
#if OMPT_SUPPORT && OMPT_BLAME
if (ompt_status == ompt_status_track_callback) {
if (this_thr->th.ompt_thread_info.state == ompt_state_idle){
if (ompt_callbacks.ompt_callback(ompt_event_idle_end)) {
ompt_callbacks.ompt_callback(ompt_event_idle_end)(th_gtid + 1);
}
} else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)) {
KMP_DEBUG_ASSERT(this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier ||
this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit ||
this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_explicit);
ompt_lw_taskteam_t* team = this_thr->th.th_team->t.ompt_serialized_team_info;
ompt_parallel_id_t pId;
ompt_task_id_t tId;
if (team){
pId = team->ompt_team_info.parallel_id;
tId = team->ompt_task_info.task_id;
} else {
pId = this_thr->th.th_team->t.ompt_team_info.parallel_id;
tId = this_thr->th.th_current_task->ompt_task_info.task_id;
}
ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)(pId, tId);
}
}
#endif
KMP_FSYNC_SPIN_ACQUIRED(spin);
}

View File

@ -598,6 +598,12 @@ __kmp_unnamed_critical_addr:
// temp: -8(%ebp)
//
pushl %ebx // save %ebx to use during this routine
//
#if OMPT_SUPPORT
movl 28(%ebp),%ebx // get exit_frame address
movl %ebp,(%ebx) // save exit_frame
#endif
movl 20(%ebp),%ebx // Stack alignment - # args
addl $2,%ebx // #args +2 Always pass at least 2 args (gtid and tid)
shll $2,%ebx // Number of bytes used on stack: (#args+2)*4
@ -1221,6 +1227,7 @@ KMP_LABEL(invoke_3):
// %edx: tid
// %ecx: argc
// %r8: p_argv
// %r9: &exit_frame
//
// locals:
// __gtid: gtid parm pushed on stack so can pass &gtid to pkfn
@ -1250,6 +1257,11 @@ __tid = -24
KMP_CFI_OFFSET rbp,-16
movq %rsp,%rbp // establish the base pointer for this routine.
KMP_CFI_REGISTER rbp
#if OMPT_SUPPORT
movq %rbp, (%r9) // save exit_frame
#endif
pushq %rbx // %rbx is callee-saved register
pushq %rsi // Put gtid on stack so can pass &tgid to pkfn
pushq %rdx // Put tid on stack so can pass &tid to pkfn

View File

@ -582,6 +582,9 @@ _gtid$ = 12
_tid$ = 16
_argc$ = 20
_argv$ = 24
if OMPT_SUPPORT
_exit_frame$ = 28
endif
_i$ = -8
_stk_adj$ = -16
_vptr$ = -12
@ -595,6 +598,10 @@ ___kmp_invoke_microtask PROC NEAR
push ebx
push esi
push edi
if OMPT_SUPPORT
mov eax, DWORD PTR _exit_frame$[ebp]
mov DWORD PTR [eax], ebp
endif
; Line 114
mov eax, DWORD PTR _argc$[ebp]
mov DWORD PTR _i$[ebp], eax
@ -1307,6 +1314,9 @@ $_gtid = 24
$_tid = 32
$_argc = 40
$_p_argv = 48
if OMPT_SUPPORT
$_exit_frame = 56
endif
PUBLIC __kmp_invoke_microtask
_TEXT SEGMENT
@ -1322,6 +1332,10 @@ __kmp_invoke_microtask PROC FRAME ;NEAR
lea rbp, QWORD PTR [rsp] ; establish the base pointer
.setframe rbp, 0
.ENDPROLOG
if OMPT_SUPPORT
mov rax, QWORD PTR $_exit_frame[rbp]
mov QWORD PTR [rax], rbp
endif
mov r10, rcx ; save pkfn pointer for later
;; ------------------------------------------------------------