LLVM OpenMP* Runtime Library
kmp_wait_release.h
1 /*
2  * kmp_wait_release.h -- Wait/Release implementation
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // The LLVM Compiler Infrastructure
8 //
9 // This file is dual licensed under the MIT and the University of Illinois Open
10 // Source Licenses. See LICENSE.txt for details.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef KMP_WAIT_RELEASE_H
15 #define KMP_WAIT_RELEASE_H
16 
17 #include "kmp.h"
18 #include "kmp_itt.h"
19 #include "kmp_stats.h"
20 #if OMPT_SUPPORT
21 #include "ompt-specific.h"
22 #endif
23 
40 enum flag_type {
44 };
45 
49 template <typename P> class kmp_flag {
50  volatile P
51  *loc;
54 public:
55  typedef P flag_t;
56  kmp_flag(volatile P *p, flag_type ft) : loc(p), t(ft) {}
60  volatile P *get() { return loc; }
64  void set(volatile P *new_loc) { loc = new_loc; }
68  flag_type get_type() { return t; }
69  // Derived classes must provide the following:
70  /*
71  kmp_info_t * get_waiter(kmp_uint32 i);
72  kmp_uint32 get_num_waiters();
73  bool done_check();
74  bool done_check_val(P old_loc);
75  bool notdone_check();
76  P internal_release();
77  void suspend(int th_gtid);
78  void resume(int th_gtid);
79  P set_sleeping();
80  P unset_sleeping();
81  bool is_sleeping();
82  bool is_any_sleeping();
83  bool is_sleeping_val(P old_loc);
84  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
85  int *thread_finished
86  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32
87  is_constrained);
88  */
89 };
90 
91 #if OMPT_SUPPORT
92 static inline void __ompt_implicit_task_end(kmp_info_t *this_thr,
93  omp_state_t omp_state,
94  ompt_data_t *tId,
95  ompt_data_t *pId) {
96  int ds_tid = this_thr->th.th_info.ds.ds_tid;
97  if (omp_state == omp_state_wait_barrier_implicit) {
98  this_thr->th.ompt_thread_info.state = omp_state_overhead;
99 #if OMPT_OPTIONAL
100  void *codeptr = NULL;
101  if (ompt_enabled.ompt_callback_sync_region_wait) {
102  ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
103  ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr);
104  }
105  if (ompt_enabled.ompt_callback_sync_region) {
106  ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
107  ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr);
108  }
109 #endif
110  if (!KMP_MASTER_TID(ds_tid)) {
111  if (ompt_enabled.ompt_callback_implicit_task) {
112  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
113  ompt_scope_end, NULL, tId, 0, ds_tid);
114  }
115 #if OMPT_OPTIONAL
116  if (ompt_enabled.ompt_callback_idle) {
117  ompt_callbacks.ompt_callback(ompt_callback_idle)(ompt_scope_begin);
118  }
119 #endif
120  // return to idle state
121  this_thr->th.ompt_thread_info.state = omp_state_idle;
122  } else {
123  this_thr->th.ompt_thread_info.state = omp_state_overhead;
124  }
125  }
126 }
127 #endif
128 
129 /* Spin wait loop that first does pause, then yield, then sleep. A thread that
130  calls __kmp_wait_* must make certain that another thread calls __kmp_release
131  to wake it back up to prevent deadlocks! */
132 template <class C>
133 static inline void
134 __kmp_wait_template(kmp_info_t *this_thr, C *flag,
135  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
136  // NOTE: We may not belong to a team at this point.
137  volatile typename C::flag_t *spin = flag->get();
138  kmp_uint32 spins;
139  kmp_uint32 hibernate;
140  int th_gtid;
141  int tasks_completed = FALSE;
142  int oversubscribed;
143 #if !KMP_USE_MONITOR
144  kmp_uint64 poll_count;
145  kmp_uint64 hibernate_goal;
146 #endif
147 
148  KMP_FSYNC_SPIN_INIT(spin, NULL);
149  if (flag->done_check()) {
150  KMP_FSYNC_SPIN_ACQUIRED(CCAST(typename C::flag_t *, spin));
151  return;
152  }
153  th_gtid = this_thr->th.th_info.ds.ds_gtid;
154  KA_TRACE(20,
155  ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
156 #if KMP_STATS_ENABLED
157  stats_state_e thread_state = KMP_GET_THREAD_STATE();
158 #endif
159 
160 /* OMPT Behavior:
161 THIS function is called from
162  __kmp_barrier (2 times) (implicit or explicit barrier in parallel regions)
163  these have join / fork behavior
164 
165  In these cases, we don't change the state or trigger events in THIS
166 function.
167  Events are triggered in the calling code (__kmp_barrier):
168 
169  state := omp_state_overhead
170  barrier-begin
171  barrier-wait-begin
172  state := omp_state_wait_barrier
173  call join-barrier-implementation (finally arrive here)
174  {}
175  call fork-barrier-implementation (finally arrive here)
176  {}
177  state := omp_state_overhead
178  barrier-wait-end
179  barrier-end
180  state := omp_state_work_parallel
181 
182 
183  __kmp_fork_barrier (after thread creation, before executing implicit task)
184  call fork-barrier-implementation (finally arrive here)
185  {} // worker arrive here with state = omp_state_idle
186 
187 
188  __kmp_join_barrier (implicit barrier at end of parallel region)
189  state := omp_state_barrier_implicit
190  barrier-begin
191  barrier-wait-begin
192  call join-barrier-implementation (finally arrive here
193 final_spin=FALSE)
194  {
195  }
196  __kmp_fork_barrier (implicit barrier at end of parallel region)
197  call fork-barrier-implementation (finally arrive here final_spin=TRUE)
198 
199  Worker after task-team is finished:
200  barrier-wait-end
201  barrier-end
202  implicit-task-end
203  idle-begin
204  state := omp_state_idle
205 
206  Before leaving, if state = omp_state_idle
207  idle-end
208  state := omp_state_overhead
209 */
210 #if OMPT_SUPPORT
211  omp_state_t ompt_entry_state;
212  ompt_data_t *pId = NULL;
213  ompt_data_t *tId;
214  if (ompt_enabled.enabled) {
215  ompt_entry_state = this_thr->th.ompt_thread_info.state;
216  if (!final_spin || ompt_entry_state != omp_state_wait_barrier_implicit ||
217  KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
218  ompt_lw_taskteam_t *team =
219  this_thr->th.th_team->t.ompt_serialized_team_info;
220  if (team) {
221  pId = &(team->ompt_team_info.parallel_data);
222  tId = &(team->ompt_task_info.task_data);
223  } else {
224  pId = OMPT_CUR_TEAM_DATA(this_thr);
225  tId = OMPT_CUR_TASK_DATA(this_thr);
226  }
227  } else {
228  pId = NULL;
229  tId = &(this_thr->th.ompt_thread_info.task_data);
230  }
231 #if OMPT_OPTIONAL
232  if (ompt_entry_state == omp_state_idle) {
233  if (ompt_enabled.ompt_callback_idle) {
234  ompt_callbacks.ompt_callback(ompt_callback_idle)(ompt_scope_begin);
235  }
236  } else
237 #endif
238  if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec ||
239  this_thr->th.th_task_team == NULL)) {
240  // implicit task is done. Either no taskqueue, or task-team finished
241  __ompt_implicit_task_end(this_thr, ompt_entry_state, tId, pId);
242  }
243  }
244 #endif
245 
246  // Setup for waiting
247  KMP_INIT_YIELD(spins);
248 
249  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
250 #if KMP_USE_MONITOR
251 // The worker threads cannot rely on the team struct existing at this point.
252 // Use the bt values cached in the thread struct instead.
253 #ifdef KMP_ADJUST_BLOCKTIME
254  if (__kmp_zero_bt && !this_thr->th.th_team_bt_set)
255  // Force immediate suspend if not set by user and more threads than
256  // available procs
257  hibernate = 0;
258  else
259  hibernate = this_thr->th.th_team_bt_intervals;
260 #else
261  hibernate = this_thr->th.th_team_bt_intervals;
262 #endif /* KMP_ADJUST_BLOCKTIME */
263 
264  /* If the blocktime is nonzero, we want to make sure that we spin wait for
265  the entirety of the specified #intervals, plus up to one interval more.
266  This increment make certain that this thread doesn't go to sleep too
267  soon. */
268  if (hibernate != 0)
269  hibernate++;
270 
271  // Add in the current time value.
272  hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
273  KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
274  th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
275  hibernate - __kmp_global.g.g_time.dt.t_value));
276 #else
277  hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals;
278  poll_count = 0;
279 #endif // KMP_USE_MONITOR
280  }
281 
282  oversubscribed = (TCR_4(__kmp_nth) > __kmp_avail_proc);
283  KMP_MB();
284 
285  // Main wait spin loop
286  while (flag->notdone_check()) {
287  int in_pool;
288  kmp_task_team_t *task_team = NULL;
289  if (__kmp_tasking_mode != tskm_immediate_exec) {
290  task_team = this_thr->th.th_task_team;
291  /* If the thread's task team pointer is NULL, it means one of 3 things:
292  1) A newly-created thread is first being released by
293  __kmp_fork_barrier(), and its task team has not been set up yet.
294  2) All tasks have been executed to completion.
295  3) Tasking is off for this region. This could be because we are in a
296  serialized region (perhaps the outer one), or else tasking was manually
297  disabled (KMP_TASKING=0). */
298  if (task_team != NULL) {
299  if (TCR_SYNC_4(task_team->tt.tt_active)) {
300  if (KMP_TASKING_ENABLED(task_team))
301  flag->execute_tasks(
302  this_thr, th_gtid, final_spin,
303  &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
304  else
305  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
306  } else {
307  KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
308 #if OMPT_SUPPORT
309  // task-team is done now, other cases should be catched above
310  if (final_spin && ompt_enabled.enabled)
311  __ompt_implicit_task_end(this_thr, ompt_entry_state, tId, pId);
312 #endif
313  this_thr->th.th_task_team = NULL;
314  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
315  }
316  } else {
317  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
318  } // if
319  } // if
320 
321  KMP_FSYNC_SPIN_PREPARE(CCAST(typename C::flag_t *, spin));
322  if (TCR_4(__kmp_global.g.g_done)) {
323  if (__kmp_global.g.g_abort)
324  __kmp_abort_thread();
325  break;
326  }
327 
328  // If we are oversubscribed, or have waited a bit (and
329  // KMP_LIBRARY=throughput), then yield
330  // TODO: Should it be number of cores instead of thread contexts? Like:
331  // KMP_YIELD(TCR_4(__kmp_nth) > __kmp_ncores);
332  // Need performance improvement data to make the change...
333  if (oversubscribed) {
334  KMP_YIELD(1);
335  } else {
336  KMP_YIELD_SPIN(spins);
337  }
338  // Check if this thread was transferred from a team
339  // to the thread pool (or vice-versa) while spinning.
340  in_pool = !!TCR_4(this_thr->th.th_in_pool);
341  if (in_pool != !!this_thr->th.th_active_in_pool) {
342  if (in_pool) { // Recently transferred from team to pool
343  KMP_TEST_THEN_INC32(&__kmp_thread_pool_active_nth);
344  this_thr->th.th_active_in_pool = TRUE;
345  /* Here, we cannot assert that:
346  KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) <=
347  __kmp_thread_pool_nth);
348  __kmp_thread_pool_nth is inc/dec'd by the master thread while the
349  fork/join lock is held, whereas __kmp_thread_pool_active_nth is
350  inc/dec'd asynchronously by the workers. The two can get out of sync
351  for brief periods of time. */
352  } else { // Recently transferred from pool to team
353  KMP_TEST_THEN_DEC32(&__kmp_thread_pool_active_nth);
354  KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
355  this_thr->th.th_active_in_pool = FALSE;
356  }
357  }
358 
359 #if KMP_STATS_ENABLED
360  // Check if thread has been signalled to idle state
361  // This indicates that the logical "join-barrier" has finished
362  if (this_thr->th.th_stats->isIdle() &&
363  KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
364  KMP_SET_THREAD_STATE(IDLE);
365  KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
366  }
367 #endif
368 
369  // Don't suspend if KMP_BLOCKTIME is set to "infinite"
370  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
371  continue;
372 
373  // Don't suspend if there is a likelihood of new tasks being spawned.
374  if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
375  continue;
376 
377 #if KMP_USE_MONITOR
378  // If we have waited a bit more, fall asleep
379  if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
380  continue;
381 #else
382  if (KMP_BLOCKING(hibernate_goal, poll_count++))
383  continue;
384 #endif
385 
386  KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
387  flag->suspend(th_gtid);
388 
389  if (TCR_4(__kmp_global.g.g_done)) {
390  if (__kmp_global.g.g_abort)
391  __kmp_abort_thread();
392  break;
393  } else if (__kmp_tasking_mode != tskm_immediate_exec &&
394  this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
395  this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
396  }
397  // TODO: If thread is done with work and times out, disband/free
398  }
399 
400 #if OMPT_SUPPORT
401  omp_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
402  if (ompt_enabled.enabled && ompt_exit_state != omp_state_undefined) {
403 #if OMPT_OPTIONAL
404  if (final_spin) {
405  __ompt_implicit_task_end(this_thr, ompt_exit_state, tId, pId);
406  ompt_exit_state = this_thr->th.ompt_thread_info.state;
407  }
408 #endif
409  if (ompt_exit_state == omp_state_idle) {
410 #if OMPT_OPTIONAL
411  if (ompt_enabled.ompt_callback_idle) {
412  ompt_callbacks.ompt_callback(ompt_callback_idle)(ompt_scope_end);
413  }
414 #endif
415  this_thr->th.ompt_thread_info.state = omp_state_overhead;
416  }
417  }
418 #endif
419 #if KMP_STATS_ENABLED
420  // If we were put into idle state, pop that off the state stack
421  if (KMP_GET_THREAD_STATE() == IDLE) {
422  KMP_POP_PARTITIONED_TIMER();
423  KMP_SET_THREAD_STATE(thread_state);
424  this_thr->th.th_stats->resetIdleFlag();
425  }
426 #endif
427 
428  KMP_FSYNC_SPIN_ACQUIRED(CCAST(typename C::flag_t *, spin));
429 }
430 
431 /* Release any threads specified as waiting on the flag by releasing the flag
432  and resume the waiting thread if indicated by the sleep bit(s). A thread that
433  calls __kmp_wait_template must call this function to wake up the potentially
434  sleeping thread and prevent deadlocks! */
435 template <class C> static inline void __kmp_release_template(C *flag) {
436 #ifdef KMP_DEBUG
437  int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
438 #endif
439  KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
440  KMP_DEBUG_ASSERT(flag->get());
441  KMP_FSYNC_RELEASING(CCAST(typename C::flag_t *, flag->get()));
442 
443  flag->internal_release();
444 
445  KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(),
446  *(flag->get())));
447 
448  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
449  // Only need to check sleep stuff if infinite block time not set.
450  // Are *any* threads waiting on flag sleeping?
451  if (flag->is_any_sleeping()) {
452  for (unsigned int i = 0; i < flag->get_num_waiters(); ++i) {
453  // if sleeping waiter exists at i, sets current_waiter to i inside flag
454  kmp_info_t *waiter = flag->get_waiter(i);
455  if (waiter) {
456  int wait_gtid = waiter->th.th_info.ds.ds_gtid;
457  // Wake up thread if needed
458  KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep "
459  "flag(%p) set\n",
460  gtid, wait_gtid, flag->get()));
461  flag->resume(wait_gtid); // unsets flag's current_waiter when done
462  }
463  }
464  }
465  }
466 }
467 
468 template <typename FlagType> struct flag_traits {};
469 
470 template <> struct flag_traits<kmp_uint32> {
471  typedef kmp_uint32 flag_t;
472  static const flag_type t = flag32;
473  static inline flag_t tcr(flag_t f) { return TCR_4(f); }
474  static inline flag_t test_then_add4(volatile flag_t *f) {
475  return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f));
476  }
477  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
478  return KMP_TEST_THEN_OR32(f, v);
479  }
480  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
481  return KMP_TEST_THEN_AND32(f, v);
482  }
483 };
484 
485 template <> struct flag_traits<kmp_uint64> {
486  typedef kmp_uint64 flag_t;
487  static const flag_type t = flag64;
488  static inline flag_t tcr(flag_t f) { return TCR_8(f); }
489  static inline flag_t test_then_add4(volatile flag_t *f) {
490  return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
491  }
492  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
493  return KMP_TEST_THEN_OR64(f, v);
494  }
495  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
496  return KMP_TEST_THEN_AND64(f, v);
497  }
498 };
499 
500 template <typename FlagType> class kmp_basic_flag : public kmp_flag<FlagType> {
501  typedef flag_traits<FlagType> traits_type;
502  FlagType checker;
504  kmp_info_t
505  *waiting_threads[1];
506  kmp_uint32
507  num_waiting_threads;
508 public:
509  kmp_basic_flag(volatile FlagType *p)
510  : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
511  kmp_basic_flag(volatile FlagType *p, kmp_info_t *thr)
512  : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) {
513  waiting_threads[0] = thr;
514  }
515  kmp_basic_flag(volatile FlagType *p, FlagType c)
516  : kmp_flag<FlagType>(p, traits_type::t), checker(c),
517  num_waiting_threads(0) {}
522  kmp_info_t *get_waiter(kmp_uint32 i) {
523  KMP_DEBUG_ASSERT(i < num_waiting_threads);
524  return waiting_threads[i];
525  }
529  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
535  void set_waiter(kmp_info_t *thr) {
536  waiting_threads[0] = thr;
537  num_waiting_threads = 1;
538  }
542  bool done_check() { return traits_type::tcr(*(this->get())) == checker; }
547  bool done_check_val(FlagType old_loc) { return old_loc == checker; }
555  bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; }
560  void internal_release() {
561  (void)traits_type::test_then_add4((volatile FlagType *)this->get());
562  }
568  FlagType set_sleeping() {
569  return traits_type::test_then_or((volatile FlagType *)this->get(),
570  KMP_BARRIER_SLEEP_STATE);
571  }
577  FlagType unset_sleeping() {
578  return traits_type::test_then_and((volatile FlagType *)this->get(),
579  ~KMP_BARRIER_SLEEP_STATE);
580  }
585  bool is_sleeping_val(FlagType old_loc) {
586  return old_loc & KMP_BARRIER_SLEEP_STATE;
587  }
591  bool is_sleeping() { return is_sleeping_val(*(this->get())); }
592  bool is_any_sleeping() { return is_sleeping_val(*(this->get())); }
593  kmp_uint8 *get_stolen() { return NULL; }
594  enum barrier_type get_bt() { return bs_last_barrier; }
595 };
596 
597 class kmp_flag_32 : public kmp_basic_flag<kmp_uint32> {
598 public:
599  kmp_flag_32(volatile kmp_uint32 *p) : kmp_basic_flag<kmp_uint32>(p) {}
600  kmp_flag_32(volatile kmp_uint32 *p, kmp_info_t *thr)
601  : kmp_basic_flag<kmp_uint32>(p, thr) {}
602  kmp_flag_32(volatile kmp_uint32 *p, kmp_uint32 c)
603  : kmp_basic_flag<kmp_uint32>(p, c) {}
604  void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
605  void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
606  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
607  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
608  kmp_int32 is_constrained) {
609  return __kmp_execute_tasks_32(
610  this_thr, gtid, this, final_spin,
611  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
612  }
613  void wait(kmp_info_t *this_thr,
614  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
615  __kmp_wait_template(this_thr, this,
616  final_spin USE_ITT_BUILD_ARG(itt_sync_obj));
617  }
618  void release() { __kmp_release_template(this); }
619  flag_type get_ptr_type() { return flag32; }
620 };
621 
622 class kmp_flag_64 : public kmp_basic_flag<kmp_uint64> {
623 public:
624  kmp_flag_64(volatile kmp_uint64 *p) : kmp_basic_flag<kmp_uint64>(p) {}
625  kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr)
626  : kmp_basic_flag<kmp_uint64>(p, thr) {}
627  kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)
628  : kmp_basic_flag<kmp_uint64>(p, c) {}
629  void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
630  void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
631  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
632  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
633  kmp_int32 is_constrained) {
634  return __kmp_execute_tasks_64(
635  this_thr, gtid, this, final_spin,
636  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
637  }
638  void wait(kmp_info_t *this_thr,
639  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
640  __kmp_wait_template(this_thr, this,
641  final_spin USE_ITT_BUILD_ARG(itt_sync_obj));
642  }
643  void release() { __kmp_release_template(this); }
644  flag_type get_ptr_type() { return flag64; }
645 };
646 
647 // Hierarchical 64-bit on-core barrier instantiation
648 class kmp_flag_oncore : public kmp_flag<kmp_uint64> {
649  kmp_uint64 checker;
650  kmp_info_t *waiting_threads[1];
651  kmp_uint32 num_waiting_threads;
652  kmp_uint32
653  offset;
654  bool flag_switch;
655  enum barrier_type bt;
656  kmp_info_t *this_thr;
658 #if USE_ITT_BUILD
659  void *
660  itt_sync_obj;
661 #endif
662  unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) {
663  return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc)))[offset];
664  }
665 
666 public:
667  kmp_flag_oncore(volatile kmp_uint64 *p)
668  : kmp_flag<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
669  flag_switch(false) {}
670  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
671  : kmp_flag<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
672  offset(idx), flag_switch(false) {}
673  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
674  enum barrier_type bar_t, kmp_info_t *thr
675 #if USE_ITT_BUILD
676  ,
677  void *itt
678 #endif
679  )
680  : kmp_flag<kmp_uint64>(p, flag_oncore), checker(c),
681  num_waiting_threads(0), offset(idx), flag_switch(false), bt(bar_t),
682  this_thr(thr)
683 #if USE_ITT_BUILD
684  ,
685  itt_sync_obj(itt)
686 #endif
687  {
688  }
689  kmp_info_t *get_waiter(kmp_uint32 i) {
690  KMP_DEBUG_ASSERT(i < num_waiting_threads);
691  return waiting_threads[i];
692  }
693  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
694  void set_waiter(kmp_info_t *thr) {
695  waiting_threads[0] = thr;
696  num_waiting_threads = 1;
697  }
698  bool done_check_val(kmp_uint64 old_loc) {
699  return byteref(&old_loc, offset) == checker;
700  }
701  bool done_check() { return done_check_val(*get()); }
702  bool notdone_check() {
703  // Calculate flag_switch
704  if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
705  flag_switch = true;
706  if (byteref(get(), offset) != 1 && !flag_switch)
707  return true;
708  else if (flag_switch) {
709  this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
710  kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go,
711  (kmp_uint64)KMP_BARRIER_STATE_BUMP);
712  __kmp_wait_64(this_thr, &flag, TRUE
713 #if USE_ITT_BUILD
714  ,
715  itt_sync_obj
716 #endif
717  );
718  }
719  return false;
720  }
721  void internal_release() {
722  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
723  byteref(get(), offset) = 1;
724  } else {
725  kmp_uint64 mask = 0;
726  byteref(&mask, offset) = 1;
727  KMP_TEST_THEN_OR64(get(), mask);
728  }
729  }
730  kmp_uint64 set_sleeping() {
731  return KMP_TEST_THEN_OR64(get(), KMP_BARRIER_SLEEP_STATE);
732  }
733  kmp_uint64 unset_sleeping() {
734  return KMP_TEST_THEN_AND64(get(), ~KMP_BARRIER_SLEEP_STATE);
735  }
736  bool is_sleeping_val(kmp_uint64 old_loc) {
737  return old_loc & KMP_BARRIER_SLEEP_STATE;
738  }
739  bool is_sleeping() { return is_sleeping_val(*get()); }
740  bool is_any_sleeping() { return is_sleeping_val(*get()); }
741  void wait(kmp_info_t *this_thr, int final_spin) {
742  __kmp_wait_template<kmp_flag_oncore>(
743  this_thr, this, final_spin USE_ITT_BUILD_ARG(itt_sync_obj));
744  }
745  void release() { __kmp_release_template(this); }
746  void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
747  void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
748  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
749  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
750  kmp_int32 is_constrained) {
751  return __kmp_execute_tasks_oncore(
752  this_thr, gtid, this, final_spin,
753  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
754  }
755  kmp_uint8 *get_stolen() { return NULL; }
756  enum barrier_type get_bt() { return bt; }
757  flag_type get_ptr_type() { return flag_oncore; }
758 };
759 
760 // Used to wake up threads, volatile void* flag is usually the th_sleep_loc
761 // associated with int gtid.
762 static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
763  if (!flag)
764  return;
765 
766  switch (RCAST(kmp_flag_64 *, CCAST(void *, flag))->get_type()) {
767  case flag32:
768  __kmp_resume_32(gtid, NULL);
769  break;
770  case flag64:
771  __kmp_resume_64(gtid, NULL);
772  break;
773  case flag_oncore:
774  __kmp_resume_oncore(gtid, NULL);
775  break;
776  }
777 }
778 
783 #endif // KMP_WAIT_RELEASE_H
volatile P * loc
flag_type get_type()
flag_type
flag_type t
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:61