LLVM OpenMP* Runtime Library
kmp_stats.h
1 #ifndef KMP_STATS_H
2 #define KMP_STATS_H
3 
8 //===----------------------------------------------------------------------===//
9 //
10 // The LLVM Compiler Infrastructure
11 //
12 // This file is dual licensed under the MIT and the University of Illinois Open
13 // Source Licenses. See LICENSE.txt for details.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "kmp_config.h"
18 
19 #if KMP_STATS_ENABLED
20 /* Statistics accumulator.
21  Accumulates number of samples and computes min, max, mean, standard deviation
22  on the fly.
23 
24  Online variance calculation algorithm from
25  http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#On-line_algorithm
26  */
27 
28 #include "kmp_stats_timing.h"
29 #include <limits>
30 #include <math.h>
31 #include <new> // placement new
32 #include <stdint.h>
33 #include <string>
34 #include <vector>
35 
36 /* Enable developer statistics here if you want them. They are more detailed
37  than is useful for application characterisation and are intended for the
38  runtime library developer. */
39 // #define KMP_DEVELOPER_STATS 1
40 
47  noTotal = 1 << 0,
48  onlyInMaster = 1 << 1,
50  1 << 2,
51  notInMaster = 1 << 3,
52  logEvent = 1 << 4
53 };
55 
62  IDLE,
63  SERIAL_REGION,
64  FORK_JOIN_BARRIER,
65  PLAIN_BARRIER,
66  TASKWAIT,
67  TASKYIELD,
68  TASKGROUP,
69  IMPLICIT_TASK,
70  EXPLICIT_TASK
71 };
72 
91 // clang-format off
92 #define KMP_FOREACH_COUNTER(macro, arg) \
93  macro(OMP_PARALLEL, stats_flags_e::onlyInMaster | stats_flags_e::noTotal, \
94  arg) macro(OMP_NESTED_PARALLEL, 0, arg) macro(OMP_FOR_static, 0, arg) \
95  macro(OMP_FOR_static_steal, 0, arg) macro(OMP_FOR_dynamic, 0, arg) \
96  macro(OMP_DISTRIBUTE, 0, arg) macro(OMP_BARRIER, 0, arg) \
97  macro(OMP_CRITICAL, 0, arg) macro(OMP_SINGLE, 0, arg) \
98  macro(OMP_MASTER, 0, arg) macro(OMP_TEAMS, 0, arg) \
99  macro(OMP_set_lock, 0, arg) macro(OMP_test_lock, 0, arg) \
100  macro(REDUCE_wait, 0, arg) \
101  macro(REDUCE_nowait, 0, arg) \
102  macro(OMP_TASKYIELD, 0, arg) \
103  macro(OMP_TASKLOOP, 0, arg) \
104  macro(TASK_executed, 0, arg) \
105  macro(TASK_cancelled, 0, arg) \
106  macro(TASK_stolen, 0, arg)
107 // clang-format on
108 
127 // clang-format off
128 #define KMP_FOREACH_TIMER(macro, arg) \
129  macro (OMP_worker_thread_life, stats_flags_e::logEvent, arg) \
130  macro (FOR_static_scheduling, 0, arg) \
131  macro (FOR_dynamic_scheduling, 0, arg) \
132  macro (OMP_critical, 0, arg) \
133  macro (OMP_critical_wait, 0, arg) \
134  macro (OMP_single, 0, arg) \
135  macro (OMP_master, 0, arg) \
136  macro (OMP_idle, stats_flags_e::logEvent, arg) \
137  macro (OMP_plain_barrier, stats_flags_e::logEvent, arg) \
138  macro (OMP_fork_barrier, stats_flags_e::logEvent, arg) \
139  macro (OMP_join_barrier, stats_flags_e::logEvent, arg) \
140  macro (OMP_parallel, stats_flags_e::logEvent, arg) \
141  macro (OMP_task_immediate, 0, arg) \
142  macro (OMP_task_taskwait, 0, arg) \
143  macro (OMP_task_taskyield, 0, arg) \
144  macro (OMP_task_taskgroup, 0, arg) \
145  macro (OMP_task_join_bar, 0, arg) \
146  macro (OMP_task_plain_bar, 0, arg) \
147  macro (OMP_serial, stats_flags_e::logEvent, arg) \
148  macro (OMP_taskloop_scheduling, 0, arg) \
149  macro (OMP_set_numthreads, stats_flags_e::noUnits | stats_flags_e::noTotal,\
150  arg) \
151  macro (OMP_PARALLEL_args, stats_flags_e::noUnits | stats_flags_e::noTotal, \
152  arg) \
153  macro (FOR_static_iterations, \
154  stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
155  macro (FOR_dynamic_iterations, \
156  stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
157  macro (FOR_static_steal_stolen, \
158  stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
159  macro (FOR_static_steal_chunks, \
160  stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
161  KMP_FOREACH_DEVELOPER_TIMER(macro, arg)
162 // clang-format on
163 
164 // OMP_start_end -- Time from when OpenMP is initialized until the
165 // stats are printed at exit
166 // OMP_serial -- Thread zero time executing serial code
167 // OMP_work -- Elapsed time in code dispatched by a fork (measured
168 // in the thread)
169 // OMP_barrier -- Time at "real" barriers (includes task time)
170 // FOR_static_scheduling -- Time spent doing scheduling for a static "for"
171 // FOR_dynamic_scheduling -- Time spent doing scheduling for a dynamic "for"
172 // OMP_idle -- Worker threads time spent waiting for inclusion in
173 // a parallel region
174 // OMP_plain_barrier -- Time spent in a barrier construct
175 // OMP_fork_join_barrier -- Time spent in a the fork-join barrier surrounding a
176 // parallel region
177 // OMP_parallel -- Time spent inside a parallel construct
178 // OMP_task_immediate -- Time spent executing non-deferred tasks
179 // OMP_task_taskwait -- Time spent executing tasks inside a taskwait
180 // construct
181 // OMP_task_taskyield -- Time spent executing tasks inside a taskyield
182 // construct
183 // OMP_task_taskgroup -- Time spent executing tasks inside a taskygroup
184 // construct
185 // OMP_task_join_bar -- Time spent executing tasks inside a join barrier
186 // OMP_task_plain_bar -- Time spent executing tasks inside a barrier
187 // construct
188 // OMP_single -- Time spent executing a "single" region
189 // OMP_master -- Time spent executing a "master" region
190 // OMP_set_numthreads -- Values passed to omp_set_num_threads
191 // OMP_PARALLEL_args -- Number of arguments passed to a parallel region
192 // FOR_static_iterations -- Number of available parallel chunks of work in a
193 // static for
194 // FOR_dynamic_iterations -- Number of available parallel chunks of work in a
195 // dynamic for
196 // Both adjust for any chunking, so if there were an
197 // iteration count of 20 but a chunk size of 10, we'd
198 // record 2.
199 
200 #if (KMP_DEVELOPER_STATS)
201 // Timers which are of interest to runtime library developers, not end users.
202 // These have to be explicitly enabled in addition to the other stats.
203 
204 // KMP_fork_barrier -- time in __kmp_fork_barrier
205 // KMP_join_barrier -- time in __kmp_join_barrier
206 // KMP_barrier -- time in __kmp_barrier
207 // KMP_end_split_barrier -- time in __kmp_end_split_barrier
208 // KMP_setup_icv_copy -- time in __kmp_setup_icv_copy
209 // KMP_icv_copy -- start/stop timer for any ICV copying
210 // KMP_linear_gather -- time in __kmp_linear_barrier_gather
211 // KMP_linear_release -- time in __kmp_linear_barrier_release
212 // KMP_tree_gather -- time in __kmp_tree_barrier_gather
213 // KMP_tree_release -- time in __kmp_tree_barrier_release
214 // KMP_hyper_gather -- time in __kmp_hyper_barrier_gather
215 // KMP_hyper_release -- time in __kmp_hyper_barrier_release
216 #define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \
217  macro(KMP_fork_call, 0, arg) macro(KMP_join_call, 0, arg) macro( \
218  KMP_end_split_barrier, 0, arg) macro(KMP_hier_gather, 0, arg) \
219  macro(KMP_hier_release, 0, arg) macro(KMP_hyper_gather, 0, arg) \
220  macro(KMP_hyper_release, 0, arg) macro(KMP_linear_gather, 0, arg) \
221  macro(KMP_linear_release, 0, arg) macro(KMP_tree_gather, 0, arg) \
222  macro(KMP_tree_release, 0, arg) macro(USER_resume, 0, arg) \
223  macro(USER_suspend, 0, arg) \
224  macro(KMP_allocate_team, 0, arg) \
225  macro(KMP_setup_icv_copy, 0, arg) \
226  macro(USER_icv_copy, 0, arg)
227 #else
228 #define KMP_FOREACH_DEVELOPER_TIMER(macro, arg)
229 #endif
230 
250 #define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) KMP_FOREACH_TIMER(macro, arg)
251 
252 #define ENUMERATE(name, ignore, prefix) prefix##name,
253 enum timer_e { KMP_FOREACH_TIMER(ENUMERATE, TIMER_) TIMER_LAST };
254 
255 enum explicit_timer_e {
256  KMP_FOREACH_EXPLICIT_TIMER(ENUMERATE, EXPLICIT_TIMER_) EXPLICIT_TIMER_LAST
257 };
258 
259 enum counter_e { KMP_FOREACH_COUNTER(ENUMERATE, COUNTER_) COUNTER_LAST };
260 #undef ENUMERATE
261 
262 class timerPair {
263  explicit_timer_e timer_index;
264  timer_e timer;
265 
266 public:
267  timerPair(explicit_timer_e ti, timer_e t) : timer_index(ti), timer(t) {}
268  inline explicit_timer_e get_index() const { return timer_index; }
269  inline timer_e get_timer() const { return timer; }
270  bool operator==(const timerPair &rhs) {
271  return this->get_index() == rhs.get_index();
272  }
273  bool operator!=(const timerPair &rhs) { return !(*this == rhs); }
274 };
275 
276 class statistic {
277  double minVal;
278  double maxVal;
279  double meanVal;
280  double m2;
281  uint64_t sampleCount;
282 
283 public:
284  statistic() { reset(); }
285  statistic(statistic const &o)
286  : minVal(o.minVal), maxVal(o.maxVal), meanVal(o.meanVal), m2(o.m2),
287  sampleCount(o.sampleCount) {}
288 
289  double getMin() const { return minVal; }
290  double getMean() const { return meanVal; }
291  double getMax() const { return maxVal; }
292  uint64_t getCount() const { return sampleCount; }
293  double getSD() const { return sqrt(m2 / sampleCount); }
294  double getTotal() const { return sampleCount * meanVal; }
295 
296  void reset() {
297  minVal = std::numeric_limits<double>::max();
298  maxVal = -std::numeric_limits<double>::max();
299  meanVal = 0.0;
300  m2 = 0.0;
301  sampleCount = 0;
302  }
303  void addSample(double sample);
304  void scale(double factor);
305  void scaleDown(double f) { scale(1. / f); }
306  statistic &operator+=(statistic const &other);
307 
308  std::string format(char unit, bool total = false) const;
309 };
310 
311 struct statInfo {
312  const char *name;
313  uint32_t flags;
314 };
315 
316 class timeStat : public statistic {
317  static statInfo timerInfo[];
318 
319 public:
320  timeStat() : statistic() {}
321  static const char *name(timer_e e) { return timerInfo[e].name; }
322  static bool noTotal(timer_e e) {
323  return timerInfo[e].flags & stats_flags_e::noTotal;
324  }
325  static bool masterOnly(timer_e e) {
326  return timerInfo[e].flags & stats_flags_e::onlyInMaster;
327  }
328  static bool workerOnly(timer_e e) {
329  return timerInfo[e].flags & stats_flags_e::notInMaster;
330  }
331  static bool noUnits(timer_e e) {
332  return timerInfo[e].flags & stats_flags_e::noUnits;
333  }
334  static bool logEvent(timer_e e) {
335  return timerInfo[e].flags & stats_flags_e::logEvent;
336  }
337  static void clearEventFlags() {
338  for (int i = 0; i < TIMER_LAST; i++) {
339  timerInfo[i].flags &= (~(stats_flags_e::logEvent));
340  }
341  }
342 };
343 
344 // Where we need explicitly to start and end the timer, this version can be used
345 // Since these timers normally aren't nicely scoped, so don't have a good place
346 // to live on the stack of the thread, they're more work to use.
347 class explicitTimer {
348  timeStat *stat;
349  tsc_tick_count startTime;
350  tsc_tick_count pauseStartTime;
351  tsc_tick_count::tsc_interval_t totalPauseTime;
352 
353 public:
354  explicitTimer()
355  : stat(0), startTime(0), pauseStartTime(0), totalPauseTime() {}
356  explicitTimer(timeStat *s)
357  : stat(s), startTime(), pauseStartTime(0), totalPauseTime() {}
358 
359  void setStat(timeStat *s) { stat = s; }
360  void start(timer_e timerEnumValue);
361  void pause() { pauseStartTime = tsc_tick_count::now(); }
362  void resume() { totalPauseTime += (tsc_tick_count::now() - pauseStartTime); }
363  void stop(timer_e timerEnumValue, kmp_stats_list *stats_ptr = nullptr);
364  void reset() {
365  startTime = 0;
366  pauseStartTime = 0;
367  totalPauseTime = 0;
368  }
369 };
370 
371 // Where all you need is to time a block, this is enough.
372 // (It avoids the need to have an explicit end, leaving the scope suffices.)
373 class blockTimer : public explicitTimer {
374  timer_e timerEnumValue;
375 
376 public:
377  blockTimer(timeStat *s, timer_e newTimerEnumValue)
378  : timerEnumValue(newTimerEnumValue), explicitTimer(s) {
379  start(timerEnumValue);
380  }
381  ~blockTimer() { stop(timerEnumValue); }
382 };
383 
384 // Where you need to partition a threads clock ticks into separate states
385 // e.g., a partitionedTimers class with two timers of EXECUTING_TASK, and
386 // DOING_NOTHING would render these conditions:
387 // time(EXECUTING_TASK) + time(DOING_NOTHING) = total time thread is alive
388 // No clock tick in the EXECUTING_TASK is a member of DOING_NOTHING and vice
389 // versa
390 class partitionedTimers {
391 private:
392  explicitTimer *timers[EXPLICIT_TIMER_LAST + 1];
393  std::vector<timerPair> timer_stack;
394 
395 public:
396  partitionedTimers();
397  void add_timer(explicit_timer_e timer_index, explicitTimer *timer_pointer);
398  void init(timerPair timer_index);
399  void push(timerPair timer_index);
400  void pop();
401  void windup();
402 };
403 
404 // Special wrapper around the partioned timers to aid timing code blocks
405 // It avoids the need to have an explicit end, leaving the scope suffices.
406 class blockPartitionedTimer {
407  partitionedTimers *part_timers;
408  timerPair timer_pair;
409 
410 public:
411  blockPartitionedTimer(partitionedTimers *pt, timerPair tp)
412  : part_timers(pt), timer_pair(tp) {
413  part_timers->push(timer_pair);
414  }
415  ~blockPartitionedTimer() { part_timers->pop(); }
416 };
417 
418 // Special wrapper around the thread state to aid in keeping state in code
419 // blocks It avoids the need to have an explicit end, leaving the scope
420 // suffices.
421 class blockThreadState {
422  stats_state_e *state_pointer;
423  stats_state_e old_state;
424 
425 public:
426  blockThreadState(stats_state_e *thread_state_pointer, stats_state_e new_state)
427  : state_pointer(thread_state_pointer), old_state(*thread_state_pointer) {
428  *state_pointer = new_state;
429  }
430  ~blockThreadState() { *state_pointer = old_state; }
431 };
432 
433 // If all you want is a count, then you can use this...
434 // The individual per-thread counts will be aggregated into a statistic at
435 // program exit.
436 class counter {
437  uint64_t value;
438  static const statInfo counterInfo[];
439 
440 public:
441  counter() : value(0) {}
442  void increment() { value++; }
443  uint64_t getValue() const { return value; }
444  void reset() { value = 0; }
445  static const char *name(counter_e e) { return counterInfo[e].name; }
446  static bool masterOnly(counter_e e) {
447  return counterInfo[e].flags & stats_flags_e::onlyInMaster;
448  }
449 };
450 
451 /* ****************************************************************
452  Class to implement an event
453 
454  There are four components to an event: start time, stop time
455  nest_level, and timer_name.
456  The start and stop time should be obvious (recorded in clock ticks).
457  The nest_level relates to the bar width in the timeline graph.
458  The timer_name is used to determine which timer event triggered this event.
459 
460  the interface to this class is through four read-only operations:
461  1) getStart() -- returns the start time as 64 bit integer
462  2) getStop() -- returns the stop time as 64 bit integer
463  3) getNestLevel() -- returns the nest level of the event
464  4) getTimerName() -- returns the timer name that triggered event
465 
466  *MORE ON NEST_LEVEL*
467  The nest level is used in the bar graph that represents the timeline.
468  Its main purpose is for showing how events are nested inside eachother.
469  For example, say events, A, B, and C are recorded. If the timeline
470  looks like this:
471 
472 Begin -------------------------------------------------------------> Time
473  | | | | | |
474  A B C C B A
475  start start start end end end
476 
477  Then A, B, C will have a nest level of 1, 2, 3 respectively.
478  These values are then used to calculate the barwidth so you can
479  see that inside A, B has occurred, and inside B, C has occurred.
480  Currently, this is shown with A's bar width being larger than B's
481  bar width, and B's bar width being larger than C's bar width.
482 
483 **************************************************************** */
484 class kmp_stats_event {
485  uint64_t start;
486  uint64_t stop;
487  int nest_level;
488  timer_e timer_name;
489 
490 public:
491  kmp_stats_event()
492  : start(0), stop(0), nest_level(0), timer_name(TIMER_LAST) {}
493  kmp_stats_event(uint64_t strt, uint64_t stp, int nst, timer_e nme)
494  : start(strt), stop(stp), nest_level(nst), timer_name(nme) {}
495  inline uint64_t getStart() const { return start; }
496  inline uint64_t getStop() const { return stop; }
497  inline int getNestLevel() const { return nest_level; }
498  inline timer_e getTimerName() const { return timer_name; }
499 };
500 
501 /* ****************************************************************
502  Class to implement a dynamically expandable array of events
503 
504  ---------------------------------------------------------
505  | event 1 | event 2 | event 3 | event 4 | ... | event N |
506  ---------------------------------------------------------
507 
508  An event is pushed onto the back of this array at every
509  explicitTimer->stop() call. The event records the thread #,
510  start time, stop time, and nest level related to the bar width.
511 
512  The event vector starts at size INIT_SIZE and grows (doubles in size)
513  if needed. An implication of this behavior is that log(N)
514  reallocations are needed (where N is number of events). If you want
515  to avoid reallocations, then set INIT_SIZE to a large value.
516 
517  the interface to this class is through six operations:
518  1) reset() -- sets the internal_size back to 0 but does not deallocate any
519  memory
520  2) size() -- returns the number of valid elements in the vector
521  3) push_back(start, stop, nest, timer_name) -- pushes an event onto
522  the back of the array
523  4) deallocate() -- frees all memory associated with the vector
524  5) sort() -- sorts the vector by start time
525  6) operator[index] or at(index) -- returns event reference at that index
526 **************************************************************** */
527 class kmp_stats_event_vector {
528  kmp_stats_event *events;
529  int internal_size;
530  int allocated_size;
531  static const int INIT_SIZE = 1024;
532 
533 public:
534  kmp_stats_event_vector() {
535  events =
536  (kmp_stats_event *)__kmp_allocate(sizeof(kmp_stats_event) * INIT_SIZE);
537  internal_size = 0;
538  allocated_size = INIT_SIZE;
539  }
540  ~kmp_stats_event_vector() {}
541  inline void reset() { internal_size = 0; }
542  inline int size() const { return internal_size; }
543  void push_back(uint64_t start_time, uint64_t stop_time, int nest_level,
544  timer_e name) {
545  int i;
546  if (internal_size == allocated_size) {
547  kmp_stats_event *tmp = (kmp_stats_event *)__kmp_allocate(
548  sizeof(kmp_stats_event) * allocated_size * 2);
549  for (i = 0; i < internal_size; i++)
550  tmp[i] = events[i];
551  __kmp_free(events);
552  events = tmp;
553  allocated_size *= 2;
554  }
555  events[internal_size] =
556  kmp_stats_event(start_time, stop_time, nest_level, name);
557  internal_size++;
558  return;
559  }
560  void deallocate();
561  void sort();
562  const kmp_stats_event &operator[](int index) const { return events[index]; }
563  kmp_stats_event &operator[](int index) { return events[index]; }
564  const kmp_stats_event &at(int index) const { return events[index]; }
565  kmp_stats_event &at(int index) { return events[index]; }
566 };
567 
568 /* ****************************************************************
569  Class to implement a doubly-linked, circular, statistics list
570 
571  |---| ---> |---| ---> |---| ---> |---| ---> ... next
572  | | | | | | | |
573  |---| <--- |---| <--- |---| <--- |---| <--- ... prev
574  Sentinel first second third
575  Node node node node
576 
577  The Sentinel Node is the user handle on the list.
578  The first node corresponds to thread 0's statistics.
579  The second node corresponds to thread 1's statistics and so on...
580 
581  Each node has a _timers, _counters, and _explicitTimers array to hold that
582  thread's statistics. The _explicitTimers point to the correct _timer and
583  update its statistics at every stop() call. The explicitTimers' pointers are
584  set up in the constructor. Each node also has an event vector to hold that
585  thread's timing events. The event vector expands as necessary and records
586  the start-stop times for each timer.
587 
588  The nestLevel variable is for plotting events and is related
589  to the bar width in the timeline graph.
590 
591  Every thread will have a thread local pointer to its node in
592  the list. The sentinel node is used by the master thread to
593  store "dummy" statistics before __kmp_create_worker() is called.
594 **************************************************************** */
595 class kmp_stats_list {
596  int gtid;
597  timeStat _timers[TIMER_LAST + 1];
598  counter _counters[COUNTER_LAST + 1];
599  explicitTimer _explicitTimers[EXPLICIT_TIMER_LAST + 1];
600  partitionedTimers _partitionedTimers;
601  int _nestLevel; // one per thread
602  kmp_stats_event_vector _event_vector;
603  kmp_stats_list *next;
604  kmp_stats_list *prev;
605  stats_state_e state;
606  int thread_is_idle_flag;
607 
608 public:
609  kmp_stats_list()
610  : _nestLevel(0), _event_vector(), next(this), prev(this), state(IDLE),
611  thread_is_idle_flag(0) {
612 #define doInit(name, ignore1, ignore2) \
613  getExplicitTimer(EXPLICIT_TIMER_##name)->setStat(getTimer(TIMER_##name)); \
614  _partitionedTimers.add_timer(EXPLICIT_TIMER_##name, \
615  getExplicitTimer(EXPLICIT_TIMER_##name));
616  KMP_FOREACH_EXPLICIT_TIMER(doInit, 0);
617 #undef doInit
618  }
619  ~kmp_stats_list() {}
620  inline timeStat *getTimer(timer_e idx) { return &_timers[idx]; }
621  inline counter *getCounter(counter_e idx) { return &_counters[idx]; }
622  inline explicitTimer *getExplicitTimer(explicit_timer_e idx) {
623  return &_explicitTimers[idx];
624  }
625  inline partitionedTimers *getPartitionedTimers() {
626  return &_partitionedTimers;
627  }
628  inline timeStat *getTimers() { return _timers; }
629  inline counter *getCounters() { return _counters; }
630  inline explicitTimer *getExplicitTimers() { return _explicitTimers; }
631  inline kmp_stats_event_vector &getEventVector() { return _event_vector; }
632  inline void resetEventVector() { _event_vector.reset(); }
633  inline void incrementNestValue() { _nestLevel++; }
634  inline int getNestValue() { return _nestLevel; }
635  inline void decrementNestValue() { _nestLevel--; }
636  inline int getGtid() const { return gtid; }
637  inline void setGtid(int newgtid) { gtid = newgtid; }
638  inline void setState(stats_state_e newstate) { state = newstate; }
639  inline stats_state_e getState() const { return state; }
640  inline stats_state_e *getStatePointer() { return &state; }
641  inline bool isIdle() { return thread_is_idle_flag == 1; }
642  inline void setIdleFlag() { thread_is_idle_flag = 1; }
643  inline void resetIdleFlag() { thread_is_idle_flag = 0; }
644  kmp_stats_list *push_back(int gtid); // returns newly created list node
645  inline void push_event(uint64_t start_time, uint64_t stop_time,
646  int nest_level, timer_e name) {
647  _event_vector.push_back(start_time, stop_time, nest_level, name);
648  }
649  void deallocate();
650  class iterator;
651  kmp_stats_list::iterator begin();
652  kmp_stats_list::iterator end();
653  int size();
654  class iterator {
655  kmp_stats_list *ptr;
656  friend kmp_stats_list::iterator kmp_stats_list::begin();
657  friend kmp_stats_list::iterator kmp_stats_list::end();
658 
659  public:
660  iterator();
661  ~iterator();
662  iterator operator++();
663  iterator operator++(int dummy);
664  iterator operator--();
665  iterator operator--(int dummy);
666  bool operator!=(const iterator &rhs);
667  bool operator==(const iterator &rhs);
668  kmp_stats_list *operator*() const; // dereference operator
669  };
670 };
671 
672 /* ****************************************************************
673  Class to encapsulate all output functions and the environment variables
674 
675  This module holds filenames for various outputs (normal stats, events, plot
676  file), as well as coloring information for the plot file.
677 
678  The filenames and flags variables are read from environment variables.
679  These are read once by the constructor of the global variable
680  __kmp_stats_output which calls init().
681 
682  During this init() call, event flags for the timeStat::timerInfo[] global
683  array are cleared if KMP_STATS_EVENTS is not true (on, 1, yes).
684 
685  The only interface function that is public is outputStats(heading). This
686  function should print out everything it needs to, either to files or stderr,
687  depending on the environment variables described below
688 
689  ENVIRONMENT VARIABLES:
690  KMP_STATS_FILE -- if set, all statistics (not events) will be printed to this
691  file, otherwise, print to stderr
692  KMP_STATS_THREADS -- if set to "on", then will print per thread statistics to
693  either KMP_STATS_FILE or stderr
694  KMP_STATS_PLOT_FILE -- if set, print the ploticus plot file to this filename,
695  otherwise, the plot file is sent to "events.plt"
696  KMP_STATS_EVENTS -- if set to "on", then log events, otherwise, don't log
697  events
698  KMP_STATS_EVENTS_FILE -- if set, all events are outputted to this file,
699  otherwise, output is sent to "events.dat"
700 **************************************************************** */
701 class kmp_stats_output_module {
702 
703 public:
704  struct rgb_color {
705  float r;
706  float g;
707  float b;
708  };
709 
710 private:
711  std::string outputFileName;
712  static const char *eventsFileName;
713  static const char *plotFileName;
714  static int printPerThreadFlag;
715  static int printPerThreadEventsFlag;
716  static const rgb_color globalColorArray[];
717  static rgb_color timerColorInfo[];
718 
719  void init();
720  static void setupEventColors();
721  static void printPloticusFile();
722  static void printHeaderInfo(FILE *statsOut);
723  static void printTimerStats(FILE *statsOut, statistic const *theStats,
724  statistic const *totalStats);
725  static void printCounterStats(FILE *statsOut, statistic const *theStats);
726  static void printCounters(FILE *statsOut, counter const *theCounters);
727  static void printEvents(FILE *eventsOut, kmp_stats_event_vector *theEvents,
728  int gtid);
729  static rgb_color getEventColor(timer_e e) { return timerColorInfo[e]; }
730  static void windupExplicitTimers();
731  bool eventPrintingEnabled() const { return printPerThreadEventsFlag; }
732 
733 public:
734  kmp_stats_output_module() { init(); }
735  void outputStats(const char *heading);
736 };
737 
738 #ifdef __cplusplus
739 extern "C" {
740 #endif
741 void __kmp_stats_init();
742 void __kmp_stats_fini();
743 void __kmp_reset_stats();
744 void __kmp_output_stats(const char *);
745 void __kmp_accumulate_stats_at_exit(void);
746 // thread local pointer to stats node within list
747 extern KMP_THREAD_LOCAL kmp_stats_list *__kmp_stats_thread_ptr;
748 // head to stats list.
749 extern kmp_stats_list *__kmp_stats_list;
750 // lock for __kmp_stats_list
751 extern kmp_tas_lock_t __kmp_stats_lock;
752 // reference start time
753 extern tsc_tick_count __kmp_stats_start_time;
754 // interface to output
755 extern kmp_stats_output_module __kmp_stats_output;
756 
757 #ifdef __cplusplus
758 }
759 #endif
760 
761 // Simple, standard interfaces that drop out completely if stats aren't enabled
762 
775 #define KMP_TIME_BLOCK(name) \
776  blockTimer __BLOCKTIME__(__kmp_stats_thread_ptr->getTimer(TIMER_##name), \
777  TIMER_##name)
778 
790 #define KMP_COUNT_VALUE(name, value) \
791  __kmp_stats_thread_ptr->getTimer(TIMER_##name)->addSample(value)
792 
803 #define KMP_COUNT_BLOCK(name) \
804  __kmp_stats_thread_ptr->getCounter(COUNTER_##name)->increment()
805 
821 #define KMP_START_EXPLICIT_TIMER(name) \
822  __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name) \
823  ->start(TIMER_##name)
824 
840 #define KMP_STOP_EXPLICIT_TIMER(name) \
841  __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name) \
842  ->stop(TIMER_##name)
843 
861 #define KMP_OUTPUT_STATS(heading_string) __kmp_output_stats(heading_string)
862 
870 #define KMP_INIT_PARTITIONED_TIMERS(name) \
871  __kmp_stats_thread_ptr->getPartitionedTimers()->init( \
872  timerPair(EXPLICIT_TIMER_##name, TIMER_##name))
873 
874 #define KMP_TIME_PARTITIONED_BLOCK(name) \
875  blockPartitionedTimer __PBLOCKTIME__( \
876  __kmp_stats_thread_ptr->getPartitionedTimers(), \
877  timerPair(EXPLICIT_TIMER_##name, TIMER_##name))
878 
879 #define KMP_PUSH_PARTITIONED_TIMER(name) \
880  __kmp_stats_thread_ptr->getPartitionedTimers()->push( \
881  timerPair(EXPLICIT_TIMER_##name, TIMER_##name))
882 
883 #define KMP_POP_PARTITIONED_TIMER() \
884  __kmp_stats_thread_ptr->getPartitionedTimers()->pop()
885 
886 #define KMP_SET_THREAD_STATE(state_name) \
887  __kmp_stats_thread_ptr->setState(state_name)
888 
889 #define KMP_GET_THREAD_STATE() __kmp_stats_thread_ptr->getState()
890 
891 #define KMP_SET_THREAD_STATE_BLOCK(state_name) \
892  blockThreadState __BTHREADSTATE__(__kmp_stats_thread_ptr->getStatePointer(), \
893  state_name)
894 
902 #define KMP_RESET_STATS() __kmp_reset_stats()
903 
904 #if (KMP_DEVELOPER_STATS)
905 #define KMP_TIME_DEVELOPER_BLOCK(n) KMP_TIME_BLOCK(n)
906 #define KMP_COUNT_DEVELOPER_VALUE(n, v) KMP_COUNT_VALUE(n, v)
907 #define KMP_COUNT_DEVELOPER_BLOCK(n) KMP_COUNT_BLOCK(n)
908 #define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) KMP_START_EXPLICIT_TIMER(n)
909 #define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) KMP_STOP_EXPLICIT_TIMER(n)
910 #define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) KMP_TIME_PARTITIONED_BLOCK(n)
911 #else
912 // Null definitions
913 #define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0)
914 #define KMP_COUNT_DEVELOPER_VALUE(n, v) ((void)0)
915 #define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0)
916 #define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
917 #define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
918 #define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0)
919 #endif
920 
921 #else // KMP_STATS_ENABLED
922 
923 // Null definitions
924 #define KMP_TIME_BLOCK(n) ((void)0)
925 #define KMP_COUNT_VALUE(n, v) ((void)0)
926 #define KMP_COUNT_BLOCK(n) ((void)0)
927 #define KMP_START_EXPLICIT_TIMER(n) ((void)0)
928 #define KMP_STOP_EXPLICIT_TIMER(n) ((void)0)
929 
930 #define KMP_OUTPUT_STATS(heading_string) ((void)0)
931 #define KMP_RESET_STATS() ((void)0)
932 
933 #define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0)
934 #define KMP_COUNT_DEVELOPER_VALUE(n, v) ((void)0)
935 #define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0)
936 #define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
937 #define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
938 #define KMP_INIT_PARTITIONED_TIMERS(name) ((void)0)
939 #define KMP_TIME_PARTITIONED_BLOCK(name) ((void)0)
940 #define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0)
941 #define KMP_PUSH_PARTITIONED_TIMER(name) ((void)0)
942 #define KMP_POP_PARTITIONED_TIMER() ((void)0)
943 #define KMP_SET_THREAD_STATE(state_name) ((void)0)
944 #define KMP_GET_THREAD_STATE() ((void)0)
945 #define KMP_SET_THREAD_STATE_BLOCK(state_name) ((void)0)
946 #endif // KMP_STATS_ENABLED
947 
948 #endif // KMP_STATS_H
statistic is valid only for master
Definition: kmp_stats.h:48
statistic is valid only for non-master threads
Definition: kmp_stats.h:51
do not show a TOTAL_aggregation for this statistic
Definition: kmp_stats.h:47
#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg)
Add new explicit timers under KMP_FOREACH_EXPLICIT_TIMER() macro.
Definition: kmp_stats.h:250
statistic doesn&#39;t need units printed next to it in output
Definition: kmp_stats.h:49
stats_flags_e
flags to describe the statistic (timer or counter)
Definition: kmp_stats.h:46
#define KMP_FOREACH_COUNTER(macro, arg)
Add new counters under KMP_FOREACH_COUNTER() macro in kmp_stats.h.
Definition: kmp_stats.h:92
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:61