17 #include "kmp_config.h" 28 #include "kmp_stats_timing.h" 92 #define KMP_FOREACH_COUNTER(macro, arg) \ 93 macro(OMP_PARALLEL, stats_flags_e::onlyInMaster | stats_flags_e::noTotal, \ 94 arg) macro(OMP_NESTED_PARALLEL, 0, arg) macro(OMP_FOR_static, 0, arg) \ 95 macro(OMP_FOR_static_steal, 0, arg) macro(OMP_FOR_dynamic, 0, arg) \ 96 macro(OMP_DISTRIBUTE, 0, arg) macro(OMP_BARRIER, 0, arg) \ 97 macro(OMP_CRITICAL, 0, arg) macro(OMP_SINGLE, 0, arg) \ 98 macro(OMP_MASTER, 0, arg) macro(OMP_TEAMS, 0, arg) \ 99 macro(OMP_set_lock, 0, arg) macro(OMP_test_lock, 0, arg) \ 100 macro(REDUCE_wait, 0, arg) \ 101 macro(REDUCE_nowait, 0, arg) \ 102 macro(OMP_TASKYIELD, 0, arg) \ 103 macro(OMP_TASKLOOP, 0, arg) \ 104 macro(TASK_executed, 0, arg) \ 105 macro(TASK_cancelled, 0, arg) \ 106 macro(TASK_stolen, 0, arg) 128 #define KMP_FOREACH_TIMER(macro, arg) \ 129 macro (OMP_worker_thread_life, stats_flags_e::logEvent, arg) \ 130 macro (FOR_static_scheduling, 0, arg) \ 131 macro (FOR_dynamic_scheduling, 0, arg) \ 132 macro (OMP_critical, 0, arg) \ 133 macro (OMP_critical_wait, 0, arg) \ 134 macro (OMP_single, 0, arg) \ 135 macro (OMP_master, 0, arg) \ 136 macro (OMP_idle, stats_flags_e::logEvent, arg) \ 137 macro (OMP_plain_barrier, stats_flags_e::logEvent, arg) \ 138 macro (OMP_fork_barrier, stats_flags_e::logEvent, arg) \ 139 macro (OMP_join_barrier, stats_flags_e::logEvent, arg) \ 140 macro (OMP_parallel, stats_flags_e::logEvent, arg) \ 141 macro (OMP_task_immediate, 0, arg) \ 142 macro (OMP_task_taskwait, 0, arg) \ 143 macro (OMP_task_taskyield, 0, arg) \ 144 macro (OMP_task_taskgroup, 0, arg) \ 145 macro (OMP_task_join_bar, 0, arg) \ 146 macro (OMP_task_plain_bar, 0, arg) \ 147 macro (OMP_serial, stats_flags_e::logEvent, arg) \ 148 macro (OMP_taskloop_scheduling, 0, arg) \ 149 macro (OMP_set_numthreads, stats_flags_e::noUnits | stats_flags_e::noTotal,\ 151 macro (OMP_PARALLEL_args, stats_flags_e::noUnits | stats_flags_e::noTotal, \ 153 macro (FOR_static_iterations, \ 154 stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ 155 macro (FOR_dynamic_iterations, \ 156 stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ 157 macro (FOR_static_steal_stolen, \ 158 stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ 159 macro (FOR_static_steal_chunks, \ 160 stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ 161 KMP_FOREACH_DEVELOPER_TIMER(macro, arg) 200 #if (KMP_DEVELOPER_STATS) 216 #define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \ 217 macro(KMP_fork_call, 0, arg) macro(KMP_join_call, 0, arg) macro( \ 218 KMP_end_split_barrier, 0, arg) macro(KMP_hier_gather, 0, arg) \ 219 macro(KMP_hier_release, 0, arg) macro(KMP_hyper_gather, 0, arg) \ 220 macro(KMP_hyper_release, 0, arg) macro(KMP_linear_gather, 0, arg) \ 221 macro(KMP_linear_release, 0, arg) macro(KMP_tree_gather, 0, arg) \ 222 macro(KMP_tree_release, 0, arg) macro(USER_resume, 0, arg) \ 223 macro(USER_suspend, 0, arg) \ 224 macro(KMP_allocate_team, 0, arg) \ 225 macro(KMP_setup_icv_copy, 0, arg) \ 226 macro(USER_icv_copy, 0, arg) 228 #define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) 250 #define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) KMP_FOREACH_TIMER(macro, arg) 252 #define ENUMERATE(name, ignore, prefix) prefix##name, 253 enum timer_e { KMP_FOREACH_TIMER(ENUMERATE, TIMER_) TIMER_LAST };
255 enum explicit_timer_e {
263 explicit_timer_e timer_index;
267 timerPair(explicit_timer_e ti, timer_e t) : timer_index(ti), timer(t) {}
268 inline explicit_timer_e get_index()
const {
return timer_index; }
269 inline timer_e get_timer()
const {
return timer; }
270 bool operator==(
const timerPair &rhs) {
271 return this->get_index() == rhs.get_index();
273 bool operator!=(
const timerPair &rhs) {
return !(*
this == rhs); }
281 uint64_t sampleCount;
284 statistic() { reset(); }
285 statistic(statistic
const &o)
286 : minVal(o.minVal), maxVal(o.maxVal), meanVal(o.meanVal), m2(o.m2),
287 sampleCount(o.sampleCount) {}
289 double getMin()
const {
return minVal; }
290 double getMean()
const {
return meanVal; }
291 double getMax()
const {
return maxVal; }
292 uint64_t getCount()
const {
return sampleCount; }
293 double getSD()
const {
return sqrt(m2 / sampleCount); }
294 double getTotal()
const {
return sampleCount * meanVal; }
297 minVal = std::numeric_limits<double>::max();
298 maxVal = -std::numeric_limits<double>::max();
303 void addSample(
double sample);
304 void scale(
double factor);
305 void scaleDown(
double f) { scale(1. / f); }
306 statistic &operator+=(statistic
const &other);
308 std::string format(
char unit,
bool total =
false)
const;
316 class timeStat :
public statistic {
317 static statInfo timerInfo[];
320 timeStat() : statistic() {}
321 static const char *name(timer_e e) {
return timerInfo[e].name; }
322 static bool noTotal(timer_e e) {
325 static bool masterOnly(timer_e e) {
328 static bool workerOnly(timer_e e) {
331 static bool noUnits(timer_e e) {
337 static void clearEventFlags() {
338 for (
int i = 0; i < TIMER_LAST; i++) {
347 class explicitTimer {
349 tsc_tick_count startTime;
350 tsc_tick_count pauseStartTime;
351 tsc_tick_count::tsc_interval_t totalPauseTime;
355 : stat(0), startTime(0), pauseStartTime(0), totalPauseTime() {}
356 explicitTimer(timeStat *s)
357 : stat(s), startTime(), pauseStartTime(0), totalPauseTime() {}
359 void setStat(timeStat *s) { stat = s; }
360 void start(timer_e timerEnumValue);
361 void pause() { pauseStartTime = tsc_tick_count::now(); }
362 void resume() { totalPauseTime += (tsc_tick_count::now() - pauseStartTime); }
363 void stop(timer_e timerEnumValue, kmp_stats_list *stats_ptr =
nullptr);
373 class blockTimer :
public explicitTimer {
374 timer_e timerEnumValue;
377 blockTimer(timeStat *s, timer_e newTimerEnumValue)
378 : timerEnumValue(newTimerEnumValue), explicitTimer(s) {
379 start(timerEnumValue);
381 ~blockTimer() { stop(timerEnumValue); }
390 class partitionedTimers {
392 explicitTimer *timers[EXPLICIT_TIMER_LAST + 1];
393 std::vector<timerPair> timer_stack;
397 void add_timer(explicit_timer_e timer_index, explicitTimer *timer_pointer);
398 void init(timerPair timer_index);
399 void push(timerPair timer_index);
406 class blockPartitionedTimer {
407 partitionedTimers *part_timers;
408 timerPair timer_pair;
411 blockPartitionedTimer(partitionedTimers *pt, timerPair tp)
412 : part_timers(pt), timer_pair(tp) {
413 part_timers->push(timer_pair);
415 ~blockPartitionedTimer() { part_timers->pop(); }
421 class blockThreadState {
427 : state_pointer(thread_state_pointer), old_state(*thread_state_pointer) {
428 *state_pointer = new_state;
430 ~blockThreadState() { *state_pointer = old_state; }
438 static const statInfo counterInfo[];
441 counter() : value(0) {}
442 void increment() { value++; }
443 uint64_t getValue()
const {
return value; }
444 void reset() { value = 0; }
445 static const char *name(counter_e e) {
return counterInfo[e].name; }
446 static bool masterOnly(counter_e e) {
484 class kmp_stats_event {
492 : start(0), stop(0), nest_level(0), timer_name(TIMER_LAST) {}
493 kmp_stats_event(uint64_t strt, uint64_t stp,
int nst, timer_e nme)
494 : start(strt), stop(stp), nest_level(nst), timer_name(nme) {}
495 inline uint64_t getStart()
const {
return start; }
496 inline uint64_t getStop()
const {
return stop; }
497 inline int getNestLevel()
const {
return nest_level; }
498 inline timer_e getTimerName()
const {
return timer_name; }
527 class kmp_stats_event_vector {
528 kmp_stats_event *events;
531 static const int INIT_SIZE = 1024;
534 kmp_stats_event_vector() {
536 (kmp_stats_event *)__kmp_allocate(
sizeof(kmp_stats_event) * INIT_SIZE);
538 allocated_size = INIT_SIZE;
540 ~kmp_stats_event_vector() {}
541 inline void reset() { internal_size = 0; }
542 inline int size()
const {
return internal_size; }
543 void push_back(uint64_t start_time, uint64_t stop_time,
int nest_level,
546 if (internal_size == allocated_size) {
547 kmp_stats_event *tmp = (kmp_stats_event *)__kmp_allocate(
548 sizeof(kmp_stats_event) * allocated_size * 2);
549 for (i = 0; i < internal_size; i++)
555 events[internal_size] =
556 kmp_stats_event(start_time, stop_time, nest_level, name);
562 const kmp_stats_event &operator[](
int index)
const {
return events[index]; }
563 kmp_stats_event &operator[](
int index) {
return events[index]; }
564 const kmp_stats_event &at(
int index)
const {
return events[index]; }
565 kmp_stats_event &at(
int index) {
return events[index]; }
595 class kmp_stats_list {
597 timeStat _timers[TIMER_LAST + 1];
598 counter _counters[COUNTER_LAST + 1];
599 explicitTimer _explicitTimers[EXPLICIT_TIMER_LAST + 1];
600 partitionedTimers _partitionedTimers;
602 kmp_stats_event_vector _event_vector;
603 kmp_stats_list *next;
604 kmp_stats_list *prev;
606 int thread_is_idle_flag;
610 : _nestLevel(0), _event_vector(), next(
this), prev(
this), state(IDLE),
611 thread_is_idle_flag(0) {
612 #define doInit(name, ignore1, ignore2) \ 613 getExplicitTimer(EXPLICIT_TIMER_##name)->setStat(getTimer(TIMER_##name)); \ 614 _partitionedTimers.add_timer(EXPLICIT_TIMER_##name, \ 615 getExplicitTimer(EXPLICIT_TIMER_##name)); 620 inline timeStat *getTimer(timer_e idx) {
return &_timers[idx]; }
621 inline counter *getCounter(counter_e idx) {
return &_counters[idx]; }
622 inline explicitTimer *getExplicitTimer(explicit_timer_e idx) {
623 return &_explicitTimers[idx];
625 inline partitionedTimers *getPartitionedTimers() {
626 return &_partitionedTimers;
628 inline timeStat *getTimers() {
return _timers; }
629 inline counter *getCounters() {
return _counters; }
630 inline explicitTimer *getExplicitTimers() {
return _explicitTimers; }
631 inline kmp_stats_event_vector &getEventVector() {
return _event_vector; }
632 inline void resetEventVector() { _event_vector.reset(); }
633 inline void incrementNestValue() { _nestLevel++; }
634 inline int getNestValue() {
return _nestLevel; }
635 inline void decrementNestValue() { _nestLevel--; }
636 inline int getGtid()
const {
return gtid; }
637 inline void setGtid(
int newgtid) { gtid = newgtid; }
638 inline void setState(
stats_state_e newstate) { state = newstate; }
641 inline bool isIdle() {
return thread_is_idle_flag == 1; }
642 inline void setIdleFlag() { thread_is_idle_flag = 1; }
643 inline void resetIdleFlag() { thread_is_idle_flag = 0; }
644 kmp_stats_list *push_back(
int gtid);
645 inline void push_event(uint64_t start_time, uint64_t stop_time,
646 int nest_level, timer_e name) {
647 _event_vector.push_back(start_time, stop_time, nest_level, name);
651 kmp_stats_list::iterator begin();
652 kmp_stats_list::iterator end();
656 friend kmp_stats_list::iterator kmp_stats_list::begin();
657 friend kmp_stats_list::iterator kmp_stats_list::end();
662 iterator operator++();
663 iterator operator++(
int dummy);
664 iterator operator--();
665 iterator operator--(
int dummy);
666 bool operator!=(
const iterator &rhs);
667 bool operator==(
const iterator &rhs);
668 kmp_stats_list *operator*()
const;
701 class kmp_stats_output_module {
711 std::string outputFileName;
712 static const char *eventsFileName;
713 static const char *plotFileName;
714 static int printPerThreadFlag;
715 static int printPerThreadEventsFlag;
716 static const rgb_color globalColorArray[];
717 static rgb_color timerColorInfo[];
720 static void setupEventColors();
721 static void printPloticusFile();
722 static void printHeaderInfo(FILE *statsOut);
723 static void printTimerStats(FILE *statsOut, statistic
const *theStats,
724 statistic
const *totalStats);
725 static void printCounterStats(FILE *statsOut, statistic
const *theStats);
726 static void printCounters(FILE *statsOut, counter
const *theCounters);
727 static void printEvents(FILE *eventsOut, kmp_stats_event_vector *theEvents,
729 static rgb_color getEventColor(timer_e e) {
return timerColorInfo[e]; }
730 static void windupExplicitTimers();
731 bool eventPrintingEnabled()
const {
return printPerThreadEventsFlag; }
734 kmp_stats_output_module() { init(); }
735 void outputStats(
const char *heading);
741 void __kmp_stats_init();
742 void __kmp_stats_fini();
743 void __kmp_reset_stats();
744 void __kmp_output_stats(
const char *);
745 void __kmp_accumulate_stats_at_exit(
void);
747 extern KMP_THREAD_LOCAL kmp_stats_list *__kmp_stats_thread_ptr;
749 extern kmp_stats_list *__kmp_stats_list;
751 extern kmp_tas_lock_t __kmp_stats_lock;
753 extern tsc_tick_count __kmp_stats_start_time;
755 extern kmp_stats_output_module __kmp_stats_output;
775 #define KMP_TIME_BLOCK(name) \ 776 blockTimer __BLOCKTIME__(__kmp_stats_thread_ptr->getTimer(TIMER_##name), \ 790 #define KMP_COUNT_VALUE(name, value) \ 791 __kmp_stats_thread_ptr->getTimer(TIMER_##name)->addSample(value) 803 #define KMP_COUNT_BLOCK(name) \ 804 __kmp_stats_thread_ptr->getCounter(COUNTER_##name)->increment() 821 #define KMP_START_EXPLICIT_TIMER(name) \ 822 __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name) \ 823 ->start(TIMER_##name) 840 #define KMP_STOP_EXPLICIT_TIMER(name) \ 841 __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name) \ 861 #define KMP_OUTPUT_STATS(heading_string) __kmp_output_stats(heading_string) 870 #define KMP_INIT_PARTITIONED_TIMERS(name) \ 871 __kmp_stats_thread_ptr->getPartitionedTimers()->init( \ 872 timerPair(EXPLICIT_TIMER_##name, TIMER_##name)) 874 #define KMP_TIME_PARTITIONED_BLOCK(name) \ 875 blockPartitionedTimer __PBLOCKTIME__( \ 876 __kmp_stats_thread_ptr->getPartitionedTimers(), \ 877 timerPair(EXPLICIT_TIMER_##name, TIMER_##name)) 879 #define KMP_PUSH_PARTITIONED_TIMER(name) \ 880 __kmp_stats_thread_ptr->getPartitionedTimers()->push( \ 881 timerPair(EXPLICIT_TIMER_##name, TIMER_##name)) 883 #define KMP_POP_PARTITIONED_TIMER() \ 884 __kmp_stats_thread_ptr->getPartitionedTimers()->pop() 886 #define KMP_SET_THREAD_STATE(state_name) \ 887 __kmp_stats_thread_ptr->setState(state_name) 889 #define KMP_GET_THREAD_STATE() __kmp_stats_thread_ptr->getState() 891 #define KMP_SET_THREAD_STATE_BLOCK(state_name) \ 892 blockThreadState __BTHREADSTATE__(__kmp_stats_thread_ptr->getStatePointer(), \ 902 #define KMP_RESET_STATS() __kmp_reset_stats() 904 #if (KMP_DEVELOPER_STATS) 905 #define KMP_TIME_DEVELOPER_BLOCK(n) KMP_TIME_BLOCK(n) 906 #define KMP_COUNT_DEVELOPER_VALUE(n, v) KMP_COUNT_VALUE(n, v) 907 #define KMP_COUNT_DEVELOPER_BLOCK(n) KMP_COUNT_BLOCK(n) 908 #define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) KMP_START_EXPLICIT_TIMER(n) 909 #define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) KMP_STOP_EXPLICIT_TIMER(n) 910 #define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) KMP_TIME_PARTITIONED_BLOCK(n) 913 #define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0) 914 #define KMP_COUNT_DEVELOPER_VALUE(n, v) ((void)0) 915 #define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0) 916 #define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) 917 #define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) 918 #define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0) 921 #else // KMP_STATS_ENABLED 924 #define KMP_TIME_BLOCK(n) ((void)0) 925 #define KMP_COUNT_VALUE(n, v) ((void)0) 926 #define KMP_COUNT_BLOCK(n) ((void)0) 927 #define KMP_START_EXPLICIT_TIMER(n) ((void)0) 928 #define KMP_STOP_EXPLICIT_TIMER(n) ((void)0) 930 #define KMP_OUTPUT_STATS(heading_string) ((void)0) 931 #define KMP_RESET_STATS() ((void)0) 933 #define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0) 934 #define KMP_COUNT_DEVELOPER_VALUE(n, v) ((void)0) 935 #define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0) 936 #define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) 937 #define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) 938 #define KMP_INIT_PARTITIONED_TIMERS(name) ((void)0) 939 #define KMP_TIME_PARTITIONED_BLOCK(name) ((void)0) 940 #define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0) 941 #define KMP_PUSH_PARTITIONED_TIMER(name) ((void)0) 942 #define KMP_POP_PARTITIONED_TIMER() ((void)0) 943 #define KMP_SET_THREAD_STATE(state_name) ((void)0) 944 #define KMP_GET_THREAD_STATE() ((void)0) 945 #define KMP_SET_THREAD_STATE_BLOCK(state_name) ((void)0) 946 #endif // KMP_STATS_ENABLED 948 #endif // KMP_STATS_H statistic is valid only for master
statistic is valid only for non-master threads
do not show a TOTAL_aggregation for this statistic
#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg)
Add new explicit timers under KMP_FOREACH_EXPLICIT_TIMER() macro.
statistic doesn't need units printed next to it in output
stats_flags_e
flags to describe the statistic (timer or counter)
#define KMP_FOREACH_COUNTER(macro, arg)
Add new counters under KMP_FOREACH_COUNTER() macro in kmp_stats.h.
stats_state_e
the states which a thread can be in