From 4d32f860e1da8a3db974cabf8e46c63ea5f3f87c Mon Sep 17 00:00:00 2001 From: Derek Bruening Date: Thu, 29 Aug 2024 00:05:24 -0400 Subject: [PATCH] i#6938 sched migrate: Include scheduler stats in schedule_stats Adds the new scheduler-provided statistics to the schedule_stats tool. Adds a sanity test check. Issue: #6938 --- clients/drcachesim/scheduler/scheduler.cpp | 2 +- .../tests/schedule_stats_nopreempt.templatex | 22 +++------- clients/drcachesim/tools/schedule_stats.cpp | 44 ++++++++++++++++--- clients/drcachesim/tools/schedule_stats.h | 17 +++++++ 4 files changed, 62 insertions(+), 23 deletions(-) diff --git a/clients/drcachesim/scheduler/scheduler.cpp b/clients/drcachesim/scheduler/scheduler.cpp index 87f72e73cf4..89bc418beda 100644 --- a/clients/drcachesim/scheduler/scheduler.cpp +++ b/clients/drcachesim/scheduler/scheduler.cpp @@ -2966,7 +2966,7 @@ scheduler_tmpl_t::pick_next_input(output_ordinal_t outpu if (prev_index == INVALID_INPUT_ORDINAL) return eof_or_idle(output, need_lock, prev_index); auto lock = std::unique_lock(*inputs_[prev_index].lock); - if (inputs_[prev_index].at_eof) { + if (inputs_[prev_index].at_eof || inputs_[prev_index].unscheduled) { lock.unlock(); return eof_or_idle(output, need_lock, prev_index); } else diff --git a/clients/drcachesim/tests/schedule_stats_nopreempt.templatex b/clients/drcachesim/tests/schedule_stats_nopreempt.templatex index 3374497d90d..29a7eb3291b 100644 --- a/clients/drcachesim/tests/schedule_stats_nopreempt.templatex +++ b/clients/drcachesim/tests/schedule_stats_nopreempt.templatex @@ -10,6 +10,12 @@ Total counts: 0 direct context switches 100\.00% voluntary switches 0\.00% direct switches + 5 switches input-to-input + 4 switches input-to-idle + 1 switches idle-to-input + 0 switches nop-ed + 0 quantum_preempts + 0 migrations 161 system calls 2 maybe-blocking system calls 0 direct switch requests @@ -35,10 +41,6 @@ Core #0 counts: 0 direct context switches 100\.00% voluntary switches 0\.00% direct switches - *[0-9]* system calls - . maybe-blocking system calls - 0 direct switch requests - 0 waits .* Core #1 counts: . threads: 1257.* @@ -50,10 +52,6 @@ Core #1 counts: 0 direct context switches 100\.00% voluntary switches 0\.00% direct switches - *[0-9]* system calls - . maybe-blocking system calls - 0 direct switch requests - 0 waits .* Core #2 counts: . threads: 1257.* @@ -65,10 +63,6 @@ Core #2 counts: 0 direct context switches 100\.00% voluntary switches 0\.00% direct switches - *[0-9]* system calls - . maybe-blocking system calls - 0 direct switch requests - 0 waits .* Core #3 counts: . threads: 1257.* @@ -80,10 +74,6 @@ Core #3 counts: 0 direct context switches 100\.00% voluntary switches 0\.00% direct switches - *[0-9]* system calls - . maybe-blocking system calls - 0 direct switch requests - 0 waits .* Core #0 schedule: [A-Ha-h_]* Core #1 schedule: [A-Ha-h_]* diff --git a/clients/drcachesim/tools/schedule_stats.cpp b/clients/drcachesim/tools/schedule_stats.cpp index 4a0dc47ea91..c6cdab50c88 100644 --- a/clients/drcachesim/tools/schedule_stats.cpp +++ b/clients/drcachesim/tools/schedule_stats.cpp @@ -144,6 +144,28 @@ schedule_stats_t::parallel_shard_exit(void *shard_data) return true; } +void +schedule_stats_t::get_scheduler_stats(memtrace_stream_t *stream, counters_t &counters) +{ + counters.switches_input_to_input = stream->get_schedule_statistic( + memtrace_stream_t::SCHED_STAT_SWITCH_INPUT_TO_INPUT); + counters.switches_input_to_idle = stream->get_schedule_statistic( + memtrace_stream_t::SCHED_STAT_SWITCH_INPUT_TO_IDLE); + counters.switches_idle_to_input = stream->get_schedule_statistic( + memtrace_stream_t::SCHED_STAT_SWITCH_IDLE_TO_INPUT); + counters.switches_nop = + stream->get_schedule_statistic(memtrace_stream_t::SCHED_STAT_SWITCH_NOP); + counters.quantum_preempts = + stream->get_schedule_statistic(memtrace_stream_t::SCHED_STAT_QUANTUM_PREEMPTS); + counters.migrations = + stream->get_schedule_statistic(memtrace_stream_t::SCHED_STAT_MIGRATIONS); + + // XXX: If we want to match what "perf" targeting this app would record, we + // should remove idle-to-input and add input-to-idle (though generally those two + // counts are pretty similar). OTOH, if we want to match what "perf" systemwide + // would record, we would want to add input-to-idle on top of what we have. +} + std::string schedule_stats_t::parallel_shard_error(void *shard_data) { @@ -379,6 +401,18 @@ schedule_stats_t::print_counters(const counters_t &counters) "% voluntary switches\n"); print_percentage(static_cast(counters.direct_switches), static_cast(counters.total_switches), "% direct switches\n"); + + // Statistics provided by scheduler. + std::cerr << std::setw(12) << counters.switches_input_to_input + << " switches input-to-input\n"; + std::cerr << std::setw(12) << counters.switches_input_to_idle + << " switches input-to-idle\n"; + std::cerr << std::setw(12) << counters.switches_idle_to_input + << " switches idle-to-input\n"; + std::cerr << std::setw(12) << counters.switches_nop << " switches nop-ed\n"; + std::cerr << std::setw(12) << counters.quantum_preempts << " quantum_preempts\n"; + std::cerr << std::setw(12) << counters.migrations << " migrations\n"; + std::cerr << std::setw(12) << counters.syscalls << " system calls\n"; std::cerr << std::setw(12) << counters.maybe_blocking_syscalls << " maybe-blocking system calls\n"; @@ -410,7 +444,11 @@ void schedule_stats_t::aggregate_results(counters_t &total) { for (const auto &shard : shard_map_) { + // First update our per-shard data with per-shard stats from the scheduler. + get_scheduler_stats(shard.second->stream, shard.second->counters); + total += shard.second->counters; + // Sanity check against the scheduler's own stats, unless the trace // is pre-scheduled or we're in core-serial mode where we don't have access // to the separate output streams. @@ -425,12 +463,6 @@ schedule_stats_t::aggregate_results(counters_t &total) memtrace_stream_t::SCHED_STAT_SWITCH_INPUT_TO_INPUT) + shard.second->stream->get_schedule_statistic( memtrace_stream_t::SCHED_STAT_SWITCH_IDLE_TO_INPUT)); - assert(shard.second->counters.total_switches - - shard.second->counters.voluntary_switches == - shard.second->stream->get_schedule_statistic( - memtrace_stream_t::SCHED_STAT_QUANTUM_PREEMPTS) - - shard.second->stream->get_schedule_statistic( - memtrace_stream_t::SCHED_STAT_SWITCH_NOP)); assert(shard.second->counters.direct_switch_requests == shard.second->stream->get_schedule_statistic( memtrace_stream_t::SCHED_STAT_DIRECT_SWITCH_ATTEMPTS)); diff --git a/clients/drcachesim/tools/schedule_stats.h b/clients/drcachesim/tools/schedule_stats.h index 5cde2be6f77..9696cf8a6b6 100644 --- a/clients/drcachesim/tools/schedule_stats.h +++ b/clients/drcachesim/tools/schedule_stats.h @@ -136,6 +136,12 @@ class schedule_stats_t : public analysis_tool_t { counters_t & operator+=(const counters_t &rhs) { + switches_input_to_input += rhs.switches_input_to_input; + switches_input_to_idle += rhs.switches_input_to_idle; + switches_idle_to_input += rhs.switches_idle_to_input; + switches_nop += rhs.switches_nop; + quantum_preempts += rhs.quantum_preempts; + migrations += rhs.migrations; instrs += rhs.instrs; total_switches += rhs.total_switches; voluntary_switches += rhs.voluntary_switches; @@ -155,6 +161,14 @@ class schedule_stats_t : public analysis_tool_t { instrs_per_switch->merge(rhs.instrs_per_switch.get()); return *this; } + // Statistics provided by scheduler. + int64_t switches_input_to_input = 0; + int64_t switches_input_to_idle = 0; + int64_t switches_idle_to_input = 0; + int64_t switches_nop = 0; + int64_t quantum_preempts = 0; + int64_t migrations = 0; + // Our own statistics. int64_t instrs = 0; int64_t total_switches = 0; int64_t voluntary_switches = 0; @@ -227,6 +241,9 @@ class schedule_stats_t : public analysis_tool_t { virtual void aggregate_results(counters_t &total); + void + get_scheduler_stats(memtrace_stream_t *stream, counters_t &shard); + uint64_t knob_print_every_ = 0; unsigned int knob_verbose_ = 0; // We use an ordered map to get our output in order. This table is not