Skip to content

Commit

Permalink
i#6938 sched migrate: Include scheduler stats in schedule_stats (#6955)
Browse files Browse the repository at this point in the history
Adds the new scheduler-provided statistics to the schedule_stats tool. 
Adds a sanity test check.

Tested on some larger apps where a high count of "nop" switches was
sometimes found; as part of understanding those, I found a code path
where if the runqueue is empty and the current thread is supposed to go
unscheduled it would be run again instead. Fixed that here; unclear it
has ever happened.

Issue: #6938
  • Loading branch information
derekbruening authored Aug 30, 2024
1 parent 48b436d commit 22c681a
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 26 deletions.
3 changes: 2 additions & 1 deletion clients/drcachesim/scheduler/scheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2966,7 +2966,8 @@ scheduler_tmpl_t<RecordType, ReaderType>::pick_next_input(output_ordinal_t outpu
if (prev_index == INVALID_INPUT_ORDINAL)
return eof_or_idle(output, need_lock, prev_index);
auto lock = std::unique_lock<std::mutex>(*inputs_[prev_index].lock);
if (inputs_[prev_index].at_eof) {
// If we can't go back to the current input, we're EOF or idle.
if (inputs_[prev_index].at_eof || inputs_[prev_index].unscheduled) {
lock.unlock();
return eof_or_idle(output, need_lock, prev_index);
} else
Expand Down
6 changes: 6 additions & 0 deletions clients/drcachesim/tests/core_serial.templatex
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@ Total counts:
0 direct context switches
100.00% voluntary switches
0.00% direct switches
4 switches input-to-input
0 switches input-to-idle
0 switches idle-to-input
0 switches nop-ed
0 quantum_preempts
0 migrations
161 system calls
2 maybe-blocking system calls
0 direct switch requests
Expand Down
22 changes: 6 additions & 16 deletions clients/drcachesim/tests/schedule_stats_nopreempt.templatex
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@ Total counts:
0 direct context switches
100\.00% voluntary switches
0\.00% direct switches
5 switches input-to-input
4 switches input-to-idle
1 switches idle-to-input
0 switches nop-ed
0 quantum_preempts
0 migrations
161 system calls
2 maybe-blocking system calls
0 direct switch requests
Expand All @@ -35,10 +41,6 @@ Core #0 counts:
0 direct context switches
100\.00% voluntary switches
0\.00% direct switches
*[0-9]* system calls
. maybe-blocking system calls
0 direct switch requests
0 waits
.*
Core #1 counts:
. threads: 1257.*
Expand All @@ -50,10 +52,6 @@ Core #1 counts:
0 direct context switches
100\.00% voluntary switches
0\.00% direct switches
*[0-9]* system calls
. maybe-blocking system calls
0 direct switch requests
0 waits
.*
Core #2 counts:
. threads: 1257.*
Expand All @@ -65,10 +63,6 @@ Core #2 counts:
0 direct context switches
100\.00% voluntary switches
0\.00% direct switches
*[0-9]* system calls
. maybe-blocking system calls
0 direct switch requests
0 waits
.*
Core #3 counts:
. threads: 1257.*
Expand All @@ -80,10 +74,6 @@ Core #3 counts:
0 direct context switches
100\.00% voluntary switches
0\.00% direct switches
*[0-9]* system calls
. maybe-blocking system calls
0 direct switch requests
0 waits
.*
Core #0 schedule: [A-Ha-h_]*
Core #1 schedule: [A-Ha-h_]*
Expand Down
58 changes: 49 additions & 9 deletions clients/drcachesim/tools/schedule_stats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,31 @@ schedule_stats_t::parallel_shard_exit(void *shard_data)
return true;
}

void
schedule_stats_t::get_scheduler_stats(memtrace_stream_t *stream, counters_t &counters)
{
counters.switches_input_to_input =
static_cast<int64_t>(stream->get_schedule_statistic(
memtrace_stream_t::SCHED_STAT_SWITCH_INPUT_TO_INPUT));
counters.switches_input_to_idle = static_cast<int64_t>(stream->get_schedule_statistic(
memtrace_stream_t::SCHED_STAT_SWITCH_INPUT_TO_IDLE));
counters.switches_idle_to_input = static_cast<int64_t>(stream->get_schedule_statistic(
memtrace_stream_t::SCHED_STAT_SWITCH_IDLE_TO_INPUT));
counters.switches_nop = static_cast<int64_t>(
stream->get_schedule_statistic(memtrace_stream_t::SCHED_STAT_SWITCH_NOP));
counters.quantum_preempts = static_cast<int64_t>(
stream->get_schedule_statistic(memtrace_stream_t::SCHED_STAT_QUANTUM_PREEMPTS));
counters.migrations = static_cast<int64_t>(
stream->get_schedule_statistic(memtrace_stream_t::SCHED_STAT_MIGRATIONS));

// XXX: Currently, schedule_stats is measuring swap-ins to a real input. If we
// want to match what "perf" targeting this app would record, which is swap-outs,
// we should remove idle-to-input and add input-to-idle (though generally those
// two counts are pretty similar). OTOH, if we want to match what "perf"
// systemwide would record, we would want to add input-to-idle on top of what we
// have today.
}

std::string
schedule_stats_t::parallel_shard_error(void *shard_data)
{
Expand Down Expand Up @@ -290,6 +315,8 @@ schedule_stats_t::parallel_shard_memref(void *shard_data, const memref_t &memref
? tid
: input_id;
if ((workload_id != prev_workload_id || tid != prev_tid) && tid != IDLE_THREAD_ID) {
// See XXX comment in get_scheduler_stats(): this measures swap-ins, while
// "perf" measures swap-outs.
record_context_switch(shard, tid, input_id, letter_ord);
}
shard->prev_workload_id = workload_id;
Expand Down Expand Up @@ -379,6 +406,18 @@ schedule_stats_t::print_counters(const counters_t &counters)
"% voluntary switches\n");
print_percentage(static_cast<double>(counters.direct_switches),
static_cast<double>(counters.total_switches), "% direct switches\n");

// Statistics provided by scheduler.
std::cerr << std::setw(12) << counters.switches_input_to_input
<< " switches input-to-input\n";
std::cerr << std::setw(12) << counters.switches_input_to_idle
<< " switches input-to-idle\n";
std::cerr << std::setw(12) << counters.switches_idle_to_input
<< " switches idle-to-input\n";
std::cerr << std::setw(12) << counters.switches_nop << " switches nop-ed\n";
std::cerr << std::setw(12) << counters.quantum_preempts << " quantum_preempts\n";
std::cerr << std::setw(12) << counters.migrations << " migrations\n";

std::cerr << std::setw(12) << counters.syscalls << " system calls\n";
std::cerr << std::setw(12) << counters.maybe_blocking_syscalls
<< " maybe-blocking system calls\n";
Expand Down Expand Up @@ -410,12 +449,19 @@ void
schedule_stats_t::aggregate_results(counters_t &total)
{
for (const auto &shard : shard_map_) {
// First update our per-shard data with per-shard stats from the scheduler.
get_scheduler_stats(shard.second->stream, shard.second->counters);

total += shard.second->counters;

// Sanity check against the scheduler's own stats, unless the trace
// is pre-scheduled or we're in core-serial mode where we don't have access
// to the separate output streams.
// is pre-scheduled, or we're in core-serial mode where we don't have access
// to the separate output streams, or we're in a unit test with a mock
// stream and no stats.
if (TESTANY(OFFLINE_FILE_TYPE_CORE_SHARDED, shard.second->filetype) ||
serial_stream_ != nullptr)
serial_stream_ != nullptr ||
shard.second->stream->get_schedule_statistic(
memtrace_stream_t::SCHED_STAT_SWITCH_INPUT_TO_INPUT) < 0)
continue;
// We assume our counts fit in the get_schedule_statistic()'s double's 54-bit
// mantissa and thus we can safely use "==".
Expand All @@ -425,12 +471,6 @@ schedule_stats_t::aggregate_results(counters_t &total)
memtrace_stream_t::SCHED_STAT_SWITCH_INPUT_TO_INPUT) +
shard.second->stream->get_schedule_statistic(
memtrace_stream_t::SCHED_STAT_SWITCH_IDLE_TO_INPUT));
assert(shard.second->counters.total_switches -
shard.second->counters.voluntary_switches ==
shard.second->stream->get_schedule_statistic(
memtrace_stream_t::SCHED_STAT_QUANTUM_PREEMPTS) -
shard.second->stream->get_schedule_statistic(
memtrace_stream_t::SCHED_STAT_SWITCH_NOP));
assert(shard.second->counters.direct_switch_requests ==
shard.second->stream->get_schedule_statistic(
memtrace_stream_t::SCHED_STAT_DIRECT_SWITCH_ATTEMPTS));
Expand Down
17 changes: 17 additions & 0 deletions clients/drcachesim/tools/schedule_stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,12 @@ class schedule_stats_t : public analysis_tool_t {
counters_t &
operator+=(const counters_t &rhs)
{
switches_input_to_input += rhs.switches_input_to_input;
switches_input_to_idle += rhs.switches_input_to_idle;
switches_idle_to_input += rhs.switches_idle_to_input;
switches_nop += rhs.switches_nop;
quantum_preempts += rhs.quantum_preempts;
migrations += rhs.migrations;
instrs += rhs.instrs;
total_switches += rhs.total_switches;
voluntary_switches += rhs.voluntary_switches;
Expand All @@ -155,6 +161,14 @@ class schedule_stats_t : public analysis_tool_t {
instrs_per_switch->merge(rhs.instrs_per_switch.get());
return *this;
}
// Statistics provided by scheduler.
int64_t switches_input_to_input = 0;
int64_t switches_input_to_idle = 0;
int64_t switches_idle_to_input = 0;
int64_t switches_nop = 0;
int64_t quantum_preempts = 0;
int64_t migrations = 0;
// Our own statistics.
int64_t instrs = 0;
int64_t total_switches = 0;
int64_t voluntary_switches = 0;
Expand Down Expand Up @@ -227,6 +241,9 @@ class schedule_stats_t : public analysis_tool_t {
virtual void
aggregate_results(counters_t &total);

void
get_scheduler_stats(memtrace_stream_t *stream, counters_t &counters);

uint64_t knob_print_every_ = 0;
unsigned int knob_verbose_ = 0;
// We use an ordered map to get our output in order. This table is not
Expand Down

0 comments on commit 22c681a

Please sign in to comment.