From 3d24761e402c17f32dad23a86b0ec305e68187f7 Mon Sep 17 00:00:00 2001 From: Derek Bruening Date: Fri, 25 Aug 2023 15:15:20 -0400 Subject: [PATCH 1/4] i#6426: Add schedule_stats tool Adds a new analysis tool "schedule_stats" which computes characteristics of a core-sharded schedule. This is meant to help in understanding and studying the effects of varying the scheduler parameters. This initial version counts instructions, blocking and all syscalls, voluntary and direct switches, and direct requests, and computes derived statistics CSPKI, I/CS, %voluntary, and %direct switches. Failed direct requests can be inferred. The tool also records a string of letters representing input threads to help visualize the thread interleaving on each core. The number of instructions per leter is controlled by a new parameter -print_every. This string visualization has its limits as more than 26 inputs will wrap around, but this approach has been useful in our unit tests and other small runs. Adds a regression test which runs the checked-in threadsig trace. Issue: #6426 --- clients/drcachesim/CMakeLists.txt | 9 +- clients/drcachesim/analyzer_multi.cpp | 4 + clients/drcachesim/common/options.cpp | 8 +- clients/drcachesim/common/options.h | 18 +- .../tests/schedule_stats_nopreempt.templatex | 76 +++++ clients/drcachesim/tools/schedule_stats.cpp | 295 ++++++++++++++++++ clients/drcachesim/tools/schedule_stats.h | 144 +++++++++ .../drcachesim/tools/schedule_stats_create.h | 60 ++++ suite/tests/CMakeLists.txt | 5 + 9 files changed, 609 insertions(+), 10 deletions(-) create mode 100644 clients/drcachesim/tests/schedule_stats_nopreempt.templatex create mode 100644 clients/drcachesim/tools/schedule_stats.cpp create mode 100644 clients/drcachesim/tools/schedule_stats.h create mode 100644 clients/drcachesim/tools/schedule_stats_create.h diff --git a/clients/drcachesim/CMakeLists.txt b/clients/drcachesim/CMakeLists.txt index c7eaf52e95e..b482353a038 100644 --- a/clients/drcachesim/CMakeLists.txt +++ b/clients/drcachesim/CMakeLists.txt @@ -166,6 +166,7 @@ add_exported_library(drmemtrace_syscall_mix STATIC tools/syscall_mix.cpp) add_exported_library(drmemtrace_view STATIC tools/view.cpp) add_exported_library(drmemtrace_func_view STATIC tools/func_view.cpp) add_exported_library(drmemtrace_invariant_checker STATIC tools/invariant_checker.cpp) +add_exported_library(drmemtrace_schedule_stats STATIC tools/schedule_stats.cpp) target_link_libraries(drmemtrace_invariant_checker drdecode) @@ -273,7 +274,8 @@ configure_DynamoRIO_standalone(drcachesim) target_link_libraries(drcachesim drmemtrace_simulator drmemtrace_reuse_distance drmemtrace_histogram drmemtrace_reuse_time drmemtrace_basic_counts drmemtrace_opcode_mix drmemtrace_syscall_mix drmemtrace_view drmemtrace_func_view - drmemtrace_raw2trace directory_iterator drmemtrace_invariant_checker) + drmemtrace_raw2trace directory_iterator drmemtrace_invariant_checker + drmemtrace_schedule_stats) if (UNIX) target_link_libraries(drcachesim dl) endif () @@ -346,6 +348,7 @@ install_client_nonDR_header(drmemtrace tools/histogram_create.h) install_client_nonDR_header(drmemtrace tools/reuse_time_create.h) install_client_nonDR_header(drmemtrace tools/basic_counts_create.h) install_client_nonDR_header(drmemtrace tools/opcode_mix_create.h) +install_client_nonDR_header(drmemtrace tools/schedule_stats_create.h) install_client_nonDR_header(drmemtrace tools/syscall_mix_create.h) install_client_nonDR_header(drmemtrace simulator/cache_simulator.h) install_client_nonDR_header(drmemtrace simulator/cache_simulator_create.h) @@ -573,6 +576,7 @@ restore_nonclient_flags(drmemtrace_func_view) restore_nonclient_flags(drmemtrace_record_filter) restore_nonclient_flags(drmemtrace_analyzer) restore_nonclient_flags(drmemtrace_invariant_checker) +restore_nonclient_flags(drmemtrace_schedule_stats) # We need to pass /EHsc and we pull in libcmtd into drcachesim from a dep lib. # Thus we need to override the /MT with /MTd. @@ -638,6 +642,7 @@ add_win32_flags(drmemtrace_func_view) add_win32_flags(drmemtrace_record_filter) add_win32_flags(drmemtrace_analyzer) add_win32_flags(drmemtrace_invariant_checker) +add_win32_flags(drmemtrace_schedule_stats) add_win32_flags(directory_iterator) add_win32_flags(test_helpers) if (WIN32 AND DEBUG) @@ -809,7 +814,7 @@ if (BUILD_TESTS) drmemtrace_histogram drmemtrace_reuse_time drmemtrace_basic_counts drmemtrace_opcode_mix drmemtrace_syscall_mix drmemtrace_view drmemtrace_func_view drmemtrace_raw2trace directory_iterator drmemtrace_invariant_checker - drmemtrace_analyzer) + drmemtrace_schedule_stats drmemtrace_analyzer) if (UNIX) target_link_libraries(tool.drcachesim.core_sharded dl) endif () diff --git a/clients/drcachesim/analyzer_multi.cpp b/clients/drcachesim/analyzer_multi.cpp index 739b086e8b1..ede2693849b 100644 --- a/clients/drcachesim/analyzer_multi.cpp +++ b/clients/drcachesim/analyzer_multi.cpp @@ -58,6 +58,7 @@ #include "tools/invariant_checker.h" #include "tools/invariant_checker_create.h" #include "tools/opcode_mix_create.h" +#include "tools/schedule_stats_create.h" #include "tools/syscall_mix_create.h" #include "tools/reuse_distance_create.h" #include "tools/reuse_time_create.h" @@ -427,6 +428,9 @@ analyzer_multi_t::create_analysis_tool_from_options(const std::string &simulator op_verbose.get_value()); } else if (simulator_type == INVARIANT_CHECKER) { return create_invariant_checker(); + } else if (simulator_type == SCHEDULE_STATS) { + return schedule_stats_tool_create(op_print_every.get_value(), + op_verbose.get_value()); } else { auto tool = create_external_tool(simulator_type); if (tool == nullptr) { diff --git a/clients/drcachesim/common/options.cpp b/clients/drcachesim/common/options.cpp index 40c29bb8d89..ddc939f5cc9 100644 --- a/clients/drcachesim/common/options.cpp +++ b/clients/drcachesim/common/options.cpp @@ -34,6 +34,7 @@ #include "options.h" +#include #include #include "dr_api.h" // For IF_X86_ELSE. @@ -460,7 +461,7 @@ droption_t "Specifies the types of simulators, separated by a colon (\":\").", "Predefined types: " CPU_CACHE ", " MISS_ANALYZER ", " TLB ", " REUSE_DIST ", " REUSE_TIME ", " HISTOGRAM ", " BASIC_COUNTS - ", or " INVARIANT_CHECKER + ", " INVARIANT_CHECKER ", or " SCHEDULE_STATS ". The external types: name of a tool identified by a " "name.drcachesim config file in the DR tools directory."); @@ -854,5 +855,10 @@ droption_t "Path with stored as-traced schedule for replay."); #endif +// Schedule_stats options. +droption_t op_print_every(DROPTION_SCOPE_ALL, "print_every", 5000, + "A letter is printed every N instrs", + "A letter is printed every N instrs or N waits"); + } // namespace drmemtrace } // namespace dynamorio diff --git a/clients/drcachesim/common/options.h b/clients/drcachesim/common/options.h index 373dfa4f305..8c1e6453e4e 100644 --- a/clients/drcachesim/common/options.h +++ b/clients/drcachesim/common/options.h @@ -35,13 +35,6 @@ #ifndef _OPTIONS_H_ #define _OPTIONS_H_ 1 -#define REPLACE_POLICY_NON_SPECIFIED "" -#define REPLACE_POLICY_LRU "LRU" -#define REPLACE_POLICY_LFU "LFU" -#define REPLACE_POLICY_FIFO "FIFO" -#define PREFETCH_POLICY_NEXTLINE "nextline" -#define PREFETCH_POLICY_NONE "none" -#define CPU_CACHE "cache" #define MISS_ANALYZER "miss_analyzer" #define TLB "TLB" #define HISTOGRAM "histogram" @@ -53,10 +46,20 @@ #define VIEW "view" #define FUNC_VIEW "func_view" #define INVARIANT_CHECKER "invariant_checker" +#define SCHEDULE_STATS "schedule_stats" + +#define REPLACE_POLICY_NON_SPECIFIED "" +#define REPLACE_POLICY_LRU "LRU" +#define REPLACE_POLICY_LFU "LFU" +#define REPLACE_POLICY_FIFO "FIFO" +#define PREFETCH_POLICY_NEXTLINE "nextline" +#define PREFETCH_POLICY_NONE "none" +#define CPU_CACHE "cache" #define CACHE_TYPE_INSTRUCTION "instruction" #define CACHE_TYPE_DATA "data" #define CACHE_TYPE_UNIFIED "unified" #define CACHE_PARENT_MEMORY "memory" + // The expected pattern for a single_op_value is: // function_name|function_id|arguments_num // where function_name can contain spaces (for instance, C++ namespace prefix) @@ -191,6 +194,7 @@ extern dynamorio::droption::droption_t op_record_file; extern dynamorio::droption::droption_t op_replay_file; extern dynamorio::droption::droption_t op_cpu_schedule_file; #endif +extern dynamorio::droption::droption_t op_print_every; } // namespace drmemtrace } // namespace dynamorio diff --git a/clients/drcachesim/tests/schedule_stats_nopreempt.templatex b/clients/drcachesim/tests/schedule_stats_nopreempt.templatex new file mode 100644 index 00000000000..adc3b0a16fb --- /dev/null +++ b/clients/drcachesim/tests/schedule_stats_nopreempt.templatex @@ -0,0 +1,76 @@ +Schedule stats tool results: +Total counts: + 4 cores + 8 threads + 638938 instructions + 5 total context switches + 0.0078255 CSPKI \(context switches per 1000 instructions\) + 127788 instructions per context switch + 5 voluntary context switches + 0 direct context switches + 100.00% voluntary switches + 0.00% direct switches + 161 system calls + 2 maybe-blocking system calls + 0 direct switch requests + 0 waits +Core #0 counts: + . threads + *[0-9]* instructions + . total context switches + 0.0[0-9.]* CSPKI \(context switches per 1000 instructions\) + *[0-9]* instructions per context switch + . voluntary context switches + 0 direct context switches + 100.00% voluntary switches + 0.00% direct switches + *[0-9]* system calls + . maybe-blocking system calls + 0 direct switch requests + 0 waits +Core #1 counts: + 2 threads + *[0-9]* instructions + . total context switches + 0.0[0-9.]* CSPKI \(context switches per 1000 instructions\) + *[0-9]* instructions per context switch + . voluntary context switches + 0 direct context switches + 100.00% voluntary switches + 0.00% direct switches + .. system calls + . maybe-blocking system calls + 0 direct switch requests + 0 waits +Core #2 counts: + 2 threads + *[0-9]* instructions + 1 total context switches + 0.0[0-9.]* CSPKI \(context switches per 1000 instructions\) + *[0-9]* instructions per context switch + 1 voluntary context switches + 0 direct context switches + 100.00% voluntary switches + 0.00% direct switches + .. system calls + . maybe-blocking system calls + 0 direct switch requests + 0 waits +Core #3 counts: + 2 threads + *[0-9]* instructions + 1 total context switches + 0.0[0-9.]* CSPKI \(context switches per 1000 instructions\) + *[0-9]* instructions per context switch + 1 voluntary context switches + 0 direct context switches + 100.00% voluntary switches + 0.00% direct switches + .. system calls + . maybe-blocking system calls + 0 direct switch requests + 0 waits +Core #0 schedule: FFFFFF,HHHHHHHHHHHHHHHHHHH,(FFF|EEEEEEEEEEEEEEEEEEE|CCCCCCCCCCCCCCCCCCC|AAAAAAAAAAAAAAAAAAA) +Core #1 schedule: DDDDDDDDDDDDDDDDDD,(FFF|EEEEEEEEEEEEEEEEEEE|CCCCCCCCCCCCCCCCCCC|AAAAAAAAAAAAAAAAAAA) +Core #2 schedule: GGGGGGGGGGGGGGGGGGG,(FFF|EEEEEEEEEEEEEEEEEEE|CCCCCCCCCCCCCCCCCCC|AAAAAAAAAAAAAAAAAAA) +Core #3 schedule: BBBBBBBBBBBBBBBBBBB,(FFF|EEEEEEEEEEEEEEEEEEE|CCCCCCCCCCCCCCCCCCC|AAAAAAAAAAAAAAAAAAA) diff --git a/clients/drcachesim/tools/schedule_stats.cpp b/clients/drcachesim/tools/schedule_stats.cpp new file mode 100644 index 00000000000..9bae0bca585 --- /dev/null +++ b/clients/drcachesim/tools/schedule_stats.cpp @@ -0,0 +1,295 @@ +/* ********************************************************** + * Copyright (c) 2017-2023 Google, Inc. All rights reserved. + * **********************************************************/ + +/* + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * * Neither the name of Google, Inc. nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + */ + +#define NOMINMAX // Avoid windows.h messing up std::max. + +#include "schedule_stats.h" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "analysis_tool.h" +#include "memref.h" +#include "trace_entry.h" +#include "utils.h" + +namespace dynamorio { +namespace drmemtrace { + +const std::string schedule_stats_t::TOOL_NAME = "Schedule stats tool"; + +analysis_tool_t * +schedule_stats_tool_create(uint64_t print_every, unsigned int verbose) +{ + return new schedule_stats_t(print_every, verbose); +} + +schedule_stats_t::schedule_stats_t(uint64_t print_every, unsigned int verbose) + : knob_print_every_(print_every) + , knob_verbose_(verbose) +{ + // Empty. +} + +schedule_stats_t::~schedule_stats_t() +{ + for (auto &iter : shard_map_) { + delete iter.second; + } +} + +std::string +schedule_stats_t::initialize_stream(memtrace_stream_t *serial_stream) +{ + if (serial_stream != nullptr) + return "Only core-sharded operation is supported"; + return ""; +} + +std::string +schedule_stats_t::initialize_shard_type(shard_type_t shard_type) +{ + if (shard_type != SHARD_BY_CORE) + return "Only core-sharded operation is supported"; + return ""; +} + +bool +schedule_stats_t::process_memref(const memref_t &memref) +{ + error_string_ = "Only core-sharded operation is supported."; + return false; +} + +bool +schedule_stats_t::parallel_shard_supported() +{ + return true; +} + +void * +schedule_stats_t::parallel_shard_init_stream(int shard_index, void *worker_data, + memtrace_stream_t *stream) +{ + auto per_shard = new per_shard_t; + std::lock_guard guard(shard_map_mutex_); + per_shard->stream = stream; + per_shard->core = stream->get_output_cpuid(); + shard_map_[shard_index] = per_shard; + return reinterpret_cast(per_shard); +} + +bool +schedule_stats_t::parallel_shard_exit(void *shard_data) +{ + // Nothing (we read the shard data in print_results). + return true; +} + +std::string +schedule_stats_t::parallel_shard_error(void *shard_data) +{ + per_shard_t *per_shard = reinterpret_cast(shard_data); + return per_shard->error; +} + +bool +schedule_stats_t::parallel_shard_memref(void *shard_data, const memref_t &memref) +{ + per_shard_t *shard = reinterpret_cast(shard_data); + if (knob_verbose_ >= 4) { + std::ostringstream line; + line << "Core #" << std::setw(2) << shard->core << " @" << std::setw(9) + << shard->stream->get_record_ordinal() << " refs, " << std::setw(9) + << shard->stream->get_instruction_ordinal() << " instrs: input " + << std::setw(4) << shard->stream->get_input_id() << " @" << std::setw(9) + << shard->stream->get_input_interface()->get_record_ordinal() << " refs, " + << std::setw(9) + << shard->stream->get_input_interface()->get_instruction_ordinal() + << " instrs: " << std::setw(16) << trace_type_names[memref.marker.type]; + if (type_is_instr(memref.instr.type)) + line << " pc=" << std::hex << memref.instr.addr << std::dec; + else if (memref.marker.type == TRACE_TYPE_MARKER) { + line << " " << memref.marker.marker_type + << " val=" << memref.marker.marker_value; + } + line << "\n"; + std::cerr << line.str(); + } + if (memref.marker.type == TRACE_TYPE_MARKER && + memref.marker.marker_type == TRACE_MARKER_TYPE_CORE_WAIT) { + ++shard->counters.waits; + if (!shard->prev_was_wait) { + shard->thread_sequence += '-'; + shard->cur_segment_instrs = 0; + shard->prev_was_wait = true; + } else { + ++shard->cur_segment_instrs; + if (shard->cur_segment_instrs == knob_print_every_) { + shard->thread_sequence += '-'; + } + } + return true; + } else + shard->prev_was_wait = false; + int64_t input = shard->stream->get_input_id(); + if (input != shard->prev_input) { + // We convert to letters which only works well for <=26 inputs. + if (!shard->thread_sequence.empty()) { + ++shard->counters.total_switches; + if (shard->saw_maybe_blocking || shard->saw_exit) + ++shard->counters.voluntary_switches; + if (shard->direct_switch_target == memref.marker.tid) + ++shard->counters.direct_switches; + shard->thread_sequence += ','; + } + shard->thread_sequence += 'A' + static_cast(input % 26); + shard->cur_segment_instrs = 0; + if (knob_verbose_ >= 2) { + std::ostringstream line; + line << "Core #" << std::setw(2) << shard->core << " @" << std::setw(9) + << shard->stream->get_record_ordinal() << " refs, " << std::setw(9) + << shard->stream->get_instruction_ordinal() << " instrs: input " + << std::setw(4) << input << " @" << std::setw(9) + << shard->stream->get_input_interface()->get_record_ordinal() + << " refs, " << std::setw(9) + << shard->stream->get_input_interface()->get_instruction_ordinal() + << " instrs, time " + << std::setw(16) + // TODO i#5843: For time quanta, provide some way to get the + // latest time and print that here instead of the the timestamp? + << shard->stream->get_input_interface()->get_last_timestamp() + << " == thread " << memref.instr.tid << "\n"; + std::cerr << line.str(); + } + shard->prev_input = input; + } + if (type_is_instr(memref.instr.type)) { + ++shard->counters.instrs; + // Print a single letter for any partial sequence. + if (shard->cur_segment_instrs == 0) + shard->thread_sequence += 'A' + static_cast(input % 26); + ++shard->cur_segment_instrs; + if (shard->cur_segment_instrs == knob_print_every_) + shard->cur_segment_instrs = 0; + shard->direct_switch_target = INVALID_THREAD_ID; + shard->saw_maybe_blocking = false; + shard->saw_exit = false; + } + if (memref.instr.tid != INVALID_THREAD_ID) + shard->counters.threads.insert(memref.instr.tid); + if (memref.marker.type == TRACE_TYPE_MARKER) { + if (memref.marker.marker_type == TRACE_MARKER_TYPE_SYSCALL) + ++shard->counters.syscalls; + if (memref.marker.marker_type == TRACE_MARKER_TYPE_MAYBE_BLOCKING_SYSCALL) { + ++shard->counters.maybe_blocking_syscalls; + shard->saw_maybe_blocking = true; + } + if (memref.marker.marker_type == TRACE_MARKER_TYPE_DIRECT_THREAD_SWITCH) { + ++shard->counters.direct_switch_requests; + shard->direct_switch_target = memref.marker.marker_value; + } + } else if (memref.exit.type == TRACE_TYPE_THREAD_EXIT) + shard->saw_exit = true; + return true; +} + +void +schedule_stats_t::print_counters(const counters_t &counters) +{ + std::cerr << std::setw(12) << counters.threads.size() << " threads\n"; + std::cerr << std::setw(12) << counters.instrs << " instructions\n"; + std::cerr << std::setw(12) << counters.total_switches << " total context switches\n"; + std::cerr << std::setw(12) << std::fixed << std::setprecision(7) + << (1000 * counters.total_switches / static_cast(counters.instrs)) + << " CSPKI (context switches per 1000 instructions)\n"; + std::cerr << std::setw(12) << std::fixed << std::setprecision(0) + << (counters.instrs / static_cast(counters.total_switches)) + << " instructions per context switch\n"; + std::cerr << std::setw(12) << std::fixed << std::setprecision(7) + << counters.voluntary_switches << " voluntary context switches\n"; + std::cerr << std::setw(12) << counters.direct_switches + << " direct context switches\n"; + if (counters.total_switches > 0) { + std::cerr << std::setw(12) << std::setprecision(2) + << 100 * + (counters.voluntary_switches / + static_cast(counters.total_switches)) + << "% voluntary switches\n"; + std::cerr << std::setw(12) << std::setprecision(2) + << 100 * + (counters.direct_switches / static_cast(counters.total_switches)) + << "% direct switches\n"; + } + std::cerr << std::setw(12) << counters.syscalls << " system calls\n"; + std::cerr << std::setw(12) << counters.maybe_blocking_syscalls + << " maybe-blocking system calls\n"; + std::cerr << std::setw(12) << counters.direct_switch_requests + << " direct switch requests\n"; + std::cerr << std::setw(12) << counters.waits << " waits\n"; +} + +bool +schedule_stats_t::print_results() +{ + std::cerr << TOOL_NAME << " results:\n"; + std::cerr << "Total counts:\n"; + counters_t total; + for (const auto &shard : shard_map_) { + total += shard.second->counters; + } + std::cerr << std::setw(12) << shard_map_.size() << " cores\n"; + print_counters(total); + for (const auto &shard : shard_map_) { + std::cerr << "Core #" << shard.second->core << " counts:\n"; + print_counters(shard.second->counters); + } + for (const auto &shard : shard_map_) { + std::cerr << "Core #" << shard.second->core + << " schedule: " << shard.second->thread_sequence << "\n"; + } + return true; +} + +} // namespace drmemtrace +} // namespace dynamorio diff --git a/clients/drcachesim/tools/schedule_stats.h b/clients/drcachesim/tools/schedule_stats.h new file mode 100644 index 00000000000..2b06f92b084 --- /dev/null +++ b/clients/drcachesim/tools/schedule_stats.h @@ -0,0 +1,144 @@ +/* ********************************************************** + * Copyright (c) 2023 Google, Inc. All rights reserved. + * **********************************************************/ + +/* + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * * Neither the name of Google, Inc. nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + */ + +#ifndef _SCHEDULE_STATS_H_ +#define _SCHEDULE_STATS_H_ 1 + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "analysis_tool.h" +#include "memref.h" + +namespace dynamorio { +namespace drmemtrace { + +class schedule_stats_t : public analysis_tool_t { +public: + schedule_stats_t(uint64_t print_every, unsigned int verbose); + ~schedule_stats_t() override; + std::string + initialize_stream(memtrace_stream_t *serial_stream) override; + std::string + initialize_shard_type(shard_type_t shard_type) override; + bool + process_memref(const memref_t &memref) override; + bool + print_results() override; + bool + parallel_shard_supported() override; + void * + parallel_shard_init_stream(int shard_index, void *worker_data, + memtrace_stream_t *stream) override; + bool + parallel_shard_exit(void *shard_data) override; + bool + parallel_shard_memref(void *shard_data, const memref_t &memref) override; + std::string + parallel_shard_error(void *shard_data) override; + + struct counters_t { + counters_t() + { + } + counters_t & + operator+=(const counters_t &rhs) + { + instrs += rhs.instrs; + total_switches += rhs.total_switches; + voluntary_switches += rhs.voluntary_switches; + direct_switches += rhs.direct_switches; + syscalls += rhs.syscalls; + maybe_blocking_syscalls += rhs.maybe_blocking_syscalls; + direct_switch_requests += rhs.direct_switch_requests; + waits += rhs.waits; + for (const memref_tid_t tid : rhs.threads) { + threads.insert(tid); + } + return *this; + } + int64_t instrs = 0; + int64_t total_switches = 0; + int64_t voluntary_switches = 0; + int64_t direct_switches = 0; // Subset of voluntary_switches. + int64_t syscalls = 0; + int64_t maybe_blocking_syscalls = 0; + int64_t direct_switch_requests = 0; + int64_t waits = 0; + std::unordered_set threads; + }; + counters_t + get_total_counts(); + +protected: + struct per_shard_t { + std::string error; + memtrace_stream_t *stream = nullptr; + int core = 0; // We target core-sharded. + counters_t counters; + int64_t prev_input = -1; + // These are cleared when an instruction is seen. + bool saw_maybe_blocking = false; + memref_tid_t direct_switch_target = INVALID_THREAD_ID; + bool saw_exit = false; + // A representation of the thread interleavings. + std::string thread_sequence; + uint64_t cur_segment_instrs = 0; + bool prev_was_wait = false; + }; + + void + print_counters(const counters_t &counters); + + uint64_t knob_print_every_ = 0; + unsigned int knob_verbose_ = 0; + // We use an ordered map to get our output in order. This table is not + // used on the hot path so its performance does not matter. + std::map shard_map_; + // This mutex is only needed in parallel_shard_init. In all other accesses to + // shard_map (in print_results) we are single-threaded. + std::mutex shard_map_mutex_; + static const std::string TOOL_NAME; + memtrace_stream_t *serial_stream_ = nullptr; +}; + +} // namespace drmemtrace +} // namespace dynamorio + +#endif /* _SCHEDULE_STATS_H_ */ diff --git a/clients/drcachesim/tools/schedule_stats_create.h b/clients/drcachesim/tools/schedule_stats_create.h new file mode 100644 index 00000000000..4b26f0c0202 --- /dev/null +++ b/clients/drcachesim/tools/schedule_stats_create.h @@ -0,0 +1,60 @@ +/* ********************************************************** + * Copyright (c) 2023 Google, Inc. All rights reserved. + * **********************************************************/ + +/* + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * * Neither the name of Google, Inc. nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + */ + +/* Schedule stats tool creation. */ + +#ifndef _SCHEDULE_STATS_CREATE_H_ +#define _SCHEDULE_STATS_CREATE_H_ 1 + +#include "analysis_tool.h" + +#include + +namespace dynamorio { +namespace drmemtrace { + +/** + * @file drmemtrace/schedule_stats_create.h + * @brief DrMemtrace schedule statistics analysis tool creation. + */ + +/** + * Creates an analysis tool which counts the number and type of context switches + * in a core-sharded trace schedule. + */ +analysis_tool_t * +schedule_stats_tool_create(uint64_t print_every, unsigned int verbose = 0); + +} // namespace drmemtrace +} // namespace dynamorio + +#endif /* _SCHEDULE_STATS_CREATE_H_ */ diff --git a/suite/tests/CMakeLists.txt b/suite/tests/CMakeLists.txt index 45bd5bf0d6c..403244f6425 100644 --- a/suite/tests/CMakeLists.txt +++ b/suite/tests/CMakeLists.txt @@ -3771,6 +3771,11 @@ if (BUILD_CLIENTS) "-indir ${thread_trace_dir} -simulator_type basic_counts -only_thread 1257604" "") set(tool.counts_only_thread_rawtemp ON) # no preprocessor + + torunonly_simtool(schedule_stats_nopreempt ${ci_shared_app} + "-indir ${thread_trace_dir} -simulator_type schedule_stats -core_sharded -sched_quantum 10000000" + "") + set(tool.schedule_stats_nopreempt_rawtemp ON) # no preprocessor endif () endif () From f4b8fe1c74c90cd6689577551b4fe7389d6066a6 Mon Sep 17 00:00:00 2001 From: Derek Bruening Date: Fri, 10 Nov 2023 13:45:29 -0500 Subject: [PATCH 2/4] Fix Windows build warning --- clients/drcachesim/tools/schedule_stats.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clients/drcachesim/tools/schedule_stats.h b/clients/drcachesim/tools/schedule_stats.h index 2b06f92b084..0afffa193b1 100644 --- a/clients/drcachesim/tools/schedule_stats.h +++ b/clients/drcachesim/tools/schedule_stats.h @@ -110,7 +110,7 @@ class schedule_stats_t : public analysis_tool_t { struct per_shard_t { std::string error; memtrace_stream_t *stream = nullptr; - int core = 0; // We target core-sharded. + int64_t core = 0; // We target core-sharded. counters_t counters; int64_t prev_input = -1; // These are cleared when an instruction is seen. From 9df91a28f4291705321f88d57d4ccffe64669921 Mon Sep 17 00:00:00 2001 From: Derek Bruening Date: Fri, 10 Nov 2023 14:25:53 -0500 Subject: [PATCH 3/4] Relax test schedule output as thread order can change --- .../drcachesim/tests/schedule_stats_nopreempt.templatex | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clients/drcachesim/tests/schedule_stats_nopreempt.templatex b/clients/drcachesim/tests/schedule_stats_nopreempt.templatex index adc3b0a16fb..f94d9f610f6 100644 --- a/clients/drcachesim/tests/schedule_stats_nopreempt.templatex +++ b/clients/drcachesim/tests/schedule_stats_nopreempt.templatex @@ -70,7 +70,7 @@ Core #3 counts: . maybe-blocking system calls 0 direct switch requests 0 waits -Core #0 schedule: FFFFFF,HHHHHHHHHHHHHHHHHHH,(FFF|EEEEEEEEEEEEEEEEEEE|CCCCCCCCCCCCCCCCCCC|AAAAAAAAAAAAAAAAAAA) -Core #1 schedule: DDDDDDDDDDDDDDDDDD,(FFF|EEEEEEEEEEEEEEEEEEE|CCCCCCCCCCCCCCCCCCC|AAAAAAAAAAAAAAAAAAA) -Core #2 schedule: GGGGGGGGGGGGGGGGGGG,(FFF|EEEEEEEEEEEEEEEEEEE|CCCCCCCCCCCCCCCCCCC|AAAAAAAAAAAAAAAAAAA) -Core #3 schedule: BBBBBBBBBBBBBBBBBBB,(FFF|EEEEEEEEEEEEEEEEEEE|CCCCCCCCCCCCCCCCCCC|AAAAAAAAAAAAAAAAAAA) +Core #0 schedule: [A-H,]* +Core #1 schedule: [A-H,]* +Core #2 schedule: [A-H,]* +Core #3 schedule: [A-H,]* From fbcd8b7fa499e9e5ca8baa235bb01a3fed3b3222 Mon Sep 17 00:00:00 2001 From: Derek Bruening Date: Sat, 11 Nov 2023 16:41:52 -0500 Subject: [PATCH 4/4] Review requests: Rename option; Add constants for output chars; Add multiple comments; Move prev_was_wait --- clients/drcachesim/analyzer_multi.cpp | 2 +- clients/drcachesim/common/options.cpp | 7 +++--- clients/drcachesim/common/options.h | 4 +++- clients/drcachesim/tools/schedule_stats.cpp | 23 +++++++++++-------- .../drcachesim/tools/schedule_stats_create.h | 3 ++- 5 files changed, 24 insertions(+), 15 deletions(-) diff --git a/clients/drcachesim/analyzer_multi.cpp b/clients/drcachesim/analyzer_multi.cpp index ede2693849b..7c458ea881c 100644 --- a/clients/drcachesim/analyzer_multi.cpp +++ b/clients/drcachesim/analyzer_multi.cpp @@ -429,7 +429,7 @@ analyzer_multi_t::create_analysis_tool_from_options(const std::string &simulator } else if (simulator_type == INVARIANT_CHECKER) { return create_invariant_checker(); } else if (simulator_type == SCHEDULE_STATS) { - return schedule_stats_tool_create(op_print_every.get_value(), + return schedule_stats_tool_create(op_schedule_stats_print_every.get_value(), op_verbose.get_value()); } else { auto tool = create_external_tool(simulator_type); diff --git a/clients/drcachesim/common/options.cpp b/clients/drcachesim/common/options.cpp index ddc939f5cc9..af0496a9d07 100644 --- a/clients/drcachesim/common/options.cpp +++ b/clients/drcachesim/common/options.cpp @@ -856,9 +856,10 @@ droption_t #endif // Schedule_stats options. -droption_t op_print_every(DROPTION_SCOPE_ALL, "print_every", 5000, - "A letter is printed every N instrs", - "A letter is printed every N instrs or N waits"); +droption_t + op_schedule_stats_print_every(DROPTION_SCOPE_ALL, "schedule_stats_print_every", 5000, + "A letter is printed every N instrs", + "A letter is printed every N instrs or N waits"); } // namespace drmemtrace } // namespace dynamorio diff --git a/clients/drcachesim/common/options.h b/clients/drcachesim/common/options.h index 8c1e6453e4e..996cff01739 100644 --- a/clients/drcachesim/common/options.h +++ b/clients/drcachesim/common/options.h @@ -35,6 +35,7 @@ #ifndef _OPTIONS_H_ #define _OPTIONS_H_ 1 +// Tool names (for -simulator_type option). #define MISS_ANALYZER "miss_analyzer" #define TLB "TLB" #define HISTOGRAM "histogram" @@ -48,6 +49,7 @@ #define INVARIANT_CHECKER "invariant_checker" #define SCHEDULE_STATS "schedule_stats" +// Constants used by specific tools. #define REPLACE_POLICY_NON_SPECIFIED "" #define REPLACE_POLICY_LRU "LRU" #define REPLACE_POLICY_LFU "LFU" @@ -194,7 +196,7 @@ extern dynamorio::droption::droption_t op_record_file; extern dynamorio::droption::droption_t op_replay_file; extern dynamorio::droption::droption_t op_cpu_schedule_file; #endif -extern dynamorio::droption::droption_t op_print_every; +extern dynamorio::droption::droption_t op_schedule_stats_print_every; } // namespace drmemtrace } // namespace dynamorio diff --git a/clients/drcachesim/tools/schedule_stats.cpp b/clients/drcachesim/tools/schedule_stats.cpp index 9bae0bca585..16103add7fa 100644 --- a/clients/drcachesim/tools/schedule_stats.cpp +++ b/clients/drcachesim/tools/schedule_stats.cpp @@ -136,6 +136,9 @@ schedule_stats_t::parallel_shard_error(void *shard_data) bool schedule_stats_t::parallel_shard_memref(void *shard_data, const memref_t &memref) { + static constexpr char THREAD_LETTER_START = 'A'; + static constexpr char THREAD_SEPARATOR = ','; + static constexpr char WAIT_SYMBOL = '-'; per_shard_t *shard = reinterpret_cast(shard_data); if (knob_verbose_ >= 4) { std::ostringstream line; @@ -166,12 +169,11 @@ schedule_stats_t::parallel_shard_memref(void *shard_data, const memref_t &memref } else { ++shard->cur_segment_instrs; if (shard->cur_segment_instrs == knob_print_every_) { - shard->thread_sequence += '-'; + shard->thread_sequence += WAIT_SYMBOL; } } return true; - } else - shard->prev_was_wait = false; + } int64_t input = shard->stream->get_input_id(); if (input != shard->prev_input) { // We convert to letters which only works well for <=26 inputs. @@ -181,9 +183,12 @@ schedule_stats_t::parallel_shard_memref(void *shard_data, const memref_t &memref ++shard->counters.voluntary_switches; if (shard->direct_switch_target == memref.marker.tid) ++shard->counters.direct_switches; - shard->thread_sequence += ','; + // A comma separating each sequence makes it a little easier to + // read, and helps distinguish a switch from two threads with the + // same %26 letter. (We could remove this though to compact it.) + shard->thread_sequence += THREAD_SEPARATOR; } - shard->thread_sequence += 'A' + static_cast(input % 26); + shard->thread_sequence += THREAD_LETTER_START + static_cast(input % 26); shard->cur_segment_instrs = 0; if (knob_verbose_ >= 2) { std::ostringstream line; @@ -206,12 +211,11 @@ schedule_stats_t::parallel_shard_memref(void *shard_data, const memref_t &memref } if (type_is_instr(memref.instr.type)) { ++shard->counters.instrs; - // Print a single letter for any partial sequence. - if (shard->cur_segment_instrs == 0) - shard->thread_sequence += 'A' + static_cast(input % 26); ++shard->cur_segment_instrs; - if (shard->cur_segment_instrs == knob_print_every_) + if (shard->cur_segment_instrs == knob_print_every_) { + shard->thread_sequence += THREAD_LETTER_START + static_cast(input % 26); shard->cur_segment_instrs = 0; + } shard->direct_switch_target = INVALID_THREAD_ID; shard->saw_maybe_blocking = false; shard->saw_exit = false; @@ -231,6 +235,7 @@ schedule_stats_t::parallel_shard_memref(void *shard_data, const memref_t &memref } } else if (memref.exit.type == TRACE_TYPE_THREAD_EXIT) shard->saw_exit = true; + shard->prev_was_wait = false; return true; } diff --git a/clients/drcachesim/tools/schedule_stats_create.h b/clients/drcachesim/tools/schedule_stats_create.h index 4b26f0c0202..b5db7770ae8 100644 --- a/clients/drcachesim/tools/schedule_stats_create.h +++ b/clients/drcachesim/tools/schedule_stats_create.h @@ -49,7 +49,8 @@ namespace drmemtrace { /** * Creates an analysis tool which counts the number and type of context switches - * in a core-sharded trace schedule. + * in a core-sharded trace schedule. The tool fails if run in any mode besides + * core-sharded. */ analysis_tool_t * schedule_stats_tool_create(uint64_t print_every, unsigned int verbose = 0);