From fe231aad2805dbcd88233b4e2e3ad3c8faecbaff Mon Sep 17 00:00:00 2001 From: Derek Bruening Date: Thu, 4 Aug 2022 00:47:22 -0400 Subject: [PATCH] i#4014 dr$sim phys: Use physaddr markers in simulators (#5585) When -use_physical is set, the cache and TLB simulators read the new virtual-to-physical translation markers and use them to simulate physical addresses. Changes drcachesim online mode to leave addresses virtual and insert markers instead, just like offline. Adds a compatibility change note and updates the docs. Includes a fix for -cpu_scheduling where the cached last thread was not reset on a cpu change with no thread change in between. -------------------------------------------------- Tested: Ran manually and looked at logs. Open to suggestions for how to automate testing. $ rm -rf drmemtrace.sim*.dir; ninja && sudo bin64/drrun -stderr_mask 0 -t drcachesim -use_physical -offline -- suite/tests/bin/simple_app && sudo chown -R $USER drmemtrace.sim*.dir && bin64/drrun -t drcachesim -indir drmemtrace.sim*.dir -use_physical -verbose 3 > OUT 2>&1 $ less OUT translating virtual 0x7fed14de0050 to 0xf52ace050 ::3036256.3036256:: @0xf52ace050 instr x3 translating virtual 0x7fed14de0053 to 0xf52ace053 ::3036256.3036256:: @0xf52ace053 instr x5 translating virtual 0x7ffca0af9068 to 0xb3f975068 translating virtual 0x7fed14de0053 to 0xf52ace053 ::3036256.3036256:: @0xf52ace053 write 0xb3f975068 x8 $ bin64/drrun -t drcachesim -indir drmemtrace.sim*.dir -use_physical -simulator_type TLB -verbose 3 > OUT 2>&1 $ les OUT translating virtual 0x7f6f263a3050 to 0xf52ace050 ::3080711.3080711:: @0x7070615f656c706d instr 0xf52ace050 x3 translating virtual 0x7f6f263a3053 to 0xf52ace053 ::3080711.3080711:: @0x7070615f656c706d direct_call 0xf52ace053 x5 translating virtual 0x7ffdeaab3798 to 0xed4c52798 translating virtual 0x7f6f263a3053 to 0xf52ace053 -------------------------------------------------- Issue: #4014 --- api/docs/release.dox | 3 + clients/drcachesim/common/options.cpp | 16 ++- clients/drcachesim/drcachesim.dox.in | 30 +++-- .../simulator/analyzer_interface.cpp | 2 + .../drcachesim/simulator/cache_simulator.cpp | 84 +++++++------ .../simulator/cache_simulator_create.h | 4 +- clients/drcachesim/simulator/simulator.cpp | 91 ++++++++++++++- clients/drcachesim/simulator/simulator.h | 28 ++++- .../drcachesim/simulator/tlb_simulator.cpp | 47 +++++--- .../simulator/tlb_simulator_create.h | 4 +- clients/drcachesim/tracer/tracer.cpp | 110 ++++++++---------- 11 files changed, 280 insertions(+), 139 deletions(-) diff --git a/api/docs/release.dox b/api/docs/release.dox index df7e5b1f240..b968c8f277e 100644 --- a/api/docs/release.dox +++ b/api/docs/release.dox @@ -128,6 +128,9 @@ The changes between version \DR_VERSION and 9.0.1 include the following compatib changes: - Eliminated the -skip_syscall option to drrun and drinject, which is now always on by default. + - Changed the drcachesim -use_physical option to not modify the regular trace + entry virtual addresses but to instead insert metadata containing translation + information for converting virtual to physical addresses. Further non-compatibility-affecting changes include: - Added AArchXX support for attaching to a running process. diff --git a/clients/drcachesim/common/options.cpp b/clients/drcachesim/common/options.cpp index 7d4cecc9954..dc548b474de 100644 --- a/clients/drcachesim/common/options.cpp +++ b/clients/drcachesim/common/options.cpp @@ -208,14 +208,18 @@ droption_t op_coherence( "Writes to cache lines will invalidate other private caches that hold that line."); droption_t op_use_physical( - DROPTION_SCOPE_CLIENT, "use_physical", false, "Use physical addresses if possible", - "If available, the default virtual addresses will be translated to physical. " - "This is not possible from user mode on all platforms. " - "For -offline, the regular trace entries remain virtual, with a pair of markers of " + DROPTION_SCOPE_ALL, "use_physical", false, "Use physical addresses if possible", + "If available, metadata with virtual-to-physical-address translation information " + "is added to the trace. This is not possible from user mode on all platforms. " + "The regular trace entries remain virtual, with a pair of markers of " "types #TRACE_MARKER_TYPE_PHYSICAL_ADDRESS and #TRACE_MARKER_TYPE_VIRTUAL_ADDRESS " "inserted at some prior point for each new or changed page mapping to show the " - "corresponding physical addresses. This option may incur significant overhead " - "both for the physical translation and as it requires disabling optimizations."); + "corresponding physical addresses. If translation fails, a " + "#TRACE_MARKER_TYPE_PHYSICAL_ADDRESS_NOT_AVAILABLE is inserted. " + "This option may incur significant overhead " + "both for the physical translation and as it requires disabling optimizations." + "For -offline, this option must be passed to both the tracer (to insert the " + "markers) and the simulator (to use the markers)."); droption_t op_virt2phys_freq( DROPTION_SCOPE_CLIENT, "virt2phys_freq", 0, "Frequency of physical mapping refresh", diff --git a/clients/drcachesim/drcachesim.dox.in b/clients/drcachesim/drcachesim.dox.in index 3dd9ae704fe..5d4557efa7b 100644 --- a/clients/drcachesim/drcachesim.dox.in +++ b/clients/drcachesim/drcachesim.dox.in @@ -132,7 +132,7 @@ trace in a metadata marker entry of type Memory accesses (data loads and stores) are stored in #_memref_data_t. The program counter of the instruction performing the memory access, -the virtual address (unless \ref sec_drcachesim_phys are enabled), and +the virtual address (convertable to physical: see \ref sec_drcachesim_phys), and the size are provided. \section sec_drcachesim_format_other Other Records @@ -1266,12 +1266,28 @@ $ bin64/drrun -t drcachesim -simulator_type miss_analyzer -LL_miss_file rec.csv The memory access tracing client gathers virtual addresses. On Linux, if the kernel allows user-mode applications access to the \p -/proc/self/pagemap file, physical addresses may be used instead. This can -be requested via the \p -use_physical runtime option (see \ref -sec_drcachesim_ops). This works on current kernels but is expected to stop -working from user mode on future kernels due to recent security changes -(see -http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=ab676b7d6fbf4b294bf198fb27ade5b0e865c7ce). +/proc/self/pagemap file or the application can be run with root +privileges, information to translate virtual addresses to physical addresses may be included in the trace. This can be +requested via the \p -use_physical runtime option (see \ref +sec_drcachesim_ops). On older kernels the pagemap file was readable without +privileges: +http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=ab676b7d6fbf4b294bf198fb27ade5b0e865c7ce. + +When \p -use_physical is enabled, the regular trace entries remain +virtual, with a pair of markers of types +#TRACE_MARKER_TYPE_PHYSICAL_ADDRESS and +#TRACE_MARKER_TYPE_VIRTUAL_ADDRESS inserted at some prior point for +each new page mapping to show the corresponding physical +addresses. If translation fails, a +#TRACE_MARKER_TYPE_PHYSICAL_ADDRESS_NOT_AVAILABLE is inserted. +Limited support for detecting changes in page mappings is provided via +the \p -virt2phys_freq option to periodically clear cached +translations. + +Each analysis tool must decide whether to use this translation +information. The cache and TLB simulators provided are equipped to +read these markers and they use the marker data when \p -use_physical +is specified. **************************************************************************** \page sec_drcachesim_core Core Simulation Support diff --git a/clients/drcachesim/simulator/analyzer_interface.cpp b/clients/drcachesim/simulator/analyzer_interface.cpp index c44121d080c..db09fe57c62 100644 --- a/clients/drcachesim/simulator/analyzer_interface.cpp +++ b/clients/drcachesim/simulator/analyzer_interface.cpp @@ -126,6 +126,7 @@ get_cache_simulator_knobs() knobs->sim_refs = op_sim_refs.get_value(); knobs->verbose = op_verbose.get_value(); knobs->cpu_scheduling = op_cpu_scheduling.get_value(); + knobs->use_physical = op_use_physical.get_value(); return knobs; } @@ -162,6 +163,7 @@ drmemtrace_analysis_tool_create() knobs.sim_refs = op_sim_refs.get_value(); knobs.verbose = op_verbose.get_value(); knobs.cpu_scheduling = op_cpu_scheduling.get_value(); + knobs.use_physical = op_use_physical.get_value(); return tlb_simulator_create(knobs); } else if (op_simulator_type.get_value() == HISTOGRAM) { return histogram_tool_create(op_line_size.get_value(), op_report_top.get_value(), diff --git a/clients/drcachesim/simulator/cache_simulator.cpp b/clients/drcachesim/simulator/cache_simulator.cpp index cf05d8ea804..67eb0f3250e 100644 --- a/clients/drcachesim/simulator/cache_simulator.cpp +++ b/clients/drcachesim/simulator/cache_simulator.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2015-2021 Google, Inc. All rights reserved. + * Copyright (c) 2015-2022 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -74,7 +74,7 @@ cache_simulator_create(const std::string &config_file) cache_simulator_t::cache_simulator_t(const cache_simulator_knobs_t &knobs) : simulator_t(knobs.num_cores, knobs.skip_refs, knobs.warmup_refs, knobs.warmup_fraction, knobs.sim_refs, knobs.cpu_scheduling, - knobs.verbose) + knobs.use_physical, knobs.verbose) , knobs_(knobs) , l1_icaches_(NULL) , l1_dcaches_(NULL) @@ -193,7 +193,7 @@ cache_simulator_t::cache_simulator_t(std::istream *config_file) init_knobs(knobs_.num_cores, knobs_.skip_refs, knobs_.warmup_refs, knobs_.warmup_fraction, knobs_.sim_refs, knobs_.cpu_scheduling, - knobs_.verbose); + knobs_.use_physical, knobs_.verbose); if (knobs_.data_prefetcher != PREFETCH_POLICY_NEXTLINE && knobs_.data_prefetcher != PREFETCH_POLICY_NONE) { @@ -440,9 +440,6 @@ cache_simulator_t::process_memref(const memref_t &memref) return true; } - // We use a static scheduling of threads to cores, as it is - // not practical to measure which core each thread actually - // ran on for each memref. int core; if (memref.data.tid == last_thread_) core = last_core_; @@ -452,52 +449,65 @@ cache_simulator_t::process_memref(const memref_t &memref) last_core_ = core; } - if (type_is_instr(memref.instr.type) || - memref.instr.type == TRACE_TYPE_PREFETCH_INSTR) { + // To support swapping to physical addresses without modifying the passed-in + // memref (which is also passed to other tools run at the same time) we use + // indirection. + const memref_t *simref = &memref; + memref_t phys_memref; + if (knobs_.use_physical) { + phys_memref = memref2phys(memref); + simref = &phys_memref; + } + + if (type_is_instr(simref->instr.type) || + simref->instr.type == TRACE_TYPE_PREFETCH_INSTR) { if (knobs_.verbose >= 3) { - std::cerr << "::" << memref.data.pid << "." << memref.data.tid << ":: " - << " @" << (void *)memref.instr.addr << " instr x" - << memref.instr.size << "\n"; + std::cerr << "::" << simref->data.pid << "." << simref->data.tid << ":: " + << " @" << (void *)simref->instr.addr << " instr x" + << simref->instr.size << "\n"; } - l1_icaches_[core]->request(memref); - } else if (memref.data.type == TRACE_TYPE_READ || - memref.data.type == TRACE_TYPE_WRITE || + l1_icaches_[core]->request(*simref); + } else if (simref->data.type == TRACE_TYPE_READ || + simref->data.type == TRACE_TYPE_WRITE || // We may potentially handle prefetches differently. // TRACE_TYPE_PREFETCH_INSTR is handled above. - type_is_prefetch(memref.data.type)) { + type_is_prefetch(simref->data.type)) { if (knobs_.verbose >= 3) { - std::cerr << "::" << memref.data.pid << "." << memref.data.tid << ":: " - << " @" << (void *)memref.data.pc << " " - << trace_type_names[memref.data.type] << " " - << (void *)memref.data.addr << " x" << memref.data.size << "\n"; + std::cerr << "::" << simref->data.pid << "." << simref->data.tid << ":: " + << " @" << (void *)simref->data.pc << " " + << trace_type_names[simref->data.type] << " " + << (void *)simref->data.addr << " x" << simref->data.size << "\n"; } - l1_dcaches_[core]->request(memref); - } else if (memref.flush.type == TRACE_TYPE_INSTR_FLUSH) { + l1_dcaches_[core]->request(*simref); + } else if (simref->flush.type == TRACE_TYPE_INSTR_FLUSH) { if (knobs_.verbose >= 3) { - std::cerr << "::" << memref.data.pid << "." << memref.data.tid << ":: " - << " @" << (void *)memref.data.pc << " iflush " - << (void *)memref.data.addr << " x" << memref.data.size << "\n"; + std::cerr << "::" << simref->data.pid << "." << simref->data.tid << ":: " + << " @" << (void *)simref->data.pc << " iflush " + << (void *)simref->data.addr << " x" << simref->data.size << "\n"; } - l1_icaches_[core]->flush(memref); - } else if (memref.flush.type == TRACE_TYPE_DATA_FLUSH) { + l1_icaches_[core]->flush(*simref); + } else if (simref->flush.type == TRACE_TYPE_DATA_FLUSH) { if (knobs_.verbose >= 3) { - std::cerr << "::" << memref.data.pid << "." << memref.data.tid << ":: " - << " @" << (void *)memref.data.pc << " dflush " - << (void *)memref.data.addr << " x" << memref.data.size << "\n"; + std::cerr << "::" << simref->data.pid << "." << simref->data.tid << ":: " + << " @" << (void *)simref->data.pc << " dflush " + << (void *)simref->data.addr << " x" << simref->data.size << "\n"; } - l1_dcaches_[core]->flush(memref); - } else if (memref.exit.type == TRACE_TYPE_THREAD_EXIT) { - handle_thread_exit(memref.exit.tid); + l1_dcaches_[core]->flush(*simref); + } else if (simref->exit.type == TRACE_TYPE_THREAD_EXIT) { + handle_thread_exit(simref->exit.tid); last_thread_ = 0; - } else if (memref.marker.type == TRACE_TYPE_INSTR_NO_FETCH) { + } else if (memref.marker.type == TRACE_TYPE_MARKER && + memref.marker.marker_type == TRACE_MARKER_TYPE_CPU_ID) { + last_thread_ = 0; + } else if (simref->marker.type == TRACE_TYPE_INSTR_NO_FETCH) { // Just ignore. if (knobs_.verbose >= 3) { - std::cerr << "::" << memref.data.pid << "." << memref.data.tid << ":: " - << " @" << (void *)memref.instr.addr << " non-fetched instr x" - << memref.instr.size << "\n"; + std::cerr << "::" << simref->data.pid << "." << simref->data.tid << ":: " + << " @" << (void *)simref->instr.addr << " non-fetched instr x" + << simref->instr.size << "\n"; } } else { - error_string_ = "Unhandled memref type " + std::to_string(memref.data.type); + error_string_ = "Unhandled memref type " + std::to_string(simref->data.type); return false; } diff --git a/clients/drcachesim/simulator/cache_simulator_create.h b/clients/drcachesim/simulator/cache_simulator_create.h index 679bcb3c800..2c1c275e0e2 100644 --- a/clients/drcachesim/simulator/cache_simulator_create.h +++ b/clients/drcachesim/simulator/cache_simulator_create.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2017-2018 Google, Inc. All rights reserved. + * Copyright (c) 2017-2022 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -67,6 +67,7 @@ struct cache_simulator_knobs_t { , warmup_fraction(0.0) , sim_refs(1ULL << 63) , cpu_scheduling(false) + , use_physical(false) , verbose(0) { } @@ -87,6 +88,7 @@ struct cache_simulator_knobs_t { double warmup_fraction; uint64_t sim_refs; bool cpu_scheduling; + bool use_physical; unsigned int verbose; }; diff --git a/clients/drcachesim/simulator/simulator.cpp b/clients/drcachesim/simulator/simulator.cpp index e0c7701fb86..bfb2a526605 100644 --- a/clients/drcachesim/simulator/simulator.cpp +++ b/clients/drcachesim/simulator/simulator.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2015-2020 Google, Inc. All rights reserved. + * Copyright (c) 2015-2022 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -42,10 +42,10 @@ simulator_t::simulator_t(unsigned int num_cores, uint64_t skip_refs, uint64_t warmup_refs, double warmup_fraction, uint64_t sim_refs, bool cpu_scheduling, - unsigned int verbose) + bool use_physical, unsigned int verbose) { init_knobs(num_cores, skip_refs, warmup_refs, warmup_fraction, sim_refs, - cpu_scheduling, verbose); + cpu_scheduling, use_physical, verbose); } simulator_t::~simulator_t() @@ -55,7 +55,7 @@ simulator_t::~simulator_t() void simulator_t::init_knobs(unsigned int num_cores, uint64_t skip_refs, uint64_t warmup_refs, double warmup_fraction, uint64_t sim_refs, bool cpu_scheduling, - unsigned int verbose) + bool use_physical, unsigned int verbose) { knob_num_cores_ = num_cores; knob_skip_refs_ = skip_refs; @@ -63,6 +63,7 @@ simulator_t::init_knobs(unsigned int num_cores, uint64_t skip_refs, uint64_t war knob_warmup_fraction_ = warmup_fraction; knob_sim_refs_ = sim_refs; knob_cpu_scheduling_ = cpu_scheduling; + knob_use_physical_ = use_physical; knob_verbose_ = verbose; last_thread_ = 0; last_core_ = 0; @@ -80,8 +81,9 @@ simulator_t::init_knobs(unsigned int num_cores, uint64_t skip_refs, uint64_t war bool simulator_t::process_memref(const memref_t &memref) { - if (memref.marker.type == TRACE_TYPE_MARKER && - memref.marker.marker_type == TRACE_MARKER_TYPE_CPU_ID && knob_cpu_scheduling_) { + if (memref.marker.type != TRACE_TYPE_MARKER) + return true; + if (memref.marker.marker_type == TRACE_MARKER_TYPE_CPU_ID && knob_cpu_scheduling_) { int cpu = (int)(intptr_t)memref.marker.marker_value; if (cpu < 0) return true; @@ -104,9 +106,86 @@ simulator_t::process_memref(const memref_t &memref) ++thread_counts_[min_core]; ++thread_ever_counts_[min_core]; } + if (!knob_use_physical_) + return true; + if (memref.marker.marker_type == TRACE_MARKER_TYPE_PAGE_SIZE) { + if (page_size_ != 0 && page_size_ != memref.marker.marker_value) { + ERRMSG("Error: conflicting page size markers"); + return false; + } + page_size_ = memref.marker.marker_value; + if (!IS_POWER_OF_2(page_size_)) { + ERRMSG("Error: page size %zu is not power of 2", page_size_); + return false; + } + } else if (memref.marker.marker_type == TRACE_MARKER_TYPE_PHYSICAL_ADDRESS) { + prior_phys_addr_ = memref.marker.marker_value; + } else if (memref.marker.marker_type == TRACE_MARKER_TYPE_VIRTUAL_ADDRESS) { + virt2phys_[page_start(memref.marker.marker_value)] = page_start(prior_phys_addr_); + } else if (memref.marker.marker_type == + TRACE_MARKER_TYPE_PHYSICAL_ADDRESS_NOT_AVAILABLE) { + addr_t virt = memref.marker.marker_value; + virt2phys_[page_start(virt)] = page_start(synthetic_virt2phys(virt)); + } return true; } +addr_t +simulator_t::synthetic_virt2phys(addr_t virt) const +{ + // For a missing translation, we drop upper bits from the virtual address + // to create a synthetic physical address with arbitrarily the bottom 28 bits. + // XXX i#4014: Ideally we would detect a collision with an existing translation + // (when added new synthetic ones, and by adding a bit saying which entries are + // synthetic which we can check when we add new legitimate entries) We currently + // live with collisions with real translations under the assumption that missing + // translations are rare. + const addr_t SYNTHETIC_PHYS_BITS = 0xfffffff; + return virt & SYNTHETIC_PHYS_BITS; +} + +addr_t +simulator_t::virt2phys(addr_t virt) const +{ + addr_t phys_page = 0; + auto it = virt2phys_.find(page_start(virt)); + if (it == virt2phys_.end()) { + // We handled TRACE_MARKER_TYPE_PHYSICAL_ADDRESS_NOT_AVAILABLE so this + // should not happen. + ERRMSG("Missing physical address marker for 0x%zx\n", virt); + phys_page = page_start(synthetic_virt2phys(virt)); + } else + phys_page = it->second; + addr_t phys = phys_page | (virt & (page_size_ - 1)); + if (knob_verbose_ >= 3) { + std::cerr << "translating virtual 0x" << std::hex << virt << " to 0x" << phys + << std::dec << "\n"; + } + return phys; +} + +memref_t +simulator_t::memref2phys(memref_t memref) const +{ + if (!type_has_address(memref.data.type)) + return memref; + memref_t out = memref; + if (type_is_instr(memref.instr.type) || + memref.instr.type == TRACE_TYPE_INSTR_NO_FETCH) { + out.instr.addr = virt2phys(memref.instr.addr); + } else if (memref.data.type == TRACE_TYPE_READ || + memref.data.type == TRACE_TYPE_WRITE || + type_is_prefetch(memref.data.type)) { + out.data.addr = virt2phys(memref.data.addr); + out.data.pc = virt2phys(memref.data.pc); + } else if (memref.data.type == TRACE_TYPE_INSTR_FLUSH || + memref.data.type == TRACE_TYPE_DATA_FLUSH) { + out.flush.addr = virt2phys(memref.flush.addr); + out.flush.pc = virt2phys(memref.flush.pc); + } + return out; +} + int simulator_t::find_emptiest_core(std::vector &counts) const { diff --git a/clients/drcachesim/simulator/simulator.h b/clients/drcachesim/simulator/simulator.h index 90b02cbf240..51863a51846 100644 --- a/clients/drcachesim/simulator/simulator.h +++ b/clients/drcachesim/simulator/simulator.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2015-2020 Google, Inc. All rights reserved. + * Copyright (c) 2015-2022 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -36,6 +36,7 @@ #ifndef _SIMULATOR_H_ #define _SIMULATOR_H_ 1 +#include #include #include #include "caching_device_stats.h" @@ -50,7 +51,7 @@ class simulator_t : public analysis_tool_t { } simulator_t(unsigned int num_cores, uint64_t skip_refs, uint64_t warmup_refs, double warmup_fraction, uint64_t sim_refs, bool cpu_scheduling, - unsigned int verbose); + bool use_physical, unsigned int verbose); virtual ~simulator_t() = 0; bool process_memref(const memref_t &memref) override; @@ -61,7 +62,7 @@ class simulator_t : public analysis_tool_t { void init_knobs(unsigned int num_cores, uint64_t skip_refs, uint64_t warmup_refs, double warmup_fraction, uint64_t sim_refs, bool cpu_scheduling, - unsigned int verbose); + bool use_physical, unsigned int verbose); void print_core(int core) const; int @@ -71,12 +72,28 @@ class simulator_t : public analysis_tool_t { virtual void handle_thread_exit(memref_tid_t tid); + addr_t + virt2phys(addr_t virt) const; + memref_t + memref2phys(memref_t memref) const; + + addr_t + page_start(addr_t addr) const + { + assert(page_size_ > 0); + return addr & ~(page_size_ - 1); + } + + addr_t + synthetic_virt2phys(addr_t virt) const; + unsigned int knob_num_cores_; uint64_t knob_skip_refs_; uint64_t knob_warmup_refs_; double knob_warmup_fraction_; uint64_t knob_sim_refs_; bool knob_cpu_scheduling_; + bool knob_use_physical_; unsigned int knob_verbose_; memref_tid_t last_thread_; @@ -88,6 +105,11 @@ class simulator_t : public analysis_tool_t { std::vector cpu_counts_; std::vector thread_counts_; std::vector thread_ever_counts_; + + // For virtual to physical page mappings. + size_t page_size_ = 0; + std::unordered_map virt2phys_; + addr_t prior_phys_addr_ = 0; }; #endif /* _SIMULATOR_H_ */ diff --git a/clients/drcachesim/simulator/tlb_simulator.cpp b/clients/drcachesim/simulator/tlb_simulator.cpp index 20eae9d7352..7d40daf60ae 100644 --- a/clients/drcachesim/simulator/tlb_simulator.cpp +++ b/clients/drcachesim/simulator/tlb_simulator.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2015-2020 Google, Inc. All rights reserved. + * Copyright (c) 2015-2022 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -53,7 +53,7 @@ tlb_simulator_create(const tlb_simulator_knobs_t &knobs) tlb_simulator_t::tlb_simulator_t(const tlb_simulator_knobs_t &knobs) : simulator_t(knobs.num_cores, knobs.skip_refs, knobs.warmup_refs, knobs.warmup_fraction, knobs.sim_refs, knobs.cpu_scheduling, - knobs.verbose) + knobs.use_physical, knobs.verbose) , knobs_(knobs) { itlbs_ = new tlb_t *[knobs_.num_cores]; @@ -159,29 +159,40 @@ tlb_simulator_t::process_memref(const memref_t &memref) last_core_ = core; } - if (type_is_instr(memref.instr.type)) - itlbs_[core]->request(memref); - else if (memref.data.type == TRACE_TYPE_READ || memref.data.type == TRACE_TYPE_WRITE) - dtlbs_[core]->request(memref); - else if (memref.exit.type == TRACE_TYPE_THREAD_EXIT) { - handle_thread_exit(memref.exit.tid); + // To support swapping to physical addresses without modifying the passed-in + // memref (which is also passed to other tools run at the same time) we use + // indirection. + const memref_t *simref = &memref; + memref_t phys_memref; + if (knobs_.use_physical) { + phys_memref = memref2phys(memref); + simref = &phys_memref; + } + + if (type_is_instr(simref->instr.type)) + itlbs_[core]->request(*simref); + else if (simref->data.type == TRACE_TYPE_READ || + simref->data.type == TRACE_TYPE_WRITE) + dtlbs_[core]->request(*simref); + else if (simref->exit.type == TRACE_TYPE_THREAD_EXIT) { + handle_thread_exit(simref->exit.tid); last_thread_ = 0; - } else if (type_is_prefetch(memref.data.type) || - memref.flush.type == TRACE_TYPE_INSTR_FLUSH || - memref.flush.type == TRACE_TYPE_DATA_FLUSH || - memref.marker.type == TRACE_TYPE_MARKER || - memref.marker.type == TRACE_TYPE_INSTR_NO_FETCH) { + } else if (type_is_prefetch(simref->data.type) || + simref->flush.type == TRACE_TYPE_INSTR_FLUSH || + simref->flush.type == TRACE_TYPE_DATA_FLUSH || + simref->marker.type == TRACE_TYPE_MARKER || + simref->marker.type == TRACE_TYPE_INSTR_NO_FETCH) { // TLB simulator ignores prefetching, cache flushing, and markers } else { - error_string_ = "Unhandled memref type " + std::to_string(memref.data.type); + error_string_ = "Unhandled memref type " + std::to_string(simref->data.type); return false; } if (knobs_.verbose >= 3) { - std::cerr << "::" << memref.data.pid << "." << memref.data.tid << ":: " - << " @" << (void *)memref.data.pc << " " - << trace_type_names[memref.data.type] << " " << (void *)memref.data.addr - << " x" << memref.data.size << std::endl; + std::cerr << "::" << simref->data.pid << "." << simref->data.tid << ":: " + << " @" << (void *)simref->data.pc << " " + << trace_type_names[simref->data.type] << " " + << (void *)simref->data.addr << " x" << simref->data.size << std::endl; } // process counters for warmup and simulated references diff --git a/clients/drcachesim/simulator/tlb_simulator_create.h b/clients/drcachesim/simulator/tlb_simulator_create.h index 3bfb209607b..8beb5e3ecc9 100644 --- a/clients/drcachesim/simulator/tlb_simulator_create.h +++ b/clients/drcachesim/simulator/tlb_simulator_create.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2017-2018 Google, Inc. All rights reserved. + * Copyright (c) 2017-2022 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -64,6 +64,7 @@ struct tlb_simulator_knobs_t { , warmup_fraction(0.0) , sim_refs(1ULL << 63) , cpu_scheduling(false) + , use_physical(false) , verbose(0) { } @@ -81,6 +82,7 @@ struct tlb_simulator_knobs_t { double warmup_fraction; uint64_t sim_refs; bool cpu_scheduling; + bool use_physical; unsigned int verbose; }; diff --git a/clients/drcachesim/tracer/tracer.cpp b/clients/drcachesim/tracer/tracer.cpp index 520ae5fa724..e1b449c64df 100644 --- a/clients/drcachesim/tracer/tracer.cpp +++ b/clients/drcachesim/tracer/tracer.cpp @@ -1027,66 +1027,56 @@ process_entry_for_physaddr(void *drcontext, per_thread_t *data, size_t header_si virt); phys = virt; } - if (op_offline.get_value()) { - // For offline we keep the main entries as virtual but add markers showing - // the corresponding physical. We assume the mappings are static, allowing - // us to only emit one marker pair per new page seen (per thread to avoid - // locks). - // XXX: Add spot-checks of mapping changes via a separate option from - // -virt2phys_freq? - if (from_cache) - return v2p_ptr; - // We have something to emit. Rather than a memmove to insert inside the - // main buffer, we have a separate buffer, as our pair of markers means we - // do not need precise placement next to the corresponding regular entry - // (which also avoids extra work in raw2trace, esp for delayed branches and - // other cases). - // The downside is that we might have many buffers with a small number - // of markers on which we waste buffer output overhead. - // XXX: We could count them up and do a memmove if the count is small - // and we have space in the redzone? - if (!*emitted) { - // We need to be sure to emit the initial thread header if this is before - // the first regular buffer and skip it in the regular buffer. - if (header_size > buf_hdr_slots_size) { - size_t size = - reinterpret_cast(instru)->append_thread_header( - data->v2p_buf, dr_get_thread_id(drcontext), get_file_type()); - ASSERT(size == data->init_header_size, "inconsistent header"); - *skip = data->init_header_size; - v2p_ptr += size; - header_size += size; - } - v2p_ptr += add_buffer_header(drcontext, data, v2p_ptr); - *emitted = true; - } - if (v2p_ptr + 2 * instru->sizeof_entry() - data->v2p_buf >= - static_cast(get_v2p_buffer_size())) { - NOTIFY(1, "Reached v2p buffer limit: emitting multiple times\n"); - data->num_phys_markers += - output_buffer(drcontext, data, data->v2p_buf, v2p_ptr, header_size); - v2p_ptr = data->v2p_buf; - v2p_ptr += add_buffer_header(drcontext, data, v2p_ptr); - } - if (success) { - v2p_ptr += - instru->append_marker(v2p_ptr, TRACE_MARKER_TYPE_PHYSICAL_ADDRESS, phys); - v2p_ptr += - instru->append_marker(v2p_ptr, TRACE_MARKER_TYPE_VIRTUAL_ADDRESS, virt); - } else { - // For translation failure, we insert a distinct marker type, so analyzers - // know for sure and don't have to infer based on a missing marker. - v2p_ptr += instru->append_marker( - v2p_ptr, TRACE_MARKER_TYPE_PHYSICAL_ADDRESS_NOT_AVAILABLE, virt); + // We keep the main entries as virtual but add markers showing + // the corresponding physical. We assume the mappings are static, allowing + // us to only emit one marker pair per new page seen (per thread to avoid + // locks). + // XXX: Add spot-checks of mapping changes via a separate option from + // -virt2phys_freq? + if (from_cache) + return v2p_ptr; + // We have something to emit. Rather than a memmove to insert inside the + // main buffer, we have a separate buffer, as our pair of markers means we + // do not need precise placement next to the corresponding regular entry + // (which also avoids extra work in raw2trace, esp for delayed branches and + // other cases). + // The downside is that we might have many buffers with a small number + // of markers on which we waste buffer output overhead. + // XXX: We could count them up and do a memmove if the count is small + // and we have space in the redzone? + if (!*emitted) { + // We need to be sure to emit the initial thread header if this is before + // the first regular buffer and skip it in the regular buffer. + if (header_size > buf_hdr_slots_size) { + size_t size = + reinterpret_cast(instru)->append_thread_header( + data->v2p_buf, dr_get_thread_id(drcontext), get_file_type()); + ASSERT(size == data->init_header_size, "inconsistent header"); + *skip = data->init_header_size; + v2p_ptr += size; + header_size += size; } + v2p_ptr += add_buffer_header(drcontext, data, v2p_ptr); + *emitted = true; + } + if (v2p_ptr + 2 * instru->sizeof_entry() - data->v2p_buf >= + static_cast(get_v2p_buffer_size())) { + NOTIFY(1, "Reached v2p buffer limit: emitting multiple times\n"); + data->num_phys_markers += + output_buffer(drcontext, data, data->v2p_buf, v2p_ptr, header_size); + v2p_ptr = data->v2p_buf; + v2p_ptr += add_buffer_header(drcontext, data, v2p_ptr); + } + if (success) { + v2p_ptr += + instru->append_marker(v2p_ptr, TRACE_MARKER_TYPE_PHYSICAL_ADDRESS, phys); + v2p_ptr += + instru->append_marker(v2p_ptr, TRACE_MARKER_TYPE_VIRTUAL_ADDRESS, virt); } else { - // For online we replace the virtual with physical. - // XXX i#4014: For consistency we should break compatibility, *not* replace, - // and insert the markers instead, updating dr$sim to use the markers - // to compute the physical addresses. We should then update - // https://dynamorio.org/sec_drcachesim_phys.html. - if (success) - instru->set_entry_addr(mem_ref, phys); + // For translation failure, we insert a distinct marker type, so analyzers + // know for sure and don't have to infer based on a missing marker. + v2p_ptr += instru->append_marker( + v2p_ptr, TRACE_MARKER_TYPE_PHYSICAL_ADDRESS_NOT_AVAILABLE, virt); } return v2p_ptr; } @@ -2968,7 +2958,7 @@ event_thread_init(void *drcontext) BUF_PTR(data->seg_base) = NULL; else { create_buffer(data); - if (op_use_physical.get_value() && op_offline.get_value()) { + if (op_use_physical.get_value()) { create_v2p_buffer(data); } init_thread_in_process(drcontext); @@ -3121,7 +3111,7 @@ event_exit(void) "drmemtrace exiting process " PIDFMT "; traced " UINT64_FORMAT_STRING " references in " UINT64_FORMAT_STRING " writeouts.\n", dr_get_process_id(), num_refs, num_writeouts); - if (op_use_physical.get_value() && op_offline.get_value()) { + if (op_use_physical.get_value()) { dr_log(NULL, DR_LOG_ALL, 1, "drcachesim num physical address markers emitted: " UINT64_FORMAT_STRING "\n",