From cc934c7a5f79def4bedeaac4a2496e1d9dde8e80 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 29 Sep 2022 19:24:34 +0200 Subject: [PATCH] SPMI: Improve speed significantly for large diffs (#76238) This starts communicating more information about diffs back from superpmi and starts using it in the driver. The current information is the base size, diff size, base instructions, diff instructions and context size. The driver uses the base size/diff size to pick a small number of interesting contexts and only creates disassembly for these contexts. jit-analyze is then also invoked on only these contexts, but the contexts are picked so that they overlap with what jit-analyze would display. Additionally, we also pick the top 20 smallest contexts (in terms of context size) -- I frequently use the smallest contexts with diffs to investigate my change. The new behavior is only enabled when no custom metrics are specified. If custom metrics are specified we fall back to producing all disassembly files and leave it up to jit-analyze to extract and analyze the metrics specified. Also, the retainTopFilesOnly option is no longer necessary since our CI pipeline will at most produce 100 .dasm files now. Another benefit is that this means that all contexts mentioned in the jit-analyze output will now be part of the artifacts. The net result is that we can now get SPMI diffs for changes with even hundreds of thousands of diffs in the same time as it takes to get diffs for a small change. Fix #76178 --- src/coreclr/inc/clr_std/vector | 27 ++++ src/coreclr/scripts/superpmi.py | 114 +++++++++++---- src/coreclr/scripts/superpmi_diffs.py | 3 +- .../superpmi/superpmi-shared/standardpch.h | 2 + .../tools/superpmi/superpmi/CMakeLists.txt | 1 + .../tools/superpmi/superpmi/commandline.cpp | 8 +- .../tools/superpmi/superpmi/commandline.h | 4 +- .../tools/superpmi/superpmi/fileio.cpp | 115 +++++++++++++++ src/coreclr/tools/superpmi/superpmi/fileio.h | 133 ++++++++++++++++++ .../superpmi/superpmi/metricssummary.cpp | 95 ++----------- .../tools/superpmi/superpmi/metricssummary.h | 2 +- .../superpmi/superpmi/parallelsuperpmi.cpp | 64 +++++++-- .../tools/superpmi/superpmi/superpmi.cpp | 128 +++++++++++------ 13 files changed, 518 insertions(+), 178 deletions(-) create mode 100644 src/coreclr/tools/superpmi/superpmi/fileio.cpp create mode 100644 src/coreclr/tools/superpmi/superpmi/fileio.h diff --git a/src/coreclr/inc/clr_std/vector b/src/coreclr/inc/clr_std/vector index c10ecf6ba5a92c..c2d1caba890aaf 100644 --- a/src/coreclr/inc/clr_std/vector +++ b/src/coreclr/inc/clr_std/vector @@ -215,6 +215,33 @@ namespace std } } + vector(const vector&) = delete; + vector& operator=(const vector&) = delete; + + vector(vector&& v) noexcept + : m_size(v.m_size) + , m_capacity(v.m_capacity) + , m_pelements(v.m_pelements) + , m_isBufferOwner(v.m_isBufferOwner) + { + v.m_isBufferOwner = false; + } + + vector& operator=(vector&& v) noexcept + { + if (m_isBufferOwner) + { + erase(m_pelements, 0, m_size); + delete [] (BYTE*)m_pelements; + } + + m_size = v.m_size; + m_capacity = v.m_capacity; + m_pelements = v.m_pelements; + m_isBufferOwner = v.m_isBufferOwner; + v.m_isBufferOwner = false; + return *this; + } size_t size() const { diff --git a/src/coreclr/scripts/superpmi.py b/src/coreclr/scripts/superpmi.py index 45dfcfe8746e51..ae45443620cd6c 100644 --- a/src/coreclr/scripts/superpmi.py +++ b/src/coreclr/scripts/superpmi.py @@ -342,7 +342,6 @@ def add_core_root_arguments(parser, build_type_default, build_type_help): asm_diff_parser.add_argument("--debuginfo", action="store_true", help="Include debug info after disassembly (sets COMPlus_JitDebugDump).") asm_diff_parser.add_argument("-tag", help="Specify a word to add to the directory name where the asm diffs will be placed") asm_diff_parser.add_argument("-metrics", action="append", help="Metrics option to pass to jit-analyze. Can be specified multiple times, or pass comma-separated values.") -asm_diff_parser.add_argument("-retainOnlyTopFiles", action="store_true", help="Retain only top .dasm files with largest improvements or regressions and delete remaining files.") asm_diff_parser.add_argument("--diff_with_release", action="store_true", help="Specify if this is asmdiff using release binaries.") asm_diff_parser.add_argument("--git_diff", action="store_true", help="Produce a '.diff' file from 'base' and 'diff' folders if there were any differences.") @@ -501,6 +500,11 @@ def read_csv_metrics(path): return dict +def read_csv_diffs(path): + with open(path) as csv_file: + reader = csv.DictReader(csv_file) + return list(reader) + def determine_clrjit_compiler_version(clrjit_path): """ Obtain the version of the compiler that was used to compile the clrjit at the specified path. @@ -1568,7 +1572,7 @@ def replay_with_asm_diffs(self): logging.info("Running asm diffs of %s", mch_file) fail_mcl_file = os.path.join(temp_location, os.path.basename(mch_file) + "_fail.mcl") - diff_mcl_file = os.path.join(temp_location, os.path.basename(mch_file) + "_diff.mcl") + diffs_info = os.path.join(temp_location, os.path.basename(mch_file) + "_diffs.csv") base_metrics_summary_file = os.path.join(temp_location, os.path.basename(mch_file) + "_base_metrics.csv") diff_metrics_summary_file = os.path.join(temp_location, os.path.basename(mch_file) + "_diff_metrics.csv") @@ -1577,7 +1581,7 @@ def replay_with_asm_diffs(self): "-a", # Asm diffs "-v", "ewmi", # display errors, warnings, missing, jit info "-f", fail_mcl_file, # Failing mc List - "-diffMCList", diff_mcl_file, # Create all of the diffs in an mcl file + "-diffsInfo", diffs_info, # Information about diffs "-r", os.path.join(temp_location, "repro"), # Repro name, create .mc repro files "-baseMetricsSummary", base_metrics_summary_file, # Create summary of metrics we can use to get total code size impact "-diffMetricsSummary", diff_metrics_summary_file, @@ -1633,8 +1637,10 @@ def replay_with_asm_diffs(self): repro_base_command_line = "{} {} {}".format(self.superpmi_path, " ".join(altjit_asm_diffs_flags), self.diff_jit_path) save_repro_mc_files(temp_location, self.coreclr_args, artifacts_base_name, repro_base_command_line) + diffs = read_csv_diffs(diffs_info) + # This file had asm diffs; keep track of that. - has_diffs = is_nonzero_length_file(diff_mcl_file) + has_diffs = len(diffs) > 0 if has_diffs: files_with_asm_diffs.append(mch_file) @@ -1649,12 +1655,6 @@ def replay_with_asm_diffs(self): if return_code == 0: logging.warning("Warning: SuperPMI returned a zero exit code, but generated a non-zero-sized mcl file") - self.diff_mcl_contents = None - with open(diff_mcl_file) as file_handle: - mcl_lines = file_handle.readlines() - mcl_lines = [item.strip() for item in mcl_lines] - self.diff_mcl_contents = mcl_lines - asm_root_dir = create_unique_directory_name(self.coreclr_args.spmi_location, "asm.{}".format(artifacts_base_name)) base_asm_location = os.path.join(asm_root_dir, "base") diff_asm_location = os.path.join(asm_root_dir, "diff") @@ -1672,13 +1672,13 @@ def replay_with_asm_diffs(self): text_differences = queue.Queue() jit_dump_differences = queue.Queue() - async def create_replay_artifacts(print_prefix, item, self, mch_file, env, jit_differences_queue, base_location, diff_location, extension): + async def create_replay_artifacts(print_prefix, context_index, self, mch_file, env, jit_differences_queue, base_location, diff_location, extension): """ Run superpmi over an MC to create JIT asm or JIT dumps for the method. """ # Setup flags to call SuperPMI for both the diff jit and the base jit flags = [ - "-c", item, + "-c", str(context_index), "-v", "q" # only log from the jit. ] flags += altjit_replay_flags @@ -1690,7 +1690,7 @@ async def create_replay_artifacts(print_prefix, item, self, mch_file, env, jit_d async def create_one_artifact(jit_path: str, location: str, flags) -> str: command = [self.superpmi_path] + flags + [jit_path, mch_file] - item_path = os.path.join(location, "{}{}".format(item, extension)) + item_path = os.path.join(location, "{}{}".format(context_index, extension)) modified_env = env.copy() modified_env['COMPlus_JitStdOutFile'] = item_path logging.debug("%sGenerating %s", print_prefix, item_path) @@ -1706,22 +1706,20 @@ async def create_one_artifact(jit_path: str, location: str, flags) -> str: diff_txt = await create_one_artifact(self.diff_jit_path, diff_location, flags + diff_option_flags_for_diff_artifact) if base_txt != diff_txt: - jit_differences_queue.put_nowait(item) + jit_differences_queue.put_nowait(context_index) ################################################################################################ end of create_replay_artifacts() - diff_items = [] - for item in self.diff_mcl_contents: - diff_items.append(item) - logging.info("Creating dasm files: %s %s", base_asm_location, diff_asm_location) - subproc_helper = AsyncSubprocessHelper(diff_items, verbose=True) + dasm_contexts = self.pick_contexts_to_disassemble(diffs) + subproc_helper = AsyncSubprocessHelper(dasm_contexts, verbose=True) subproc_helper.run_to_completion(create_replay_artifacts, self, mch_file, asm_complus_vars_full_env, text_differences, base_asm_location, diff_asm_location, ".dasm") if self.coreclr_args.diff_jit_dump: logging.info("Creating JitDump files: %s %s", base_dump_location, diff_dump_location) subproc_helper.run_to_completion(create_replay_artifacts, self, mch_file, jit_dump_complus_vars_full_env, jit_dump_differences, base_dump_location, diff_dump_location, ".txt") - logging.info("Differences found. To replay SuperPMI use:") + logging.info("Differences found. To replay SuperPMI use:".format(len(diffs))) + logging.info("") for var, value in asm_complus_vars.items(): print_platform_specific_environment_vars(logging.INFO, self.coreclr_args, var, value) @@ -1736,9 +1734,10 @@ async def create_one_artifact(jit_path: str, location: str, flags) -> str: logging.info("%s %s -c ### %s %s", self.superpmi_path, " ".join(altjit_replay_flags), self.diff_jit_path, mch_file) logging.info("") - logging.debug("Method numbers with binary differences:") - for item in self.diff_mcl_contents: - logging.debug(item) + smallest = sorted(diffs, key=lambda r: int(r["Context size"]))[:20] + logging.debug("Smallest {} contexts with binary differences:".format(len(smallest))) + for diff in smallest: + logging.debug(diff["Context"]) logging.debug("") if base_metrics is not None and diff_metrics is not None: @@ -1769,13 +1768,22 @@ async def create_one_artifact(jit_path: str, location: str, flags) -> str: # It appears we have a built jit-analyze on the path, so try to run it. jit_analyze_summary_file = os.path.join(asm_root_dir, "summary.md") command = [ jit_analyze_path, "--md", jit_analyze_summary_file, "-r", "--base", base_asm_location, "--diff", diff_asm_location ] - if self.coreclr_args.retainOnlyTopFiles: - command += [ "--retainOnlyTopFiles" ] if self.coreclr_args.metrics: command += [ "--metrics", ",".join(self.coreclr_args.metrics) ] elif base_bytes is not None and diff_bytes is not None: command += [ "--override-total-base-metric", str(base_bytes), "--override-total-diff-metric", str(diff_bytes) ] + if len(dasm_contexts) < len(diffs): + # Avoid producing analysis output that assumes these are all contexts + # with diffs. When no custom metrics are specified we pick only a subset + # of interesting contexts to disassemble, to avoid spending too long. + # See pick_contexts_to_disassemble. + command += [ "--is-subset-of-diffs" ] + else: + # Ask jit-analyze to avoid producing analysis output that assumes these are + # all contexts (we never produce .dasm files for contexts without diffs). + command += [ "--is-diffs-only" ] + run_and_log(command, logging.INFO) ran_jit_analyze = True @@ -1800,6 +1808,14 @@ async def create_one_artifact(jit_path: str, location: str, flags) -> str: else: logging.warning("No textual differences. Is this an issue with coredistools?") + # If we are not specifying custom metrics then print a summary here, otherwise leave the summarization up to jit-analyze. + if self.coreclr_args.metrics is None: + num_improvements = sum(1 for r in diffs if int(r["Diff size"]) < int(r["Base size"])) + num_regressions = sum(1 for r in diffs if int(r["Diff size"]) > int(r["Base size"])) + logging.info("{} contexts with differences found ({} improvements, {} regressions)".format(len(diffs), num_improvements, num_regressions)) + logging.info("") + logging.info("") + if self.coreclr_args.diff_jit_dump: try: current_jit_dump_diff = jit_dump_differences.get_nowait() @@ -1994,6 +2010,49 @@ def write_pivot_section(row): return result ################################################################################################ end of replay_with_asm_diffs() + def pick_contexts_to_disassemble(self, diffs): + """ Given information about diffs, pick the context numbers to create .dasm files for + + Returns: + List of method context numbers to create disassembly for. + """ + + # If there are non-default metrics then we need to disassemble + # everything so that jit-analyze can handle those. + if self.coreclr_args.metrics is not None: + contexts = diffs + else: + # In the default case we have size improvements/regressions + # available without needing to disassemble all, so pick a subset of + # interesting diffs to pass to jit-analyze. + + # 20 smallest method contexts with diffs + smallest_contexts = sorted(diffs, key=lambda r: int(r["Context size"]))[:20] + # Order by byte-wise improvement, largest improvements first + by_diff_size = sorted(diffs, key=lambda r: int(r["Diff size"]) - int(r["Base size"])) + # 20 top improvements, byte-wise + top_improvements_bytes = by_diff_size[:20] + # 20 top regressions, byte-wise + top_regressions_bytes = by_diff_size[-20:] + + # Order by percentage-wise size improvement, largest improvements first + def diff_pct(r): + base = int(r["Base size"]) + if base == 0: + return 0 + diff = int(r["Diff size"]) + return (diff - base) / base + + by_diff_size_pct = sorted(diffs, key=diff_pct) + top_improvements_pct = by_diff_size_pct[:20] + top_regressions_pct = by_diff_size_pct[-20:] + + contexts = smallest_contexts + top_improvements_bytes + top_regressions_bytes + top_improvements_pct + top_regressions_pct + + final_contexts_indices = list(set(int(r["Context"]) for r in contexts)) + final_contexts_indices.sort() + return final_contexts_indices + ################################################################################ # SuperPMI Replay/TP diff ################################################################################ @@ -3908,11 +3967,6 @@ def verify_base_diff_args(): lambda unused: True, "Unable to set metrics.") - coreclr_args.verify(args, - "retainOnlyTopFiles", - lambda unused: True, - "Unable to set retainOnlyTopFiles.") - coreclr_args.verify(args, "diff_with_release", lambda unused: True, diff --git a/src/coreclr/scripts/superpmi_diffs.py b/src/coreclr/scripts/superpmi_diffs.py index 47243ad43d3a23..b7af1427fc481c 100644 --- a/src/coreclr/scripts/superpmi_diffs.py +++ b/src/coreclr/scripts/superpmi_diffs.py @@ -216,8 +216,7 @@ def main(main_args): "-spmi_location", spmi_location, "-error_limit", "100", "-log_level", "debug", - "-log_file", log_file, - "-retainOnlyTopFiles"]) + "-log_file", log_file]) print("Running superpmi.py tpdiff") diff --git a/src/coreclr/tools/superpmi/superpmi-shared/standardpch.h b/src/coreclr/tools/superpmi/superpmi-shared/standardpch.h index dc4ce235ffbf2a..e0e83c41d03ab6 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/standardpch.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/standardpch.h @@ -62,6 +62,7 @@ // Getting STL to work with PAL is difficult, so reimplement STL functionality to not require it. #ifdef TARGET_UNIX +#include "clr_std/utility" #include "clr_std/string" #include "clr_std/algorithm" #include "clr_std/vector" @@ -69,6 +70,7 @@ #ifndef USE_STL #define USE_STL #endif // USE_STL +#include #include #include #include diff --git a/src/coreclr/tools/superpmi/superpmi/CMakeLists.txt b/src/coreclr/tools/superpmi/superpmi/CMakeLists.txt index 63237450898e3b..76ab73ffdabfe2 100644 --- a/src/coreclr/tools/superpmi/superpmi/CMakeLists.txt +++ b/src/coreclr/tools/superpmi/superpmi/CMakeLists.txt @@ -26,6 +26,7 @@ set(SUPERPMI_SOURCES neardiffer.cpp parallelsuperpmi.cpp superpmi.cpp + fileio.cpp jithost.cpp ../superpmi-shared/callutils.cpp ../superpmi-shared/compileresult.cpp diff --git a/src/coreclr/tools/superpmi/superpmi/commandline.cpp b/src/coreclr/tools/superpmi/superpmi/commandline.cpp index 563d8cb16248c0..ca407889e87235 100644 --- a/src/coreclr/tools/superpmi/superpmi/commandline.cpp +++ b/src/coreclr/tools/superpmi/superpmi/commandline.cpp @@ -318,7 +318,7 @@ bool CommandLine::Parse(int argc, char* argv[], /* OUT */ Options* o) o->mclFilename = argv[i]; } - else if ((_strnicmp(&argv[i][1], "diffMCList", 10) == 0)) + else if ((_strnicmp(&argv[i][1], "diffsInfo", 9) == 0)) { if (++i >= argc) { @@ -326,7 +326,7 @@ bool CommandLine::Parse(int argc, char* argv[], /* OUT */ Options* o) return false; } - o->diffMCLFilename = argv[i]; + o->diffsInfo = argv[i]; } else if ((_strnicmp(&argv[i][1], "target", 6) == 0)) { @@ -641,9 +641,9 @@ bool CommandLine::Parse(int argc, char* argv[], /* OUT */ Options* o) DumpHelp(argv[0]); return false; } - if (o->diffMCLFilename != nullptr && !o->applyDiff) + if (o->diffsInfo != nullptr && !o->applyDiff) { - LogError("-diffMCList specified without -applyDiff."); + LogError("-diffsInfo specified without -applyDiff."); DumpHelp(argv[0]); return false; } diff --git a/src/coreclr/tools/superpmi/superpmi/commandline.h b/src/coreclr/tools/superpmi/superpmi/commandline.h index 055adf00295cda..e801f8cb4f752d 100644 --- a/src/coreclr/tools/superpmi/superpmi/commandline.h +++ b/src/coreclr/tools/superpmi/superpmi/commandline.h @@ -39,7 +39,7 @@ class CommandLine , baseMetricsSummaryFile(nullptr) , diffMetricsSummaryFile(nullptr) , mclFilename(nullptr) - , diffMCLFilename(nullptr) + , diffsInfo(nullptr) , targetArchitecture(nullptr) , compileList(nullptr) , offset(-1) @@ -72,7 +72,7 @@ class CommandLine char* baseMetricsSummaryFile; char* diffMetricsSummaryFile; char* mclFilename; - char* diffMCLFilename; + char* diffsInfo; char* targetArchitecture; char* compileList; int offset; diff --git a/src/coreclr/tools/superpmi/superpmi/fileio.cpp b/src/coreclr/tools/superpmi/superpmi/fileio.cpp new file mode 100644 index 00000000000000..ed16485a038e8a --- /dev/null +++ b/src/coreclr/tools/superpmi/superpmi/fileio.cpp @@ -0,0 +1,115 @@ +#include "standardpch.h" +#include "fileio.h" + +bool FileWriter::Printf(const char* fmt, ...) +{ + va_list args; + va_start(args, fmt); + + char stackBuffer[512]; + size_t bufferSize = sizeof(stackBuffer); + char* pBuffer = stackBuffer; + while (true) + { + va_list argsCopy; + va_copy(argsCopy, args); + int printed = _vsnprintf_s(pBuffer, bufferSize, _TRUNCATE, fmt, argsCopy); + va_end(argsCopy); + + if (printed < 0) + { + // buffer too small + if (pBuffer != stackBuffer) + delete[] pBuffer; + + bufferSize *= 2; + pBuffer = new char[bufferSize]; + } + else + { + DWORD numWritten; + bool result = + WriteFile(m_file.Get(), pBuffer, static_cast(printed), &numWritten, nullptr) && + (numWritten == static_cast(printed)); + + if (pBuffer != stackBuffer) + delete[] pBuffer; + + va_end(args); + return result; + } + } +} + +bool FileWriter::CreateNew(const char* path, FileWriter* fw) +{ + FileHandle handle(CreateFile(path, GENERIC_WRITE, 0, nullptr, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, nullptr)); + if (!handle.IsValid()) + { + return false; + } + + *fw = FileWriter(std::move(handle)); + return true; +} + +bool FileLineReader::Open(const char* path, FileLineReader* fr) +{ + FileHandle file(CreateFile(path, GENERIC_READ, FILE_SHARE_READ, nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr)); + if (!file.IsValid()) + { + return false; + } + + LARGE_INTEGER size; + if (!GetFileSizeEx(file.Get(), &size)) + { + return false; + } + + if (static_cast(size.QuadPart) > SIZE_MAX) + { + return false; + } + + FileMappingHandle mapping(CreateFileMapping(file.Get(), nullptr, PAGE_READONLY, size.u.HighPart, size.u.LowPart, nullptr)); + if (!mapping.IsValid()) + { + return false; + } + + FileViewHandle view(MapViewOfFile(mapping.Get(), FILE_MAP_READ, 0, 0, 0)); + if (!view.IsValid()) + { + return false; + } + + *fr = FileLineReader(std::move(file), std::move(mapping), std::move(view), static_cast(size.QuadPart)); + return true; +} + +bool FileLineReader::AdvanceLine() +{ + if (m_cur >= m_end) + { + return false; + } + + char* end = m_cur; + while (end < m_end && *end != '\r' && *end != '\n') + { + end++; + } + + m_currentLine.resize(end - m_cur + 1); + memcpy(m_currentLine.data(), m_cur, end - m_cur); + m_currentLine[end - m_cur] = '\0'; + + m_cur = end; + if (m_cur < m_end && *m_cur == '\r') + m_cur++; + if (m_cur < m_end && *m_cur == '\n') + m_cur++; + + return true; +} diff --git a/src/coreclr/tools/superpmi/superpmi/fileio.h b/src/coreclr/tools/superpmi/superpmi/fileio.h new file mode 100644 index 00000000000000..a88e74d6ee00c5 --- /dev/null +++ b/src/coreclr/tools/superpmi/superpmi/fileio.h @@ -0,0 +1,133 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef _FileIO +#define _FileIO + +template +struct HandleWrapper +{ + using HandleType = typename HandleSpec::Type; + + HandleWrapper() + : m_handle(HandleSpec::Invalid()) + { + } + + explicit HandleWrapper(HandleType handle) + : m_handle(handle) + { + } + + ~HandleWrapper() + { + if (m_handle != HandleSpec::Invalid()) + { + HandleSpec::Close(m_handle); + m_handle = HandleSpec::Invalid(); + } + } + + HandleWrapper(const HandleWrapper&) = delete; + HandleWrapper& operator=(HandleWrapper&) = delete; + + HandleWrapper(HandleWrapper&& hw) noexcept + : m_handle(hw.m_handle) + { + hw.m_handle = HandleSpec::Invalid(); + } + + HandleWrapper& operator=(HandleWrapper&& hw) noexcept + { + if (m_handle != HandleSpec::Invalid()) + HandleSpec::Close(m_handle); + + m_handle = hw.m_handle; + hw.m_handle = HandleSpec::Invalid(); + return *this; + } + + bool IsValid() { return m_handle != HandleSpec::Invalid(); } + HandleType Get() { return m_handle; } + +private: + HandleType m_handle; +}; + +struct FileHandleSpec +{ + using Type = HANDLE; + static HANDLE Invalid() { return INVALID_HANDLE_VALUE; } + static void Close(HANDLE h) { CloseHandle(h); } +}; + +struct FileMappingHandleSpec +{ + using Type = HANDLE; + static HANDLE Invalid() { return nullptr; } + static void Close(HANDLE h) { CloseHandle(h); } +}; + +struct FileViewHandleSpec +{ + using Type = LPVOID; + static LPVOID Invalid() { return nullptr; } + static void Close(LPVOID p) { UnmapViewOfFile(p); } +}; + +typedef HandleWrapper FileHandle; +typedef HandleWrapper FileMappingHandle; +typedef HandleWrapper FileViewHandle; + +class FileWriter +{ + FileHandle m_file; + + FileWriter(FileHandle file) + : m_file(std::move(file)) + { + } + +public: + FileWriter() + { + } + + bool Printf(const char* fmt, ...); + + static bool CreateNew(const char* path, FileWriter* fw); +}; + +class FileLineReader +{ + FileHandle m_file; + FileMappingHandle m_fileMapping; + FileViewHandle m_view; + + char* m_cur; + char* m_end; + std::vector m_currentLine; + + FileLineReader(FileHandle file, FileMappingHandle fileMapping, FileViewHandle view, size_t size) + : m_file(std::move(file)) + , m_fileMapping(std::move(fileMapping)) + , m_view(std::move(view)) + , m_cur(static_cast(m_view.Get())) + , m_end(static_cast(m_view.Get()) + size) + { + } + +public: + FileLineReader() + : m_cur(nullptr) + , m_end(nullptr) + { + } + + bool AdvanceLine(); + const char* GetCurrentLine() { return m_currentLine.data(); } + + static bool Open(const char* path, FileLineReader* fr); +}; + +#endif diff --git a/src/coreclr/tools/superpmi/superpmi/metricssummary.cpp b/src/coreclr/tools/superpmi/superpmi/metricssummary.cpp index cb9c908ee72d80..7967cf90293fb4 100644 --- a/src/coreclr/tools/superpmi/superpmi/metricssummary.cpp +++ b/src/coreclr/tools/superpmi/superpmi/metricssummary.cpp @@ -4,6 +4,7 @@ #include "standardpch.h" #include "metricssummary.h" #include "logging.h" +#include "fileio.h" void MetricsSummary::AggregateFrom(const MetricsSummary& other) { @@ -24,50 +25,15 @@ void MetricsSummaries::AggregateFrom(const MetricsSummaries& other) FullOpts.AggregateFrom(other.FullOpts); } -struct FileHandleWrapper -{ - FileHandleWrapper(HANDLE hFile) - : hFile(hFile) - { - } - - ~FileHandleWrapper() - { - CloseHandle(hFile); - } - - HANDLE get() { return hFile; } - -private: - HANDLE hFile; -}; - -static bool FilePrintf(HANDLE hFile, const char* fmt, ...) -{ - va_list args; - va_start(args, fmt); - - char buffer[4096]; - int len = vsprintf_s(buffer, ARRAY_SIZE(buffer), fmt, args); - DWORD numWritten; - bool result = - WriteFile(hFile, buffer, static_cast(len), &numWritten, nullptr) && (numWritten == static_cast(len)); - - va_end(args); - - return result; -} - bool MetricsSummaries::SaveToFile(const char* path) { - FileHandleWrapper file(CreateFile(path, GENERIC_WRITE, 0, nullptr, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, nullptr)); - if (file.get() == INVALID_HANDLE_VALUE) + FileWriter file; + if (!FileWriter::CreateNew(path, &file)) { return false; } - if (!FilePrintf( - file.get(), + if (!file.Printf( "Successful compiles,Failing compiles,Missing compiles,Contexts with diffs," "Code bytes,Diffed code bytes,Executed instructions,Diff executed instructions,Name\n")) { @@ -75,16 +41,15 @@ bool MetricsSummaries::SaveToFile(const char* path) } return - WriteRow(file.get(), "Overall", Overall) && - WriteRow(file.get(), "MinOpts", MinOpts) && - WriteRow(file.get(), "FullOpts", FullOpts); + WriteRow(file, "Overall", Overall) && + WriteRow(file, "MinOpts", MinOpts) && + WriteRow(file, "FullOpts", FullOpts); } -bool MetricsSummaries::WriteRow(HANDLE hFile, const char* name, const MetricsSummary& summary) +bool MetricsSummaries::WriteRow(FileWriter& fw, const char* name, const MetricsSummary& summary) { return - FilePrintf( - hFile, + fw.Printf( "%d,%d,%d,%d,%lld,%lld,%lld,%lld,%s\n", summary.SuccessfulCompiles, summary.FailingCompiles, @@ -99,59 +64,27 @@ bool MetricsSummaries::WriteRow(HANDLE hFile, const char* name, const MetricsSum bool MetricsSummaries::LoadFromFile(const char* path, MetricsSummaries* metrics) { - FileHandleWrapper file(CreateFile(path, GENERIC_READ, FILE_SHARE_READ, nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr)); - if (file.get() == INVALID_HANDLE_VALUE) - { - return false; - } - - LARGE_INTEGER len; - if (!GetFileSizeEx(file.get(), &len)) + FileLineReader reader; + if (!FileLineReader::Open(path, &reader)) { return false; } - DWORD stringLen = static_cast(len.QuadPart); - std::vector content(stringLen); - DWORD numRead; - if (!ReadFile(file.get(), content.data(), stringLen, &numRead, nullptr) || (numRead != stringLen)) + if (!reader.AdvanceLine()) { return false; } - std::vector line; - size_t index = 0; - auto nextLine = [&line, &content, &index]() - { - size_t end = index; - while ((end < content.size()) && (content[end] != '\r') && (content[end] != '\n')) - { - end++; - } - - line.resize(end - index + 1); - memcpy(line.data(), &content[index], end - index); - line[end - index] = '\0'; - - index = end; - if ((index < content.size()) && (content[index] == '\r')) - index++; - if ((index < content.size()) && (content[index] == '\n')) - index++; - }; - *metrics = MetricsSummaries(); - nextLine(); bool result = true; - while (index < content.size()) + while (reader.AdvanceLine()) { - nextLine(); MetricsSummary summary; char name[32]; int scanResult = sscanf_s( - line.data(), + reader.GetCurrentLine(), "%d,%d,%d,%d,%lld,%lld,%lld,%lld,%s", &summary.SuccessfulCompiles, &summary.FailingCompiles, diff --git a/src/coreclr/tools/superpmi/superpmi/metricssummary.h b/src/coreclr/tools/superpmi/superpmi/metricssummary.h index 118d6aac224006..14e364cd9fcf3e 100644 --- a/src/coreclr/tools/superpmi/superpmi/metricssummary.h +++ b/src/coreclr/tools/superpmi/superpmi/metricssummary.h @@ -39,7 +39,7 @@ class MetricsSummaries bool SaveToFile(const char* path); static bool LoadFromFile(const char* path, MetricsSummaries* metrics); private: - static bool WriteRow(HANDLE hFile, const char* name, const MetricsSummary& summary); + static bool WriteRow(class FileWriter& fw, const char* name, const MetricsSummary& summary); }; #endif diff --git a/src/coreclr/tools/superpmi/superpmi/parallelsuperpmi.cpp b/src/coreclr/tools/superpmi/superpmi/parallelsuperpmi.cpp index dfca5adcc376a6..610ccdf732b73e 100644 --- a/src/coreclr/tools/superpmi/superpmi/parallelsuperpmi.cpp +++ b/src/coreclr/tools/superpmi/superpmi/parallelsuperpmi.cpp @@ -9,6 +9,7 @@ #include "commandline.h" #include "errorhandling.h" #include "metricssummary.h" +#include "fileio.h" #define MAX_LOG_LINE_SIZE 0x1000 // 4 KB @@ -349,7 +350,7 @@ struct PerWorkerData HANDLE hStdError; char* failingMCListPath; - char* diffMCListPath; + char* diffsInfoPath; char* stdOutputPath; char* stdErrorPath; char* baseMetricsSummaryPath; @@ -359,7 +360,7 @@ struct PerWorkerData : hStdOutput(INVALID_HANDLE_VALUE) , hStdError(INVALID_HANDLE_VALUE) , failingMCListPath(nullptr) - , diffMCListPath(nullptr) + , diffsInfoPath(nullptr) , stdOutputPath(nullptr) , stdErrorPath(nullptr) , baseMetricsSummaryPath(nullptr) @@ -368,7 +369,7 @@ struct PerWorkerData } }; -void MergeWorkerMCLs(char* mclFilename, PerWorkerData* workerData, int workerCount, char* PerWorkerData::*mclPath) +static void MergeWorkerMCLs(char* mclFilename, PerWorkerData* workerData, int workerCount, char* PerWorkerData::*mclPath) { int **MCL = new int *[workerCount], *MCLCount = new int[workerCount], totalCount = 0; @@ -396,6 +397,39 @@ void MergeWorkerMCLs(char* mclFilename, PerWorkerData* workerData, int workerCou LogError("Unable to write to MCL file %s.", mclFilename); } +static void MergeWorkerCsvs(char* csvFilename, PerWorkerData* workerData, int workerCount, char* PerWorkerData::* csvPath) +{ + FileWriter fw; + if (!FileWriter::CreateNew(csvFilename, &fw)) + { + LogError("Could not create file %s", csvFilename); + return; + } + + bool hasHeader = false; + for (int i = 0; i < workerCount; i++) + { + FileLineReader reader; + if (!FileLineReader::Open(workerData[i].*csvPath, &reader)) + { + LogError("Could not open child CSV file %s", workerData[i].*csvPath); + continue; + } + + if (hasHeader && !reader.AdvanceLine()) + { + continue; + } + + while (reader.AdvanceLine()) + { + fw.Printf("%s\n", reader.GetCurrentLine()); + } + + hasHeader = true; + } +} + #define MAX_CMDLINE_SIZE 0x1000 // 4 KB //------------------------------------------------------------- @@ -528,8 +562,8 @@ int doParallelSuperPMI(CommandLine::Options& o) LogVerbose("Using child (%s) with args (%s)", spmiFilename, spmiArgs); if (o.mclFilename != nullptr) LogVerbose(" failingMCList=%s", o.mclFilename); - if (o.diffMCLFilename != nullptr) - LogVerbose(" diffMCLFilename=%s", o.diffMCLFilename); + if (o.diffsInfo != nullptr) + LogVerbose(" diffsInfo=%s", o.diffsInfo); LogVerbose(" workerCount=%d, skipCleanup=%d.", o.workerCount, o.skipCleanup); PerWorkerData* perWorkerData = new PerWorkerData[o.workerCount]; @@ -551,10 +585,10 @@ int doParallelSuperPMI(CommandLine::Options& o) sprintf_s(wd.failingMCListPath, MAX_PATH, "%sParallelSuperPMI-%u-%d.mcl", tempPath, randNumber, i); } - if (o.diffMCLFilename != nullptr) + if (o.diffsInfo != nullptr) { - wd.diffMCListPath = new char[MAX_PATH]; - sprintf_s(wd.diffMCListPath, MAX_PATH, "%sParallelSuperPMI-Diff-%u-%d.mcl", tempPath, randNumber, i); + wd.diffsInfoPath = new char[MAX_PATH]; + sprintf_s(wd.diffsInfoPath, MAX_PATH, "%sParallelSuperPMI-Diff-%u-%d.mcl", tempPath, randNumber, i); } if (o.baseMetricsSummaryFile != nullptr) @@ -593,10 +627,10 @@ int doParallelSuperPMI(CommandLine::Options& o) wd.failingMCListPath); } - if (wd.diffMCListPath != nullptr) + if (wd.diffsInfoPath != nullptr) { - bytesWritten += sprintf_s(cmdLine + bytesWritten, MAX_CMDLINE_SIZE - bytesWritten, " -diffMCList %s", - wd.diffMCListPath); + bytesWritten += sprintf_s(cmdLine + bytesWritten, MAX_CMDLINE_SIZE - bytesWritten, " -diffsInfo %s", + wd.diffsInfoPath); } if (wd.baseMetricsSummaryPath != nullptr) @@ -719,10 +753,10 @@ int doParallelSuperPMI(CommandLine::Options& o) MergeWorkerMCLs(o.mclFilename, perWorkerData, o.workerCount, &PerWorkerData::failingMCListPath); } - if (o.diffMCLFilename != nullptr && !usageError) + if (o.diffsInfo != nullptr && !usageError) { // Concat the resulting diff .mcl files - MergeWorkerMCLs(o.diffMCLFilename, perWorkerData, o.workerCount, &PerWorkerData::diffMCListPath); + MergeWorkerCsvs(o.diffsInfo, perWorkerData, o.workerCount, &PerWorkerData::diffsInfoPath); } if (o.baseMetricsSummaryFile != nullptr && !usageError) @@ -761,9 +795,9 @@ int doParallelSuperPMI(CommandLine::Options& o) { DeleteFile(wd.failingMCListPath); } - if (wd.diffMCListPath != nullptr) + if (wd.diffsInfoPath != nullptr) { - DeleteFile(wd.diffMCListPath); + DeleteFile(wd.diffsInfoPath); } if (wd.baseMetricsSummaryPath != nullptr) { diff --git a/src/coreclr/tools/superpmi/superpmi/superpmi.cpp b/src/coreclr/tools/superpmi/superpmi/superpmi.cpp index ddf595281691bd..7875d4ca111199 100644 --- a/src/coreclr/tools/superpmi/superpmi/superpmi.cpp +++ b/src/coreclr/tools/superpmi/superpmi/superpmi.cpp @@ -19,6 +19,7 @@ #include "methodstatsemitter.h" #include "spmiutil.h" #include "metricssummary.h" +#include "fileio.h" extern int doParallelSuperPMI(CommandLine::Options& o); @@ -64,54 +65,47 @@ void SetSuperPmiTargetArchitecture(const char* targetArchitecture) } } +enum class NearDifferResult +{ + SuccessWithoutDiff, + SuccessWithDiff, + Failure, +}; + // This function uses PAL_TRY, so it can't be in the a function that requires object unwinding. Extracting it out here // avoids compiler error. // -static void InvokeNearDiffer(NearDiffer* nearDiffer, - CommandLine::Options* o, - MethodContext** mc, - CompileResult** crl, - bool* hasDiff, - MethodContextReader** reader, - MCList* failingMCL, - MCList* diffMCL) +static NearDifferResult InvokeNearDiffer(NearDiffer* nearDiffer, + MethodContext** mc, + CompileResult** crl, + MethodContextReader** reader) { + struct Param : FilterSuperPMIExceptionsParam_CaptureException { NearDiffer* nearDiffer; - CommandLine::Options* o; MethodContext** mc; CompileResult** crl; - bool* hasDiff; MethodContextReader** reader; - MCList* failingMCL; - MCList* diffMCL; + NearDifferResult result; } param; param.nearDiffer = nearDiffer; - param.o = o; param.mc = mc; param.crl = crl; - param.hasDiff = hasDiff; param.reader = reader; - param.failingMCL = failingMCL; - param.diffMCL = diffMCL; - *hasDiff = false; + param.result = NearDifferResult::Failure; PAL_TRY(Param*, pParam, ¶m) { if (!pParam->nearDiffer->compare(*pParam->mc, *pParam->crl, (*pParam->mc)->cr)) { - *pParam->hasDiff = true; + pParam->result = NearDifferResult::SuccessWithDiff; LogIssue(ISSUE_ASM_DIFF, "main method %d of size %d differs", (*pParam->reader)->GetMethodContextIndex(), (*pParam->mc)->methodSize); - - // This is a difference in ASM outputs from Jit1 & Jit2 and not a playback failure - // We will add this MC to the diffMCList if one is requested - // Otherwise this will end up in failingMCList - if ((*pParam->o).diffMCLFilename != nullptr) - (*pParam->diffMCL).AddMethodToMCL((*pParam->reader)->GetMethodContextIndex()); - else if ((*pParam->o).mclFilename != nullptr) - (*pParam->failingMCL).AddMethodToMCL((*pParam->reader)->GetMethodContextIndex()); + } + else + { + pParam->result = NearDifferResult::SuccessWithoutDiff; } } PAL_EXCEPT_FILTER(FilterSuperPMIExceptions_CaptureExceptionAndStop) @@ -121,10 +115,26 @@ static void InvokeNearDiffer(NearDiffer* nearDiffer, LogError("main method %d of size %d failed to load and compile correctly.", (*reader)->GetMethodContextIndex(), (*mc)->methodSize); e.ShowAndDeleteMessage(); - if ((*o).mclFilename != nullptr) - (*failingMCL).AddMethodToMCL((*reader)->GetMethodContextIndex()); + param.result = NearDifferResult::Failure; } PAL_ENDTRY + + return param.result; +} + +static bool PrintDiffsCsvHeader(FileWriter& fw) +{ + return fw.Printf("Context,Context size,Base size,Diff size,Base instructions,Diff instructions\n"); +} + +static bool PrintDiffsCsvRow( + FileWriter& fw, + int context, + uint32_t contextSize, + long long baseSize, long long diffSize, + long long baseInstructions, long long diffInstructions) +{ + return fw.Printf("%d,%u,%lld,%lld,%lld,%lld\n", context, contextSize, baseSize, diffSize, baseInstructions, diffInstructions); } // Run superpmi. The return value is as follows: @@ -171,7 +181,8 @@ int __cdecl main(int argc, char* argv[]) #endif bool collectThroughput = false; - MCList failingToReplayMCL, diffMCL; + MCList failingToReplayMCL; + FileWriter diffCsv; CommandLine::Options o; if (!CommandLine::Parse(argc, argv, &o)) @@ -230,9 +241,13 @@ int __cdecl main(int argc, char* argv[]) { failingToReplayMCL.InitializeMCL(o.mclFilename); } - if (o.diffMCLFilename != nullptr) + if (o.diffsInfo != nullptr) { - diffMCL.InitializeMCL(o.diffMCLFilename); + if (!FileWriter::CreateNew(o.diffsInfo, &diffCsv)) + { + LogError("Could not create file %s", o.diffsInfo); + return (int)SpmiResult::GeneralFailure; + } } SetDebugDumpVariables(); @@ -266,6 +281,11 @@ int __cdecl main(int argc, char* argv[]) } } + if (o.diffsInfo != nullptr) + { + PrintDiffsCsvHeader(diffCsv); + } + MetricsSummaries totalBaseMetrics; MetricsSummaries totalDiffMetrics; @@ -552,16 +572,42 @@ int __cdecl main(int argc, char* argv[]) } else { - bool hasDiff; - InvokeNearDiffer(&nearDiffer, &o, &mc, &crl, &hasDiff, &reader, &failingToReplayMCL, &diffMCL); + NearDifferResult result = InvokeNearDiffer(&nearDiffer, &mc, &crl, &reader); - if (hasDiff) + switch (result) { - totalBaseMetrics.Overall.NumContextsWithDiffs++; - totalDiffMetrics.Overall.NumContextsWithDiffs++; + case NearDifferResult::SuccessWithDiff: + totalBaseMetrics.Overall.NumContextsWithDiffs++; + totalDiffMetrics.Overall.NumContextsWithDiffs++; + + totalBaseMetricsOpts.NumContextsWithDiffs++; + totalDiffMetricsOpts.NumContextsWithDiffs++; + + // This is a difference in ASM outputs from Jit1 & Jit2 and not a playback failure + // We will add this MC to the diffs info if there is one. + // Otherwise this will end up in failingMCList + if (o.diffsInfo != nullptr) + { + PrintDiffsCsvRow( + diffCsv, + reader->GetMethodContextIndex(), + mcb.size, + baseMetrics.NumCodeBytes, diffMetrics.NumCodeBytes, + baseMetrics.NumExecutedInstructions, diffMetrics.NumExecutedInstructions); + } + else if (o.mclFilename != nullptr) + { + failingToReplayMCL.AddMethodToMCL(reader->GetMethodContextIndex()); + } - totalBaseMetricsOpts.NumContextsWithDiffs++; - totalDiffMetricsOpts.NumContextsWithDiffs++; + break; + case NearDifferResult::SuccessWithoutDiff: + break; + case NearDifferResult::Failure: + if (o.mclFilename != nullptr) + failingToReplayMCL.AddMethodToMCL(reader->GetMethodContextIndex()); + + break; } totalBaseMetrics.Overall.NumDiffedCodeBytes += baseMetrics.NumCodeBytes; @@ -625,7 +671,7 @@ int __cdecl main(int argc, char* argv[]) if (o.breakOnError) { if (o.indexCount == -1) - LogInfo("HINT: to repro add '/c %d' to cmdline", reader->GetMethodContextIndex()); + LogInfo("HINT: to repro add '-c %d' to cmdline", reader->GetMethodContextIndex()); __debugbreak(); } } @@ -674,10 +720,6 @@ int __cdecl main(int argc, char* argv[]) { failingToReplayMCL.CloseMCL(); } - if (o.diffMCLFilename != nullptr) - { - diffMCL.CloseMCL(); - } Logger::Shutdown(); SpmiResult result = SpmiResult::Success;