From abb3e3a13df8a831f9d74de114a24434a91a4670 Mon Sep 17 00:00:00 2001 From: Tyson Smith Date: Tue, 19 Sep 2023 09:54:58 -0700 Subject: [PATCH] Include minidump-stackwalk errors in minidump logs This makes minidump reporting function more like sanitizers. --- src/ffpuppet/core.py | 13 +-- src/ffpuppet/minidump_parser.py | 154 ++++++++++++++------------- src/ffpuppet/test_ffpuppet.py | 10 +- src/ffpuppet/test_minidump_parser.py | 128 +++++++++++----------- 4 files changed, 157 insertions(+), 148 deletions(-) diff --git a/src/ffpuppet/core.py b/src/ffpuppet/core.py index d236bfb..4e5d8b1 100644 --- a/src/ffpuppet/core.py +++ b/src/ffpuppet/core.py @@ -12,6 +12,7 @@ from re import IGNORECASE from re import compile as re_compile from re import match as re_match +from shutil import copyfileobj from subprocess import Popen, check_output from sys import executable from typing import Any, Dict, Iterator, List, Optional, Pattern, Set, Tuple, Union @@ -602,12 +603,12 @@ def close(self, force_close: bool = False) -> None: if not sym_path.is_dir(): # use packaged symbols sym_path = self._bin_path / "symbols" - process_minidumps( - md_path, - sym_path, - self._logs.add_log, - working_path=self._working_path, - ) + for md_txt in process_minidumps(md_path, sym_path): + with md_txt.open("rb") as md_fp: + copyfileobj( + md_fp, self._logs.add_log(md_txt.stem) + ) # type: ignore + stderr_fp = self._logs.get_fp("stderr") if stderr_fp: stderr_fp.write(f"[ffpuppet] Exit code: {self._proc.poll()}\n".encode()) diff --git a/src/ffpuppet/minidump_parser.py b/src/ffpuppet/minidump_parser.py index dac3d78..dc9d787 100644 --- a/src/ffpuppet/minidump_parser.py +++ b/src/ffpuppet/minidump_parser.py @@ -5,10 +5,10 @@ from json import JSONDecodeError, load from logging import getLogger from pathlib import Path -from shutil import copy2, rmtree, which -from subprocess import DEVNULL, CalledProcessError, TimeoutExpired, run +from shutil import rmtree, which +from subprocess import CalledProcessError, TimeoutExpired, run from tempfile import NamedTemporaryFile, mkdtemp -from typing import IO, Any, Callable, Dict, List, Optional +from typing import IO, Any, Dict, Iterator, List, Optional, Tuple LOG = getLogger(__name__) @@ -18,26 +18,39 @@ __all__ = ("process_minidumps",) -class MinidumpStackwalkFailure(Exception): - """ - Raised when the minidump-stackwalk fails. - """ - - class MinidumpParser: """Parse minidump files via minidump-stackwalk. https://lib.rs/crates/minidump-stackwalk Attributes: - symbols_path: Path containing debug symbols. + symbols: Path containing debug symbols. """ MDSW_BIN = which("minidump-stackwalk") - __slots__ = ("_symbols_path",) + __slots__ = ("_storage", "_symbols") + + def __init__(self, symbols: Optional[Path] = None): + self._storage = Path(mkdtemp(prefix="md-parser-")) + self._symbols = symbols + + def __enter__(self) -> "MinidumpParser": + return self + + def __exit__(self, *exc: Any) -> None: + self.close() - def __init__(self, symbols_path: Optional[Path] = None): - self._symbols_path = symbols_path + def close(self) -> None: + """Remove working data. + + Args: + None + + Returns: + None + """ + if self._storage.is_dir(): + rmtree(self._storage) @staticmethod def format_output( @@ -121,37 +134,63 @@ def format_output( if limit < len(frames): out_fp.write(b"WARNING: Hit stack size output limit!\n") - def to_json(self, src: Path, dst: Path) -> Path: + def to_json(self, src: Path) -> Tuple[bool, Path]: """Convert a minidump to json a file. Args: src: Minidump file. - dst: Location to save JSON file. Returns: - A file containing JSON output. + Status and a file containing minidump-stackwalk output. """ assert self.MDSW_BIN cmd = [self.MDSW_BIN, "--no-color", "--json"] - if self._symbols_path: - cmd.extend(["--symbols-path", str(self._symbols_path)]) + if self._symbols: + cmd.extend(["--symbols-path", str(self._symbols)]) else: cmd.extend(["--symbols-url", "https://symbols.mozilla.org/"]) - # add output files - with NamedTemporaryFile(dir=dst, prefix="mdsw_out_", suffix=".json") as ofp: - out_json = Path(ofp.name) - cmd.extend(["--output-file", str(out_json)]) cmd.append(str(src)) + # get unique file name + with NamedTemporaryFile(dir=self._storage, prefix="mdsw_out_") as ofp: + out_json = Path(ofp.name) LOG.debug("running %r", " ".join(cmd)) try: - run(cmd, check=True, stdout=DEVNULL, timeout=60) - except CalledProcessError: + with out_json.open("wb") as out_fp: + run(cmd, check=True, stderr=out_fp, stdout=out_fp, timeout=60) + success = True + except (CalledProcessError, TimeoutExpired): LOG.error("Failed to process: %s", src) - raise MinidumpStackwalkFailure(b"minidump-stackwalk failed") from None - except TimeoutExpired: - LOG.error("Failed to process: %s", src) - raise MinidumpStackwalkFailure("minidump-stackwalk hung") from None - return out_json + success = False + return success, out_json + + def to_text(self, src: Path, filename: str) -> Path: + """Convert minidump output to a formatted text file. + + Args: + src: Minidump file. + filename: Name to use for text file. + + Returns: + File containing text output. + """ + dst = self._storage / filename + success, md_json = self.to_json(src) + if success: + try: + # load json from mdsw + with md_json.open("rb") as in_fp: + md_data = load(in_fp) + md_json.unlink() + # write formatted data + with dst.open("wb") as log_fp: + self.format_output(md_data, log_fp) + except (JSONDecodeError, KeyError): + LOG.error("Failed to process: %s", src) + dst.write_text("Failed to load JSON from minidump-stackwalk") + else: + # move error log + md_json.rename(dst) + return dst @classmethod def mdsw_available( @@ -202,61 +241,30 @@ def mdsw_available( return True -def process_minidumps( - path: Path, - symbols_path: Path, - cb_create_log: Callable[..., Any], - working_path: Optional[str] = None, -) -> None: - """Scan for minidump (.dmp) files in path. If files are found they - are parsed and new logs are added via the cb_create_log callback. +def process_minidumps(path: Path, symbols: Path) -> Iterator[Path]: + """Scan for minidump (.dmp) files in path and a log is yielded for each. Args: path: Path to scan for minidump files. - symbols_path: Directory containing symbols for the target binary. - cb_create_log: A callback to the add_log() of a PuppetLogger. - working_path: Used as base directory for temporary files. + symbols: Directory containing symbols for the target binary. - Returns: - None + Yields: + Formatted minidump logs. """ if not MinidumpParser.mdsw_available(): - LOG.warning( + LOG.error( "Unable to process minidump." " See README.md for details on obtaining the latest minidump-stackwalk." ) return assert path.is_dir(), f"missing minidump scan path '{path!s}'" local_symbols = True - if not symbols_path.is_dir(): - LOG.warning("Local packaged symbols not found: %r", str(symbols_path)) + if not symbols.is_dir(): + LOG.warning("Local packaged symbols not found: '%s'", symbols) local_symbols = False - # create working path - working_path = mkdtemp(prefix="minidump_", dir=working_path) - - md_parser = MinidumpParser(symbols_path=symbols_path if local_symbols else None) - # order by last modified date hopefully the oldest log is the cause of the issue - dmp_files = sorted(path.glob("*.dmp"), key=lambda x: x.stat().st_mtime) - for count, file in enumerate(dmp_files): - # filter out zero byte files and warn - if file.stat().st_size == 0: - LOG.warning("Ignored zero byte minidump: %s", file) - continue - try: - # parse minidump with minidump-stackwalk - md_json = md_parser.to_json(file, Path(working_path)) - # load json data from file to dict - with md_json.open("rb") as json_fp: - md_data = load(json_fp) - md_json.unlink() - # write formatted minidump output to log file - md_parser.format_output(md_data, cb_create_log(f"minidump_{count:02}")) - except (JSONDecodeError, KeyError, MinidumpStackwalkFailure): - # save a copy of the minidump - saved_md = copy2(file, working_path) - LOG.error("Minidump saved as '%s'", saved_md) - raise - - # if successful remove the working path - rmtree(working_path) + with MinidumpParser(symbols=symbols if local_symbols else None) as md_parser: + # order by last modified date hopefully the oldest log is the cause of the issue + dmp_files = sorted(path.glob("*.dmp"), key=lambda x: x.stat().st_mtime) + for count, dmp_file in enumerate(dmp_files): + yield md_parser.to_text(dmp_file, f"minidump_{count:02}.txt") diff --git a/src/ffpuppet/test_ffpuppet.py b/src/ffpuppet/test_ffpuppet.py index 75f6ad2..c210428 100644 --- a/src/ffpuppet/test_ffpuppet.py +++ b/src/ffpuppet/test_ffpuppet.py @@ -575,11 +575,11 @@ def test_ffpuppet_21(tmp_path): def test_ffpuppet_22(mocker, tmp_path): """test multiple minidumps""" - # pylint: disable=unused-argument - def _fake_process_minidumps(dmps, _, add_log, working_path=None): + def _fake_process_minidumps(dmps, _): for num, _ in enumerate(Path(dmps).glob("*.dmp")): - lfp = add_log(f"minidump_{num + 1:02}") - lfp.write(b"test") + md_log = tmp_path / f"minidump_{num:02}.txt" + md_log.write_text("test") + yield md_log mocker.patch("ffpuppet.core.process_minidumps", side_effect=_fake_process_minidumps) profile = tmp_path / "profile" @@ -600,9 +600,9 @@ def _fake_process_minidumps(dmps, _, add_log, working_path=None): ffp.close() logs = tmp_path / "logs" ffp.save_logs(logs) + assert any(logs.glob("log_minidump_00.txt")) assert any(logs.glob("log_minidump_01.txt")) assert any(logs.glob("log_minidump_02.txt")) - assert any(logs.glob("log_minidump_03.txt")) def test_ffpuppet_23(tmp_path): diff --git a/src/ffpuppet/test_minidump_parser.py b/src/ffpuppet/test_minidump_parser.py index 900f6d0..ea08707 100644 --- a/src/ffpuppet/test_minidump_parser.py +++ b/src/ffpuppet/test_minidump_parser.py @@ -4,42 +4,65 @@ # You can obtain one at http://mozilla.org/MPL/2.0/. """ffpuppet minidump parser tests""" -from json import JSONDecodeError from subprocess import CalledProcessError, CompletedProcess, TimeoutExpired -from pytest import mark, raises +from pytest import mark -from .minidump_parser import MinidumpParser, MinidumpStackwalkFailure, process_minidumps +from .minidump_parser import MinidumpParser, process_minidumps @mark.parametrize( - "run_return, symbols, result", + "run_return, symbols", [ # succeeded - with symbols - (None, True, True), + (None, True), # succeeded - without symbols - (None, False, True), + (None, False), # failed - parse hung - ((TimeoutExpired(["test"], 0.0),), True, False), + ((TimeoutExpired(["test"], 0.0),), True), # failed - json parse error - ((CalledProcessError(1, ["test"]),), True, False), + ((CalledProcessError(1, ["test"]),), True), ], ) -def test_minidump_parser_01(mocker, tmp_path, run_return, symbols, result): +def test_minidump_parser_01(mocker, tmp_path, run_return, symbols): """test MinidumpParser.to_json()""" - mocker.patch( - "ffpuppet.minidump_parser.MinidumpParser.MDSW_BIN", "minidump_stackwalk" - ) + mocker.patch.object(MinidumpParser, "MDSW_BIN", "mdsw") mocker.patch("ffpuppet.minidump_parser.run", autospec=True, side_effect=run_return) - parser = MinidumpParser(symbols_path=tmp_path if symbols else None) - if result: - assert parser.to_json(tmp_path, str(tmp_path)) - else: - with raises(MinidumpStackwalkFailure): - parser.to_json(tmp_path, str(tmp_path)) + with MinidumpParser(symbols=tmp_path if symbols else None) as parser: + assert parser + success, data_file = parser.to_json(tmp_path) + assert success == (run_return is None) + assert data_file -def test_minidump_parser_02(tmp_path): +@mark.parametrize( + "success, json_data", + [ + # load and format output + (True, "{}"), + # failed to generate json + (False, ""), + # failed to format data + (True, "bad,json"), + ], +) +def test_minidump_parser_02(mocker, tmp_path, success, json_data): + """test MinidumpParser.to_txt()""" + json_file = tmp_path / "foo.json" + json_file.write_text(json_data) + mocker.patch.object(MinidumpParser, "to_json", return_value=(success, json_file)) + mocker.patch.object(MinidumpParser, "format_output") + with MinidumpParser() as parser: + # pylint: disable=protected-access + assert parser._storage.is_dir() + output = parser.to_text(tmp_path / "foo.dmp", "minidump_00.txt") + assert output + assert output.name == "minidump_00.txt" + assert output.is_file() + assert not output.is_file() + + +def test_minidump_parser_03(tmp_path): """test MinidumpParser.format_output() - un-symbolized""" data = { "crash_info": { @@ -81,7 +104,7 @@ def test_minidump_parser_02(tmp_path): assert formatted[4] == "0|0|xul.dll||||" -def test_minidump_parser_03(tmp_path): +def test_minidump_parser_04(tmp_path): """test MinidumpParser.format_output() - symbolized""" data = { "crash_info": { @@ -185,56 +208,33 @@ def test_minidump_parser_03(tmp_path): (None, None, False), ], ) -def test_minidump_parser_04(mocker, call_result, mdsw_bin, result): +def test_minidump_parser_05(mocker, call_result, mdsw_bin, result): """test MinidumpParser.mdsw_available()""" - mocker.patch("ffpuppet.minidump_parser.MinidumpParser.MDSW_BIN", mdsw_bin) mocker.patch("ffpuppet.minidump_parser.run", side_effect=call_result) + mocker.patch.object(MinidumpParser, "MDSW_BIN", mdsw_bin) assert ( MinidumpParser.mdsw_available(force_check=True, min_version="0.15.2") == result ) -@mark.parametrize( - "mdsw, syms, md_json_data, raised", - [ - # loading minidump files - success - (True, True, ["{}"], False), - # loading minidump files - JSONDecodeError - (True, True, ["bad,json"], True), - # loading minidump files - zero byte minidumps - (True, True, ["", "{}", ""], False), - # symbols_path does not exist - (True, False, ["{}"], False), - # test minidump-stackwalk not available - (False, False, [], False), - ], -) -def test_process_minidumps_01(mocker, tmp_path, mdsw, syms, md_json_data, raised): +def test_process_minidumps_01(mocker, tmp_path): """test process_minidumps()""" fake_mdp = mocker.patch("ffpuppet.minidump_parser.MinidumpParser", autospec=True) - fake_mdp.mdsw_available.return_value = mdsw - - to_json_results = [] - for count, md_data in enumerate(md_json_data): - md_file = tmp_path / f"minidump{count:02d}.dmp" - md_file.write_text(md_data) - if md_data: - to_json_results.append(md_file) - fake_mdp.return_value.to_json.side_effect = to_json_results - - try: - process_minidumps( - tmp_path, - tmp_path if syms else tmp_path / "missing", - mocker.Mock(), - working_path=str(tmp_path), - ) - except JSONDecodeError: - assert raised - else: - assert not raised - - assert fake_mdp.mdsw_available.call_count == 1 - if mdsw: - assert fake_mdp.return_value.to_json.call_count == 1 - assert fake_mdp.return_value.format_output.call_count == (0 if raised else 1) + fake_mdp.mdsw_available.return_value = False + assert not any(process_minidumps(tmp_path, tmp_path)) + + +def test_process_minidumps_02(mocker, tmp_path): + """test process_minidumps()""" + mocker.patch( + "ffpuppet.minidump_parser.MinidumpParser.mdsw_available", autospec=True + ) + mocker.patch( + "ffpuppet.minidump_parser.MinidumpParser.to_text", + autospec=True, + return_value=tmp_path / "minidump_00.txt", + ) + (tmp_path / "foo.dmp").touch() + logs = list(process_minidumps(tmp_path, tmp_path / "syms")) + assert logs + assert logs[0].name == "minidump_00.txt"