Include minidump-stackwalk errors in minidump logs

This makes minidump reporting function more like sanitizers.
MozillaSecurity · Sep 19, 2023 · abb3e3a · abb3e3a
1 parent be10ce0
commit abb3e3a
Show file tree

Hide file tree

Showing 4 changed files with 157 additions and 148 deletions.
diff --git a/src/ffpuppet/core.py b/src/ffpuppet/core.py
@@ -12,6 +12,7 @@
 from re import IGNORECASE
 from re import compile as re_compile
 from re import match as re_match
+from shutil import copyfileobj
 from subprocess import Popen, check_output
 from sys import executable
 from typing import Any, Dict, Iterator, List, Optional, Pattern, Set, Tuple, Union
@@ -602,12 +603,12 @@ def close(self, force_close: bool = False) -> None:
                 if not sym_path.is_dir():
                     # use packaged symbols
                     sym_path = self._bin_path / "symbols"
-                process_minidumps(
-                    md_path,
-                    sym_path,
-                    self._logs.add_log,
-                    working_path=self._working_path,
-                )
+                for md_txt in process_minidumps(md_path, sym_path):
+                    with md_txt.open("rb") as md_fp:
+                        copyfileobj(
+                            md_fp, self._logs.add_log(md_txt.stem)
+                        )  # type: ignore
+
             stderr_fp = self._logs.get_fp("stderr")
             if stderr_fp:
                 stderr_fp.write(f"[ffpuppet] Exit code: {self._proc.poll()}\n".encode())

diff --git a/src/ffpuppet/minidump_parser.py b/src/ffpuppet/minidump_parser.py
@@ -5,10 +5,10 @@
 from json import JSONDecodeError, load
 from logging import getLogger
 from pathlib import Path
-from shutil import copy2, rmtree, which
-from subprocess import DEVNULL, CalledProcessError, TimeoutExpired, run
+from shutil import rmtree, which
+from subprocess import CalledProcessError, TimeoutExpired, run
 from tempfile import NamedTemporaryFile, mkdtemp
-from typing import IO, Any, Callable, Dict, List, Optional
+from typing import IO, Any, Dict, Iterator, List, Optional, Tuple
 
 LOG = getLogger(__name__)
 
@@ -18,26 +18,39 @@
 __all__ = ("process_minidumps",)
 
 
-class MinidumpStackwalkFailure(Exception):
-    """
-    Raised when the minidump-stackwalk fails.
-    """
-
-
 class MinidumpParser:
     """Parse minidump files via minidump-stackwalk.
     https://lib.rs/crates/minidump-stackwalk
 
     Attributes:
-        symbols_path: Path containing debug symbols.
+        symbols: Path containing debug symbols.
     """
 
     MDSW_BIN = which("minidump-stackwalk")
 
-    __slots__ = ("_symbols_path",)
+    __slots__ = ("_storage", "_symbols")
+
+    def __init__(self, symbols: Optional[Path] = None):
+        self._storage = Path(mkdtemp(prefix="md-parser-"))
+        self._symbols = symbols
+
+    def __enter__(self) -> "MinidumpParser":
+        return self
+
+    def __exit__(self, *exc: Any) -> None:
+        self.close()
 
-    def __init__(self, symbols_path: Optional[Path] = None):
-        self._symbols_path = symbols_path
+    def close(self) -> None:
+        """Remove working data.
+
+        Args:
+            None
+
+        Returns:
+            None
+        """
+        if self._storage.is_dir():
+            rmtree(self._storage)
 
     @staticmethod
     def format_output(
@@ -121,37 +134,63 @@ def format_output(
         if limit < len(frames):
             out_fp.write(b"WARNING: Hit stack size output limit!\n")
 
-    def to_json(self, src: Path, dst: Path) -> Path:
+    def to_json(self, src: Path) -> Tuple[bool, Path]:
         """Convert a minidump to json a file.
 
         Args:
             src: Minidump file.
-            dst: Location to save JSON file.
 
         Returns:
-            A file containing JSON output.
+            Status and a file containing minidump-stackwalk output.
         """
         assert self.MDSW_BIN
         cmd = [self.MDSW_BIN, "--no-color", "--json"]
-        if self._symbols_path:
-            cmd.extend(["--symbols-path", str(self._symbols_path)])
+        if self._symbols:
+            cmd.extend(["--symbols-path", str(self._symbols)])
         else:
             cmd.extend(["--symbols-url", "https://symbols.mozilla.org/"])
-        # add output files
-        with NamedTemporaryFile(dir=dst, prefix="mdsw_out_", suffix=".json") as ofp:
-            out_json = Path(ofp.name)
-        cmd.extend(["--output-file", str(out_json)])
         cmd.append(str(src))
+        # get unique file name
+        with NamedTemporaryFile(dir=self._storage, prefix="mdsw_out_") as ofp:
+            out_json = Path(ofp.name)
         LOG.debug("running %r", " ".join(cmd))
         try:
-            run(cmd, check=True, stdout=DEVNULL, timeout=60)
-        except CalledProcessError:
+            with out_json.open("wb") as out_fp:
+                run(cmd, check=True, stderr=out_fp, stdout=out_fp, timeout=60)
+            success = True
+        except (CalledProcessError, TimeoutExpired):
             LOG.error("Failed to process: %s", src)
-            raise MinidumpStackwalkFailure(b"minidump-stackwalk failed") from None
-        except TimeoutExpired:
-            LOG.error("Failed to process: %s", src)
-            raise MinidumpStackwalkFailure("minidump-stackwalk hung") from None
-        return out_json
+            success = False
+        return success, out_json
+
+    def to_text(self, src: Path, filename: str) -> Path:
+        """Convert minidump output to a formatted text file.
+
+        Args:
+            src: Minidump file.
+            filename: Name to use for text file.
+
+        Returns:
+            File containing text output.
+        """
+        dst = self._storage / filename
+        success, md_json = self.to_json(src)
+        if success:
+            try:
+                # load json from mdsw
+                with md_json.open("rb") as in_fp:
+                    md_data = load(in_fp)
+                md_json.unlink()
+                # write formatted data
+                with dst.open("wb") as log_fp:
+                    self.format_output(md_data, log_fp)
+            except (JSONDecodeError, KeyError):
+                LOG.error("Failed to process: %s", src)
+                dst.write_text("Failed to load JSON from minidump-stackwalk")
+        else:
+            # move error log
+            md_json.rename(dst)
+        return dst
 
     @classmethod
     def mdsw_available(
@@ -202,61 +241,30 @@ def mdsw_available(
         return True
 
 
-def process_minidumps(
-    path: Path,
-    symbols_path: Path,
-    cb_create_log: Callable[..., Any],
-    working_path: Optional[str] = None,
-) -> None:
-    """Scan for minidump (.dmp) files in path. If files are found they
-    are parsed and new logs are added via the cb_create_log callback.
+def process_minidumps(path: Path, symbols: Path) -> Iterator[Path]:
+    """Scan for minidump (.dmp) files in path and a log is yielded for each.
 
     Args:
         path: Path to scan for minidump files.
-        symbols_path: Directory containing symbols for the target binary.
-        cb_create_log: A callback to the add_log() of a PuppetLogger.
-        working_path: Used as base directory for temporary files.
+        symbols: Directory containing symbols for the target binary.
 
-    Returns:
-        None
+    Yields:
+        Formatted minidump logs.
     """
     if not MinidumpParser.mdsw_available():
-        LOG.warning(
+        LOG.error(
             "Unable to process minidump."
             " See README.md for details on obtaining the latest minidump-stackwalk."
         )
         return
     assert path.is_dir(), f"missing minidump scan path '{path!s}'"
     local_symbols = True
-    if not symbols_path.is_dir():
-        LOG.warning("Local packaged symbols not found: %r", str(symbols_path))
+    if not symbols.is_dir():
+        LOG.warning("Local packaged symbols not found: '%s'", symbols)
         local_symbols = False
 
-    # create working path
-    working_path = mkdtemp(prefix="minidump_", dir=working_path)
-
-    md_parser = MinidumpParser(symbols_path=symbols_path if local_symbols else None)
-    # order by last modified date hopefully the oldest log is the cause of the issue
-    dmp_files = sorted(path.glob("*.dmp"), key=lambda x: x.stat().st_mtime)
-    for count, file in enumerate(dmp_files):
-        # filter out zero byte files and warn
-        if file.stat().st_size == 0:
-            LOG.warning("Ignored zero byte minidump: %s", file)
-            continue
-        try:
-            # parse minidump with minidump-stackwalk
-            md_json = md_parser.to_json(file, Path(working_path))
-            # load json data from file to dict
-            with md_json.open("rb") as json_fp:
-                md_data = load(json_fp)
-            md_json.unlink()
-            # write formatted minidump output to log file
-            md_parser.format_output(md_data, cb_create_log(f"minidump_{count:02}"))
-        except (JSONDecodeError, KeyError, MinidumpStackwalkFailure):
-            # save a copy of the minidump
-            saved_md = copy2(file, working_path)
-            LOG.error("Minidump saved as '%s'", saved_md)
-            raise
-
-    # if successful remove the working path
-    rmtree(working_path)
+    with MinidumpParser(symbols=symbols if local_symbols else None) as md_parser:
+        # order by last modified date hopefully the oldest log is the cause of the issue
+        dmp_files = sorted(path.glob("*.dmp"), key=lambda x: x.stat().st_mtime)
+        for count, dmp_file in enumerate(dmp_files):
+            yield md_parser.to_text(dmp_file, f"minidump_{count:02}.txt")
diff --git a/src/ffpuppet/test_ffpuppet.py b/src/ffpuppet/test_ffpuppet.py
@@ -575,11 +575,11 @@ def test_ffpuppet_21(tmp_path):
 def test_ffpuppet_22(mocker, tmp_path):
     """test multiple minidumps"""
 
-    # pylint: disable=unused-argument
-    def _fake_process_minidumps(dmps, _, add_log, working_path=None):
+    def _fake_process_minidumps(dmps, _):
         for num, _ in enumerate(Path(dmps).glob("*.dmp")):
-            lfp = add_log(f"minidump_{num + 1:02}")
-            lfp.write(b"test")
+            md_log = tmp_path / f"minidump_{num:02}.txt"
+            md_log.write_text("test")
+            yield md_log
 
     mocker.patch("ffpuppet.core.process_minidumps", side_effect=_fake_process_minidumps)
     profile = tmp_path / "profile"
@@ -600,9 +600,9 @@ def _fake_process_minidumps(dmps, _, add_log, working_path=None):
         ffp.close()
         logs = tmp_path / "logs"
         ffp.save_logs(logs)
+        assert any(logs.glob("log_minidump_00.txt"))
         assert any(logs.glob("log_minidump_01.txt"))
         assert any(logs.glob("log_minidump_02.txt"))
-        assert any(logs.glob("log_minidump_03.txt"))
 
 
 def test_ffpuppet_23(tmp_path):