pymatviz/utils.py add df_to_pdf() and normalize_and_crop_pdf()

janosh · Oct 9, 2023 · 9a09444 · 9a09444
1 parent 6e10a05
commit 9a09444
Showing 1 changed file with 93 additions and 0 deletions.
diff --git a/pymatviz/utils.py b/pymatviz/utils.py
@@ -21,8 +21,11 @@
 
 
 if TYPE_CHECKING:
+    from pathlib import Path
+
     from matplotlib.gridspec import GridSpec
     from numpy.typing import ArrayLike
+    from pandas.io.formats.style import Styler
 
 ROOT = dirname(dirname(__file__))
 
@@ -575,3 +578,93 @@ def patch_dict(
     patched.update(updates)
 
     yield patched
+
+
+def df_to_pdf(
+    styler: Styler,
+    file_path: str | Path,
+    crop: bool = True,
+    size: str = "landscape",
+    style: str = "",
+    **kwargs: Any,
+) -> None:
+    """Export a pandas Styler to PDF with WeasyPrint.
+
+    Args:
+        styler (Styler): Styler object to export.
+        file_path (str): Path to save the PDF to. Requires WeasyPrint.
+        crop (bool): Whether to crop the PDF margins. Requires pdfCropMargins.
+            Defaults to True.
+        size (str): Page size. Defaults to "landscape". See
+            https://developer.mozilla.org/@page for options.
+        style (str): CSS style string to be inserted into the HTML file.
+            Defaults to "". Example: "body { margin: 0; padding: 1em; }".
+        **kwargs: Keyword arguments passed to Styler.to_html().
+    """
+    try:
+        from weasyprint import HTML
+    except ImportError as exc:
+        msg = "weasyprint not installed\nrun pip install weasyprint"
+        raise ImportError(msg) from exc
+
+    html_str = styler.to_html(**kwargs)
+
+    # CSS to adjust layout and margins
+    html_str = f"""
+    <style>
+        @page {{ size: {size}; }}
+        {style}
+    </style>
+    {html_str}
+    """
+
+    html = HTML(string=html_str)
+
+    html.write_pdf(file_path)
+
+    if crop:
+        normalize_and_crop_pdf(file_path)
+
+
+def normalize_and_crop_pdf(file_path: str | Path) -> None:
+    """Normalize a PDF using Ghostscript and then crop it.
+    Without gs normalization, pdfCropMargins sometimes corrupts the PDF.
+
+    Args:
+        file_path (str | Path): Path to the PDF file.
+    """
+    try:
+        normalized_file_path = f"{file_path}_normalized.pdf"
+        from pdfCropMargins import crop
+
+        # Normalize the PDF with Ghostscript
+        subprocess.run(
+            [
+                *"gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4".split(),
+                *"-dPDFSETTINGS=/default -dNOPAUSE -dQUIET -dBATCH".split(),
+                f"-sOutputFile={normalized_file_path}",
+                str(file_path),
+            ],
+            check=True,
+        )
+
+        # Crop the normalized PDF
+        cropped_file_path, exit_code, stdout, stderr = crop(
+            ["--percentRetain", "0", normalized_file_path]
+        )
+
+        if stderr:
+            print(f"pdfCropMargins {stderr=}")
+            # something went wrong, remove the cropped PDF
+            os.remove(cropped_file_path)
+        else:
+            # replace the original PDF with the cropped one
+            os.replace(cropped_file_path, str(file_path))
+
+        os.remove(normalized_file_path)
+
+    except ImportError as exc:
+        msg = "pdfCropMargins not installed\nrun pip install pdfCropMargins"
+        raise ImportError(msg) from exc
+    except Exception as exc:
+        raise RuntimeError("Error cropping PDF margins") from exc