Skip to content

Commit

Permalink
pymatviz/utils.py add df_to_pdf() and normalize_and_crop_pdf()
Browse files Browse the repository at this point in the history
  • Loading branch information
janosh committed Oct 9, 2023
1 parent 6e10a05 commit 9a09444
Showing 1 changed file with 93 additions and 0 deletions.
93 changes: 93 additions & 0 deletions pymatviz/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,11 @@


if TYPE_CHECKING:
from pathlib import Path

from matplotlib.gridspec import GridSpec
from numpy.typing import ArrayLike
from pandas.io.formats.style import Styler

ROOT = dirname(dirname(__file__))

Expand Down Expand Up @@ -575,3 +578,93 @@ def patch_dict(
patched.update(updates)

yield patched


def df_to_pdf(
styler: Styler,
file_path: str | Path,
crop: bool = True,
size: str = "landscape",
style: str = "",
**kwargs: Any,
) -> None:
"""Export a pandas Styler to PDF with WeasyPrint.
Args:
styler (Styler): Styler object to export.
file_path (str): Path to save the PDF to. Requires WeasyPrint.
crop (bool): Whether to crop the PDF margins. Requires pdfCropMargins.
Defaults to True.
size (str): Page size. Defaults to "landscape". See
https://developer.mozilla.org/@page for options.
style (str): CSS style string to be inserted into the HTML file.
Defaults to "". Example: "body { margin: 0; padding: 1em; }".
**kwargs: Keyword arguments passed to Styler.to_html().
"""
try:
from weasyprint import HTML
except ImportError as exc:
msg = "weasyprint not installed\nrun pip install weasyprint"
raise ImportError(msg) from exc

html_str = styler.to_html(**kwargs)

# CSS to adjust layout and margins
html_str = f"""
<style>
@page {{ size: {size}; }}
{style}
</style>
{html_str}
"""

html = HTML(string=html_str)

html.write_pdf(file_path)

if crop:
normalize_and_crop_pdf(file_path)


def normalize_and_crop_pdf(file_path: str | Path) -> None:
"""Normalize a PDF using Ghostscript and then crop it.
Without gs normalization, pdfCropMargins sometimes corrupts the PDF.
Args:
file_path (str | Path): Path to the PDF file.
"""
try:
normalized_file_path = f"{file_path}_normalized.pdf"
from pdfCropMargins import crop

# Normalize the PDF with Ghostscript
subprocess.run(
[
*"gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4".split(),
*"-dPDFSETTINGS=/default -dNOPAUSE -dQUIET -dBATCH".split(),
f"-sOutputFile={normalized_file_path}",
str(file_path),
],
check=True,
)

# Crop the normalized PDF
cropped_file_path, exit_code, stdout, stderr = crop(
["--percentRetain", "0", normalized_file_path]
)

if stderr:
print(f"pdfCropMargins {stderr=}")
# something went wrong, remove the cropped PDF
os.remove(cropped_file_path)
else:
# replace the original PDF with the cropped one
os.replace(cropped_file_path, str(file_path))

os.remove(normalized_file_path)

except ImportError as exc:
msg = "pdfCropMargins not installed\nrun pip install pdfCropMargins"
raise ImportError(msg) from exc
except Exception as exc:
raise RuntimeError("Error cropping PDF margins") from exc

0 comments on commit 9a09444

Please sign in to comment.