From a1b8113d56e3a7841f8dc73846aebee8286b6a89 Mon Sep 17 00:00:00 2001 From: "James R. Barlow" Date: Fri, 8 Nov 2024 11:09:13 -0800 Subject: [PATCH] Add bisect script --- misc/bisect_pdf.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 misc/bisect_pdf.py diff --git a/misc/bisect_pdf.py b/misc/bisect_pdf.py new file mode 100644 index 000000000..79cd8f982 --- /dev/null +++ b/misc/bisect_pdf.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 +# SPDX-FileCopyrightText: 2024 James R. Barlow +# SPDX-License-Identifier: MIT + +"""Helper script for bisecting PDFs to find a page with an issue.""" + +import sys + +import pikepdf + +if len(sys.argv) != 2: + print(f"Usage: {sys.argv[0]} ") + sys.exit(1) + +with pikepdf.open(sys.argv[1]) as pdf: + num_pages = len(pdf.pages) + low = 0 + high = num_pages - 1 + while low <= high: + mid = (low + high) // 2 + with pikepdf.new() as new_pdf: + new_pdf.pages.extend(pdf.pages[low : mid + 1]) + new_pdf.save(f"bisect-issue-{low + 1}-{mid + 1}.pdf") + print(f"Is bisect-issue-{low + 1}-{mid + 1}.pdf good or bad?", end=" ") + while True: + response = input().lower() + if response == "good": + low = mid + 1 + break + elif response == "bad": + high = mid - 1 + break + else: + print("Please respond with 'good' or 'bad'.") + print(f"The issue is on page {low + 1} of the original PDF.") + with pikepdf.new() as new_pdf: + new_pdf.pages.extend(pdf.pages[low]) + new_pdf.save(f"bisect-issue-bad-{low + 1}.pdf") + with pikepdf.new() as new_pdf: + new_pdf.pages.extend(pdf.pages[:low]) + new_pdf.pages.extend(pdf.pages[low + 1 :]) + new_pdf.save(f"bisect-issue-good-{low + 1}.pdf")