From 950d8c4683cb5f4c1cd7f81d7098aedd0c4e28e1 Mon Sep 17 00:00:00 2001 From: Benjamin Dornel Date: Tue, 3 Sep 2024 23:40:30 +0800 Subject: [PATCH] chore: import from pymupdf instead of fitz --- src/monopoly/pdf.py | 10 +++++----- tests/conftest.py | 4 ++-- tests/unit/test_safety_check.py | 8 ++++---- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/monopoly/pdf.py b/src/monopoly/pdf.py index b39ab9f8..8f5be3db 100644 --- a/src/monopoly/pdf.py +++ b/src/monopoly/pdf.py @@ -5,12 +5,12 @@ from pathlib import Path from typing import Optional -import fitz import pdftotext from ocrmypdf import Verbosity, configure_logging, ocr from ocrmypdf.exceptions import PriorOcrFoundError, TaggedPDFError from pydantic import SecretStr from pydantic_settings import BaseSettings, SettingsConfigDict +from pymupdf import TEXTFLAGS_TEXT, Document, Page from monopoly.banks import BankBase @@ -61,7 +61,7 @@ class BadPasswordFormatError(Exception): """Exception raised passwords are not provided in a proper format""" -class PdfDocument(fitz.Document): +class PdfDocument(Document): """Handles logic related to the opening, unlocking, and storage of a PDF document.""" def __init__( @@ -181,7 +181,7 @@ def get_pages(self) -> list[PdfPage]: raise RuntimeError("Unable to retrieve pages") @staticmethod - def _remove_vertical_text(page: fitz.Page): + def _remove_vertical_text(page: Page): """Helper function to remove vertical text, based on writing direction (wdir). This helps avoid situations where the PDF is oddly parsed, due to vertical text @@ -199,12 +199,12 @@ def _remove_vertical_text(page: fitz.Page): If line["dir"] != (1, 0), the text of its spans is rotated. """ - for block in page.get_text("dict", flags=fitz.TEXTFLAGS_TEXT)["blocks"]: + for block in page.get_text("dict", flags=TEXTFLAGS_TEXT)["blocks"]: for line in block["lines"]: writing_direction = line["dir"] if writing_direction != (1, 0): page.add_redact_annot(line["bbox"]) - page.apply_redactions(images=fitz.PDF_REDACT_IMAGE_NONE) + page.apply_redactions(images=0) return page @staticmethod diff --git a/tests/conftest.py b/tests/conftest.py index 9803db2d..5334fa06 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,8 +1,8 @@ import os from unittest.mock import MagicMock, Mock, patch -import fitz import pytest +from pymupdf import Document from monopoly.banks.detector import BankDetector from monopoly.config import DateOrder, PdfConfig, StatementConfig @@ -65,7 +65,7 @@ def setup_statement_fixture( mock_page = Mock(spec=PdfPage) mock_page.lines = ["foo", "bar"] mock_page.raw_text = ["foo\nbar"] - document = MagicMock(spec=fitz.Document) + document = MagicMock(spec=Document) document.name = "mock_document.pdf" statement = statement_cls(pages=[mock_page], config=statement_config, header="foo") yield statement diff --git a/tests/unit/test_safety_check.py b/tests/unit/test_safety_check.py index 5e817a23..7e1add21 100644 --- a/tests/unit/test_safety_check.py +++ b/tests/unit/test_safety_check.py @@ -1,5 +1,5 @@ -import fitz import pytest +from pymupdf import Document from monopoly.banks import BankBase from monopoly.statements import CreditStatement, DebitStatement @@ -15,7 +15,7 @@ class MockProcessor(BankBase): def test_credit_safety_check(credit_statement: CreditStatement): - document = fitz.Document() + document = Document() page = document.new_page() text = "Page 1\n3\nfoo\n02 May\n2.27\n27 Apr\n2.67\ntotal amount 31.50" page.lines = text.split("\n") @@ -35,7 +35,7 @@ def test_credit_safety_check(credit_statement: CreditStatement): def test_debit_safety_check(debit_statement: DebitStatement): - document = fitz.Document() + document = Document() page = document.new_page() text = ( "Page 1\n3\nfoo\n02 May\n-2.5\n27 Apr\n2.67\ntotal credit 30.0 total debit 2.5" @@ -64,7 +64,7 @@ def test_debit_safety_check(debit_statement: DebitStatement): def test_debit_safety_check_failure(debit_statement: DebitStatement): - document = fitz.Document() + document = Document() page = document.new_page() text = "Page 1\n3\nfoo\n02 May\n-999\n27 Apr\n456\nrandom transaction 123" page.lines = text.split("\n")