Skip to content

Commit

Permalink
chore: linting for ocr changes
Browse files Browse the repository at this point in the history
  • Loading branch information
benjamin-awd committed Sep 4, 2024
1 parent c1da910 commit d65641c
Show file tree
Hide file tree
Showing 6 changed files with 11 additions and 13 deletions.
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,9 @@ disable_error_code = [

[[tool.mypy.overrides]]
module = [
"fitz",
"pymupdf",
"ocrmypdf",
"ocrmypdf.exceptions",
"pdftotext",
"pdf2john",
]
Expand Down
4 changes: 2 additions & 2 deletions src/monopoly/banks/base.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
from typing import Any

from monopoly.config import PdfConfig, StatementConfig
from monopoly.identifiers import Identifier

logger = logging.getLogger(__name__)

Expand All @@ -16,7 +16,7 @@ class BankBase:

statement_configs: list[StatementConfig]
pdf_config: PdfConfig = PdfConfig()
identifiers: list[Identifier]
identifiers: list[list[Any]]

def __init_subclass__(cls, **kwargs) -> None:
if not hasattr(cls, "statement_configs"):
Expand Down
4 changes: 1 addition & 3 deletions src/monopoly/banks/detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,7 @@ def metadata_items(self) -> list[Any]:

return identifiers

def detect_bank(
self, banks: list[Type["BankBase"]] = None
) -> Type["BankBase"] | None:
def detect_bank(self, banks: list[Type["BankBase"]]) -> Type["BankBase"] | None:
"""
Reads the encryption metadata or actual metadata (if the PDF is not encrypted),
and checks for a bank based on unique identifiers.
Expand Down
4 changes: 1 addition & 3 deletions src/monopoly/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,4 @@ class PdfConfig:

page_range: tuple[Optional[int], Optional[int]] = (None, None)
page_bbox: Optional[tuple[float, float, float, float]] = None
ocr_identifiers: list[Optional[MetadataIdentifier]] = field(
default_factory=list[None]
)
ocr_identifiers: Optional[list[MetadataIdentifier]] = None
6 changes: 3 additions & 3 deletions src/monopoly/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from functools import cached_property, lru_cache
from io import BytesIO
from pathlib import Path
from typing import TYPE_CHECKING, Optional
from typing import TYPE_CHECKING, Optional, Type

import pdftotext
from pydantic import SecretStr
Expand Down Expand Up @@ -119,7 +119,7 @@ def raw_text(self) -> str:
class PdfParser:
def __init__(
self,
bank: "BankBase",
bank: Type["BankBase"],
document: PdfDocument,
):
"""
Expand All @@ -146,7 +146,7 @@ def page_bbox(self):

@cached_property
def ocr_identifiers(self):
return self.pdf_config.ocr_identifiers
return self.pdf_config.ocr_identifiers or []

@lru_cache
def get_pages(self) -> list[PdfPage]:
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/test_bank_identifier/test_auto_detect_bank.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def test_detect_bank_with_not_matching_text_identifier(
mock_banks_list = [MockBankTwo, MockBankWithMultipleTextIdentifier]
monkeypatch.setattr("monopoly.banks.banks", mock_banks_list)

assert not metadata_analyzer.detect_bank()
assert not metadata_analyzer.detect_bank(mock_banks_list)


@patch.object(PdfDocument, "raw_text", new_callable=PropertyMock)
Expand Down

0 comments on commit d65641c

Please sign in to comment.