Skip to content

Commit

Permalink
feat(ci): add mypy to workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
benjamin-awd committed Oct 29, 2023
1 parent 5322af1 commit 1d180ae
Show file tree
Hide file tree
Showing 13 changed files with 111 additions and 30 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@ jobs:
poetry-version: "1.6.1"

- name: Run CI
run: poetry run task format && poetry run task lint
run: poetry run task format && poetry run task lint && poetry run task mypy
12 changes: 6 additions & 6 deletions monopoly/config.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Annotated
from typing import Annotated, Optional

from pydantic import ConfigDict, StringConstraints
from pydantic.dataclasses import dataclass
Expand Down Expand Up @@ -51,9 +51,9 @@ class PdfConfig:
PDF artifacts that may affect parsing.
"""

password: str = None
page_range: tuple = (None, None)
page_bbox: tuple = None
password: Optional[str] = None
page_range: tuple[Optional[int], Optional[int]] = (None, None)
page_bbox: Optional[tuple[float, float, float, float]] = None


@dataclass
Expand All @@ -79,8 +79,8 @@ class BruteForceConfig:
(the brute force mask).
"""

static_string: str = None
mask: str = None
static_string: Optional[str] = None
mask: Optional[str] = None


settings = Settings()
Expand Down
2 changes: 1 addition & 1 deletion monopoly/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class AutoEnum(StrEnum):
e.g. CITIBANK -> citibank
"""

def _generate_next_value_(name: str, *args):
def _generate_next_value_(name: str, *args): # type: ignore
return name.lower()


Expand Down
10 changes: 5 additions & 5 deletions monopoly/gmail/gmail.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def __init__(self, gmail_service: GmailResource = None):
if not gmail_service:
self.gmail_service = get_gmail_service()

def get_emails(self, query="is:unread", latest=False) -> Message:
def get_emails(self, query="is:unread", latest=False) -> list[Message]:
emails: list = (
self.gmail_service.users()
.messages()
Expand Down Expand Up @@ -72,8 +72,8 @@ def get_attachment_byte_string(self, message_id, attachment_id) -> dict:

class Message(Gmail):
def __init__(self, data: dict, gmail_service: GmailResource):
self.message_id: str = data.get("id")
self.payload: dict = data.get("payload")
self.message_id: str | None = data.get("id")
self.payload: dict | None = data.get("payload")
self.gmail_service = gmail_service
self.trusted_user_emails = settings.trusted_user_emails
super().__init__(gmail_service)
Expand Down Expand Up @@ -138,7 +138,7 @@ def subject(self) -> str:
for item in self.payload.get("headers"):
if item["name"] == "Subject":
return item["value"]
return None
raise RuntimeError("Subject could not be found")

@property
def parts(self) -> list[MessagePart]:
Expand Down Expand Up @@ -171,7 +171,7 @@ def from_trusted_user(self) -> bool:
class MessagePart:
def __init__(self, data: dict):
self._data = data
self.part_id = data.get("partId")
self.part_id: str = data.get("partId")
self.filename: str = data.get("filename")
self.body: dict = data.get("body")

Expand Down
2 changes: 1 addition & 1 deletion monopoly/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def process_bank_statement(message: Message, banks: dict):
if re.search(bank_regex_pattern, subject):
attachment = message.get_attachment()

with message.save(attachment) as file:
with message.save(attachment) as file: # type: ignore
processor: StatementProcessor = bank_class(file_path=file)
statement = processor.extract()
transformed_df = processor.transform(statement)
Expand Down
13 changes: 8 additions & 5 deletions monopoly/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import subprocess
from dataclasses import dataclass
from io import BytesIO
from typing import Optional

import fitz
import pdftotext
Expand All @@ -25,8 +26,8 @@ class PdfParser:
def __init__(
self,
file_path: str,
brute_force_config: BruteForceConfig = None,
pdf_config: PdfConfig = None,
brute_force_config: Optional[BruteForceConfig] = None,
pdf_config: Optional[PdfConfig] = None,
):
"""Class responsible for parsing PDFs and returning raw text
Expand All @@ -40,10 +41,10 @@ def __init__(

self.password = pdf_config.password
self.page_range = slice(*pdf_config.page_range)
self.page_bbox: tuple = pdf_config.page_bbox
self.page_bbox = pdf_config.page_bbox
self.brute_force_config = brute_force_config

def open(self, brute_force_config: BruteForceConfig = None):
def open(self, brute_force_config: Optional[BruteForceConfig] = None):
"""
Opens and decrypts a PDF document
"""
Expand Down Expand Up @@ -136,7 +137,9 @@ def _remove_vertical_text(page: fitz.Page):
return page

@staticmethod
def unlock_pdf(pdf_file_path: str, static_string: str, mask: str):
def unlock_pdf(
pdf_file_path: str, static_string: Optional[str], mask: Optional[str]
):
hash_extractor = PdfHashExtractor(pdf_file_path)
pdf_hash = hash_extractor.parse()

Expand Down
7 changes: 4 additions & 3 deletions monopoly/processor.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
from datetime import datetime
from typing import Optional

from pandas import DataFrame

Expand All @@ -24,7 +25,7 @@ def __init__(
super().__init__(file_path, brute_force_config, pdf_config)

def extract(self) -> Statement:
pages = self.get_pages()
pages = self.get_pages(self.brute_force_config)
statement = Statement(pages, self.statement_config)

if not statement.transactions:
Expand Down Expand Up @@ -70,7 +71,7 @@ def load(
self,
df: DataFrame,
statement: Statement,
csv_file_path: str = None,
csv_file_path: Optional[str] = None,
upload_to_cloud: bool = False,
):
csv_file_path = write_to_csv(
Expand All @@ -80,6 +81,6 @@ def load(
if upload_to_cloud:
upload_to_cloud_storage(
statement=statement,
source_filename=csv_file_path,
source_filename=csv_file_path, # type: ignore
bucket_name=settings.gcs_bucket,
)
10 changes: 6 additions & 4 deletions monopoly/statement.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def remove_extra_whitespace(cls, value: str) -> str:
return " ".join(value.split())

@field_validator("amount", mode="before")
def adjust_number_format(cls, value: str) -> float:
def adjust_number_format(cls, value: str) -> str:
if isinstance(value, str):
return value.replace(",", "")
return value
Expand All @@ -38,7 +38,7 @@ class Statement:
config: StatementConfig

@cached_property
def transactions(self) -> list[dict]:
def transactions(self) -> list[Transaction]:
transactions = []
for page in self.pages:
lines = self.process_lines(page)
Expand All @@ -53,9 +53,11 @@ def transactions(self) -> list[dict]:
def process_lines(page: PdfPage) -> list:
return [line.lstrip() for line in page.lines]

def _process_line(self, line: str, lines: list[str], idx: int) -> dict:
def _process_line(
self, line: str, lines: list[str], idx: int
) -> Transaction | None:
if match := re.search(self.config.transaction_pattern, line):
transaction = Transaction(**match.groupdict())
transaction = Transaction(**match.groupdict()) # type: ignore

if self.config.multiline_transactions and idx < len(lines) - 1:
next_line = lines[idx + 1]
Expand Down
5 changes: 3 additions & 2 deletions monopoly/storage/storage.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import logging
import os
from datetime import datetime
from typing import Optional
from uuid import uuid4

from google.cloud import storage
from google.cloud import storage # type: ignore
from pandas import DataFrame

from monopoly.config import StatementConfig
Expand Down Expand Up @@ -53,7 +54,7 @@ def upload_to_cloud_storage(
logger.info("Uploaded to %s", blob_name)


def write_to_csv(df: DataFrame, csv_file_path: str, statement: Statement):
def write_to_csv(df: DataFrame, csv_file_path: Optional[str], statement: Statement):
logger = logging.getLogger(__name__)

if not csv_file_path:
Expand Down
47 changes: 46 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 21 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,14 @@ pysnooper = "^1.2.0"
google-api-python-client-stubs = "^1.17.0"
pylint-pydantic = "^0.3.0"
pillow = "^10.1.0"
mypy = "^1.6.1"


[tool.taskipy.tasks]
format = "isort . && black ."
lint = "flake8 monopoly && pylint monopoly"
full_test = "pytest -n auto"
mypy = "mypy monopoly"
ci = "poetry run task format && poetry run task lint && poetry run task test"

[tool.pylint]
Expand All @@ -63,6 +65,25 @@ filterwarnings = [
"ignore::DeprecationWarning:google.rpc",
]

[tool.mypy]
disable_error_code = [
"annotation-unchecked",
]
exclude = [
"gmail\\.py"
]

[[tool.mypy.overrides]]
module = [
"fitz",
"pdftotext",
"pdf2john",
"pandas",
"google.cloud",
"google.oauth2.credentials"
]
ignore_missing_imports = true

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def attachment():
return MessageAttachment(filename="test.pdf", file_byte_string=b"Test data")


@pytest.fixture(scope="session")
@pytest.fixture(scope="function")
def parser():
parser = PdfParser(file_path=None)
yield parser
Expand Down
8 changes: 8 additions & 0 deletions tests/integration/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,14 @@ def test_can_brute_force_open_protected(parser: PdfParser):
parser.open(brute_force_config)


def test_get_pages_with_brute_force_config(parser: PdfParser):
brute_force_config = BruteForceConfig("foobar", "?d?d?d")
parser.file_path = fixture_directory / "protected.pdf"

pages = parser.get_pages(brute_force_config)
assert len(pages) == 1


def test_wrong_password_raises_error(parser: PdfParser):
parser.file_path = fixture_directory / "protected.pdf"
parser.password = "wrong_pw"
Expand Down

0 comments on commit 1d180ae

Please sign in to comment.