Skip to content

Commit

Permalink
feat(cli): add progress bar
Browse files Browse the repository at this point in the history
  • Loading branch information
benjamin-awd committed Nov 26, 2023
1 parent 27edaae commit 8b2695f
Show file tree
Hide file tree
Showing 7 changed files with 107 additions and 15 deletions.
33 changes: 32 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ pydantic = "^2.4.2"
pdf2john = "^0.1.8"
pdftotext = "^2.2.2"
click = "^8.1.7"
tqdm = "4.65"


[tool.poetry.group.dev.dependencies]
Expand All @@ -36,6 +37,7 @@ pytest = "^7.4.1"
pysnooper = "^1.2.0"
pylint-pydantic = "^0.3.0"
mypy = "^1.6.1"
types-tqdm = "^4.66.0.5"


[tool.taskipy.tasks]
Expand Down
35 changes: 30 additions & 5 deletions src/monopoly/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,33 @@
from typing import Collection, Iterable, Optional

import click
from tqdm import tqdm

from monopoly.banks import auto_detect_bank


def run(files: Collection[Path], output_directory: Optional[Path] = None):
def run(input_files: Collection[Path], output_directory: Optional[Path] = None):
files = tqdm(
input_files,
desc="Processing statements",
ncols=80,
bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt}",
)
results = []
for file in files:
bank = auto_detect_bank(file)
statement = bank.extract()
transformed_df = bank.transform(statement)

# saves processed statements to same directory by default
if not output_directory:
output_directory = file.parent
bank.load(transformed_df, statement, output_directory)

output_file = bank.load(transformed_df, statement, output_directory)
results.append((file.name, output_file.name))
click.echo(click.style(f"{len(files)} statement(s) processed", bold=True))
for raw_statement, processed_statement in sorted(results):
click.echo(f"{raw_statement} -> {processed_statement}")


def get_statement_paths(files: Iterable[Path]) -> set[Path]:
Expand All @@ -40,16 +55,26 @@ def get_statement_paths(files: Iterable[Path]) -> set[Path]:
type=click.Path(exists=True, allow_dash=True, resolve_path=True, path_type=Path),
help="Specify output folder",
)
def monopoly(files: list[Path], output: Path):
@click.pass_context
def monopoly(ctx: click.Context, files: list[Path], output: Path):
"""
Monopoly helps convert your bank statements from PDF to CSV.
A file or directory can be passed in via the FILES argument
"""
if files:
matched_files = get_statement_paths(files)
run(matched_files, output)

if matched_files:
run(matched_files, output)
ctx.exit(0)

else:
click.echo(
click.style("Could not find .pdf files", fg="yellow", bold=True),
err=True,
)
ctx.exit(1)
else:
show_welcome_message()

Expand Down Expand Up @@ -78,7 +103,7 @@ def show_welcome_message():
),
(
"monopoly . --output <dir>",
"saves all results to specific directory",
"saves all results to a specific directory",
),
(
"monopoly --help",
Expand Down
4 changes: 2 additions & 2 deletions src/monopoly/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def open(self, brute_force_config: Optional[BruteForceConfig] = None):
"""
Opens and decrypts a PDF document
"""
logger.info("Opening pdf from path %s", self.file_path)
logger.debug("Opening pdf from path %s", self.file_path)
document = self.document

if not document.is_encrypted:
Expand All @@ -74,7 +74,7 @@ def open(self, brute_force_config: Optional[BruteForceConfig] = None):
# This attempts to unlock statements based on a common password,
# followed by the last few digits of a card
if brute_force_config:
logger.info("Unlocking PDF using a string prefix with mask")
logger.debug("Unlocking PDF using a string prefix with mask")
password = self.unlock_pdf(
static_string=brute_force_config.static_string,
mask=brute_force_config.mask,
Expand Down
5 changes: 2 additions & 3 deletions src/monopoly/processor.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import logging
import os
from datetime import datetime
from pathlib import Path
from typing import Optional
Expand Down Expand Up @@ -125,8 +124,8 @@ def load(self, df: DataFrame, statement: Statement, output_directory: Path):
"file", statement.statement_config, statement.statement_date
)

output_path = os.path.join(output_directory, filename)
logger.info("Writing CSV to file path: %s", output_path)
output_path = output_directory / filename
logger.debug("Writing CSV to file path: %s", output_path)
df.to_csv(output_path, index=False)

return output_path
38 changes: 36 additions & 2 deletions tests/unit/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import subprocess
from pathlib import Path
from unittest.mock import DEFAULT, MagicMock, patch
Expand Down Expand Up @@ -44,13 +45,13 @@ def load(self):

def test_run(monkeypatch):
def mock_auto_detect_bank(file_path: Path):
assert "input.pdf" in str(file_path)
assert "statement.pdf" in str(file_path)
return MockBank()

monkeypatch.setattr("monopoly.cli.auto_detect_bank", mock_auto_detect_bank)

# Mock paths
files = [Path("tests/integration/banks/example/input.pdf").resolve()]
files = [Path("path/to/statement.pdf").resolve()]

with patch.multiple(MockBank, extract=DEFAULT, transform=DEFAULT, load=DEFAULT):
run(files)
Expand All @@ -65,6 +66,39 @@ def mock_auto_detect_bank(file_path: Path):
MockBank.load.assert_called_once()


def test_monopoly_output():
cli_runner = CliRunner()
with open("tests/integration/banks/citibank/input.pdf", "rb") as source_file:
file_content = source_file.read()

with cli_runner.isolated_filesystem() as tmp_dir:
with open(f"{tmp_dir}/input.pdf", "wb") as destination_file:
destination_file.write(file_content)

result_dir = "results"
os.mkdir(result_dir)
result = cli_runner.invoke(
monopoly, [".", "--output", f"{tmp_dir}/{result_dir}"]
)

assert result.exit_code == 0
assert "1 statement(s) processed" in result.output
assert "input.pdf -> citibank-credit-2022-11.csv" in result.output


def test_monopoly_no_pdf():
cli_runner = CliRunner()

with cli_runner.isolated_filesystem():
with open("file.txt", "w") as f:
f.write("not a pdf file")

result = cli_runner.invoke(monopoly, ["file.txt"])

assert result.exit_code == 1
assert "Could not find .pdf files" in result.output


def test_get_statement_paths(test_directory: Path) -> None:
path = test_directory
expected = {
Expand Down
5 changes: 3 additions & 2 deletions tests/unit/test_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,6 @@ def test_load(statement, mock_generate_name, mock_to_csv):
mock_generate_name.assert_called_once_with(
"file", statement.statement_config, datetime(2023, 1, 1)
)
mock_to_csv.assert_called_once_with("/output_directory/test_file.csv", index=False)
assert output_path == "/output_directory/test_file.csv"
expected = Path("/output_directory/test_file.csv")
mock_to_csv.assert_called_once_with(expected, index=False)
assert output_path == expected

0 comments on commit 8b2695f

Please sign in to comment.