Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(banks): use enums and add transaction class #12

Merged
merged 7 commits into from
Oct 6, 2023
33 changes: 17 additions & 16 deletions monopoly/bank.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from pandas import DataFrame

from monopoly.config import settings
from monopoly.helpers.constants import AMOUNT, DATE, ROOT_DIR
from monopoly.helpers.constants import ROOT_DIR, BankStatement
from monopoly.helpers.generate_name import generate_name
from monopoly.pdf import PdfConfig, PdfParser
from monopoly.statement import Statement, StatementConfig
Expand All @@ -20,7 +20,6 @@ class Bank:
pdf_config: PdfConfig
statement_config: StatementConfig
file_path: str
date_parser: callable = None
transform_dates: bool = True

def extract(self) -> Statement:
Expand All @@ -41,7 +40,9 @@ def transform(self, statement: Statement) -> DataFrame:
df = statement.df
statement_date = statement.statement_date

df[AMOUNT] = df[AMOUNT].str.replace(",", "").astype(float)
df[BankStatement.AMOUNT] = (
df[BankStatement.AMOUNT].str.replace(",", "").astype(float)
)

if self.transform_dates:
df = self._transform_date_to_iso(df, statement_date)
Expand All @@ -52,7 +53,9 @@ def _transform_date_to_iso(
self, df: DataFrame, statement_date: datetime
) -> DataFrame:
logger.info("Transforming dates to ISO 8601")
df[DATE] = df.apply(self._convert_date, statement_date=statement_date, axis=1)
df[BankStatement.DATE] = df.apply(
self._convert_date, statement_date=statement_date, axis=1
)
return df

def parse_date(self, date_str):
Expand All @@ -61,7 +64,7 @@ def parse_date(self, date_str):
return parsed_date.day, parsed_date.month

def _convert_date(self, row, statement_date: datetime):
row_day, row_month = self.parse_date(row[DATE])
row_day, row_month = self.parse_date(row[BankStatement.DATE])

# Deal with mixed years from Jan/Dec
if statement_date.month == 1 and row_month == 12:
Expand All @@ -71,23 +74,21 @@ def _convert_date(self, row, statement_date: datetime):

return f"{row_year}-{row_month:02d}-{row_day:02d}"

def _write_to_csv(self, df: DataFrame, statement_date: datetime):
filename = generate_name("file", self.statement_config, statement_date)

file_path = os.path.join(ROOT_DIR, "output", filename)
logger.info("Writing CSV to file path: %s", file_path)
df.to_csv(file_path, index=False)

return file_path

def load(
self,
transformed_df: DataFrame,
df: DataFrame,
statement: Statement,
csv_file_path: str = None,
upload_to_cloud: bool = False,
):
statement_date = statement.statement_date
csv_file_path = self._write_to_csv(transformed_df, statement_date)

if not csv_file_path:
filename = generate_name("file", self.statement_config, statement_date)
csv_file_path = os.path.join(ROOT_DIR, "output", filename)
logger.info("Writing CSV to file path: %s", csv_file_path)

df.to_csv(csv_file_path, index=False)

if upload_to_cloud:
blob_name = generate_name("blob", self.statement_config, statement_date)
Expand Down
5 changes: 3 additions & 2 deletions monopoly/banks/citibank.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,16 @@

from monopoly.bank import BankBase, StatementConfig
from monopoly.config import settings
from monopoly.helpers.constants import AccountType, BankNames
from monopoly.pdf import PdfConfig

logger = logging.getLogger(__name__)


class Citibank(BankBase):
statement_config = StatementConfig(
bank_name="Citibank",
account_type="Credit",
bank_name=BankNames.CITIBANK,
account_type=AccountType.CREDIT,
transaction_pattern=(
r"(?P<date>\b\d{2}\s\w{3}\b)\s*(?P<description>.*?)\s*(?P<amount>[\d.,]+)$"
),
Expand Down
5 changes: 3 additions & 2 deletions monopoly/banks/hsbc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,16 @@

from monopoly.bank import BankBase, StatementConfig
from monopoly.config import settings
from monopoly.helpers.constants import AccountType, BankNames
from monopoly.pdf import PdfConfig

logger = logging.getLogger(__name__)


class Hsbc(BankBase):
statement_config = StatementConfig(
bank_name="HSBC",
account_type="Credit",
bank_name=BankNames.HSBC,
account_type=AccountType.CREDIT,
transaction_pattern=(
r"\d{2}\s\w{3}\s*"
r"(?P<date>\d{2}\s\w{3})\s.*?"
Expand Down
5 changes: 3 additions & 2 deletions monopoly/banks/ocbc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,16 @@

from monopoly.bank import BankBase, StatementConfig
from monopoly.config import settings
from monopoly.helpers.constants import AccountType, BankNames
from monopoly.pdf import PdfConfig

logger = logging.getLogger(__name__)


class Ocbc(BankBase):
statement_config = StatementConfig(
bank_name="OCBC",
account_type="Credit",
bank_name=BankNames.OCBC,
account_type=AccountType.CREDIT,
transaction_pattern=(
r"(?P<date>\d+/\d+)\s*(?P<description>.*?)\s*(?P<amount>[\d.,]+)$"
),
Expand Down
29 changes: 22 additions & 7 deletions monopoly/helpers/constants.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,25 @@
import os
from enum import Enum

from monopoly.helpers.enums import BankStatement, EmailSubjectRegex

DATE = BankStatement.DATE.value
DESCRIPTION = BankStatement.DESCRIPTION.value
AMOUNT = BankStatement.AMOUNT.value
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
OCBC = EmailSubjectRegex.OCBC.value
HSBC = EmailSubjectRegex.HSBC.value


class AccountType(Enum):
CREDIT = "credit"


class BankNames(Enum):
CITIBANK = "citibank"
HSBC = "hsbc"
OCBC = "ocbc"


class BankStatement(str, Enum):
DATE = "date"
DESCRIPTION = "description"
AMOUNT = "amount"


class EmailSubjectRegex(str, Enum):
OCBC = r"OCBC Bank: Your Credit Card e-Statement"
HSBC = r"Your.HSBC.*eStatement"
12 changes: 0 additions & 12 deletions monopoly/helpers/enums.py

This file was deleted.

4 changes: 2 additions & 2 deletions monopoly/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from monopoly.banks.hsbc import Hsbc
from monopoly.banks.ocbc import Ocbc
from monopoly.gmail import Gmail, Message
from monopoly.helpers.constants import HSBC, OCBC
from monopoly.helpers.constants import EmailSubjectRegex

logger = logging.getLogger(__name__)

Expand All @@ -19,7 +19,7 @@ def main():

messages: list[Message] = Gmail().get_emails()

banks = {OCBC: Ocbc, HSBC: Hsbc}
banks = {EmailSubjectRegex.OCBC: Ocbc, EmailSubjectRegex.HSBC: Hsbc}

for message in messages:
process_bank_statement(message, banks)
Expand Down
29 changes: 21 additions & 8 deletions monopoly/statement.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,38 +6,51 @@

from pandas import DataFrame

from monopoly.helpers.constants import AMOUNT, DATE, DESCRIPTION
from monopoly.helpers.constants import AccountType, BankNames, BankStatement
from monopoly.pdf import PdfPage

logger = logging.getLogger(__name__)


@dataclass
class StatementConfig:
bank_name: str
account_type: str
bank_name: BankNames
account_type: AccountType
statement_date_format: str
transaction_pattern: str
transaction_date_format: str
date_pattern: str
multiline_transactions: bool = False

# Convert enums to strings
def __post_init__(self):
self.bank_name = self.bank_name.value
self.account_type = self.account_type.value


@dataclass
class Transaction:
date: str
description: str
amount: float


@dataclass
class Statement:
pages: list[PdfPage]
columns = [DATE, DESCRIPTION, AMOUNT]
columns = [enum.value for enum in BankStatement]
config: StatementConfig

@cached_property
def transactions(self) -> list[dict]:
transactions = []
for page in self.pages:
for i, line in enumerate(page.lines):
item = self._process_line(line, page.lines, idx=i)
transactions.append(item)
transaction = self._process_line(line, page.lines, idx=i)
if transaction:
transactions.append(transaction)

return list(filter(None, transactions))
return transactions

def _process_line(self, line: str, page: list[str], idx: int) -> dict:
if match := re.findall(self.config.transaction_pattern, line):
Expand All @@ -50,7 +63,7 @@ def _process_line(self, line: str, page: list[str], idx: int) -> dict:
except IndexError as err:
logger.debug(err)

return {DATE: date, DESCRIPTION: description, AMOUNT: amount}
return vars(Transaction(date, description, amount))
return None

@cached_property
Expand Down
5 changes: 3 additions & 2 deletions tests/citibank/test_citibank_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@
from pandas.testing import assert_frame_equal

from monopoly.banks.citibank import Citibank
from monopoly.helpers.constants import BankStatement


def test_citibank_extract_unprotected_pdf(citibank: Citibank):
raw_df = citibank.extract().df
expected_df = pd.read_csv("tests/fixtures/citibank/expected.csv", dtype=object)

assert_frame_equal(raw_df, expected_df)
raw_df["amount"] = raw_df["amount"].astype("float")
raw_df[BankStatement.AMOUNT] = raw_df[BankStatement.AMOUNT].astype("float")

# total excluding $20 cashback
assert round(raw_df["amount"].sum(), 2) == 1434.07
assert round(raw_df[BankStatement.AMOUNT].sum(), 2) == 1434.07
50 changes: 9 additions & 41 deletions tests/citibank/test_citibank_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,14 @@

from monopoly.bank import Statement
from monopoly.banks.citibank import Citibank
from monopoly.helpers.constants import AMOUNT, DATE, DESCRIPTION
from monopoly.statement import Transaction


def test_citibank_transform_cross_year(citibank: Citibank, statement: Statement):
raw_df = pd.DataFrame(
[
{
DATE: "09 JAN",
DESCRIPTION: "Shopee Singapore SINGAPORE SG",
AMOUNT: "31.45",
},
{
DATE: "12 DEC",
DESCRIPTION: "UNIQLO SINGAPORE PTE. SINGAPORE SG",
AMOUNT: "29.80",
},
Transaction("09 JAN", "Shopee Singapore", "31.45"),
Transaction("12 DEC", "UNIQLO SINGAPORE", "29.80"),
]
)
statement.statement_date = datetime(2024, 1, 1)
Expand All @@ -29,16 +21,8 @@ def test_citibank_transform_cross_year(citibank: Citibank, statement: Statement)

expected_data = pd.DataFrame(
[
{
DATE: "2024-01-09",
DESCRIPTION: "Shopee Singapore SINGAPORE SG",
AMOUNT: 31.45,
},
{
DATE: "2023-12-12",
DESCRIPTION: "UNIQLO SINGAPORE PTE. SINGAPORE SG",
AMOUNT: 29.80,
},
Transaction("2024-01-09", "Shopee Singapore", 31.45),
Transaction("2023-12-12", "UNIQLO SINGAPORE", 29.80),
]
)

Expand All @@ -48,16 +32,8 @@ def test_citibank_transform_cross_year(citibank: Citibank, statement: Statement)
def test_citibank_transform_within_year(citibank: Citibank, statement: Statement):
raw_df = pd.DataFrame(
[
{
DATE: "09 JUN",
DESCRIPTION: "Shopee Singapore SINGAPORE SG",
AMOUNT: "31.45",
},
{
DATE: "12 JUN",
DESCRIPTION: "UNIQLO SINGAPORE PTE. SINGAPORE SG",
AMOUNT: "29.80",
},
Transaction("09 JUN", "Shopee Singapore", "31.45"),
Transaction("12 JUN", "UNIQLO SINGAPORE", "29.80"),
]
)

Expand All @@ -68,16 +44,8 @@ def test_citibank_transform_within_year(citibank: Citibank, statement: Statement

expected_data = pd.DataFrame(
[
{
DATE: "2023-06-09",
DESCRIPTION: "Shopee Singapore SINGAPORE SG",
AMOUNT: 31.45,
},
{
DATE: "2023-06-12",
DESCRIPTION: "UNIQLO SINGAPORE PTE. SINGAPORE SG",
AMOUNT: 29.80,
},
Transaction("2023-06-09", "Shopee Singapore", 31.45),
Transaction("2023-06-12", "UNIQLO SINGAPORE", 29.80),
]
)

Expand Down
5 changes: 3 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from monopoly.banks.hsbc import Hsbc
from monopoly.banks.ocbc import Ocbc
from monopoly.gmail import Message, MessageAttachment
from monopoly.helpers.constants import AccountType, BankNames
from monopoly.pdf import PdfConfig, PdfParser


Expand Down Expand Up @@ -67,8 +68,8 @@ def statement(monkeypatch, statement_config):
@pytest.fixture(scope="session")
def statement_config():
statement_config = StatementConfig(
account_type="Savings",
bank_name="Example Bank",
account_type=AccountType.CREDIT,
bank_name=BankNames.OCBC,
statement_date_format=None,
transaction_pattern=None,
transaction_date_format=None,
Expand Down
Loading