Skip to content

Commit

Permalink
Merge pull request #13 from PyCampES/handle_pipes
Browse files Browse the repository at this point in the history
add remove_pipes
  • Loading branch information
gilgamezh authored Mar 31, 2024
2 parents cfa562f + 2226e22 commit d2315c0
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 0 deletions.
5 changes: 5 additions & 0 deletions src/ficamp/classifier/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,15 @@ def remove_digits(s: str) -> str:
return " ".join(clean)


def remove_pipes(s: str) -> str:
return " ".join(s.split("|"))


def preprocess(s: str) -> str:
"Clean up transaction description"
steps = (
lambda s: s.lower(),
remove_pipes,
remove_digits,
)
out = s
Expand Down
13 changes: 13 additions & 0 deletions tests/test_preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from ficamp.classifier.preprocessing import (
preprocess,
remove_digits,
remove_pipes,
)


Expand All @@ -23,6 +24,17 @@ def test_remove_digits(inp, exp):
assert remove_digits(inp) == exp


@pytest.mark.parametrize(
("inp,exp"),
(
("SEPA 1231AMSTERDAM", "SEPA 1231AMSTERDAM"), # nothing to do
("SEPA 1231|AMSTERDAM", "SEPA 1231 AMSTERDAM"),
),
)
def test_remove_pipes(inp, exp):
assert remove_pipes(inp) == exp


@pytest.mark.parametrize(
("inp,exp"),
(
Expand All @@ -34,6 +46,7 @@ def test_remove_digits(inp, exp):
("SEPA 123", "sepa"),
("SEPA 12312321 bic", "sepa bic"),
("SEPA 12312321 123BIC", "sepa"),
("SEPA 1231|AMSTERDAM 123BIC", "sepa amsterdam"),
),
)
def test_preprocess(inp, exp):
Expand Down

0 comments on commit d2315c0

Please sign in to comment.