diff --git a/README.md b/README.md index 43ca8c7..6be4d88 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ [![CI status](https://github.com/artefactory/NLPretext/actions/workflows/ci.yml/badge.svg?branch%3Amain&event%3Apush)](https://github.com/artefactory/NLPretext/actions/workflows/ci.yml?query=branch%3Amain) [![CD status](https://github.com/artefactory/NLPretext/actions/workflows/cd.yml/badge.svg?event%3Arelease)](https://github.com/artefactory/NLPretext/actions/workflows/cd.yml?query=event%3Arelease) -[![Python Version](https://img.shields.io/badge/Python-3.7-informational.svg)](#supported-python-versions) +[![Python Version](https://img.shields.io/badge/Python-3.8-informational.svg)](#supported-python-versions) [![Dependencies Status](https://img.shields.io/badge/dependabots-active-informational.svg)](https://github.com/artefactory/NLPretext}/pulls?utf8=%E2%9C%93&q=is%3Apr%20author%3Aapp%2Fdependabot) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) @@ -54,7 +54,7 @@ Cannot find what you were looking for? Feel free to open an [issue]((https://git ### Supported Python Versions - Main version supported : `3.8` -- Other supported versions : `3.9` +- Other supported versions : `3.9`, `3.10` We strongly advise you to do the remaining steps in a virtual environnement. diff --git a/nlpretext/preprocessor.py b/nlpretext/preprocessor.py index e92c501..5d5acd1 100644 --- a/nlpretext/preprocessor.py +++ b/nlpretext/preprocessor.py @@ -83,5 +83,5 @@ def run(self, text: str) -> str: {"operation": operation, "args": None} for operation in operations_to_pipe ] self.pipeline = self.build_pipeline(operations) - text = self.pipeline.fit_transform(text) + text = self.pipeline.transform(text) return text diff --git a/tests/test_preprocessor.py b/tests/test_preprocessor.py index b179ae9..ca0f334 100644 --- a/tests/test_preprocessor.py +++ b/tests/test_preprocessor.py @@ -501,27 +501,23 @@ def test_custom_preprocess(): assert expected_result == result -def test_apply_preprocessor(): +@pytest.mark.parametrize( + "input_str, expected_str", + [ + ( + "Some text with @mentions and whitespaces and #hashtags", + "Some text with and whitespaces and", + ), + ("@twitteruser ✊", ""), + ("", ""), + ], +) +def test_apply_preprocessor(input_str, expected_str): # Given - text = "Some text with @mentions and whitespaces and #hashtags" - operations: List[Callable[[Any], Any]] = [ - remove_html_tags, - remove_mentions, - remove_emoji, - remove_hashtag, - remove_eol_characters, - fix_bad_unicode, - normalize_whitespace, - ] - preprocessor = Preprocessor() - expected_result = text - for function in operations: - expected_result = function(expected_result) - # When - result = preprocessor.run(text) + result = preprocessor.run(input_str) # Then - assert expected_result == result + assert expected_str == result