-
Notifications
You must be signed in to change notification settings - Fork 13
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #150 from artefactory/fix/credits
Fix/credits
- Loading branch information
Showing
5 changed files
with
100 additions
and
39 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
1.0.2 | ||
1.0.3 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -190,7 +190,7 @@ def test_get_stopwords(): | |
@pytest.mark.parametrize( | ||
"input_tokens, lang, expected_output", | ||
[ | ||
(['I', 'like', 'when', 'you', 'move', 'your', 'body', '!'], "en", ['I', 'move', 'body', '!']) | ||
(['I', 'like', 'this', 'song', 'very', 'much', '!'], "en", ['I', 'song', '!']) | ||
], | ||
) | ||
def test_remove_stopwords_tokens(input_tokens, lang, expected_output): | ||
|
@@ -201,7 +201,7 @@ def test_remove_stopwords_tokens(input_tokens, lang, expected_output): | |
@pytest.mark.parametrize( | ||
"input_text, lang, expected_output", | ||
[ | ||
('I like when you move your body !', 'en', 'I move body !'), | ||
('I like this song very much !', 'en', 'I song !'), | ||
('Can I get a beer?', 'en', 'Can I beer ?'), | ||
('Je vous recommande ce film !', 'fr', 'Je recommande film !'), | ||
('je vous recommande ce film !', 'fr', 'recommande film !'), | ||
|
@@ -216,7 +216,7 @@ def test_remove_stopwords_text(input_text, lang, expected_output): | |
@pytest.mark.parametrize( | ||
"input_text, lang, custom_stopwords, expected_output", | ||
[ | ||
('I like when you move your body !', 'en', ['body'], 'I move !'), | ||
('I like this song very much !', 'en', ['song'], 'I !'), | ||
('Je vous recommande ce film la scène de fin est géniale !', 'fr', | ||
['film', 'scène'], 'Je recommande fin géniale !'), | ||
], | ||
|
@@ -249,7 +249,6 @@ def test_remove_accents(): | |
('proportienelle', 'proportienelle'), | ||
('Pour plus de démocratie participative', 'Pour plus de démocratie participative'), | ||
('Transparence de la vie public', 'Transparence de la vie public'), | ||
('18 mois de trop....ca suffit macron', '18 mois de trop....ca suffit macron'), | ||
('Egalité devant les infractions routières', 'Egalité devant les infractions routières')],) | ||
def test_fix_bad_unicode(input_str, expected_str): | ||
result = fix_bad_unicode(input_str) | ||
|
@@ -287,14 +286,13 @@ def test_unpack_english_contractions(input_str, expected_str): | |
@pytest.mark.parametrize( | ||
"input_str, expected_str", | ||
[( | ||
"Wan't to contribute to Nautilus? read https://github.com/artefactory/nautilus-nlp/blob/docs/CONTRIBUTING.md"\ | ||
"Wan't to contribute to NLPretext? read https://github.com/artefactory/NLPretext/blob/master/CONTRIBUTING.md"\ | ||
" first", | ||
"Wan't to contribute to Nautilus? read *URL* first"), | ||
("The ip address of my VM is http://34.76.182.5:8888", "The ip address of my VM is *URL*"), | ||
"Wan't to contribute to NLPretext? read *URL* first"), | ||
("If you go to http://internet.org, you will find a website hosted by FB.", | ||
"If you go to *URL*, you will find a website hosted by FB."), | ||
("Ishttps://waaaou.com/ available?", 'Is*URL* available?'), | ||
("mailto:hugo.vasselin@artefact.com", '*URL*')]) | ||
("Ishttps://internet.org/ available?", 'Is*URL* available?'), | ||
("mailto:john.doe@artefact.com", '*URL*')]) | ||
def test_replace_urls(input_str, expected_str): | ||
result = replace_urls(input_str) | ||
np.testing.assert_equal(result, expected_str) | ||
|
@@ -303,10 +301,9 @@ def test_replace_urls(input_str, expected_str): | |
@pytest.mark.parametrize( | ||
"input_str, expected_str", | ||
[ | ||
("my email:hugo.vasselin@artefact.com", "my email:*EMAIL*"), | ||
("my email:john.doe@artefact.com", "my email:*EMAIL*"), | ||
("[email protected] is a temporary email", "*EMAIL* is a temporary email"), | ||
("our emails used to be [email protected]", "our emails used to be *EMAIL*"), | ||
("[email protected],C ton email bb?", '*EMAIL*,C ton email bb?') | ||
("our emails used to be [email protected]", "our emails used to be *EMAIL*") | ||
] | ||
) | ||
def test_replace_emails(input_str, expected_str): | ||
|
@@ -317,17 +314,17 @@ def test_replace_emails(input_str, expected_str): | |
@pytest.mark.parametrize( | ||
"input_str, expected_str", | ||
[ | ||
("mon 06 bb: 0625093267", "mon 06 bb: *PHONE*"), | ||
("mon 06 bb: 06.25.09.32.67", "mon 06 bb: *PHONE*"), | ||
("call me at +33625093267", "call me at *PHONE*"), | ||
("call me at +33 6 25 09 32 67", "call me at *PHONE*"), | ||
("call me at +33 625 093 267", "call me at *PHONE*"), | ||
("if this unit test doesn't work, call 3615 and says 'ROBIN'", | ||
"if this unit test doesn't work, call *PHONE* and says 'ROBIN'"), | ||
('(541) 754-3010 is a US. Phone', '*PHONE* is a US. Phone'), | ||
('+1-541-754-3010 is an international Phone', '*PHONE* is an international Phone'), | ||
('+1-541-754-3010 Dialed in the US', '*PHONE* Dialed in the US'), | ||
('+1-541-754-3010 Dialed from Germany', '*PHONE* Dialed from Germany') | ||
("mon 06: 0601020304", "mon 06: *PHONE*"), | ||
("mon 06: 06.01.02.03.04", "mon 06: *PHONE*"), | ||
("call me at +33601020304", "call me at *PHONE*"), | ||
("call me at +33 6 01 02 03 04", "call me at *PHONE*"), | ||
("call me at +33 601 020 304", "call me at *PHONE*"), | ||
("if this unit test doesn't work, call 3615 and says 'HELP'", | ||
"if this unit test doesn't work, call *PHONE* and says 'HELP'"), | ||
('(541) 754-0000 is a US. Phone', '*PHONE* is a US. Phone'), | ||
('+1-541-754-0000 is an international Phone', '*PHONE* is an international Phone'), | ||
('+1-541-754-0000 Dialed in the US', '*PHONE* Dialed in the US'), | ||
('+1-541-754-0000 Dialed from Germany', '*PHONE* Dialed from Germany') | ||
] | ||
) | ||
def test_replace_phone_numbers(input_str, expected_str): | ||
|
@@ -343,9 +340,8 @@ def test_replace_phone_numbers(input_str, expected_str): | |
"input_str, expected_str", | ||
[ | ||
("123, 3 petits chats", "*NUMBER*, *NUMBER* petits chats"), | ||
("l0ve 2 twa <3", "l0ve *NUMBER* twa <*NUMBER*"), | ||
("Give me 45bucks!", "Give me *NUMBER*bucks!"), | ||
("call me at +33625093267", "call me at *NUMBER*") | ||
("call me at +33601020304", "call me at *NUMBER*") | ||
] | ||
) | ||
def test_replace_numbers(input_str, expected_str): | ||
|
@@ -384,9 +380,9 @@ def test_replace_currency_symbols(input_str, param, expected_str): | |
("Seriously.,.", '.,;', "Seriously "), | ||
("Seriously...", '.,;', "Seriously "), | ||
("Seriously.!.", '.,;', "Seriously ! "), | ||
("hugo.vasselin@artefact.com", '.,;', "hugo vasselin@artefact com"), | ||
("hugo.vasselin@artefact.com", None, "hugo vasselin artefact com"), | ||
("hugo-vasselin@artefact.com", None, "hugo vasselin artefact com") | ||
("john.doe@artefact.com", '.,;', "john doe@artefact com"), | ||
("john.doe@artefact.com", None, "john doe artefact com"), | ||
("john-doe@artefact.com", None, "john doe artefact com") | ||
] | ||
) | ||
def test_remove_punct(input_str, param, expected_str): | ||
|
@@ -397,27 +393,26 @@ def test_remove_punct(input_str, param, expected_str): | |
@pytest.mark.parametrize( | ||
"input_str, expected_str", | ||
[ | ||
("👉👌", ""), | ||
("⚽👌", ""), | ||
("🎅🏿⌚", ""), | ||
("🥖✊💦", ""), | ||
("🥖🍷🇫🇷", ""), | ||
("✊", ""), | ||
("J'espère que les 🚓 vont pas lire ce test", | ||
"J'espère que les vont pas lire ce test"), | ||
("J'espère que les vont pas lire ce test🚓", | ||
"J'espère que les vont pas lire ce test") | ||
("Save 🐼 and 🐟", | ||
"Save and "), | ||
] | ||
) | ||
def test_remove_emoji(input_str, expected_str): | ||
result = remove_emoji(input_str) | ||
np.testing.assert_equal(result, expected_str) | ||
assert len(result) == len(expected_str) | ||
assert result == expected_str | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"input_str, expected_str", | ||
[ | ||
("👉👌", ":backhand_index_pointing_right::OK_hand:"), | ||
("⚽️👌", ":soccer_ball::OK_hand:"), | ||
("🎅🏿⌚", ":Santa_Claus_dark_skin_tone::watch:"), | ||
("🥖✊💦", ":baguette_bread::raised_fist::sweat_droplets:"), | ||
("🥖🍷🇫🇷", ":baguette_bread::wine_glass::France:"), | ||
("✊", ":raised_fist:") | ||
] | ||
) | ||
|