artefactory · amaleelhamri · Feb 16, 2021 · Feb 15, 2021 · Feb 15, 2021 · Feb 15, 2021
diff --git a/.github/workflows/ci_actions.yml → .github/workflows/ci.yml b/.github/workflows/ci_actions.yml → .github/workflows/ci.yml
@@ -38,7 +38,9 @@ jobs:
       - name: Install requirements
         run: |
           python -m pip install --upgrade pip
-          pip install -r requirements.txt
+          pip install -r requirements_dev.txt
+          pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz
+          pip install https://github.com/explosion/spacy-models/releases/download/fr_core_news_sm-2.3.0/fr_core_news_sm-2.3.0.tar.gz
 
       - name: Run pylint
         run: |

diff --git a/README.md b/README.md
@@ -88,7 +88,7 @@ print(text)
 # "dinner life recommend"
 ```
 
-Take a look at all the functions that are available [here](https://github.com/artefactory/NLPretext/tree/feature/readme/nlpretext) in the ```preprocess.py``` scripts in the different folders: basic, social, token.
+Take a look at all the functions that are available [here](https://github.com/artefactory/NLPretext/tree/master) in the ```preprocess.py``` scripts in the different folders: basic, social, token.
 
 
 # Individual Functions

diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-0.9.1
+1.0.0
diff --git a/nlpretext/classic/preprocess.py → nlpretext/basic/preprocess.py b/nlpretext/classic/preprocess.py → nlpretext/basic/preprocess.py
diff --git a/nlpretext/preprocessor.py b/nlpretext/preprocessor.py
@@ -5,7 +5,7 @@
 
 from nlpretext.social.preprocess import (
     remove_html_tags, remove_mentions, remove_emoji, remove_hashtag)
-from nlpretext.classic.preprocess import normalize_whitespace, remove_eol_characters, fix_bad_unicode
+from nlpretext.basic.preprocess import normalize_whitespace, remove_eol_characters, fix_bad_unicode
 
 
 class Preprocessor():

diff --git a/nlpretext/social/preprocess.py b/nlpretext/social/preprocess.py
@@ -21,7 +21,7 @@
 
 import emoji as _emoji
 from nlpretext._config import constants
-from nlpretext.classic.preprocess import normalize_whitespace
+from nlpretext.basic.preprocess import normalize_whitespace
 
 
 def remove_mentions(text) -> str:

diff --git a/nlpretext/token/preprocess.py b/nlpretext/token/preprocess.py
@@ -20,16 +20,22 @@
 from __future__ import absolute_import, division, print_function, unicode_literals
 
 import re
+from nlpretext._utils.stopwords import get_stopwords
 
 
-def remove_stopwords(tokens, stopwords: list) -> str:
+def remove_stopwords(tokens: list, lang: str, custom_stopwords: list = None)  -> str:
     """
     Remove stopwords from a text.
     eg. 'I like when you move your body !' -> 'I move body !'
 
     Parameters
     ----------
-    stopwords : list of stopwords to remove
+    tokens: list(str)
+        list of tokens
+    lang: str
+        language iso code (e.g : "en")
+    custom_stopwords : list(str)|None
+        list of custom stopwords to add. None by default
 
     Returns
     -------
@@ -41,6 +47,9 @@ def remove_stopwords(tokens, stopwords: list) -> str:
     ValueError
         When inputs is not a list
     """
+    stopwords = get_stopwords(lang)
+    if custom_stopwords:
+        stopwords += custom_stopwords
     tokens = [word for word in tokens if word not in stopwords]
     return tokens
 

diff --git a/requirements.txt b/requirements.txt
@@ -1,15 +1,3 @@
-# local package
-#-e .
-
-# external requirements
-coverage
-pillow
-pytest==6.1.1
-pytest-cov==2.10.1
-python-dotenv>=0.5.1
-Sphinx
-sphinx_rtd_theme
-
 #library requirements
 chardet==3.0.4
 emoji>=0.5.2
@@ -24,8 +12,5 @@ pylint==2.4.4
 regex==2019.8.19
 sacremoses==0.0.13
 scikit_learn==0.23.2
-setuptools==40.8.0
 spacy==2.3.4
-https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz
-https://github.com/explosion/spacy-models/releases/download/fr_core_news_sm-2.3.0/fr_core_news_sm-2.3.0.tar.gz
 stop_words==2018.7.23
diff --git a/requirements_dev.txt b/requirements_dev.txt
@@ -0,0 +1,8 @@
+coverage==5.3
+pytest==6.1.1
+pytest-cov==2.10.1
+python-dotenv>=0.5.1
+Sphinx==3.2.1
+sphinx_rtd_theme==0.5.0
+setuptools==40.8.0
+-r requirements.txt
diff --git a/setup.py b/setup.py
@@ -15,38 +15,29 @@
 # You should have received a copy of the GNU Lesser General Public License
 # along with this program; if not, write to the Free Software Foundation,
 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-from setuptools import find_packages, setup
 import setuptools
-import setuptools.command.install
 from pathlib import Path
 
-class PostInstallCommand(setuptools.command.install.install):
-    """Post-installation command."""
-    def run(self):
-        setuptools.command.install.install.run(self)
-        try:
-            import spacy
-            spacy.cli.validate()
-        except ModuleNotFoundError:
-            pass
-
 
 with open(Path(__file__).resolve().parent.joinpath('VERSION'), 'r') as fh:
     version = fh.read()
-setup(
+
+with open("requirements.txt", "r") as fr:
+    requirements = [req for req in fr.read().splitlines() if not req.startswith("#")]
+
+setuptools.setup(
     name='nlpretext',
-    packages=find_packages(),
+    packages=setuptools.find_packages(),
+    scripts=["VERSION", "requirements.txt"],
     version=version,
     description='All the goto functions you need to handle NLP use-cases',
     author='Artefact',
     license='MIT',
-    url='https://github.com/artefactory/nautilus-nlp',
+    url='https://github.com/artefactory/NLPretext',
+    install_requires=requirements,
     classifiers=[
-        'Programming Language :: Python :: 3.6',
+        'Programming Language :: Python :: 3.7',
         'License :: OSI Approved :: MIT License',
         'Operating System :: OS Independent',
     ],
-    cmdclass={
-        'install': PostInstallCommand,
-    },
 )
diff --git a/tests/test_preprocessor.py b/tests/test_preprocessor.py
@@ -17,14 +17,14 @@
 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 import pytest
 import numpy as np
-from nlpretext.classic.preprocess import (
+from nlpretext.basic.preprocess import (
     normalize_whitespace, remove_eol_characters, fix_bad_unicode,
     unpack_english_contractions, replace_urls, replace_emails,
     replace_phone_numbers, replace_numbers, replace_currency_symbols,
     remove_punct, remove_accents, remove_multiple_spaces_and_strip_text,
     filter_non_latin_characters
 )
-from nlpretext.classic.preprocess import (
+from nlpretext.basic.preprocess import (
     remove_stopwords as remove_stopwords_text
 )
 from nlpretext.social.preprocess import (
@@ -188,14 +188,13 @@ def test_get_stopwords():
 
 
 @pytest.mark.parametrize(
-    "input_tokens, expected_output",
+    "input_tokens, lang, expected_output",
     [
-        (['I', 'like', 'when', 'you', 'move', 'your', 'body', '!'], ['I', 'move', 'body', '!'])
+        (['I', 'like', 'when', 'you', 'move', 'your', 'body', '!'], "en", ['I', 'move', 'body', '!'])
     ],
 )
-def test_remove_stopwords_tokens(input_tokens, expected_output):
-    stopwords = get_stopwords('en')
-    result = remove_stopwords_token(input_tokens, stopwords)
+def test_remove_stopwords_tokens(input_tokens, lang, expected_output):
+    result = remove_stopwords_token(input_tokens, lang)
     np.testing.assert_array_equal(result, expected_output)