From 2ced84d40e28c76d0df58cceba4c4b8656535446 Mon Sep 17 00:00:00 2001 From: Amale EL HAMRI Date: Mon, 15 Feb 2021 16:16:37 +0100 Subject: [PATCH 01/16] rename ci_actions.yml > ci.yml --- .github/workflows/{ci_actions.yml => ci.yml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/workflows/{ci_actions.yml => ci.yml} (100%) diff --git a/.github/workflows/ci_actions.yml b/.github/workflows/ci.yml similarity index 100% rename from .github/workflows/ci_actions.yml rename to .github/workflows/ci.yml From a1a7b4e7609c6ad1a618036a0fdd2adedd697c02 Mon Sep 17 00:00:00 2001 From: Amale EL HAMRI Date: Mon, 15 Feb 2021 16:19:17 +0100 Subject: [PATCH 02/16] update setup.py --- setup.py | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/setup.py b/setup.py index 8b6eaf3..4e17403 100644 --- a/setup.py +++ b/setup.py @@ -15,38 +15,29 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -from setuptools import find_packages, setup import setuptools -import setuptools.command.install from pathlib import Path -class PostInstallCommand(setuptools.command.install.install): - """Post-installation command.""" - def run(self): - setuptools.command.install.install.run(self) - try: - import spacy - spacy.cli.validate() - except ModuleNotFoundError: - pass - with open(Path(__file__).resolve().parent.joinpath('VERSION'), 'r') as fh: version = fh.read() -setup( + +with open("requirements.txt", "r") as fr: + requirements = [req for req in fr.read().splitlines() if not req.startswith("#")] + +setuptools.setup( name='nlpretext', - packages=find_packages(), + packages=setuptools.find_packages(), + scripts=["VERSION", "requirements.txt"], version=version, description='All the goto functions you need to handle NLP use-cases', author='Artefact', license='MIT', - url='https://github.com/artefactory/nautilus-nlp', + url='https://github.com/artefactory/NLPretext', + install_requires=requirements, classifiers=[ - 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', 'License :: OSI Approved :: MIT License', 'Operating System :: OS Independent', ], - cmdclass={ - 'install': PostInstallCommand, - }, ) From bcb3e61c4d13d7f4d6529b787273280f88a6fe1e Mon Sep 17 00:00:00 2001 From: Amale EL HAMRI Date: Mon, 15 Feb 2021 16:35:59 +0100 Subject: [PATCH 03/16] adding requirements_dev --- .github/workflows/ci.yml | 2 +- requirements.txt | 15 --------------- requirements_dev.txt | 8 ++++++++ 3 files changed, 9 insertions(+), 16 deletions(-) create mode 100644 requirements_dev.txt diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d5031d8..ab7d269 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -38,7 +38,7 @@ jobs: - name: Install requirements run: | python -m pip install --upgrade pip - pip install -r requirements.txt + pip install -r requirements_dev.txt - name: Run pylint run: | diff --git a/requirements.txt b/requirements.txt index f2cb8e3..e06f872 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,15 +1,3 @@ -# local package -#-e . - -# external requirements -coverage -pillow -pytest==6.1.1 -pytest-cov==2.10.1 -python-dotenv>=0.5.1 -Sphinx -sphinx_rtd_theme - #library requirements chardet==3.0.4 emoji>=0.5.2 @@ -24,8 +12,5 @@ pylint==2.4.4 regex==2019.8.19 sacremoses==0.0.13 scikit_learn==0.23.2 -setuptools==40.8.0 spacy==2.3.4 -https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz -https://github.com/explosion/spacy-models/releases/download/fr_core_news_sm-2.3.0/fr_core_news_sm-2.3.0.tar.gz stop_words==2018.7.23 \ No newline at end of file diff --git a/requirements_dev.txt b/requirements_dev.txt new file mode 100644 index 0000000..b16d405 --- /dev/null +++ b/requirements_dev.txt @@ -0,0 +1,8 @@ +coverage==5.3 +pytest==6.1.1 +pytest-cov==2.10.1 +python-dotenv>=0.5.1 +Sphinx==3.2.1 +sphinx_rtd_theme==0.5.0 +setuptools==40.8.0 +-r requirements.txt \ No newline at end of file From e072f011eacb9f0ff54d24f9d5af41898e16b3e7 Mon Sep 17 00:00:00 2001 From: Amale EL HAMRI Date: Mon, 15 Feb 2021 16:37:00 +0100 Subject: [PATCH 04/16] adding CD to publish package to pypi --- .github/workflows/cd.yml | 52 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 .github/workflows/cd.yml diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml new file mode 100644 index 0000000..987e708 --- /dev/null +++ b/.github/workflows/cd.yml @@ -0,0 +1,52 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +name: CD + +on: [push, pull_request] + +jobs: + CI: + name: Launching CD + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.7] + + steps: + - uses: actions/checkout@v2 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Install requirements + run: | + python -m pip install --upgrade pip + pip install setuptools wheel + + - name: Building package distribution + run: | + python setup.py sdist bdist_wheel + ls + + - name: Publish new package version on PyPI + uses: pypa/gh-action-pypi-publish@master + with: + user: __token__ + password: ${{ secrets.PYPI_API_TOKEN }} From f86f36afed5f0f86bbb8c39f84009a8d5344f4d2 Mon Sep 17 00:00:00 2001 From: Amale EL HAMRI Date: Mon, 15 Feb 2021 16:42:17 +0100 Subject: [PATCH 05/16] adding spacy model download in CI --- .github/workflows/ci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ab7d269..07567dd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -39,6 +39,8 @@ jobs: run: | python -m pip install --upgrade pip pip install -r requirements_dev.txt + pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz + pip install https://github.com/explosion/spacy-models/releases/download/fr_core_news_sm-2.3.0/fr_core_news_sm-2.3.0.tar.gz - name: Run pylint run: | From 040a62e24bd0af3067161e5835aad617c4af69e8 Mon Sep 17 00:00:00 2001 From: Amale EL HAMRI Date: Mon, 15 Feb 2021 16:51:37 +0100 Subject: [PATCH 06/16] adding pypi publication only when master is merged --- .github/workflows/cd.yml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index 987e708..acbdcd1 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -17,10 +17,14 @@ # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. name: CD -on: [push, pull_request] +on: + pull_request: + branches: + - master + types: [closed] jobs: - CI: + CD: name: Launching CD runs-on: ubuntu-latest strategy: @@ -43,7 +47,6 @@ jobs: - name: Building package distribution run: | python setup.py sdist bdist_wheel - ls - name: Publish new package version on PyPI uses: pypa/gh-action-pypi-publish@master From 04ae61ee6ddbabc0d18e4fa1a472b333a4a434e4 Mon Sep 17 00:00:00 2001 From: Amale EL HAMRI Date: Mon, 15 Feb 2021 16:51:59 +0100 Subject: [PATCH 07/16] renaming classic > basic --- nlpretext/{classic => basic}/preprocess.py | 0 nlpretext/preprocessor.py | 2 +- tests/test_preprocessor.py | 4 ++-- 3 files changed, 3 insertions(+), 3 deletions(-) rename nlpretext/{classic => basic}/preprocess.py (100%) diff --git a/nlpretext/classic/preprocess.py b/nlpretext/basic/preprocess.py similarity index 100% rename from nlpretext/classic/preprocess.py rename to nlpretext/basic/preprocess.py diff --git a/nlpretext/preprocessor.py b/nlpretext/preprocessor.py index c453aad..e05f4be 100644 --- a/nlpretext/preprocessor.py +++ b/nlpretext/preprocessor.py @@ -5,7 +5,7 @@ from nlpretext.social.preprocess import ( remove_html_tags, remove_mentions, remove_emoji, remove_hashtag) -from nlpretext.classic.preprocess import normalize_whitespace, remove_eol_characters, fix_bad_unicode +from nlpretext.basic.preprocess import normalize_whitespace, remove_eol_characters, fix_bad_unicode class Preprocessor(): diff --git a/tests/test_preprocessor.py b/tests/test_preprocessor.py index 5212015..66bc04d 100644 --- a/tests/test_preprocessor.py +++ b/tests/test_preprocessor.py @@ -17,14 +17,14 @@ # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. import pytest import numpy as np -from nlpretext.classic.preprocess import ( +from nlpretext.basic.preprocess import ( normalize_whitespace, remove_eol_characters, fix_bad_unicode, unpack_english_contractions, replace_urls, replace_emails, replace_phone_numbers, replace_numbers, replace_currency_symbols, remove_punct, remove_accents, remove_multiple_spaces_and_strip_text, filter_non_latin_characters ) -from nlpretext.classic.preprocess import ( +from nlpretext.basic.preprocess import ( remove_stopwords as remove_stopwords_text ) from nlpretext.social.preprocess import ( From ec1ad6392fe9b1c9a6e7b5c3d17a2d01013a03a9 Mon Sep 17 00:00:00 2001 From: Amale EL HAMRI Date: Mon, 15 Feb 2021 18:32:18 +0100 Subject: [PATCH 08/16] nlpretext/social/preprocess.py --- nlpretext/social/preprocess.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nlpretext/social/preprocess.py b/nlpretext/social/preprocess.py index 1f30891..ebbdb8a 100644 --- a/nlpretext/social/preprocess.py +++ b/nlpretext/social/preprocess.py @@ -21,7 +21,7 @@ import emoji as _emoji from nlpretext._config import constants -from nlpretext.classic.preprocess import normalize_whitespace +from nlpretext.basic.preprocess import normalize_whitespace def remove_mentions(text) -> str: From da35f1339d5580d8ed8d81097f7f53125d767019 Mon Sep 17 00:00:00 2001 From: Amale EL HAMRI Date: Tue, 16 Feb 2021 09:46:33 +0100 Subject: [PATCH 09/16] removing CD --- .github/workflows/cd.yml | 55 ---------------------------------------- 1 file changed, 55 deletions(-) delete mode 100644 .github/workflows/cd.yml diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml deleted file mode 100644 index acbdcd1..0000000 --- a/.github/workflows/cd.yml +++ /dev/null @@ -1,55 +0,0 @@ -# GNU Lesser General Public License v3.0 only -# Copyright (C) 2020 Artefact -# licence-information@artefact.com -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 3 of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -name: CD - -on: - pull_request: - branches: - - master - types: [closed] - -jobs: - CD: - name: Launching CD - runs-on: ubuntu-latest - strategy: - matrix: - python-version: [3.7] - - steps: - - uses: actions/checkout@v2 - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - - name: Install requirements - run: | - python -m pip install --upgrade pip - pip install setuptools wheel - - - name: Building package distribution - run: | - python setup.py sdist bdist_wheel - - - name: Publish new package version on PyPI - uses: pypa/gh-action-pypi-publish@master - with: - user: __token__ - password: ${{ secrets.PYPI_API_TOKEN }} From 0772036c6ae6b6520a56959a8e9a05de29419353 Mon Sep 17 00:00:00 2001 From: Amale EL HAMRI Date: Tue, 16 Feb 2021 09:52:51 +0100 Subject: [PATCH 10/16] update link readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9cde61d..4a31283 100644 --- a/README.md +++ b/README.md @@ -88,7 +88,7 @@ print(text) # "dinner life recommend" ``` -Take a look at all the functions that are available [here](https://github.com/artefactory/NLPretext/tree/feature/readme/nlpretext) in the ```preprocess.py``` scripts in the different folders: basic, social, token. +Take a look at all the functions that are available [here](https://github.com/artefactory/NLPretext/tree/master) in the ```preprocess.py``` scripts in the different folders: basic, social, token. # Individual Functions From b9a7006c535efe37bacbbe29e476ebe61e42ff13 Mon Sep 17 00:00:00 2001 From: Amale EL HAMRI Date: Tue, 16 Feb 2021 10:03:42 +0100 Subject: [PATCH 11/16] update remove_stopwords token with arg lang instead of list --- nlpretext/token/preprocess.py | 13 +++++++++++-- tests/test_preprocessor.py | 9 ++++----- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/nlpretext/token/preprocess.py b/nlpretext/token/preprocess.py index 989bd9a..058dd8e 100644 --- a/nlpretext/token/preprocess.py +++ b/nlpretext/token/preprocess.py @@ -20,16 +20,22 @@ from __future__ import absolute_import, division, print_function, unicode_literals import re +from nlpretext._utils.stopwords import get_stopwords -def remove_stopwords(tokens, stopwords: list) -> str: +def remove_stopwords(tokens: list, lang: str, custom_stopwords: list = None) -> str: """ Remove stopwords from a text. eg. 'I like when you move your body !' -> 'I move body !' Parameters ---------- - stopwords : list of stopwords to remove + tokens: list(str) + list of tokens + lang: str + language iso code (e.g : "en") + custom_stopwords : list(str)|None + list of custom stopwords to add. None by default Returns ------- @@ -41,6 +47,9 @@ def remove_stopwords(tokens, stopwords: list) -> str: ValueError When inputs is not a list """ + stopwords = get_stopwords(lang) + if custom_stopwords: + stopwords += custom_stopwords tokens = [word for word in tokens if word not in stopwords] return tokens diff --git a/tests/test_preprocessor.py b/tests/test_preprocessor.py index 66bc04d..a11ba16 100644 --- a/tests/test_preprocessor.py +++ b/tests/test_preprocessor.py @@ -188,14 +188,13 @@ def test_get_stopwords(): @pytest.mark.parametrize( - "input_tokens, expected_output", + "input_tokens, lang, expected_output", [ - (['I', 'like', 'when', 'you', 'move', 'your', 'body', '!'], ['I', 'move', 'body', '!']) + (['I', 'like', 'when', 'you', 'move', 'your', 'body', '!'], "en", ['I', 'move', 'body', '!']) ], ) -def test_remove_stopwords_tokens(input_tokens, expected_output): - stopwords = get_stopwords('en') - result = remove_stopwords_token(input_tokens, stopwords) +def test_remove_stopwords_tokens(input_tokens, lang, expected_output): + result = remove_stopwords_token(input_tokens, lang) np.testing.assert_array_equal(result, expected_output) From adf57391bf0db7ddad2733c299d1d6b74f5d6d59 Mon Sep 17 00:00:00 2001 From: Amale EL HAMRI Date: Tue, 16 Feb 2021 10:12:44 +0100 Subject: [PATCH 12/16] update version 1.0.0 --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index f514a2f..afaf360 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.9.1 \ No newline at end of file +1.0.0 \ No newline at end of file From 09db3b48876d18e3ff2c5af81f88a89ed1e5aeec Mon Sep 17 00:00:00 2001 From: Amale EL HAMRI Date: Tue, 16 Feb 2021 10:21:16 +0100 Subject: [PATCH 13/16] add mosestekonizer version --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index e06f872..a4774f8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ chardet==3.0.4 emoji>=0.5.2 flashtext==2.7 ftfy<5.0.0,>=4.2.0 -mosestokenizer +mosestokenizer==1.1.0 nlpaug==1.0.1 nltk>=3.4.5 numpy>1.15.4 From 633af6149419a04814c72f8695c4774b1beba0b3 Mon Sep 17 00:00:00 2001 From: Amale EL HAMRI Date: Tue, 16 Feb 2021 10:28:07 +0100 Subject: [PATCH 14/16] typo README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4a31283..b26156d 100644 --- a/README.md +++ b/README.md @@ -88,7 +88,7 @@ print(text) # "dinner life recommend" ``` -Take a look at all the functions that are available [here](https://github.com/artefactory/NLPretext/tree/master) in the ```preprocess.py``` scripts in the different folders: basic, social, token. +Take a look at all the functions that are available [here](https://github.com/artefactory/NLPretext/tree/master/nlpretext) in the ```preprocess.py``` scripts in the different folders: basic, social, token. # Individual Functions From 2a95b1dd1c3bbc91296fff075bfd6f14103f1d95 Mon Sep 17 00:00:00 2001 From: Amale EL HAMRI Date: Tue, 16 Feb 2021 10:57:56 +0100 Subject: [PATCH 15/16] update project documentation --- docs/conf.py | 21 ++--- docs/index.rst | 52 ++++++------ docs/modules.rst | 6 +- docs/nautilus_nlp.config.rst | 22 ------ docs/nautilus_nlp.data.rst | 22 ------ docs/nautilus_nlp.features.rst | 22 ------ docs/nautilus_nlp.models.rst | 70 ----------------- docs/nautilus_nlp.rst | 42 ---------- docs/nautilus_nlp.utils.rst | 118 ---------------------------- docs/nautilus_nlp.visualization.rst | 22 ------ docs/nlpretext.augmentation.rst | 7 ++ docs/nlpretext.basic.rst | 7 ++ docs/nlpretext.rst | 8 ++ docs/nlpretext.social.rst | 7 ++ docs/nlpretext.token.rst | 7 ++ 15 files changed, 74 insertions(+), 359 deletions(-) delete mode 100644 docs/nautilus_nlp.config.rst delete mode 100644 docs/nautilus_nlp.data.rst delete mode 100644 docs/nautilus_nlp.features.rst delete mode 100644 docs/nautilus_nlp.models.rst delete mode 100644 docs/nautilus_nlp.rst delete mode 100644 docs/nautilus_nlp.utils.rst delete mode 100644 docs/nautilus_nlp.visualization.rst create mode 100644 docs/nlpretext.augmentation.rst create mode 100644 docs/nlpretext.basic.rst create mode 100644 docs/nlpretext.rst create mode 100644 docs/nlpretext.social.rst create mode 100644 docs/nlpretext.token.rst diff --git a/docs/conf.py b/docs/conf.py index fffe634..a6769f8 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -31,17 +31,20 @@ # import os import sys +from pathlib import Path + sys.path.insert(0, os.path.abspath('../')) # -- Project information ----------------------------------------------------- -project = 'Nautilus_nlp' +project = 'NLPretext' copyright = '2020, Artefact' author = 'Artefact' -# The short X.Y version -version = '0.1.0' +with open(Path(__file__).resolve().parent.joinpath('../VERSION'), 'r') as fh: + version = fh.read() + # The full version, including alpha/beta/rc tags release = '' @@ -129,7 +132,7 @@ # -- Options for HTMLHelp output --------------------------------------------- # Output file base name for HTML help builder. -htmlhelp_basename = 'Nautilus_nlpdoc' +htmlhelp_basename = 'NLPretextdoc' # -- Options for LaTeX output ------------------------------------------------ @@ -156,8 +159,8 @@ # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, 'Nautilus_nlp.tex', 'Nautilus\\_nlp Documentation', - 'Robin Doumerc', 'manual'), + (master_doc, 'NLPretext.tex', 'Nautilus\\_nlp Documentation', + 'Artefact', 'manual'), ] @@ -166,7 +169,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ - (master_doc, 'nautilus_nlp', 'Nautilus_nlp Documentation', + (master_doc, 'NLPretext', 'NLPretext Documentation', [author], 1) ] @@ -177,8 +180,8 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, 'Nautilus_nlp', 'Nautilus_nlp Documentation', - author, 'Nautilus_nlp', 'One line description of project.', + (master_doc, 'NLPretext', 'NLPretext Documentation', + author, 'NLPretext', 'One line description of project.', 'Miscellaneous'), ] diff --git a/docs/index.rst b/docs/index.rst index 91dd094..eb204fd 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,63 +1,57 @@ -Welcome to Nautilus_nlp's documentation! +Welcome to NLPretext's documentation! ======================================== -The Nautilus NLP library aimed to be a meta-library to be used to help you get started on handling your NLP use-case. +The NLPretext library aimed to be a meta-library to be used to help you get started on handling your NLP use-case preprocessing. -This library can help you with: - 1. Cleaning text data - 2. Normalizing your dataset - 3. Training automatically multiclass, multilabel classifier - 4. Help you discover topics and cluster your data +# Installation +Beware, this package has been tested on Python **3.6** & **3.7** & **3.8**, and will probably not be working under python **2.7** as **Python2.7** EOL is scheduled for December 2019. -# Feature Request +To install this library you should first clone the repository: -As an Artefact user, you might be working on a NLP use case, and wish to use Nautilus. +pip install nlpretext - However, if you think Nautilus is lacking features that can be useful not only to your use case but also others, feel free to to fill up an issue with the label "Feature-request". +This library uses Spacy as tokenizer. Current models supported are `en_core_web_sm` and `fr_core_news_sm`. If not installed, run the following commands: - We will try to put it in the roadmap and implement it as soon as possible. +pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz -# Installation +pip install https://github.com/explosion/spacy-models/releases/download/fr_core_news_sm-2.3.0/fr_core_news_sm-2.3.0.tar.gz -Beware, this package has been tested on Python **3.6** & **3.7**, and will probably not be working under python **2.7** as **Python2.7** EOL is scheduled for December 2019. -To install this library you should first clone the repository: -`git clone https://github.com/artefactory/nautilus_nlp/ && cd nautilus_nlp` - -**If you don't use the docker container, we strongly advise you to do these steps in a virtual environnement** - -First you need to install the required files: +.. toctree:: + :maxdepth: 2 + :caption: Text Preprocessing Functions: -`pip install -r requirements.txt` + modules -then you can install it via pip: +.. toctree:: + :maxdepth: 2 + :caption: Basic preprocessing: -`pip install -e .` + nlpretext.basic .. toctree:: :maxdepth: 2 - :caption: Preprocessing and utility functions: + :caption: Social preprocessing: - modules + nlpretext.social .. toctree:: :maxdepth: 2 - :caption: Preprocessing and utility functions: - - nautilus_nlp.utils + :caption: Token preprocessing: + nlpretext.token .. toctree:: :maxdepth: 2 - :caption: Machine learning: + :caption: Text Augmentation: - nautilus_nlp.models + nlpretext.augmentation Indices and tables ================== diff --git a/docs/modules.rst b/docs/modules.rst index 26ca8ee..f7251b8 100644 --- a/docs/modules.rst +++ b/docs/modules.rst @@ -1,7 +1,7 @@ -nautilus_nlp -============ +nlpretext +========= .. toctree:: :maxdepth: 4 - nautilus_nlp + nlpretext diff --git a/docs/nautilus_nlp.config.rst b/docs/nautilus_nlp.config.rst deleted file mode 100644 index 8864121..0000000 --- a/docs/nautilus_nlp.config.rst +++ /dev/null @@ -1,22 +0,0 @@ -nautilus\_nlp.config package -============================ - -Submodules ----------- - -nautilus\_nlp.config.config module ----------------------------------- - -.. automodule:: nautilus_nlp.config.config - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: nautilus_nlp.config - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/nautilus_nlp.data.rst b/docs/nautilus_nlp.data.rst deleted file mode 100644 index 4b12cc2..0000000 --- a/docs/nautilus_nlp.data.rst +++ /dev/null @@ -1,22 +0,0 @@ -nautilus\_nlp.data package -========================== - -Submodules ----------- - -nautilus\_nlp.data.make\_dataset module ---------------------------------------- - -.. automodule:: nautilus_nlp.data.make_dataset - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: nautilus_nlp.data - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/nautilus_nlp.features.rst b/docs/nautilus_nlp.features.rst deleted file mode 100644 index c4b764c..0000000 --- a/docs/nautilus_nlp.features.rst +++ /dev/null @@ -1,22 +0,0 @@ -nautilus\_nlp.features package -============================== - -Submodules ----------- - -nautilus\_nlp.features.build\_features module ---------------------------------------------- - -.. automodule:: nautilus_nlp.features.build_features - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: nautilus_nlp.features - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/nautilus_nlp.models.rst b/docs/nautilus_nlp.models.rst deleted file mode 100644 index ee38f75..0000000 --- a/docs/nautilus_nlp.models.rst +++ /dev/null @@ -1,70 +0,0 @@ -nautilus\_nlp.models package -============================ - -Submodules ----------- - -nautilus\_nlp.models.Fasttext\_classifier module ------------------------------------------------- - -.. automodule:: nautilus_nlp.models.Fasttext_classifier - :members: - :undoc-members: - :show-inheritance: - -nautilus\_nlp.models.Fasttext\_embedding module ------------------------------------------------ - -.. automodule:: nautilus_nlp.models.Fasttext_embedding - :members: - :undoc-members: - :show-inheritance: - -nautilus\_nlp.models.Language\_detector module ----------------------------------------------- - -.. automodule:: nautilus_nlp.models.Language_detector - :members: - :undoc-members: - :show-inheritance: - -nautilus\_nlp.models.Sentiment\_detector module ------------------------------------------------ - -.. automodule:: nautilus_nlp.models.Sentiment_detector - :members: - :undoc-members: - :show-inheritance: - -nautilus\_nlp.models.Spacy\_model module ----------------------------------------- - -.. automodule:: nautilus_nlp.models.Spacy_model - :members: - :undoc-members: - :show-inheritance: - -nautilus\_nlp.models.sentiment module -------------------------------------- - -.. automodule:: nautilus_nlp.models.sentiment - :members: - :undoc-members: - :show-inheritance: - -nautilus\_nlp.models.sk\_vectorizer module ------------------------------------------- - -.. automodule:: nautilus_nlp.models.sk_vectorizer - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: nautilus_nlp.models - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/nautilus_nlp.rst b/docs/nautilus_nlp.rst deleted file mode 100644 index e240841..0000000 --- a/docs/nautilus_nlp.rst +++ /dev/null @@ -1,42 +0,0 @@ -nautilus\_nlp package -===================== - -Subpackages ------------ - -.. toctree:: - - nautilus_nlp.config - nautilus_nlp.data - nautilus_nlp.features - nautilus_nlp.models - nautilus_nlp.utils - nautilus_nlp.visualization - -Submodules ----------- - -nautilus\_nlp.corpus module ---------------------------- - -.. automodule:: nautilus_nlp.corpus - :members: - :undoc-members: - :show-inheritance: - -nautilus\_nlp.doc module ------------------------- - -.. automodule:: nautilus_nlp.doc - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: nautilus_nlp - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/nautilus_nlp.utils.rst b/docs/nautilus_nlp.utils.rst deleted file mode 100644 index 1b9e43a..0000000 --- a/docs/nautilus_nlp.utils.rst +++ /dev/null @@ -1,118 +0,0 @@ -nautilus\_nlp.utils package -=========================== - -Submodules ----------- - -nautilus\_nlp.utils.Text\_processor module ------------------------------------------- - -.. automodule:: nautilus_nlp.utils.Text_processor - :members: - :undoc-members: - :show-inheritance: - -nautilus\_nlp.utils.compat module ---------------------------------- - -.. automodule:: nautilus_nlp.utils.compat - :members: - :undoc-members: - :show-inheritance: - -nautilus\_nlp.utils.constants module ------------------------------------- - -.. automodule:: nautilus_nlp.utils.constants - :members: - :undoc-members: - :show-inheritance: - -nautilus\_nlp.utils.emoji module --------------------------------- - -.. automodule:: nautilus_nlp.utils.emoji - :members: - :undoc-members: - :show-inheritance: - -nautilus\_nlp.utils.encoding module ------------------------------------ - -.. automodule:: nautilus_nlp.utils.encoding - :members: - :undoc-members: - :show-inheritance: - -nautilus\_nlp.utils.export module ---------------------------------- - -.. automodule:: nautilus_nlp.utils.export - :members: - :undoc-members: - :show-inheritance: - -nautilus\_nlp.utils.file\_loader module ---------------------------------------- - -.. automodule:: nautilus_nlp.utils.file_loader - :members: - :undoc-members: - :show-inheritance: - -nautilus\_nlp.utils.lemmatizer module -------------------------------------- - -.. automodule:: nautilus_nlp.utils.lemmatizer - :members: - :undoc-members: - :show-inheritance: - -nautilus\_nlp.utils.preprocess module -------------------------------------- - -.. automodule:: nautilus_nlp.utils.preprocess - :members: - :undoc-members: - :show-inheritance: - -nautilus\_nlp.utils.stemmer module ----------------------------------- - -.. automodule:: nautilus_nlp.utils.stemmer - :members: - :undoc-members: - :show-inheritance: - -nautilus\_nlp.utils.text\_vectorizer module -------------------------------------------- - -.. automodule:: nautilus_nlp.utils.text_vectorizer - :members: - :undoc-members: - :show-inheritance: - -nautilus\_nlp.utils.tokenizer module ------------------------------------- - -.. automodule:: nautilus_nlp.utils.tokenizer - :members: - :undoc-members: - :show-inheritance: - -nautilus\_nlp.utils.vector\_similarity module ---------------------------------------------- - -.. automodule:: nautilus_nlp.utils.vector_similarity - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: nautilus_nlp.utils - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/nautilus_nlp.visualization.rst b/docs/nautilus_nlp.visualization.rst deleted file mode 100644 index 3f312ce..0000000 --- a/docs/nautilus_nlp.visualization.rst +++ /dev/null @@ -1,22 +0,0 @@ -nautilus\_nlp.visualization package -=================================== - -Submodules ----------- - -nautilus\_nlp.visualization.visualize module --------------------------------------------- - -.. automodule:: nautilus_nlp.visualization.visualize - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: nautilus_nlp.visualization - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/nlpretext.augmentation.rst b/docs/nlpretext.augmentation.rst new file mode 100644 index 0000000..11afca2 --- /dev/null +++ b/docs/nlpretext.augmentation.rst @@ -0,0 +1,7 @@ +nlpretext.augmentation module +----------------------------- + +.. automodule:: nlpretext.augmentation.preprocess + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/nlpretext.basic.rst b/docs/nlpretext.basic.rst new file mode 100644 index 0000000..ca40843 --- /dev/null +++ b/docs/nlpretext.basic.rst @@ -0,0 +1,7 @@ +nlpretext.basic module +----------------------------- + +.. automodule:: nlpretext.basic.preprocess + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/nlpretext.rst b/docs/nlpretext.rst new file mode 100644 index 0000000..7fa285f --- /dev/null +++ b/docs/nlpretext.rst @@ -0,0 +1,8 @@ +nlpretext.preprocessor module +----------------------------- + +.. automodule:: nlpretext.preprocessor + :members: + :undoc-members: + :show-inheritance: + diff --git a/docs/nlpretext.social.rst b/docs/nlpretext.social.rst new file mode 100644 index 0000000..06a10a0 --- /dev/null +++ b/docs/nlpretext.social.rst @@ -0,0 +1,7 @@ +nlpretext.social module +----------------------------- + +.. automodule:: nlpretext.social.preprocess + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/nlpretext.token.rst b/docs/nlpretext.token.rst new file mode 100644 index 0000000..28adcb7 --- /dev/null +++ b/docs/nlpretext.token.rst @@ -0,0 +1,7 @@ +nlpretext.token module +----------------------------- + +.. automodule:: nlpretext.token.preprocess + :members: + :undoc-members: + :show-inheritance: From 7112402bd1443be4a447235b92aa2fd63cf639bb Mon Sep 17 00:00:00 2001 From: Amale EL HAMRI Date: Tue, 16 Feb 2021 11:01:55 +0100 Subject: [PATCH 16/16] fix Nautilus > NLPretext --- docs/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index a6769f8..ef88628 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -159,7 +159,7 @@ # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, 'NLPretext.tex', 'Nautilus\\_nlp Documentation', + (master_doc, 'NLPretext.tex', 'NLPretext Documentation', 'Artefact', 'manual'), ]