some smaller items - documentation, repo name update (#135)

* update links after repo rename * reference for seqeval in notebook * spacy provide custom config * include link for evaluate * fix code smell * fix path in notebook * correct path for data
ssciwr · Aug 18, 2023 · 442f36d · 442f36d
1 parent aad5906
commit 442f36d
Show file tree

Hide file tree

Showing 5 changed files with 42 additions and 16 deletions.
diff --git a/moralization/spacy_model_manager.py b/moralization/spacy_model_manager.py
@@ -152,6 +152,15 @@ def __init__(
         existing_model = (
             self.model_path.is_dir() and (self.model_path / "config.cfg").is_file()
         )
+        # if another config was provided, check that it exists
+        if base_config_file:
+            config_file = Path(base_config_file)
+            if not config_file.is_file():
+                raise ValueError(
+                    "The config file that you provided does not exist. Please check your input. {}".format(
+                        base_config_file
+                    )
+                )
         if base_config_file or overwrite_existing_files or not existing_model:
             _create_model(
                 self.model_path,

diff --git a/moralization/tests/test_spacy_model_manager.py b/moralization/tests/test_spacy_model_manager.py
@@ -28,6 +28,16 @@ def test_spacy_model_manager_train_new_model(tmp_path, data_dir):
     # evaluate trained model
     evaluation = model.evaluate(data_manager)
     assert "Moralisierung" in evaluation["spans_task1_per_type"]
+    # create instance with pre-existing config
+    path_to_config = model_path / "config.cfg"
+    # save model to other path
+    model_path = tmp_path / "idontexist2"
+    _ = SpacyModelManager(model_path, base_config_file=path_to_config.as_posix())
+    # try with config not found
+    with pytest.raises(ValueError):
+        SpacyModelManager(
+            model_path, base_config_file="./config", overwrite_existing_files=True
+        )
 
 
 def test_spacy_model_manager_train_new_model_task(tmp_path, data_dir):

diff --git a/notebooks/DemoNotebook_interactive_plots.ipynb b/notebooks/DemoNotebook_interactive_plots.ipynb
@@ -28,11 +28,13 @@
     "    # first install pinned version of setuptools (latest version doesn't seem to work with this package on colab)\n",
     "    %pip install setuptools==61 -qqq\n",
     "    # install the moralization package\n",
-    "    %pip install git+https://github.com/ssciwr/moralization.git -qqq\n",
+    "    %pip install git+https://github.com/ssciwr/moralization-analyzer.git -qqq\n",
     "      # download test data sets\n",
-    "    !wget https://github.com/ssciwr/moralization/archive/refs/heads/test_data.zip -q\n",
-    "    !mkdir -p data && unzip -qq test_data.zip && mv -f moralization-test_data/*_Data ./data/. && rm -rf moralization-test_data test_data.zip\n",
-    "    !spacy download de_core_news_sm"
+    "    !wget https://github.com/ssciwr/moralization-analyzer/archive/refs/heads/test_data.zip -q\n",
+    "    !mkdir -p data && unzip -qq test_data.zip && mv -f moralization-analyzer-test_data/*_Data ./data/. && rm -rf moralization-analyzer-test_data test_data.zip\n",
+    "    !spacy download de_core_news_sm\n",
+    "    from google.colab import drive\n",
+    "    drive.mount('/content/drive')"
    ]
   },
   {
@@ -512,7 +514,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.13"
+   "version": "3.9.17"
   },
   "vscode": {
    "interpreter": {

diff --git a/notebooks/DemoNotebook_spacy_model.ipynb b/notebooks/DemoNotebook_spacy_model.ipynb
@@ -29,11 +29,11 @@
     "    # first install pinned version of setuptools (latest version doesn't seem to work with this package on colab)\n",
     "    %pip install setuptools==61 -qqq\n",
     "    # install the moralization package\n",
-    "    %pip install git+https://github.com/ssciwr/moralization.git -qqq\n",
+    "    %pip install git+https://github.com/ssciwr/moralization-analyzer.git -qqq\n",
     "\n",
     "    # download test data sets\n",
-    "    !wget https://github.com/ssciwr/moralization/archive/refs/heads/test_data.zip -q\n",
-    "    !mkdir -p data && unzip -qq test_data.zip && mv -f moralization-test_data/*_Data ./data/. && rm -rf moralization-test_data test_data.zip\n",
+    "    !wget https://github.com/ssciwr/moralization-analyzer/archive/refs/heads/test_data.zip -q\n",
+    "    !mkdir -p data && unzip -qq test_data.zip && mv -f moralization-analyzer-test_data/*_Data ./data/. && rm -rf moralization-analyzer-test_data test_data.zip\n",
     "    !spacy download de_core_news_sm\n",
     "    from google.colab import drive\n",
     "    drive.mount('/content/drive')"
@@ -123,7 +123,9 @@
    "source": [
     "my_model = SpacyModelManager(\"my_model\", overwrite_existing_files=True)\n",
     "# select a different language and task\n",
-    "my_model = SpacyModelManager(\"my_model\", overwrite_existing_files=True, language=\"en\", task=\"task2\")"
+    "my_model = SpacyModelManager(\"my_model\", overwrite_existing_files=True, language=\"en\", task=\"task2\")\n",
+    "# provide a different config file than the base config file\n",
+    "# my_model = SpacyModelManager(\"my_model\", overwrite_existing_files=True, language=\"en\", task=\"task2\", base_config_file=\"./config.cfg\")"
    ]
   },
   {
@@ -202,7 +204,7 @@
     "### Train the model\n",
     "\n",
     "- `data_manager`: the data to be used for training the model\n",
-    "- `overrides`: an optional dictionary of model config settings to override"
+    "- `overrides`: an optional dictionary of model config settings to override; otherwise all settings are given in the `config.cfg`"
    ]
   },
   {
@@ -224,7 +226,8 @@
    "source": [
     "### Evaluate the model\n",
     "\n",
-    "- `data_manager`: the test data from this data_manager will be used to evaluate the model"
+    "- `data_manager`: the test data from this data_manager will be used to evaluate the model\n",
+    "- the evaluation is carried out using spaCy's [evaluate](https://spacy.io/api/cli#benchmark-accuracy) command via the [evaluate cli](https://github.com/explosion/spaCy/blob/master/spacy/cli/evaluate.py). "
    ]
   },
   {
@@ -333,7 +336,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.0"
+   "version": "3.9.17"
   }
  },
  "nbformat": 4,

diff --git a/notebooks/DemoNotebook_transformers_model.ipynb b/notebooks/DemoNotebook_transformers_model.ipynb
@@ -26,11 +26,11 @@
     "    # first install pinned version of setuptools (latest version doesn't seem to work with this package on colab)\n",
     "    %pip install setuptools==61 -qqq\n",
     "    # install the moralization package\n",
-    "    %pip install git+https://github.com/ssciwr/moralization.git -qqq\n",
+    "    %pip install git+https://github.com/ssciwr/moralization-analyzer.git -qqq\n",
     "\n",
     "    # download test data sets\n",
-    "    !wget https://github.com/ssciwr/moralization/archive/refs/heads/test_data.zip -q\n",
-    "    !mkdir -p data && unzip -qq test_data.zip && mv -f moralization-test_data/*_Data ./data/. && rm -rf moralization-test_data test_data.zip\n",
+    "    !wget https://github.com/ssciwr/moralization-analyzer/archive/refs/heads/test_data.zip -q\n",
+    "    !mkdir -p data && unzip -qq test_data.zip && mv -f moralization-analyzer-test_data/*_Data ./data/. && rm -rf moralization-analyzer-test_data test_data.zip\n",
     "    !spacy download de_core_news_sm\n",
     "    from google.colab import drive\n",
     "    drive.mount('/content/drive')"
@@ -345,7 +345,9 @@
    "metadata": {},
    "source": [
     "To train, simply call the `train` method with the above `data_manager`. The token and column names are passed using the `token_column_name` and  and `label_column_name` keywords. If the data has been prepared by the `DataManager` and was not a dataset you pulled from the Hugging Face Hub, these are set to `Sentences` and `Labels`. The number of training epochs is set by the keyword `num_train_epochs`.\n",
-    "As optimizer we currently use AdamW. The learning rate can be adjusted directly using the `learning_rate` keyword."
+    "As optimizer we currently use AdamW. The learning rate can be adjusted directly using the `learning_rate` keyword.\n",
+    "\n",
+    "The metric that is used for the training and evaluation is [`seqeval`](https://huggingface.co/spaces/evaluate-metric/seqeval)."
    ]
   },
   {