Skip to content

Commit

Permalink
some smaller items - documentation, repo name update (#135)
Browse files Browse the repository at this point in the history
* update links after repo rename

* reference for seqeval in notebook

* spacy provide custom config

* include link for evaluate

* fix code smell

* fix path in notebook

* correct path for data
  • Loading branch information
iulusoy authored Aug 18, 2023
1 parent aad5906 commit 442f36d
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 16 deletions.
9 changes: 9 additions & 0 deletions moralization/spacy_model_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,15 @@ def __init__(
existing_model = (
self.model_path.is_dir() and (self.model_path / "config.cfg").is_file()
)
# if another config was provided, check that it exists
if base_config_file:
config_file = Path(base_config_file)
if not config_file.is_file():
raise ValueError(
"The config file that you provided does not exist. Please check your input. {}".format(
base_config_file
)
)
if base_config_file or overwrite_existing_files or not existing_model:
_create_model(
self.model_path,
Expand Down
10 changes: 10 additions & 0 deletions moralization/tests/test_spacy_model_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,16 @@ def test_spacy_model_manager_train_new_model(tmp_path, data_dir):
# evaluate trained model
evaluation = model.evaluate(data_manager)
assert "Moralisierung" in evaluation["spans_task1_per_type"]
# create instance with pre-existing config
path_to_config = model_path / "config.cfg"
# save model to other path
model_path = tmp_path / "idontexist2"
_ = SpacyModelManager(model_path, base_config_file=path_to_config.as_posix())
# try with config not found
with pytest.raises(ValueError):
SpacyModelManager(
model_path, base_config_file="./config", overwrite_existing_files=True
)


def test_spacy_model_manager_train_new_model_task(tmp_path, data_dir):
Expand Down
12 changes: 7 additions & 5 deletions notebooks/DemoNotebook_interactive_plots.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,13 @@
" # first install pinned version of setuptools (latest version doesn't seem to work with this package on colab)\n",
" %pip install setuptools==61 -qqq\n",
" # install the moralization package\n",
" %pip install git+https://github.com/ssciwr/moralization.git -qqq\n",
" %pip install git+https://github.com/ssciwr/moralization-analyzer.git -qqq\n",
" # download test data sets\n",
" !wget https://github.com/ssciwr/moralization/archive/refs/heads/test_data.zip -q\n",
" !mkdir -p data && unzip -qq test_data.zip && mv -f moralization-test_data/*_Data ./data/. && rm -rf moralization-test_data test_data.zip\n",
" !spacy download de_core_news_sm"
" !wget https://github.com/ssciwr/moralization-analyzer/archive/refs/heads/test_data.zip -q\n",
" !mkdir -p data && unzip -qq test_data.zip && mv -f moralization-analyzer-test_data/*_Data ./data/. && rm -rf moralization-analyzer-test_data test_data.zip\n",
" !spacy download de_core_news_sm\n",
" from google.colab import drive\n",
" drive.mount('/content/drive')"
]
},
{
Expand Down Expand Up @@ -512,7 +514,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.13"
"version": "3.9.17"
},
"vscode": {
"interpreter": {
Expand Down
17 changes: 10 additions & 7 deletions notebooks/DemoNotebook_spacy_model.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,11 @@
" # first install pinned version of setuptools (latest version doesn't seem to work with this package on colab)\n",
" %pip install setuptools==61 -qqq\n",
" # install the moralization package\n",
" %pip install git+https://github.com/ssciwr/moralization.git -qqq\n",
" %pip install git+https://github.com/ssciwr/moralization-analyzer.git -qqq\n",
"\n",
" # download test data sets\n",
" !wget https://github.com/ssciwr/moralization/archive/refs/heads/test_data.zip -q\n",
" !mkdir -p data && unzip -qq test_data.zip && mv -f moralization-test_data/*_Data ./data/. && rm -rf moralization-test_data test_data.zip\n",
" !wget https://github.com/ssciwr/moralization-analyzer/archive/refs/heads/test_data.zip -q\n",
" !mkdir -p data && unzip -qq test_data.zip && mv -f moralization-analyzer-test_data/*_Data ./data/. && rm -rf moralization-analyzer-test_data test_data.zip\n",
" !spacy download de_core_news_sm\n",
" from google.colab import drive\n",
" drive.mount('/content/drive')"
Expand Down Expand Up @@ -123,7 +123,9 @@
"source": [
"my_model = SpacyModelManager(\"my_model\", overwrite_existing_files=True)\n",
"# select a different language and task\n",
"my_model = SpacyModelManager(\"my_model\", overwrite_existing_files=True, language=\"en\", task=\"task2\")"
"my_model = SpacyModelManager(\"my_model\", overwrite_existing_files=True, language=\"en\", task=\"task2\")\n",
"# provide a different config file than the base config file\n",
"# my_model = SpacyModelManager(\"my_model\", overwrite_existing_files=True, language=\"en\", task=\"task2\", base_config_file=\"./config.cfg\")"
]
},
{
Expand Down Expand Up @@ -202,7 +204,7 @@
"### Train the model\n",
"\n",
"- `data_manager`: the data to be used for training the model\n",
"- `overrides`: an optional dictionary of model config settings to override"
"- `overrides`: an optional dictionary of model config settings to override; otherwise all settings are given in the `config.cfg`"
]
},
{
Expand All @@ -224,7 +226,8 @@
"source": [
"### Evaluate the model\n",
"\n",
"- `data_manager`: the test data from this data_manager will be used to evaluate the model"
"- `data_manager`: the test data from this data_manager will be used to evaluate the model\n",
"- the evaluation is carried out using spaCy's [evaluate](https://spacy.io/api/cli#benchmark-accuracy) command via the [evaluate cli](https://github.com/explosion/spaCy/blob/master/spacy/cli/evaluate.py). "
]
},
{
Expand Down Expand Up @@ -333,7 +336,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.0"
"version": "3.9.17"
}
},
"nbformat": 4,
Expand Down
10 changes: 6 additions & 4 deletions notebooks/DemoNotebook_transformers_model.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@
" # first install pinned version of setuptools (latest version doesn't seem to work with this package on colab)\n",
" %pip install setuptools==61 -qqq\n",
" # install the moralization package\n",
" %pip install git+https://github.com/ssciwr/moralization.git -qqq\n",
" %pip install git+https://github.com/ssciwr/moralization-analyzer.git -qqq\n",
"\n",
" # download test data sets\n",
" !wget https://github.com/ssciwr/moralization/archive/refs/heads/test_data.zip -q\n",
" !mkdir -p data && unzip -qq test_data.zip && mv -f moralization-test_data/*_Data ./data/. && rm -rf moralization-test_data test_data.zip\n",
" !wget https://github.com/ssciwr/moralization-analyzer/archive/refs/heads/test_data.zip -q\n",
" !mkdir -p data && unzip -qq test_data.zip && mv -f moralization-analyzer-test_data/*_Data ./data/. && rm -rf moralization-analyzer-test_data test_data.zip\n",
" !spacy download de_core_news_sm\n",
" from google.colab import drive\n",
" drive.mount('/content/drive')"
Expand Down Expand Up @@ -345,7 +345,9 @@
"metadata": {},
"source": [
"To train, simply call the `train` method with the above `data_manager`. The token and column names are passed using the `token_column_name` and and `label_column_name` keywords. If the data has been prepared by the `DataManager` and was not a dataset you pulled from the Hugging Face Hub, these are set to `Sentences` and `Labels`. The number of training epochs is set by the keyword `num_train_epochs`.\n",
"As optimizer we currently use AdamW. The learning rate can be adjusted directly using the `learning_rate` keyword."
"As optimizer we currently use AdamW. The learning rate can be adjusted directly using the `learning_rate` keyword.\n",
"\n",
"The metric that is used for the training and evaluation is [`seqeval`](https://huggingface.co/spaces/evaluate-metric/seqeval)."
]
},
{
Expand Down

0 comments on commit 442f36d

Please sign in to comment.