Skip to content

Commit

Permalink
[tests] Resolve remaining test failures (#550)
Browse files Browse the repository at this point in the history
* Run formatting

* Avoid onnx==1.16.2 due to errors on Windows

* Skip flaky test - passes in isolation, fails during full suite

And it fails because the log doesn't get captured by the test, not because the log isn't thrown

* Fix model card pattern

* Only override model_card_data model_id if not already set

* Prevent spacy from forcing numpy 2

* Fix softened evaluation_strategy deprecation

* Always override eval_strategy with evaluation_strategy if the latter is provided

* Restrict codecarbon to <2.6 to avoid failures from 2.7.* and print issues in 2.6.*

* Set evaluation_strategy default to None

* Print model card before full-matching

* Print the full stdout during tests

* Simplify the required pattern for model card generation tests

These proved problematic as the ordering of each section in the metadata can change between OSes, versions, etc.

* Run formatting

* Require at least transformers>=4.41.0 due to eval_strategy rename

* Remove debug print statement in tests

* Use SafeTemporaryDirectory that prevents NotADirectoryError on Windows

* Add tests for Python 3.11 & 3.12

Previously, these would fail for various reasons

* Skip OpenVINO tests; OpenVINO exporting has been broken since openvino==2022.3.0

Skipping it allows us to add tests for Python 3.11 and 3.12, and OpenVINO support is not as important to SetFit as having tests

* Set minimum datasets version to 2.7.0 due to pytest failures at Py3.11 & 3.12

* Increment minimum datasets version for 3.11/3.12 support
  • Loading branch information
tomaarsen authored Sep 12, 2024
1 parent 3904e53 commit 72f4d1e
Show file tree
Hide file tree
Showing 13 changed files with 69 additions and 145 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
name: Run unit tests
strategy:
matrix:
python-version: ['3.8', '3.9', '3.10']
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
os: [ubuntu-latest, windows-latest]
requirements: ['.[tests]', '.[compat_tests]']
fail-fast: false
Expand Down Expand Up @@ -73,7 +73,7 @@ jobs:
shell: bash
run: |
echo "OLD_HF_CACHE_HASH=$(find ~/.cache/huggingface/hub ~/.cache/torch -type f -exec sha256sum {} + | LC_ALL=C sort | sha256sum | cut -d ' ' -f 1)" >> $GITHUB_ENV
pytest -sv tests/
pytest -v tests/
echo "NEW_HF_CACHE_HASH=$(find ~/.cache/huggingface/hub ~/.cache/torch -type f -exec sha256sum {} + | LC_ALL=C sort | sha256sum | cut -d ' ' -f 1)" >> $GITHUB_ENV
- name: Save new HF models to cache
Expand Down
15 changes: 9 additions & 6 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,23 @@

INTEGRATIONS_REQUIRE = ["optuna"]
REQUIRED_PKGS = [
"datasets>=2.3.0",
"datasets>=2.15.0",
"sentence-transformers>=2.2.1",
"transformers>=4.41.0",
"evaluate>=0.3.0",
"huggingface_hub>=0.21.0",
"huggingface_hub>=0.23.0",
"scikit-learn",
"packaging",
]
ABSA_REQUIRE = ["spacy"]
ABSA_REQUIRE = ["spacy<3.7.6"]
QUALITY_REQUIRE = ["black", "flake8", "isort", "tabulate"]
ONNX_REQUIRE = ["onnxruntime", "onnx", "skl2onnx"]
OPENVINO_REQUIRE = ["hummingbird-ml<0.4.9", "openvino==2022.3.0"]
ONNX_REQUIRE = ["onnxruntime", "onnx!=1.16.2", "skl2onnx"]
OPENVINO_REQUIRE = ["hummingbird-ml", "openvino"]
TESTS_REQUIRE = ["pytest", "pytest-cov"] + ONNX_REQUIRE + OPENVINO_REQUIRE + ABSA_REQUIRE
DOCS_REQUIRE = ["hf-doc-builder>=0.3.0"]
CODECARBON_REQUIRE = ["codecarbon"]
CODECARBON_REQUIRE = ["codecarbon<2.6.0"]
# 2.7.* fails with AttributeError: 'EmissionsTracker' object has no attribute '_cloud'
# 2.6.* has an accidental print statement spamming the terminal
EXTRAS_REQUIRE = {
"optuna": INTEGRATIONS_REQUIRE,
"quality": QUALITY_REQUIRE,
Expand Down
6 changes: 5 additions & 1 deletion src/setfit/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -662,7 +662,11 @@ def create_model_card(self, path: str, model_name: Optional[str] = "SetFit Model
# via push_to_hub, and the path is in a temporary folder, then we only take the last two
# directories
model_path = Path(model_name)
if model_path.exists() and Path(tempfile.gettempdir()) in model_path.resolve().parents:
if (
self.model_card_data.model_id is None
and model_path.exists()
and Path(tempfile.gettempdir()) in model_path.resolve().parents
):
self.model_card_data.model_id = "/".join(model_path.parts[-2:])

with open(os.path.join(path, "README.md"), "w", encoding="utf-8") as f:
Expand Down
4 changes: 2 additions & 2 deletions src/setfit/training_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ class TrainingArguments:
logging_steps: int = 50

eval_strategy: str = "no"
evaluation_strategy: str = field(default="no", repr=False, init=False) # Softly deprecated
evaluation_strategy: Optional[str] = field(default=None, repr=False) # Softly deprecated
eval_steps: Optional[int] = None
eval_delay: int = 0
eval_max_steps: int = -1
Expand Down Expand Up @@ -252,7 +252,7 @@ def __post_init__(self) -> None:
self.logging_dir = default_logdir()

self.logging_strategy = IntervalStrategy(self.logging_strategy)
if self.evaluation_strategy and not self.eval_strategy:
if self.evaluation_strategy is not None:
logger.warning(
"The `evaluation_strategy` argument is deprecated and will be removed in a future version. "
"Please use `eval_strategy` instead."
Expand Down
5 changes: 5 additions & 0 deletions tests/exporters/test_openvino.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,17 @@

import numpy as np
import openvino.runtime as ov
import pytest
from transformers import AutoTokenizer

from setfit import SetFitModel
from setfit.exporters.openvino import export_to_openvino


@pytest.mark.skip(
reason="OpenVINO exporting broke since openvino==2022.3.0, while this version is not supported for Python 3.11 onwards. "
"To allow us to add Python 3.11+ support, we are skipping this test until OpenVINO support is fixed."
)
def test_export_to_openvino():
"""Test that the exported `OpenVINO` model returns the same predictions as the original model."""
model_path = "lewtun/my-awesome-setfit-model"
Expand Down
46 changes: 3 additions & 43 deletions tests/model_card_pattern.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,47 +6,7 @@
MODEL_CARD_PATTERN = re.compile(
"""\
---
language:
- en
license: apache-2\.0
library_name: setfit
tags:
- setfit
- sentence-transformers
- text-classification
- generated_from_setfit_trainer
datasets:
- sst2
metrics:
- accuracy
widget:
- text: .*
pipeline_tag: text-classification
inference: true
co2_eq_emissions:
emissions: [\d\.\-e]+
source: codecarbon
training_type: fine-tuning
on_cloud: (false|true)
cpu_model: .+
ram_total_size: [\d\.]+
hours_used: [\d\.]+
( hardware_used: .+
)?base_model: sentence-transformers/paraphrase-albert-small-v2
model-index:
- name: SetFit with sentence-transformers\/paraphrase-albert-small-v2 on SST2
results:
- task:
type: text-classification
name: Text Classification
dataset:
name: SST2
type: sst2
split: test
metrics:
- type: accuracy
value: [\d\.]+
name: Accuracy
.*
---
\# SetFit with sentence\-transformers/paraphrase\-albert\-small\-v2 on SST2
Expand All @@ -62,8 +22,8 @@
### Model Description
- \*\*Model Type:\*\* SetFit
- \*\*Sentence Transformer body:\*\* \[sentence-transformers/paraphrase-albert-small-v2\]\(https://huggingface.co/sentence-transformers/paraphrase-albert-small-v2\)
- \*\*Classification head:\*\* a \[LogisticRegression\]\(https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html\) instance
- \*\*Sentence Transformer body:\*\* \[sentence-transformers/paraphrase-albert-small-v2\]\(https://huggingface.co/sentence-transformers/paraphrase-albert-small-v2\) *
- \*\*Classification head:\*\* a \[LogisticRegression\]\(https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html\) instance *
- \*\*Maximum Sequence Length:\*\* 100 tokens
- \*\*Number of Classes:\*\* 2 classes
- \*\*Training Dataset:\*\* \[SST2\]\(https://huggingface.co/datasets/sst2\)
Expand Down
41 changes: 1 addition & 40 deletions tests/span/aspect_model_card_pattern.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,46 +6,7 @@
ASPECT_MODEL_CARD_PATTERN = re.compile(
"""\
---
language:
- en
license: apache-2\.0
library_name: setfit
tags:
- setfit
- absa
- sentence-transformers
- text-classification
- generated_from_setfit_trainer
base_model: sentence-transformers/paraphrase-albert-small-v2
metrics:
- accuracy
widget:
- text: .*
pipeline_tag: text-classification
inference: false
co2_eq_emissions:
emissions: [\d\.\-e]+
source: codecarbon
training_type: fine-tuning
on_cloud: (false|true)
cpu_model: .+
ram_total_size: [\d\.]+
hours_used: [\d\.]+
( hardware_used: .+
)?model-index:
- name: SetFit Aspect Model with sentence-transformers\/paraphrase-albert-small-v2
results:
- task:
type: text-classification
name: Text Classification
dataset:
name: Unknown
type: unknown
split: test
metrics:
- type: accuracy
value: [\d\.]+
name: Accuracy
.*
---
\# SetFit Aspect Model with sentence\-transformers/paraphrase\-albert\-small\-v2
Expand Down
41 changes: 1 addition & 40 deletions tests/span/polarity_model_card_pattern.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,46 +6,7 @@
POLARITY_MODEL_CARD_PATTERN = re.compile(
"""\
---
language:
- en
license: apache-2\.0
library_name: setfit
tags:
- setfit
- absa
- sentence-transformers
- text-classification
- generated_from_setfit_trainer
base_model: sentence-transformers/paraphrase-albert-small-v2
metrics:
- accuracy
widget:
- text: .*
pipeline_tag: text-classification
inference: false
co2_eq_emissions:
emissions: [\d\.\-e]+
source: codecarbon
training_type: fine-tuning
on_cloud: (false|true)
cpu_model: .+
ram_total_size: [\d\.]+
hours_used: [\d\.]+
( hardware_used: .+
)?model-index:
- name: SetFit Polarity Model with sentence-transformers\/paraphrase-albert-small-v2
results:
- task:
type: text-classification
name: Text Classification
dataset:
name: Unknown
type: unknown
split: test
metrics:
- type: accuracy
value: [\d\.]+
name: Accuracy
.*
---
\# SetFit Polarity Model with sentence\-transformers/paraphrase\-albert\-small\-v2
Expand Down
8 changes: 4 additions & 4 deletions tests/span/test_modeling.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import json
import re
from pathlib import Path
from tempfile import TemporaryDirectory

import pytest
import torch
Expand All @@ -13,6 +12,7 @@
from setfit.span.aspect_extractor import AspectExtractor
from setfit.span.modeling import AspectModel, PolarityModel
from tests.test_modeling import torch_cuda_available
from tests.utils import SafeTemporaryDirectory


def test_loading():
Expand Down Expand Up @@ -65,7 +65,7 @@ def test_save_load(absa_model: AbsaModel, caplog: LogCaptureFixture) -> None:

absa_model.polarity_model.span_context = 5

with TemporaryDirectory() as tmp_dir:
with SafeTemporaryDirectory() as tmp_dir:
tmp_dir = str(Path(tmp_dir) / "model")
absa_model.save_pretrained(tmp_dir)
assert (Path(tmp_dir + "-aspect") / "config_setfit.json").exists()
Expand Down Expand Up @@ -93,8 +93,8 @@ def test_save_load(absa_model: AbsaModel, caplog: LogCaptureFixture) -> None:
assert len(caplog.record_tuples) == 2
caplog.clear()

with TemporaryDirectory() as aspect_tmp_dir:
with TemporaryDirectory() as polarity_tmp_dir:
with SafeTemporaryDirectory() as aspect_tmp_dir:
with SafeTemporaryDirectory() as polarity_tmp_dir:
absa_model.save_pretrained(aspect_tmp_dir, polarity_tmp_dir)
assert (Path(aspect_tmp_dir) / "config_setfit.json").exists()
assert (Path(polarity_tmp_dir) / "config_setfit.json").exists()
Expand Down
4 changes: 2 additions & 2 deletions tests/test_deprecated_trainer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import pathlib
import re
import tempfile
from unittest import TestCase

import evaluate
Expand All @@ -16,6 +15,7 @@
from setfit.modeling import SetFitModel
from setfit.trainer import SetFitTrainer
from setfit.utils import BestRun
from tests.utils import SafeTemporaryDirectory


logging.set_verbosity_warning()
Expand Down Expand Up @@ -134,7 +134,7 @@ def test_trainer_raises_error_when_dataset_not_split(self):

def test_trainer_raises_error_when_dataset_is_dataset_dict_with_train(self):
"""Verify that a useful error is raised if we pass an unsplit dataset with only a `train` split to the trainer."""
with tempfile.TemporaryDirectory() as tmpdirname:
with SafeTemporaryDirectory() as tmpdirname:
path = pathlib.Path(tmpdirname) / "test_dataset_dict_with_train.csv"
path.write_text("label,text\n1,good\n0,terrible\n")
dataset = load_dataset("csv", data_files=str(path))
Expand Down
6 changes: 3 additions & 3 deletions tests/test_modeling.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import json
from pathlib import Path
from tempfile import TemporaryDirectory
from unittest import TestCase

import numpy as np
Expand All @@ -14,6 +13,7 @@

from setfit import SetFitHead, SetFitModel, Trainer
from setfit.modeling import MODEL_HEAD_NAME
from tests.utils import SafeTemporaryDirectory


torch_cuda_available = pytest.mark.skipif(not torch.cuda.is_available(), reason="PyTorch must be compiled with CUDA")
Expand Down Expand Up @@ -262,7 +262,7 @@ def test_load_model_on_device(device):


def test_save_load_config(model: SetFitModel) -> None:
with TemporaryDirectory() as tmp_dir:
with SafeTemporaryDirectory() as tmp_dir:
tmp_dir = str(Path(tmp_dir) / "model")
model.save_pretrained(tmp_dir)
config_path = Path(tmp_dir) / "config_setfit.json"
Expand All @@ -271,7 +271,7 @@ def test_save_load_config(model: SetFitModel) -> None:
config = json.load(f)
assert config == {"normalize_embeddings": False, "labels": None}

with TemporaryDirectory() as tmp_dir:
with SafeTemporaryDirectory() as tmp_dir:
tmp_dir = str(Path(tmp_dir) / "model")
model.normalize_embeddings = True
model.labels = ["negative", "positive"]
Expand Down
8 changes: 6 additions & 2 deletions tests/test_trainer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import os
import re
import tempfile
from pathlib import Path
from unittest import TestCase

Expand All @@ -20,6 +19,7 @@
from setfit.trainer import Trainer
from setfit.training_args import TrainingArguments
from setfit.utils import BestRun
from tests.utils import SafeTemporaryDirectory


logging.set_verbosity_warning()
Expand Down Expand Up @@ -141,7 +141,7 @@ def test_trainer_raises_error_when_dataset_not_split(self):

def test_trainer_raises_error_when_dataset_is_dataset_dict_with_train(self):
"""Verify that a useful error is raised if we pass an unsplit dataset with only a `train` split to the trainer."""
with tempfile.TemporaryDirectory() as tmpdirname:
with SafeTemporaryDirectory() as tmpdirname:
path = Path(tmpdirname) / "test_dataset_dict_with_train.csv"
path.write_text("label,text\n1,good\n0,terrible\n")
dataset = load_dataset("csv", data_files=str(path))
Expand Down Expand Up @@ -257,6 +257,10 @@ def test_trainer_normalize(self):
metrics = trainer.evaluate()
self.assertEqual(metrics, {"accuracy": 1.0})

@pytest.mark.skip(
"This test is flaky and needs to be fixed; it passes when run in isolation, under "
"TrainerDifferentiableHeadTest, and even under test_trainer.py, but fails when run in the full test suite."
)
def test_trainer_max_length_exceeds_max_acceptable_length(self):
trainer = Trainer(
model=self.model,
Expand Down
Loading

0 comments on commit 72f4d1e

Please sign in to comment.