From dd62f2b1e06895731f616d42cdc8b8fdbe2ed17b Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers
Date: Mon, 20 Feb 2023 13:25:36 +0100
Subject: [PATCH] Update tests for sklearn 1.2, server issue (#1200)
* Relax error checking
* Skip unit test due to server issue openml/openml#1180
* Account for rename parameter `base_estimator` to `estimator` in sk 1.2
* Update n_init parameter for sklearn 1.2
* Test for more specific exceptions
---
.../test_sklearn_extension.py | 46 +++++++++----------
tests/test_runs/test_run_functions.py | 18 ++++++--
2 files changed, 36 insertions(+), 28 deletions(-)
diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
index 709d123f0..26c2dd563 100644
--- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
+++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
@@ -338,6 +338,7 @@ def test_serialize_model_clustering(self):
)
)
else:
+ n_init = '"warn"' if LooseVersion(sklearn.__version__) >= "1.2" else "10"
fixture_parameters = OrderedDict(
(
("algorithm", '"lloyd"'),
@@ -345,7 +346,7 @@ def test_serialize_model_clustering(self):
("init", '"k-means++"'),
("max_iter", "300"),
("n_clusters", "8"),
- ("n_init", "10"),
+ ("n_init", n_init),
("random_state", "null"),
("tol", "0.0001"),
("verbose", "0"),
@@ -358,13 +359,13 @@ def test_serialize_model_clustering(self):
)
structure = serialization.get_structure("name")
- self.assertEqual(serialization.name, fixture_name)
- self.assertEqual(serialization.class_name, fixture_name)
- self.assertEqual(serialization.custom_name, fixture_short_name)
- self.assertEqual(serialization.description, fixture_description)
- self.assertEqual(serialization.parameters, fixture_parameters)
- self.assertEqual(serialization.dependencies, version_fixture)
- self.assertDictEqual(structure, fixture_structure)
+ assert serialization.name == fixture_name
+ assert serialization.class_name == fixture_name
+ assert serialization.custom_name == fixture_short_name
+ assert serialization.description == fixture_description
+ assert serialization.parameters == fixture_parameters
+ assert serialization.dependencies == version_fixture
+ assert structure == fixture_structure
def test_serialize_model_with_subcomponent(self):
model = sklearn.ensemble.AdaBoostClassifier(
@@ -1449,22 +1450,19 @@ def test_deserialize_complex_with_defaults(self):
pipe_orig = sklearn.pipeline.Pipeline(steps=steps)
pipe_adjusted = sklearn.clone(pipe_orig)
- if LooseVersion(sklearn.__version__) < "0.23":
- params = {
- "Imputer__strategy": "median",
- "OneHotEncoder__sparse": False,
- "Estimator__n_estimators": 10,
- "Estimator__base_estimator__n_estimators": 10,
- "Estimator__base_estimator__base_estimator__learning_rate": 0.1,
- }
- else:
- params = {
- "Imputer__strategy": "mean",
- "OneHotEncoder__sparse": True,
- "Estimator__n_estimators": 50,
- "Estimator__base_estimator__n_estimators": 10,
- "Estimator__base_estimator__base_estimator__learning_rate": 0.1,
- }
+ impute_strategy = "median" if LooseVersion(sklearn.__version__) < "0.23" else "mean"
+ sparse = LooseVersion(sklearn.__version__) >= "0.23"
+ estimator_name = (
+ "base_estimator" if LooseVersion(sklearn.__version__) < "1.2" else "estimator"
+ )
+ params = {
+ "Imputer__strategy": impute_strategy,
+ "OneHotEncoder__sparse": sparse,
+ "Estimator__n_estimators": 10,
+ f"Estimator__{estimator_name}__n_estimators": 10,
+ f"Estimator__{estimator_name}__{estimator_name}__learning_rate": 0.1,
+ }
+
pipe_adjusted.set_params(**params)
flow = self.extension.model_to_flow(pipe_adjusted)
pipe_deserialized = self.extension.flow_to_model(flow, initialize_with_defaults=True)
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index a9abcd05e..1e92613c3 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -410,10 +410,19 @@ def test_check_erronous_sklearn_flow_fails(self):
# Invalid parameter values
clf = LogisticRegression(C="abc", solver="lbfgs")
- with self.assertRaisesRegex(
- ValueError,
- r"Penalty term must be positive; got \(C=u?'abc'\)", # u? for 2.7/3.4-6 compability
- ):
+ # The exact error message depends on scikit-learn version.
+ # Because the sklearn-extension module is to be separated,
+ # I will simply relax specifics of the raised Error.
+ # old: r"Penalty term must be positive; got \(C=u?'abc'\)"
+ # new: sklearn.utils._param_validation.InvalidParameterError:
+ # The 'C' parameter of LogisticRegression must be a float in the range (0, inf]. Got 'abc' instead. # noqa: E501
+ try:
+ from sklearn.utils._param_validation import InvalidParameterError
+
+ exceptions = (ValueError, InvalidParameterError)
+ except ImportError:
+ exceptions = (ValueError,)
+ with self.assertRaises(exceptions):
openml.runs.run_model_on_task(
task=task,
model=clf,
@@ -680,6 +689,7 @@ def get_ct_cf(nominal_indices, numeric_indices):
sentinel=sentinel,
)
+ @unittest.skip("https://github.com/openml/OpenML/issues/1180")
@unittest.skipIf(
LooseVersion(sklearn.__version__) < "0.20",
reason="columntransformer introduction in 0.20.0",