From dd62f2b1e06895731f616d42cdc8b8fdbe2ed17b Mon Sep 17 00:00:00 2001 From: Pieter Gijsbers Date: Mon, 20 Feb 2023 13:25:36 +0100 Subject: [PATCH] Update tests for sklearn 1.2, server issue (#1200) * Relax error checking * Skip unit test due to server issue openml/openml#1180 * Account for rename parameter `base_estimator` to `estimator` in sk 1.2 * Update n_init parameter for sklearn 1.2 * Test for more specific exceptions --- .../test_sklearn_extension.py | 46 +++++++++---------- tests/test_runs/test_run_functions.py | 18 ++++++-- 2 files changed, 36 insertions(+), 28 deletions(-) diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py index 709d123f0..26c2dd563 100644 --- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py +++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py @@ -338,6 +338,7 @@ def test_serialize_model_clustering(self): ) ) else: + n_init = '"warn"' if LooseVersion(sklearn.__version__) >= "1.2" else "10" fixture_parameters = OrderedDict( ( ("algorithm", '"lloyd"'), @@ -345,7 +346,7 @@ def test_serialize_model_clustering(self): ("init", '"k-means++"'), ("max_iter", "300"), ("n_clusters", "8"), - ("n_init", "10"), + ("n_init", n_init), ("random_state", "null"), ("tol", "0.0001"), ("verbose", "0"), @@ -358,13 +359,13 @@ def test_serialize_model_clustering(self): ) structure = serialization.get_structure("name") - self.assertEqual(serialization.name, fixture_name) - self.assertEqual(serialization.class_name, fixture_name) - self.assertEqual(serialization.custom_name, fixture_short_name) - self.assertEqual(serialization.description, fixture_description) - self.assertEqual(serialization.parameters, fixture_parameters) - self.assertEqual(serialization.dependencies, version_fixture) - self.assertDictEqual(structure, fixture_structure) + assert serialization.name == fixture_name + assert serialization.class_name == fixture_name + assert serialization.custom_name == fixture_short_name + assert serialization.description == fixture_description + assert serialization.parameters == fixture_parameters + assert serialization.dependencies == version_fixture + assert structure == fixture_structure def test_serialize_model_with_subcomponent(self): model = sklearn.ensemble.AdaBoostClassifier( @@ -1449,22 +1450,19 @@ def test_deserialize_complex_with_defaults(self): pipe_orig = sklearn.pipeline.Pipeline(steps=steps) pipe_adjusted = sklearn.clone(pipe_orig) - if LooseVersion(sklearn.__version__) < "0.23": - params = { - "Imputer__strategy": "median", - "OneHotEncoder__sparse": False, - "Estimator__n_estimators": 10, - "Estimator__base_estimator__n_estimators": 10, - "Estimator__base_estimator__base_estimator__learning_rate": 0.1, - } - else: - params = { - "Imputer__strategy": "mean", - "OneHotEncoder__sparse": True, - "Estimator__n_estimators": 50, - "Estimator__base_estimator__n_estimators": 10, - "Estimator__base_estimator__base_estimator__learning_rate": 0.1, - } + impute_strategy = "median" if LooseVersion(sklearn.__version__) < "0.23" else "mean" + sparse = LooseVersion(sklearn.__version__) >= "0.23" + estimator_name = ( + "base_estimator" if LooseVersion(sklearn.__version__) < "1.2" else "estimator" + ) + params = { + "Imputer__strategy": impute_strategy, + "OneHotEncoder__sparse": sparse, + "Estimator__n_estimators": 10, + f"Estimator__{estimator_name}__n_estimators": 10, + f"Estimator__{estimator_name}__{estimator_name}__learning_rate": 0.1, + } + pipe_adjusted.set_params(**params) flow = self.extension.model_to_flow(pipe_adjusted) pipe_deserialized = self.extension.flow_to_model(flow, initialize_with_defaults=True) diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py index a9abcd05e..1e92613c3 100644 --- a/tests/test_runs/test_run_functions.py +++ b/tests/test_runs/test_run_functions.py @@ -410,10 +410,19 @@ def test_check_erronous_sklearn_flow_fails(self): # Invalid parameter values clf = LogisticRegression(C="abc", solver="lbfgs") - with self.assertRaisesRegex( - ValueError, - r"Penalty term must be positive; got \(C=u?'abc'\)", # u? for 2.7/3.4-6 compability - ): + # The exact error message depends on scikit-learn version. + # Because the sklearn-extension module is to be separated, + # I will simply relax specifics of the raised Error. + # old: r"Penalty term must be positive; got \(C=u?'abc'\)" + # new: sklearn.utils._param_validation.InvalidParameterError: + # The 'C' parameter of LogisticRegression must be a float in the range (0, inf]. Got 'abc' instead. # noqa: E501 + try: + from sklearn.utils._param_validation import InvalidParameterError + + exceptions = (ValueError, InvalidParameterError) + except ImportError: + exceptions = (ValueError,) + with self.assertRaises(exceptions): openml.runs.run_model_on_task( task=task, model=clf, @@ -680,6 +689,7 @@ def get_ct_cf(nominal_indices, numeric_indices): sentinel=sentinel, ) + @unittest.skip("https://github.com/openml/OpenML/issues/1180") @unittest.skipIf( LooseVersion(sklearn.__version__) < "0.20", reason="columntransformer introduction in 0.20.0",