From dd62f2b1e06895731f616d42cdc8b8fdbe2ed17b Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers <p.gijsbers@tue.nl>
Date: Mon, 20 Feb 2023 13:25:36 +0100
Subject: [PATCH] Update tests for sklearn 1.2, server issue (#1200)

* Relax error checking

* Skip unit test due to server issue openml/openml#1180

* Account for rename parameter `base_estimator` to `estimator` in sk 1.2

* Update n_init parameter for sklearn 1.2

* Test for more specific exceptions
---
 .../test_sklearn_extension.py                 | 46 +++++++++----------
 tests/test_runs/test_run_functions.py         | 18 ++++++--
 2 files changed, 36 insertions(+), 28 deletions(-)

diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
index 709d123f0..26c2dd563 100644
--- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
+++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
@@ -338,6 +338,7 @@ def test_serialize_model_clustering(self):
                 )
             )
         else:
+            n_init = '"warn"' if LooseVersion(sklearn.__version__) >= "1.2" else "10"
             fixture_parameters = OrderedDict(
                 (
                     ("algorithm", '"lloyd"'),
@@ -345,7 +346,7 @@ def test_serialize_model_clustering(self):
                     ("init", '"k-means++"'),
                     ("max_iter", "300"),
                     ("n_clusters", "8"),
-                    ("n_init", "10"),
+                    ("n_init", n_init),
                     ("random_state", "null"),
                     ("tol", "0.0001"),
                     ("verbose", "0"),
@@ -358,13 +359,13 @@ def test_serialize_model_clustering(self):
         )
         structure = serialization.get_structure("name")
 
-        self.assertEqual(serialization.name, fixture_name)
-        self.assertEqual(serialization.class_name, fixture_name)
-        self.assertEqual(serialization.custom_name, fixture_short_name)
-        self.assertEqual(serialization.description, fixture_description)
-        self.assertEqual(serialization.parameters, fixture_parameters)
-        self.assertEqual(serialization.dependencies, version_fixture)
-        self.assertDictEqual(structure, fixture_structure)
+        assert serialization.name == fixture_name
+        assert serialization.class_name == fixture_name
+        assert serialization.custom_name == fixture_short_name
+        assert serialization.description == fixture_description
+        assert serialization.parameters == fixture_parameters
+        assert serialization.dependencies == version_fixture
+        assert structure == fixture_structure
 
     def test_serialize_model_with_subcomponent(self):
         model = sklearn.ensemble.AdaBoostClassifier(
@@ -1449,22 +1450,19 @@ def test_deserialize_complex_with_defaults(self):
         pipe_orig = sklearn.pipeline.Pipeline(steps=steps)
 
         pipe_adjusted = sklearn.clone(pipe_orig)
-        if LooseVersion(sklearn.__version__) < "0.23":
-            params = {
-                "Imputer__strategy": "median",
-                "OneHotEncoder__sparse": False,
-                "Estimator__n_estimators": 10,
-                "Estimator__base_estimator__n_estimators": 10,
-                "Estimator__base_estimator__base_estimator__learning_rate": 0.1,
-            }
-        else:
-            params = {
-                "Imputer__strategy": "mean",
-                "OneHotEncoder__sparse": True,
-                "Estimator__n_estimators": 50,
-                "Estimator__base_estimator__n_estimators": 10,
-                "Estimator__base_estimator__base_estimator__learning_rate": 0.1,
-            }
+        impute_strategy = "median" if LooseVersion(sklearn.__version__) < "0.23" else "mean"
+        sparse = LooseVersion(sklearn.__version__) >= "0.23"
+        estimator_name = (
+            "base_estimator" if LooseVersion(sklearn.__version__) < "1.2" else "estimator"
+        )
+        params = {
+            "Imputer__strategy": impute_strategy,
+            "OneHotEncoder__sparse": sparse,
+            "Estimator__n_estimators": 10,
+            f"Estimator__{estimator_name}__n_estimators": 10,
+            f"Estimator__{estimator_name}__{estimator_name}__learning_rate": 0.1,
+        }
+
         pipe_adjusted.set_params(**params)
         flow = self.extension.model_to_flow(pipe_adjusted)
         pipe_deserialized = self.extension.flow_to_model(flow, initialize_with_defaults=True)
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index a9abcd05e..1e92613c3 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -410,10 +410,19 @@ def test_check_erronous_sklearn_flow_fails(self):
 
         # Invalid parameter values
         clf = LogisticRegression(C="abc", solver="lbfgs")
-        with self.assertRaisesRegex(
-            ValueError,
-            r"Penalty term must be positive; got \(C=u?'abc'\)",  # u? for 2.7/3.4-6 compability
-        ):
+        # The exact error message depends on scikit-learn version.
+        # Because the sklearn-extension module is to be separated,
+        # I will simply relax specifics of the raised Error.
+        # old: r"Penalty term must be positive; got \(C=u?'abc'\)"
+        # new: sklearn.utils._param_validation.InvalidParameterError:
+        #   The 'C' parameter of LogisticRegression must be a float in the range (0, inf]. Got 'abc' instead.  # noqa: E501
+        try:
+            from sklearn.utils._param_validation import InvalidParameterError
+
+            exceptions = (ValueError, InvalidParameterError)
+        except ImportError:
+            exceptions = (ValueError,)
+        with self.assertRaises(exceptions):
             openml.runs.run_model_on_task(
                 task=task,
                 model=clf,
@@ -680,6 +689,7 @@ def get_ct_cf(nominal_indices, numeric_indices):
             sentinel=sentinel,
         )
 
+    @unittest.skip("https://github.com/openml/OpenML/issues/1180")
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.20",
         reason="columntransformer introduction in 0.20.0",