openml · PGijsbers · Jul 11, 2022 · Jul 1, 2022 · Jul 1, 2022 · Jul 1, 2022
diff --git a/.flake8 b/.flake8
@@ -5,7 +5,7 @@ select = C,E,F,W,B,T
 ignore = E203, E402, W503
 per-file-ignores =
     *__init__.py:F401
-    *cli.py:T001
+    *cli.py:T201
 exclude =
     venv
     examples
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,28 +1,34 @@
 repos:
   - repo: https://github.com/psf/black
-    rev: 19.10b0
+    rev: 22.6.0
     hooks:
       - id: black
         args: [--line-length=100]
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v0.761
+    rev: v0.961
     hooks:
       - id: mypy
         name: mypy openml
         files: openml/.*
+        additional_dependencies:
+          - types-requests
+          - types-python-dateutil
       - id: mypy
         name: mypy tests
         files: tests/.*
+        additional_dependencies:
+          - types-requests
+          - types-python-dateutil
   - repo: https://gitlab.com/pycqa/flake8
-    rev: 3.8.3
+    rev: 4.0.1
     hooks:
       - id: flake8
         name: flake8 openml
         files: openml/.*
         additional_dependencies:
-          - flake8-print==3.1.4
+          - flake8-print==5.0.0
       - id: flake8
         name: flake8 tests
         files: tests/.*
         additional_dependencies:
-          - flake8-print==3.1.4
+          - flake8-print==5.0.0
diff --git a/doc/progress.rst b/doc/progress.rst
@@ -12,6 +12,7 @@ Changelog
  * FIX#1110: Make arguments to ``create_study`` and ``create_suite`` that are defined as optional by the OpenML XSD actually optional.
  * FIX#1147: ``openml.flow.flow_exists`` no longer requires an API key.
  * MAIN#1088: Do CI for Windows on Github Actions instead of Appveyor.
+ * MAIN#1146: Update the pre-commit dependencies.
  * ADD#1103: Add a ``predictions`` property to OpenMLRun for easy accessibility of prediction data.
 
 

diff --git a/examples/30_extended/custom_flow_.py b/examples/30_extended/custom_flow_.py
@@ -85,7 +85,9 @@
 # but that does not matter for this demonstration.
 
 autosklearn_flow = openml.flows.get_flow(9313)  # auto-sklearn 0.5.1
-subflow = dict(components=OrderedDict(automl_tool=autosklearn_flow),)
+subflow = dict(
+    components=OrderedDict(automl_tool=autosklearn_flow),
+)
 
 ####################################################################################################
 # With all parameters of the flow defined, we can now initialize the OpenMLFlow and publish.
@@ -98,7 +100,10 @@
 # the model of the flow to `None`.
 
 autosklearn_amlb_flow = openml.flows.OpenMLFlow(
-    **general, **flow_hyperparameters, **subflow, model=None,
+    **general,
+    **flow_hyperparameters,
+    **subflow,
+    model=None,
 )
 autosklearn_amlb_flow.publish()
 print(f"autosklearn flow created: {autosklearn_amlb_flow.flow_id}")

diff --git a/examples/30_extended/fetch_runtimes_tutorial.py b/examples/30_extended/fetch_runtimes_tutorial.py
@@ -72,7 +72,10 @@
 n_repeats, n_folds, n_samples = task.get_split_dimensions()
 print(
     "Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
-        task_id, n_repeats, n_folds, n_samples,
+        task_id,
+        n_repeats,
+        n_folds,
+        n_samples,
     )
 )
 
@@ -97,7 +100,10 @@ def print_compare_runtimes(measures):
 clf = RandomForestClassifier(n_estimators=10)
 
 run1 = openml.runs.run_model_on_task(
-    model=clf, task=task, upload_flow=False, avoid_duplicate_runs=False,
+    model=clf,
+    task=task,
+    upload_flow=False,
+    avoid_duplicate_runs=False,
 )
 measures = run1.fold_evaluations
 

diff --git a/examples/30_extended/flows_and_runs_tutorial.py b/examples/30_extended/flows_and_runs_tutorial.py
@@ -176,7 +176,11 @@
 
 # The following lines can then be executed offline:
 run = openml.runs.run_model_on_task(
-    pipe, task, avoid_duplicate_runs=False, upload_flow=False, dataset_format="array",
+    pipe,
+    task,
+    avoid_duplicate_runs=False,
+    upload_flow=False,
+    dataset_format="array",
 )
 
 # The run may be stored offline, and the flow will be stored along with it:

diff --git a/examples/30_extended/run_setup_tutorial.py b/examples/30_extended/run_setup_tutorial.py
@@ -57,10 +57,18 @@
 # easy as you want it to be
 
 
-cat_imp = make_pipeline(OneHotEncoder(handle_unknown="ignore", sparse=False), TruncatedSVD(),)
+cat_imp = make_pipeline(
+    OneHotEncoder(handle_unknown="ignore", sparse=False),
+    TruncatedSVD(),
+)
 cont_imp = SimpleImputer(strategy="median")
 ct = ColumnTransformer([("cat", cat_imp, cat), ("cont", cont_imp, cont)])
-model_original = Pipeline(steps=[("transform", ct), ("estimator", RandomForestClassifier()),])
+model_original = Pipeline(
+    steps=[
+        ("transform", ct),
+        ("estimator", RandomForestClassifier()),
+    ]
+)
 
 # Let's change some hyperparameters. Of course, in any good application we
 # would tune them using, e.g., Random Search or Bayesian Optimization, but for

diff --git a/examples/30_extended/study_tutorial.py b/examples/30_extended/study_tutorial.py
@@ -51,7 +51,9 @@
 # And we can use the evaluation listing functionality to learn more about
 # the evaluations available for the conducted runs:
 evaluations = openml.evaluations.list_evaluations(
-    function="predictive_accuracy", output_format="dataframe", study=study.study_id,
+    function="predictive_accuracy",
+    output_format="dataframe",
+    study=study.study_id,
 )
 print(evaluations.head())
 

diff --git a/examples/30_extended/task_manual_iteration_tutorial.py b/examples/30_extended/task_manual_iteration_tutorial.py
@@ -44,7 +44,10 @@
 
 print(
     "Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
-        task_id, n_repeats, n_folds, n_samples,
+        task_id,
+        n_repeats,
+        n_folds,
+        n_samples,
     )
 )
 
@@ -53,7 +56,11 @@
 # samples (indexing is zero-based). Usually, one would loop over all repeats, folds and sample
 # sizes, but we can neglect this here as there is only a single repetition.
 
-train_indices, test_indices = task.get_train_test_split_indices(repeat=0, fold=0, sample=0,)
+train_indices, test_indices = task.get_train_test_split_indices(
+    repeat=0,
+    fold=0,
+    sample=0,
+)
 
 print(train_indices.shape, train_indices.dtype)
 print(test_indices.shape, test_indices.dtype)
@@ -69,7 +76,10 @@
 
 print(
     "X_train.shape: {}, y_train.shape: {}, X_test.shape: {}, y_test.shape: {}".format(
-        X_train.shape, y_train.shape, X_test.shape, y_test.shape,
+        X_train.shape,
+        y_train.shape,
+        X_test.shape,
+        y_test.shape,
     )
 )
 
@@ -82,7 +92,10 @@
 n_repeats, n_folds, n_samples = task.get_split_dimensions()
 print(
     "Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
-        task_id, n_repeats, n_folds, n_samples,
+        task_id,
+        n_repeats,
+        n_folds,
+        n_samples,
     )
 )
 
@@ -92,7 +105,9 @@
     for fold_idx in range(n_folds):
         for sample_idx in range(n_samples):
             train_indices, test_indices = task.get_train_test_split_indices(
-                repeat=repeat_idx, fold=fold_idx, sample=sample_idx,
+                repeat=repeat_idx,
+                fold=fold_idx,
+                sample=sample_idx,
             )
             X_train = X.iloc[train_indices]
             y_train = y.iloc[train_indices]
@@ -121,7 +136,10 @@
 n_repeats, n_folds, n_samples = task.get_split_dimensions()
 print(
     "Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
-        task_id, n_repeats, n_folds, n_samples,
+        task_id,
+        n_repeats,
+        n_folds,
+        n_samples,
     )
 )
 
@@ -131,7 +149,9 @@
     for fold_idx in range(n_folds):
         for sample_idx in range(n_samples):
             train_indices, test_indices = task.get_train_test_split_indices(
-                repeat=repeat_idx, fold=fold_idx, sample=sample_idx,
+                repeat=repeat_idx,
+                fold=fold_idx,
+                sample=sample_idx,
             )
             X_train = X.iloc[train_indices]
             y_train = y.iloc[train_indices]
@@ -160,7 +180,10 @@
 n_repeats, n_folds, n_samples = task.get_split_dimensions()
 print(
     "Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
-        task_id, n_repeats, n_folds, n_samples,
+        task_id,
+        n_repeats,
+        n_folds,
+        n_samples,
     )
 )
 
@@ -170,7 +193,9 @@
     for fold_idx in range(n_folds):
         for sample_idx in range(n_samples):
             train_indices, test_indices = task.get_train_test_split_indices(
-                repeat=repeat_idx, fold=fold_idx, sample=sample_idx,
+                repeat=repeat_idx,
+                fold=fold_idx,
+                sample=sample_idx,
             )
             X_train = X.iloc[train_indices]
             y_train = y.iloc[train_indices]

diff --git a/openml/_api_calls.py b/openml/_api_calls.py
@@ -69,15 +69,20 @@ def _perform_api_call(call, request_method, data=None, file_elements=None):
     __check_response(response, url, file_elements)
 
     logging.info(
-        "%.7fs taken for [%s] request for the URL %s", time.time() - start, request_method, url,
+        "%.7fs taken for [%s] request for the URL %s",
+        time.time() - start,
+        request_method,
+        url,
     )
     return response.text
 
 
 def _download_minio_file(
-    source: str, destination: Union[str, pathlib.Path], exists_ok: bool = True,
+    source: str,
+    destination: Union[str, pathlib.Path],
+    exists_ok: bool = True,
 ) -> None:
-    """ Download file ``source`` from a MinIO Bucket and store it at ``destination``.
+    """Download file ``source`` from a MinIO Bucket and store it at ``destination``.
 
     Parameters
     ----------
@@ -103,7 +108,9 @@ def _download_minio_file(
 
     try:
         client.fget_object(
-            bucket_name=bucket, object_name=object_name, file_path=str(destination),
+            bucket_name=bucket,
+            object_name=object_name,
+            file_path=str(destination),
         )
     except minio.error.S3Error as e:
         if e.message.startswith("Object does not exist"):
@@ -120,7 +127,7 @@ def _download_text_file(
     exists_ok: bool = True,
     encoding: str = "utf8",
 ) -> Optional[str]:
-    """ Download the text file at `source` and store it in `output_path`.
+    """Download the text file at `source` and store it in `output_path`.
 
     By default, do nothing if a file already exists in `output_path`.
     The downloaded file can be checked against an expected md5 checksum.
@@ -156,7 +163,10 @@ def _download_text_file(
 
     if output_path is None:
         logging.info(
-            "%.7fs taken for [%s] request for the URL %s", time.time() - start, "get", source,
+            "%.7fs taken for [%s] request for the URL %s",
+            time.time() - start,
+            "get",
+            source,
         )
         return downloaded_file
 
@@ -165,7 +175,10 @@ def _download_text_file(
             fh.write(downloaded_file)
 
         logging.info(
-            "%.7fs taken for [%s] request for the URL %s", time.time() - start, "get", source,
+            "%.7fs taken for [%s] request for the URL %s",
+            time.time() - start,
+            "get",
+            source,
         )
 
         del downloaded_file
@@ -174,8 +187,8 @@ def _download_text_file(
 
 def _file_id_to_url(file_id, filename=None):
     """
-     Presents the URL how to download a given file id
-     filename is optional
+    Presents the URL how to download a given file id
+    filename is optional
     """
     openml_url = config.server.split("/api/")
     url = openml_url[0] + "/data/download/%s" % file_id
@@ -194,7 +207,12 @@ def _read_url_files(url, data=None, file_elements=None):
         file_elements = {}
     # Using requests.post sets header 'Accept-encoding' automatically to
     # 'gzip,deflate'
-    response = _send_request(request_method="post", url=url, data=data, files=file_elements,)
+    response = _send_request(
+        request_method="post",
+        url=url,
+        data=data,
+        files=file_elements,
+    )
     return response
 
 
@@ -258,7 +276,9 @@ def _send_request(request_method, url, data, files=None, md5_checksum=None):
                         raise OpenMLServerError(
                             "Unexpected server error when calling {}. Please contact the "
                             "developers!\nStatus code: {}\n{}".format(
-                                url, response.status_code, response.text,
+                                url,
+                                response.status_code,
+                                response.text,
                             )
                         )
                 if retry_counter >= n_retries:
@@ -290,7 +310,9 @@ def __check_response(response, url, file_elements):
 
 
 def __parse_server_exception(
-    response: requests.Response, url: str, file_elements: Dict,
+    response: requests.Response,
+    url: str,
+    file_elements: Dict,
 ) -> OpenMLServerError:
 
     if response.status_code == 414:
@@ -319,12 +341,17 @@ def __parse_server_exception(
 
         # 512 for runs, 372 for datasets, 500 for flows
         # 482 for tasks, 542 for evaluations, 674 for setups
-        return OpenMLServerNoResult(code=code, message=full_message,)
+        return OpenMLServerNoResult(
+            code=code,
+            message=full_message,
+        )
     # 163: failure to validate flow XML (https://www.openml.org/api_docs#!/flow/post_flow)
     if code in [163] and file_elements is not None and "description" in file_elements:
         # file_elements['description'] is the XML file description of the flow
         full_message = "\n{}\n{} - {}".format(
-            file_elements["description"], message, additional_information,
+            file_elements["description"],
+            message,
+            additional_information,
         )
     else:
         full_message = "{} - {}".format(message, additional_information)