Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pre-commit update #1150

Merged
merged 8 commits into from
Jul 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .flake8
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ select = C,E,F,W,B,T
ignore = E203, E402, W503
per-file-ignores =
*__init__.py:F401
*cli.py:T001
*cli.py:T201
exclude =
venv
examples
16 changes: 11 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,28 +1,34 @@
repos:
- repo: https://github.com/psf/black
rev: 19.10b0
rev: 22.6.0
hooks:
- id: black
args: [--line-length=100]
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v0.761
rev: v0.961
hooks:
- id: mypy
name: mypy openml
files: openml/.*
additional_dependencies:
- types-requests
- types-python-dateutil
- id: mypy
name: mypy tests
files: tests/.*
additional_dependencies:
- types-requests
- types-python-dateutil
- repo: https://gitlab.com/pycqa/flake8
rev: 3.8.3
rev: 4.0.1
hooks:
- id: flake8
name: flake8 openml
files: openml/.*
additional_dependencies:
- flake8-print==3.1.4
- flake8-print==5.0.0
- id: flake8
name: flake8 tests
files: tests/.*
additional_dependencies:
- flake8-print==3.1.4
- flake8-print==5.0.0
1 change: 1 addition & 0 deletions doc/progress.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Changelog
* FIX#1110: Make arguments to ``create_study`` and ``create_suite`` that are defined as optional by the OpenML XSD actually optional.
* FIX#1147: ``openml.flow.flow_exists`` no longer requires an API key.
* MAIN#1088: Do CI for Windows on Github Actions instead of Appveyor.
* MAIN#1146: Update the pre-commit dependencies.
* ADD#1103: Add a ``predictions`` property to OpenMLRun for easy accessibility of prediction data.


Expand Down
9 changes: 7 additions & 2 deletions examples/30_extended/custom_flow_.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,9 @@
# but that does not matter for this demonstration.

autosklearn_flow = openml.flows.get_flow(9313) # auto-sklearn 0.5.1
subflow = dict(components=OrderedDict(automl_tool=autosklearn_flow),)
subflow = dict(
components=OrderedDict(automl_tool=autosklearn_flow),
)

####################################################################################################
# With all parameters of the flow defined, we can now initialize the OpenMLFlow and publish.
Expand All @@ -98,7 +100,10 @@
# the model of the flow to `None`.

autosklearn_amlb_flow = openml.flows.OpenMLFlow(
**general, **flow_hyperparameters, **subflow, model=None,
**general,
**flow_hyperparameters,
**subflow,
model=None,
)
autosklearn_amlb_flow.publish()
print(f"autosklearn flow created: {autosklearn_amlb_flow.flow_id}")
Expand Down
10 changes: 8 additions & 2 deletions examples/30_extended/fetch_runtimes_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,10 @@
n_repeats, n_folds, n_samples = task.get_split_dimensions()
print(
"Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
task_id, n_repeats, n_folds, n_samples,
task_id,
n_repeats,
n_folds,
n_samples,
)
)

Expand All @@ -97,7 +100,10 @@ def print_compare_runtimes(measures):
clf = RandomForestClassifier(n_estimators=10)

run1 = openml.runs.run_model_on_task(
model=clf, task=task, upload_flow=False, avoid_duplicate_runs=False,
model=clf,
task=task,
upload_flow=False,
avoid_duplicate_runs=False,
)
measures = run1.fold_evaluations

Expand Down
6 changes: 5 additions & 1 deletion examples/30_extended/flows_and_runs_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,11 @@

# The following lines can then be executed offline:
run = openml.runs.run_model_on_task(
pipe, task, avoid_duplicate_runs=False, upload_flow=False, dataset_format="array",
pipe,
task,
avoid_duplicate_runs=False,
upload_flow=False,
dataset_format="array",
)

# The run may be stored offline, and the flow will be stored along with it:
Expand Down
12 changes: 10 additions & 2 deletions examples/30_extended/run_setup_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,18 @@
# easy as you want it to be


cat_imp = make_pipeline(OneHotEncoder(handle_unknown="ignore", sparse=False), TruncatedSVD(),)
cat_imp = make_pipeline(
OneHotEncoder(handle_unknown="ignore", sparse=False),
TruncatedSVD(),
)
cont_imp = SimpleImputer(strategy="median")
ct = ColumnTransformer([("cat", cat_imp, cat), ("cont", cont_imp, cont)])
model_original = Pipeline(steps=[("transform", ct), ("estimator", RandomForestClassifier()),])
model_original = Pipeline(
steps=[
("transform", ct),
("estimator", RandomForestClassifier()),
]
)

# Let's change some hyperparameters. Of course, in any good application we
# would tune them using, e.g., Random Search or Bayesian Optimization, but for
Expand Down
4 changes: 3 additions & 1 deletion examples/30_extended/study_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,9 @@
# And we can use the evaluation listing functionality to learn more about
# the evaluations available for the conducted runs:
evaluations = openml.evaluations.list_evaluations(
function="predictive_accuracy", output_format="dataframe", study=study.study_id,
function="predictive_accuracy",
output_format="dataframe",
study=study.study_id,
)
print(evaluations.head())

Expand Down
43 changes: 34 additions & 9 deletions examples/30_extended/task_manual_iteration_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,10 @@

print(
"Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
task_id, n_repeats, n_folds, n_samples,
task_id,
n_repeats,
n_folds,
n_samples,
)
)

Expand All @@ -53,7 +56,11 @@
# samples (indexing is zero-based). Usually, one would loop over all repeats, folds and sample
# sizes, but we can neglect this here as there is only a single repetition.

train_indices, test_indices = task.get_train_test_split_indices(repeat=0, fold=0, sample=0,)
train_indices, test_indices = task.get_train_test_split_indices(
repeat=0,
fold=0,
sample=0,
)

print(train_indices.shape, train_indices.dtype)
print(test_indices.shape, test_indices.dtype)
Expand All @@ -69,7 +76,10 @@

print(
"X_train.shape: {}, y_train.shape: {}, X_test.shape: {}, y_test.shape: {}".format(
X_train.shape, y_train.shape, X_test.shape, y_test.shape,
X_train.shape,
y_train.shape,
X_test.shape,
y_test.shape,
)
)

Expand All @@ -82,7 +92,10 @@
n_repeats, n_folds, n_samples = task.get_split_dimensions()
print(
"Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
task_id, n_repeats, n_folds, n_samples,
task_id,
n_repeats,
n_folds,
n_samples,
)
)

Expand All @@ -92,7 +105,9 @@
for fold_idx in range(n_folds):
for sample_idx in range(n_samples):
train_indices, test_indices = task.get_train_test_split_indices(
repeat=repeat_idx, fold=fold_idx, sample=sample_idx,
repeat=repeat_idx,
fold=fold_idx,
sample=sample_idx,
)
X_train = X.iloc[train_indices]
y_train = y.iloc[train_indices]
Expand Down Expand Up @@ -121,7 +136,10 @@
n_repeats, n_folds, n_samples = task.get_split_dimensions()
print(
"Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
task_id, n_repeats, n_folds, n_samples,
task_id,
n_repeats,
n_folds,
n_samples,
)
)

Expand All @@ -131,7 +149,9 @@
for fold_idx in range(n_folds):
for sample_idx in range(n_samples):
train_indices, test_indices = task.get_train_test_split_indices(
repeat=repeat_idx, fold=fold_idx, sample=sample_idx,
repeat=repeat_idx,
fold=fold_idx,
sample=sample_idx,
)
X_train = X.iloc[train_indices]
y_train = y.iloc[train_indices]
Expand Down Expand Up @@ -160,7 +180,10 @@
n_repeats, n_folds, n_samples = task.get_split_dimensions()
print(
"Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
task_id, n_repeats, n_folds, n_samples,
task_id,
n_repeats,
n_folds,
n_samples,
)
)

Expand All @@ -170,7 +193,9 @@
for fold_idx in range(n_folds):
for sample_idx in range(n_samples):
train_indices, test_indices = task.get_train_test_split_indices(
repeat=repeat_idx, fold=fold_idx, sample=sample_idx,
repeat=repeat_idx,
fold=fold_idx,
sample=sample_idx,
)
X_train = X.iloc[train_indices]
y_train = y.iloc[train_indices]
Expand Down
55 changes: 41 additions & 14 deletions openml/_api_calls.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,15 +69,20 @@ def _perform_api_call(call, request_method, data=None, file_elements=None):
__check_response(response, url, file_elements)

logging.info(
"%.7fs taken for [%s] request for the URL %s", time.time() - start, request_method, url,
"%.7fs taken for [%s] request for the URL %s",
time.time() - start,
request_method,
url,
)
return response.text


def _download_minio_file(
source: str, destination: Union[str, pathlib.Path], exists_ok: bool = True,
source: str,
destination: Union[str, pathlib.Path],
exists_ok: bool = True,
) -> None:
""" Download file ``source`` from a MinIO Bucket and store it at ``destination``.
"""Download file ``source`` from a MinIO Bucket and store it at ``destination``.

Parameters
----------
Expand All @@ -103,7 +108,9 @@ def _download_minio_file(

try:
client.fget_object(
bucket_name=bucket, object_name=object_name, file_path=str(destination),
bucket_name=bucket,
object_name=object_name,
file_path=str(destination),
)
except minio.error.S3Error as e:
if e.message.startswith("Object does not exist"):
Expand All @@ -120,7 +127,7 @@ def _download_text_file(
exists_ok: bool = True,
encoding: str = "utf8",
) -> Optional[str]:
""" Download the text file at `source` and store it in `output_path`.
"""Download the text file at `source` and store it in `output_path`.

By default, do nothing if a file already exists in `output_path`.
The downloaded file can be checked against an expected md5 checksum.
Expand Down Expand Up @@ -156,7 +163,10 @@ def _download_text_file(

if output_path is None:
logging.info(
"%.7fs taken for [%s] request for the URL %s", time.time() - start, "get", source,
"%.7fs taken for [%s] request for the URL %s",
time.time() - start,
"get",
source,
)
return downloaded_file

Expand All @@ -165,7 +175,10 @@ def _download_text_file(
fh.write(downloaded_file)

logging.info(
"%.7fs taken for [%s] request for the URL %s", time.time() - start, "get", source,
"%.7fs taken for [%s] request for the URL %s",
time.time() - start,
"get",
source,
)

del downloaded_file
Expand All @@ -174,8 +187,8 @@ def _download_text_file(

def _file_id_to_url(file_id, filename=None):
"""
Presents the URL how to download a given file id
filename is optional
Presents the URL how to download a given file id
filename is optional
"""
openml_url = config.server.split("/api/")
url = openml_url[0] + "/data/download/%s" % file_id
Expand All @@ -194,7 +207,12 @@ def _read_url_files(url, data=None, file_elements=None):
file_elements = {}
# Using requests.post sets header 'Accept-encoding' automatically to
# 'gzip,deflate'
response = _send_request(request_method="post", url=url, data=data, files=file_elements,)
response = _send_request(
request_method="post",
url=url,
data=data,
files=file_elements,
)
return response


Expand Down Expand Up @@ -258,7 +276,9 @@ def _send_request(request_method, url, data, files=None, md5_checksum=None):
raise OpenMLServerError(
"Unexpected server error when calling {}. Please contact the "
"developers!\nStatus code: {}\n{}".format(
url, response.status_code, response.text,
url,
response.status_code,
response.text,
)
)
if retry_counter >= n_retries:
Expand Down Expand Up @@ -290,7 +310,9 @@ def __check_response(response, url, file_elements):


def __parse_server_exception(
response: requests.Response, url: str, file_elements: Dict,
response: requests.Response,
url: str,
file_elements: Dict,
) -> OpenMLServerError:

if response.status_code == 414:
Expand Down Expand Up @@ -319,12 +341,17 @@ def __parse_server_exception(

# 512 for runs, 372 for datasets, 500 for flows
# 482 for tasks, 542 for evaluations, 674 for setups
return OpenMLServerNoResult(code=code, message=full_message,)
return OpenMLServerNoResult(
code=code,
message=full_message,
)
# 163: failure to validate flow XML (https://www.openml.org/api_docs#!/flow/post_flow)
if code in [163] and file_elements is not None and "description" in file_elements:
# file_elements['description'] is the XML file description of the flow
full_message = "\n{}\n{} - {}".format(
file_elements["description"], message, additional_information,
file_elements["description"],
message,
additional_information,
)
else:
full_message = "{} - {}".format(message, additional_information)
Expand Down
Loading