Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Proposal for fixed generation of benchmark data. #124

Merged
merged 13 commits into from
Nov 9, 2021
11 changes: 11 additions & 0 deletions .cirrus.yml
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ test_task:
benchmark_task:
only_if: ${SKIP_BENCHMARK_TASK} == ""
auto_cancellation: true
env:
PY_VER: "3.8"
name: "${CIRRUS_OS}: performance benchmarking"
# Custom clone behaviour to enable ASV to access the PR base branch (if on a
# PR).
Expand All @@ -134,6 +136,15 @@ benchmark_task:
fingerprint_script:
- echo "${CIRRUS_TASK_NAME}"
- if [ -n "${IRIS_SOURCE}" ]; then echo "${IRIS_SOURCE}"; fi
nox_cache:
folder: ${CIRRUS_WORKING_DIR}/.nox
reupload_on_changes: true
fingerprint_script:
- echo "${CIRRUS_TASK_NAME}"
- echo "${NOX_CACHE_BUILD}"
- if [ -n "${IRIS_SOURCE}" ]; then echo "${IRIS_SOURCE}"; fi
benchmarks_script:
- export CONDA_OVERRIDE_LINUX="$(uname -r | cut -d'+' -f1)"
- nox --session=tests --python --install-only
- export DATA_GEN_PYTHON=$(realpath $(find .nox -path "*tests*bin/python"))
- nox --session="benchmarks(ci compare)"
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ coverage.xml
.hypothesis/
.pytest_cache/
benchmarks/.asv*
benchmarks/.data

# Translations
*.mo
Expand Down
13 changes: 4 additions & 9 deletions benchmarks/benchmarks/ci/esmf_regridder.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
import numpy as np
import dask.array as da
import iris
from iris.coord_systems import RotatedGeogCS
from iris.cube import Cube

from esmf_regrid.esmf_regridder import GridInfo
from esmf_regrid.schemes import ESMFAreaWeightedRegridder
from esmf_regrid.tests.unit.schemes.test__cube_to_GridInfo import _grid_cube

from ..generate_data import _grid_cube


def _make_small_grid_args():
Expand Down Expand Up @@ -65,16 +65,12 @@ def setup(self, type):
if type == "large target":
n_lons_tgt = 100
n_lats_tgt = 200
if type == "mixed":
coord_system_src = RotatedGeogCS(0, 90, 90)
else:
coord_system_src = None
grid = _grid_cube(
n_lons_src,
n_lats_src,
lon_bounds,
lat_bounds,
coord_system=coord_system_src,
alt_coord_system=(type == "mixed"),
)
tgt = _grid_cube(n_lons_tgt, n_lats_tgt, lon_bounds, lat_bounds)
src_data = np.arange(n_lats_src * n_lons_src * h).reshape(
Expand Down Expand Up @@ -115,13 +111,12 @@ def setup_cache(self):
h = 2000
# Rotated coord systems prevent pickling of the regridder so are
# removed for the time being.
# coord_system_src = RotatedGeogCS(0, 90, 90)
grid = _grid_cube(
n_lons_src,
n_lats_src,
lon_bounds,
lat_bounds,
# coord_system=coord_system_src,
# alt_coord_system=True,
trexfeathers marked this conversation as resolved.
Show resolved Hide resolved
)
tgt = _grid_cube(n_lons_tgt, n_lats_tgt, lon_bounds, lat_bounds)

Expand Down
127 changes: 127 additions & 0 deletions benchmarks/benchmarks/generate_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
"""
Scripts for generating supporting data for benchmarking.

Data generated using iris-esmf-regrid should use
:func:`run_function_elsewhere`, which means that data is generated using a
fixed version of iris-esmf-regrid and a fixed environment, rather than those
that get changed when the benchmarking run checks out a new commit.

Downstream use of data generated 'elsewhere' requires saving; usually in a
NetCDF file. Could also use pickling but there is a potential risk if the
benchmark sequence runs over two different Python versions.

"""
from inspect import getsource
from subprocess import CalledProcessError, check_output, run
from os import environ
from pathlib import Path
from textwrap import dedent

from iris import load_cube

#: Python executable used by :func:`run_function_elsewhere`, set via env
#: variable of same name. Must be path of Python within an environment that
#: supports iris-esmf-regrid and has iris-esmf-regrid installed via
#: ``pip install -e``.
DATA_GEN_PYTHON = environ.get("DATA_GEN_PYTHON", "")
jamesp marked this conversation as resolved.
Show resolved Hide resolved
try:
_ = check_output([DATA_GEN_PYTHON, "-c", "a = True"])
except (CalledProcessError, FileNotFoundError, PermissionError):
error = (
f"Expected valid python executable path from env variable "
f"DATA_GEN_PYTHON. Got: {DATA_GEN_PYTHON}"
)
raise ValueError(error)


def run_function_elsewhere(func_to_run, *args, **kwargs):
"""
Run a given function using the :const:`DATA_GEN_PYTHON` executable.
pp-mo marked this conversation as resolved.
Show resolved Hide resolved

This structure allows the function to be written natively.

Parameters
----------
func_to_run : FunctionType
The function object to be run.
NOTE: the function must be completely self-contained, i.e. perform all
its own imports (within the target :const:`DATA_GEN_PYTHON`
environment).
*args : tuple, optional
Function call arguments. Must all be expressible as simple literals,
i.e. the ``repr`` must be a valid literal expression.
**kwargs: dict, optional
Function call keyword arguments. All values must be expressible as
simple literals (see ``*args``).

Returns
-------
str
The ``stdout`` from the run.

"""
func_string = dedent(getsource(func_to_run))
func_call_term_strings = [repr(arg) for arg in args]
func_call_term_strings += [f"{name}={repr(val)}" for name, val in kwargs.items()]
func_call_string = (
f"{func_to_run.__name__}(" + ",".join(func_call_term_strings) + ")"
)
python_string = "\n".join([func_string, func_call_string])
result = run(
[DATA_GEN_PYTHON, "-c", python_string], capture_output=True, check=True
)
return result.stdout


def _grid_cube(
trexfeathers marked this conversation as resolved.
Show resolved Hide resolved
n_lons,
n_lats,
lon_outer_bounds,
lat_outer_bounds,
circular=False,
alt_coord_system=False,
):
"""Wrapper for calling _grid_cube via :func:`run_function_elsewhere`."""

def external(*args, **kwargs):
"""
Prep and call _grid_cube, saving to a NetCDF file.

Saving to a file allows the original python executable to pick back up.

Remember that all arguments must work as strings, hence the fresh
construction of a ``coord_system`` within the function.

"""
from iris import save
from iris.coord_systems import RotatedGeogCS

from esmf_regrid.tests.unit.schemes.test__cube_to_GridInfo import (
_grid_cube as original,
)

save_path = kwargs.pop("save_path")

if kwargs.pop("alt_coord_system"):
kwargs["coord_system"] = RotatedGeogCS(0, 90, 90)

cube = original(*args, **kwargs)
save(cube, save_path)

save_dir = (Path(__file__).parent.parent / ".data").resolve()
save_dir.mkdir(exist_ok=True)
# TODO: caching? Currently written assuming overwrite every time.
save_path = save_dir / "_grid_cube.nc"
trexfeathers marked this conversation as resolved.
Show resolved Hide resolved

_ = run_function_elsewhere(
external,
n_lons,
n_lats,
lon_outer_bounds,
lat_outer_bounds,
circular,
alt_coord_system=alt_coord_system,
save_path=str(save_path),
)
return_cube = load_cube(str(save_path))
return return_cube
4 changes: 2 additions & 2 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ def tests(session: nox.sessions.Session):
session.run("pytest")


@nox.session
@nox.session(python=PY_VER, venv_backend="conda")
@nox.parametrize(
["ci_mode", "gh_pages"],
[(True, False), (False, False), (False, True)],
Expand Down Expand Up @@ -372,7 +372,7 @@ def asv_exec(*sub_args: str) -> None:
help_output = session.run(*run_args, "--help", silent=True)
if "--python" in help_output:
# Not all asv commands accept the --python kwarg.
run_args.append(f"--python={PY_VER[-1]}")
run_args.append(f"--python={session.python}")
session.run(*run_args)

if ci_mode:
Expand Down