Skip to content

Commit

Permalink
Proposal for fixed generation of benchmark data. (#124)
Browse files Browse the repository at this point in the history
  • Loading branch information
trexfeathers authored Nov 9, 2021
1 parent 82c4948 commit fa2e34b
Show file tree
Hide file tree
Showing 5 changed files with 145 additions and 11 deletions.
11 changes: 11 additions & 0 deletions .cirrus.yml
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ test_task:
benchmark_task:
only_if: ${SKIP_BENCHMARK_TASK} == ""
auto_cancellation: true
env:
PY_VER: "3.8"
name: "${CIRRUS_OS}: performance benchmarking"
# Custom clone behaviour to enable ASV to access the PR base branch (if on a
# PR).
Expand All @@ -134,6 +136,15 @@ benchmark_task:
fingerprint_script:
- echo "${CIRRUS_TASK_NAME}"
- if [ -n "${IRIS_SOURCE}" ]; then echo "${IRIS_SOURCE}"; fi
nox_cache:
folder: ${CIRRUS_WORKING_DIR}/.nox
reupload_on_changes: true
fingerprint_script:
- echo "${CIRRUS_TASK_NAME}"
- echo "${NOX_CACHE_BUILD}"
- if [ -n "${IRIS_SOURCE}" ]; then echo "${IRIS_SOURCE}"; fi
benchmarks_script:
- export CONDA_OVERRIDE_LINUX="$(uname -r | cut -d'+' -f1)"
- nox --session=tests --install-only
- export DATA_GEN_PYTHON=$(realpath $(find .nox -path "*tests*bin/python"))
- nox --session="benchmarks(ci compare)"
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ coverage.xml
.hypothesis/
.pytest_cache/
benchmarks/.asv*
benchmarks/.data

# Translations
*.mo
Expand Down
13 changes: 4 additions & 9 deletions benchmarks/benchmarks/ci/esmf_regridder.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
import numpy as np
import dask.array as da
import iris
from iris.coord_systems import RotatedGeogCS
from iris.cube import Cube

from esmf_regrid.esmf_regridder import GridInfo
from esmf_regrid.schemes import ESMFAreaWeightedRegridder
from esmf_regrid.tests.unit.schemes.test__cube_to_GridInfo import _grid_cube

from ..generate_data import _grid_cube


def _make_small_grid_args():
Expand Down Expand Up @@ -65,16 +65,12 @@ def setup(self, type):
if type == "large target":
n_lons_tgt = 100
n_lats_tgt = 200
if type == "mixed":
coord_system_src = RotatedGeogCS(0, 90, 90)
else:
coord_system_src = None
grid = _grid_cube(
n_lons_src,
n_lats_src,
lon_bounds,
lat_bounds,
coord_system=coord_system_src,
alt_coord_system=(type == "mixed"),
)
tgt = _grid_cube(n_lons_tgt, n_lats_tgt, lon_bounds, lat_bounds)
src_data = np.arange(n_lats_src * n_lons_src * h).reshape(
Expand Down Expand Up @@ -115,13 +111,12 @@ def setup_cache(self):
h = 2000
# Rotated coord systems prevent pickling of the regridder so are
# removed for the time being.
# coord_system_src = RotatedGeogCS(0, 90, 90)
grid = _grid_cube(
n_lons_src,
n_lats_src,
lon_bounds,
lat_bounds,
# coord_system=coord_system_src,
# alt_coord_system=True,
)
tgt = _grid_cube(n_lons_tgt, n_lats_tgt, lon_bounds, lat_bounds)

Expand Down
127 changes: 127 additions & 0 deletions benchmarks/benchmarks/generate_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
"""
Scripts for generating supporting data for benchmarking.
Data generated using iris-esmf-regrid should use
:func:`run_function_elsewhere`, which means that data is generated using a
fixed version of iris-esmf-regrid and a fixed environment, rather than those
that get changed when the benchmarking run checks out a new commit.
Downstream use of data generated 'elsewhere' requires saving; usually in a
NetCDF file. Could also use pickling but there is a potential risk if the
benchmark sequence runs over two different Python versions.
"""
from inspect import getsource
from subprocess import CalledProcessError, check_output, run
from os import environ
from pathlib import Path
from textwrap import dedent

from iris import load_cube

#: Python executable used by :func:`run_function_elsewhere`, set via env
#: variable of same name. Must be path of Python within an environment that
#: supports iris-esmf-regrid and has iris-esmf-regrid installed via
#: ``pip install -e``.
try:
DATA_GEN_PYTHON = environ["DATA_GEN_PYTHON"]
_ = check_output([DATA_GEN_PYTHON, "-c", "a = True"])
except KeyError:
error = "Env variable DATA_GEN_PYTHON not defined."
raise KeyError(error)
except (CalledProcessError, FileNotFoundError, PermissionError):
error = "Env variable DATA_GEN_PYTHON not a runnable python executable path."
raise ValueError(error)


def run_function_elsewhere(func_to_run, *args, **kwargs):
"""
Run a given function using the :const:`DATA_GEN_PYTHON` executable.
This structure allows the function to be written natively.
Parameters
----------
func_to_run : FunctionType
The function object to be run.
NOTE: the function must be completely self-contained, i.e. perform all
its own imports (within the target :const:`DATA_GEN_PYTHON`
environment).
*args : tuple, optional
Function call arguments. Must all be expressible as simple literals,
i.e. the ``repr`` must be a valid literal expression.
**kwargs: dict, optional
Function call keyword arguments. All values must be expressible as
simple literals (see ``*args``).
Returns
-------
str
The ``stdout`` from the run.
"""
func_string = dedent(getsource(func_to_run))
func_call_term_strings = [repr(arg) for arg in args]
func_call_term_strings += [f"{name}={repr(val)}" for name, val in kwargs.items()]
func_call_string = (
f"{func_to_run.__name__}(" + ",".join(func_call_term_strings) + ")"
)
python_string = "\n".join([func_string, func_call_string])
result = run(
[DATA_GEN_PYTHON, "-c", python_string], capture_output=True, check=True
)
return result.stdout


def _grid_cube(
n_lons,
n_lats,
lon_outer_bounds,
lat_outer_bounds,
circular=False,
alt_coord_system=False,
):
"""Wrapper for calling _grid_cube via :func:`run_function_elsewhere`."""

def external(*args, **kwargs):
"""
Prep and call _grid_cube, saving to a NetCDF file.
Saving to a file allows the original python executable to pick back up.
Remember that all arguments must work as strings, hence the fresh
construction of a ``coord_system`` within the function.
"""
from iris import save
from iris.coord_systems import RotatedGeogCS

from esmf_regrid.tests.unit.schemes.test__cube_to_GridInfo import (
_grid_cube as original,
)

save_path = kwargs.pop("save_path")

if kwargs.pop("alt_coord_system"):
kwargs["coord_system"] = RotatedGeogCS(0, 90, 90)

cube = original(*args, **kwargs)
save(cube, save_path)

save_dir = (Path(__file__).parent.parent / ".data").resolve()
save_dir.mkdir(exist_ok=True)
# TODO: caching? Currently written assuming overwrite every time.
save_path = save_dir / "_grid_cube.nc"

_ = run_function_elsewhere(
external,
n_lons,
n_lats,
lon_outer_bounds,
lat_outer_bounds,
circular,
alt_coord_system=alt_coord_system,
save_path=str(save_path),
)
return_cube = load_cube(str(save_path))
return return_cube
4 changes: 2 additions & 2 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ def tests(session: nox.sessions.Session):
session.run("pytest")


@nox.session
@nox.session(python=PY_VER, venv_backend="conda")
@nox.parametrize(
["ci_mode", "gh_pages"],
[(True, False), (False, False), (False, True)],
Expand Down Expand Up @@ -370,7 +370,7 @@ def asv_exec(*sub_args: str) -> None:
help_output = session.run(*run_args, "--help", silent=True)
if "--python" in help_output:
# Not all asv commands accept the --python kwarg.
run_args.append(f"--python={PY_VER[-1]}")
run_args.append(f"--python={session.python}")
session.run(*run_args)

if ci_mode:
Expand Down

0 comments on commit fa2e34b

Please sign in to comment.