From 962d441311c72f4906af439931a03b398dc3f501 Mon Sep 17 00:00:00 2001 From: Tristan Abbott Date: Mon, 1 Jul 2024 18:09:33 +0000 Subject: [PATCH 01/11] remove lock=False kwarg --- CHANGELOG.md | 10 ++++++++++ pycontrails/datalib/_met_utils/metsource.py | 4 ---- pyproject.toml | 4 ++-- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8fa731507..edb879df8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,15 @@ # Changelog +## v0.52.1 + +### Breaking changes + +- Remove `lock=False` as a default keyword argument to `xr.open_mfdataset` in the `MetDataSource.open_dataset` method. This reverts a change from [v0.44.1](https://github.com/contrailcirrus/pycontrails/releases/tag/v0.44.1) and prevents segmentation faults when using recent versions of [netCDF4](https://pypi.org/project/netCDF4/) (1.7.0 and above). + +### Internals + +- Remove upper limits on netCDF4 and numpy versions. + ## v0.52.0 ### Breaking changes diff --git a/pycontrails/datalib/_met_utils/metsource.py b/pycontrails/datalib/_met_utils/metsource.py index 48cda339d..9d0c8a6bd 100644 --- a/pycontrails/datalib/_met_utils/metsource.py +++ b/pycontrails/datalib/_met_utils/metsource.py @@ -35,9 +35,6 @@ #: Whether to open multi-file datasets in parallel OPEN_IN_PARALLEL: bool = False -#: Whether to use file locking when opening multi-file datasets -OPEN_WITH_LOCK: bool = False - def parse_timesteps(time: TimeInput | None, freq: str | None = "1h") -> list[datetime]: """Parse time input into set of time steps. @@ -741,5 +738,4 @@ def open_dataset( xr_kwargs.setdefault("engine", NETCDF_ENGINE) xr_kwargs.setdefault("chunks", DEFAULT_CHUNKS) xr_kwargs.setdefault("parallel", OPEN_IN_PARALLEL) - xr_kwargs.setdefault("lock", OPEN_WITH_LOCK) return xr.open_mfdataset(disk_paths, **xr_kwargs) diff --git a/pyproject.toml b/pyproject.toml index 806686069..ddedf7dd9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,7 @@ readme = { file = "README.md", content-type = "text/markdown" } dependencies = [ "dask>=2022.3", - "numpy>=1.22,<2.0.0", + "numpy>=1.22", "overrides>=6.1", "pandas>=2.2", "scipy>=1.10", @@ -99,7 +99,7 @@ ecmwf = [ "cfgrib>=0.9", "eccodes>=1.4", "ecmwf-api-client>=1.6", - "netcdf4>=1.6.1,<1.7.0", + "netcdf4>=1.6.1", "platformdirs>=3.0", "requests>=2.25", "lxml>=5.1.0", From c61872c059bb4b4db22d93893f8a94aa9ece5ad3 Mon Sep 17 00:00:00 2001 From: Zeb Engberg Date: Mon, 1 Jul 2024 16:22:27 -0600 Subject: [PATCH 02/11] debugging test, revert me --- tests/unit/test_throwaway.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 tests/unit/test_throwaway.py diff --git a/tests/unit/test_throwaway.py b/tests/unit/test_throwaway.py new file mode 100644 index 000000000..a9949a3cb --- /dev/null +++ b/tests/unit/test_throwaway.py @@ -0,0 +1,13 @@ +import h5netcdf # noqa +import xarray as xr + + +def test(): + ds = xr.Dataset( + { + "a": (["x"], [1, 2, 3]), + "b": (["x"], [4, 5, 6]), + }, + coords={"x": [10, 20, 30]}, + ) + ds.to_netcdf("test.nc") From 8174a9e06d04a6e414cff04dd9c1bb6c3ab9482e Mon Sep 17 00:00:00 2001 From: Zeb Engberg Date: Mon, 1 Jul 2024 16:28:47 -0600 Subject: [PATCH 03/11] Revert "debugging test, revert me" This reverts commit c61872c059bb4b4db22d93893f8a94aa9ece5ad3. --- tests/unit/test_throwaway.py | 13 ------------- 1 file changed, 13 deletions(-) delete mode 100644 tests/unit/test_throwaway.py diff --git a/tests/unit/test_throwaway.py b/tests/unit/test_throwaway.py deleted file mode 100644 index a9949a3cb..000000000 --- a/tests/unit/test_throwaway.py +++ /dev/null @@ -1,13 +0,0 @@ -import h5netcdf # noqa -import xarray as xr - - -def test(): - ds = xr.Dataset( - { - "a": (["x"], [1, 2, 3]), - "b": (["x"], [4, 5, 6]), - }, - coords={"x": [10, 20, 30]}, - ) - ds.to_netcdf("test.nc") From ce8e029f1f6908e25481d3b446026c2af90154b7 Mon Sep 17 00:00:00 2001 From: Tristan Abbott Date: Tue, 2 Jul 2024 18:05:05 +0000 Subject: [PATCH 04/11] remove h5netcdf dependency --- CHANGELOG.md | 2 ++ pycontrails/datalib/goes.py | 14 +++++++++++--- pyproject.toml | 1 - 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index edb879df8..4ebeffd06 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,10 +5,12 @@ ### Breaking changes - Remove `lock=False` as a default keyword argument to `xr.open_mfdataset` in the `MetDataSource.open_dataset` method. This reverts a change from [v0.44.1](https://github.com/contrailcirrus/pycontrails/releases/tag/v0.44.1) and prevents segmentation faults when using recent versions of [netCDF4](https://pypi.org/project/netCDF4/) (1.7.0 and above). +- GOES imagery is now loaded from a temporary file on disk rather than directly from memory when using `GOES.get` without a cachestore. ### Internals - Remove upper limits on netCDF4 and numpy versions. +- Remove h5netcdf dependency. ## v0.52.0 diff --git a/pycontrails/datalib/goes.py b/pycontrails/datalib/goes.py index b4cc6c293..3b380ec88 100644 --- a/pycontrails/datalib/goes.py +++ b/pycontrails/datalib/goes.py @@ -16,7 +16,7 @@ import datetime import enum -import io +import tempfile from collections.abc import Iterable import numpy as np @@ -535,7 +535,7 @@ def _get_without_cache(self, time: datetime.datetime) -> xr.DataArray: da_dict = {} for rpath, init_bytes in data.items(): channel = _extract_channel_from_rpath(rpath) - ds = xr.open_dataset(io.BytesIO(init_bytes), engine="h5netcdf") + ds = _load_via_tempfile(init_bytes) da = ds["CMI"] da = da.expand_dims(band_id=ds["band_id"].values) @@ -551,7 +551,7 @@ def _get_without_cache(self, time: datetime.datetime) -> xr.DataArray: da = xr.concat(da_dict.values(), dim="band_id") else: - ds = xr.open_dataset(io.BytesIO(data), engine="h5netcdf") + ds = _load_via_tempfile(data) da = ds["CMI"] da = da.expand_dims(band_id=ds["band_id"].values) @@ -564,6 +564,14 @@ def _get_without_cache(self, time: datetime.datetime) -> xr.DataArray: return da +def _load_via_tempfile(data: bytes) -> xr.Dataset: + """Load xarray dataset via temporary file.""" + with tempfile.NamedTemporaryFile(buffering=0) as tmp: + tmp.write(data) + ds = xr.load_dataset(tmp.name) + return ds + + def _concat_c02(ds1: XArrayType, ds2: XArrayType) -> XArrayType: """Concatenate two datasets with C01 and C02 data.""" # Average the C02 data to the C01 resolution diff --git a/pyproject.toml b/pyproject.toml index ddedf7dd9..4721149d8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -128,7 +128,6 @@ sat = [ "geojson>=3.1", "google-cloud-bigquery>=3.23", "google-cloud-bigquery-storage>=2.25", - "h5netcdf>=1.2", "pillow>=10.3", "pyproj>=3.5", "rasterio>=1.3", From 1df35c3db56d3d3f4ae87423d4fb5453561fea27 Mon Sep 17 00:00:00 2001 From: Tristan Abbott Date: Tue, 2 Jul 2024 19:07:10 +0000 Subject: [PATCH 05/11] update numpy scalar repr in doctests --- CHANGELOG.md | 1 + pycontrails/core/coordinates.py | 10 +++++----- pycontrails/core/flight.py | 8 ++++---- pycontrails/core/interpolation.py | 4 ++-- pycontrails/core/vector.py | 2 +- pycontrails/models/cocip/cocip_uncertainty.py | 18 +++++++++--------- pycontrails/physics/geo.py | 2 +- 7 files changed, 23 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4ebeffd06..cbb5011b2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ - Remove upper limits on netCDF4 and numpy versions. - Remove h5netcdf dependency. +- Update doctests with numpy 2 scalar representation (see [NEP 51](https://numpy.org/neps/nep-0051-scalar-representation.html)). ## v0.52.0 diff --git a/pycontrails/core/coordinates.py b/pycontrails/core/coordinates.py index 0ff1250d2..2e76508e7 100644 --- a/pycontrails/core/coordinates.py +++ b/pycontrails/core/coordinates.py @@ -59,27 +59,27 @@ def slice_domain( >>> # Call with request as np.array >>> request = np.linspace(-20, 20, 100) >>> slice_domain(domain, request) - slice(640, 801, None) + slice(np.int64(640), np.int64(801), None) >>> # Call with request as tuple >>> request = -20, 20 >>> slice_domain(domain, request) - slice(640, 801, None) + slice(np.int64(640), np.int64(801), None) >>> # Call with a buffer >>> request = -16, 13 >>> buffer = 4, 7 >>> slice_domain(domain, request, buffer) - slice(640, 801, None) + slice(np.int64(640), np.int64(801), None) >>> # Call with request as a single number >>> request = -20 >>> slice_domain(domain, request) - slice(640, 641, None) + slice(np.int64(640), np.int64(641), None) >>> request = -19.9 >>> slice_domain(domain, request) - slice(640, 642, None) + slice(np.int64(640), np.int64(642), None) """ # if the length of domain coordinates is <= 2, return the whole domain diff --git a/pycontrails/core/flight.py b/pycontrails/core/flight.py index 730107839..698285501 100644 --- a/pycontrails/core/flight.py +++ b/pycontrails/core/flight.py @@ -384,7 +384,7 @@ def max_distance_gap(self) -> float: ... time=pd.date_range('2021-01-01T12', '2021-01-01T14', periods=200), ... ) >>> fl.max_distance_gap - 7391.27... + np.float64(7391.27...) """ if self.attrs["crs"] != "EPSG:4326": raise NotImplementedError("Only implemented for EPSG:4326 CRS.") @@ -415,7 +415,7 @@ def length(self) -> float: ... time=pd.date_range('2021-01-01T12', '2021-01-01T14', periods=200), ... ) >>> fl.length - 1436924.67... + np.float64(1436924.67...) """ if self.attrs["crs"] != "EPSG:4326": raise NotImplementedError("Only implemented for EPSG:4326 CRS.") @@ -1520,11 +1520,11 @@ def length_met(self, key: str, threshold: float = 1.0) -> float: >>> # Length (in meters) of waypoints whose temperature exceeds 236K >>> fl.length_met("air_temperature", threshold=236) - 4132178.159... + np.float64(4132178.159...) >>> # Proportion (with respect to distance) of waypoints whose temperature exceeds 236K >>> fl.proportion_met("air_temperature", threshold=236) - 0.663552... + np.float64(0.663552...) """ if key not in self.data: raise KeyError(f"Column {key} does not exist in data.") diff --git a/pycontrails/core/interpolation.py b/pycontrails/core/interpolation.py index 400efbd4c..7f6e4f5c4 100644 --- a/pycontrails/core/interpolation.py +++ b/pycontrails/core/interpolation.py @@ -621,11 +621,11 @@ class EmissionsProfileInterpolator: >>> epi = EmissionsProfileInterpolator(xp, fp) >>> # Interpolate a single value >>> epi.interp(5) - 0.150000... + np.float64(0.150000...) >>> # Interpolate a single value on a logarithmic scale >>> epi.log_interp(5) - 1.105171... + np.float64(1.105171...) >>> # Demonstrate speed up compared with xarray.DataArray interpolation >>> import time, xarray as xr diff --git a/pycontrails/core/vector.py b/pycontrails/core/vector.py index 497856b9d..20af8bd80 100644 --- a/pycontrails/core/vector.py +++ b/pycontrails/core/vector.py @@ -2057,7 +2057,7 @@ def vector_to_lon_lat_grid( [2.97, 0.12, 1.33, ..., 3.54, 0.74, 2.59]]) >>> da.sum().item() == vector["foo"].sum() - True + np.True_ """ df = vector.select(("longitude", "latitude", *agg), copy=False).dataframe diff --git a/pycontrails/models/cocip/cocip_uncertainty.py b/pycontrails/models/cocip/cocip_uncertainty.py index 9c0248e66..5f230e57c 100644 --- a/pycontrails/models/cocip/cocip_uncertainty.py +++ b/pycontrails/models/cocip/cocip_uncertainty.py @@ -92,12 +92,12 @@ class CocipUncertaintyParams(CocipParams): >>> distr = scipy.stats.uniform(loc=0.4, scale=0.2) >>> params = CocipUncertaintyParams(seed=123, initial_wake_vortex_depth_uncertainty=distr) >>> params.initial_wake_vortex_depth - 0.41076420 + np.float64(0.41076420...) >>> # Once seeded, calling the class again gives a new value >>> params = CocipUncertaintyParams(initial_wake_vortex_depth=distr) >>> params.initial_wake_vortex_depth - 0.43526372 + np.float64(0.43526372...) >>> # To retain the default value, set the uncertainty to None >>> params = CocipUncertaintyParams(rf_lw_enhancement_factor_uncertainty=None) @@ -212,7 +212,7 @@ def uncertainty_params(self) -> dict[str, rv_frozen]: return out - def rvs(self, size: None | int = None) -> dict[str, float | npt.NDArray[np.float64]]: + def rvs(self, size: None | int = None) -> dict[str, np.float64 | npt.NDArray[np.float64]]: """Call each distribution's `rvs` method to generate random parameters. Seed calls to `rvs` with class variable `rng`. @@ -247,12 +247,12 @@ def rvs(self, size: None | int = None) -> dict[str, float | npt.NDArray[np.float [7.9063961e-04, 3.0336906e-03, 7.7571563e-04, 2.0577813e-02, 9.4205803e-01, 4.3379897e-03, 3.6786550e-03, 2.4747452e-02]], dtype=float32), - 'initial_wake_vortex_depth': 0.39805019708566847, - 'nvpm_ei_n_enhancement_factor': 0.9371878437312526, - 'rf_lw_enhancement_factor': 1.1017491252832377, - 'rf_sw_enhancement_factor': 0.99721639115012, - 'sedimentation_impact_factor': 0.5071779847244678, - 'wind_shear_enhancement_exponent': 0.34100931239701004} + 'initial_wake_vortex_depth': np.float64(0.39805019708566847), + 'nvpm_ei_n_enhancement_factor': np.float64(0.9371878437312526), + 'rf_lw_enhancement_factor': np.float64(1.1017491252832377), + 'rf_sw_enhancement_factor': np.float64(0.99721639115012), + 'sedimentation_impact_factor': np.float64(0.5071779847244678), + 'wind_shear_enhancement_exponent': np.float64(0.34100931239701004)} """ return { param: distr.rvs(size=size, random_state=self.rng) diff --git a/pycontrails/physics/geo.py b/pycontrails/physics/geo.py index 38e0bb23b..5cbf63630 100644 --- a/pycontrails/physics/geo.py +++ b/pycontrails/physics/geo.py @@ -886,7 +886,7 @@ def spatial_bounding_box( >>> lon = rng.uniform(-180, 180, size=30) >>> lat = rng.uniform(-90, 90, size=30) >>> spatial_bounding_box(lon, lat) - (-168.0, -77.0, 155.0, 82.0) + (np.float64(-168.0), np.float64(-77.0), np.float64(155.0), np.float64(82.0)) """ lon_min = max(np.floor(np.min(longitude) - buffer), -180.0) lon_max = min(np.ceil(np.max(longitude) + buffer), 179.99) From a51eea4ae6eeb8188da46d40e08c14789429d36f Mon Sep 17 00:00:00 2001 From: Tristan Abbott Date: Tue, 2 Jul 2024 19:39:06 +0000 Subject: [PATCH 06/11] fix numpy 2 deprecation warning --- pycontrails/core/polygon.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pycontrails/core/polygon.py b/pycontrails/core/polygon.py index fdedfec88..1c721d9d8 100644 --- a/pycontrails/core/polygon.py +++ b/pycontrails/core/polygon.py @@ -536,7 +536,7 @@ def multipolygon_to_geojson( else: shape = len(ring.coords), 3 coords = np.empty(shape) - coords[:, :2] = ring.coords + coords[:, :2] = np.asarray(ring.coords) coords[:, 2] = altitude poly_coords.append(coords.tolist()) From 44911214d71a9af2b39043f84ec6076cdccbf6ac Mon Sep 17 00:00:00 2001 From: Tristan Abbott Date: Wed, 3 Jul 2024 13:35:01 +0000 Subject: [PATCH 07/11] save 1 loc --- pycontrails/datalib/goes.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pycontrails/datalib/goes.py b/pycontrails/datalib/goes.py index 3b380ec88..c597990d2 100644 --- a/pycontrails/datalib/goes.py +++ b/pycontrails/datalib/goes.py @@ -568,8 +568,7 @@ def _load_via_tempfile(data: bytes) -> xr.Dataset: """Load xarray dataset via temporary file.""" with tempfile.NamedTemporaryFile(buffering=0) as tmp: tmp.write(data) - ds = xr.load_dataset(tmp.name) - return ds + return xr.load_dataset(tmp.name) def _concat_c02(ds1: XArrayType, ds2: XArrayType) -> XArrayType: From d39d70e98fea4355fcb7d11191581c33f4d8cbab Mon Sep 17 00:00:00 2001 From: Tristan Abbott Date: Wed, 3 Jul 2024 13:53:23 +0000 Subject: [PATCH 08/11] add error message when running doctests with numpy 2 --- CHANGELOG.md | 2 +- Makefile | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cbb5011b2..396e57bb6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ - Remove upper limits on netCDF4 and numpy versions. - Remove h5netcdf dependency. -- Update doctests with numpy 2 scalar representation (see [NEP 51](https://numpy.org/neps/nep-0051-scalar-representation.html)). +- Update doctests with numpy 2 scalar representation (see [NEP 51](https://numpy.org/neps/nep-0051-scalar-representation.html)). Doctests will now fail when run with numpy 1. ## v0.52.0 diff --git a/Makefile b/Makefile index a905e35ee..d94c67660 100644 --- a/Makefile +++ b/Makefile @@ -133,6 +133,13 @@ main-test-status: DOCS_DIR = docs DOCS_BUILD_DIR = docs/_build +# Check that numpy 2 is being used +ensure-numpy-2: + [[ $$(python -c 'import numpy as np; print(np.__version__)') == 2.* ]] \ + || ( \ + echo -e "$(COLOR_YELLOW)NumPy 2 required for doctests$(END_COLOR)"; \ + exit 1) + # Check for GCP credentials ensure-gcp-credentials: python -c 'from google.cloud import storage; storage.Client()' \ @@ -163,7 +170,7 @@ ensure-era5-cached: cache-era5-gcp: ensure-era5-cached gcloud storage cp -r -n .doc-test-cache/* gs://contrails-301217-unit-test/doc-test-cache/ -doctest: ensure-era5-cached ensure-gcp-credentials +doctest: ensure-numpy-2 ensure-era5-cached ensure-gcp-credentials pytest --doctest-modules \ --ignore-glob=pycontrails/ext/* \ pycontrails -vv From 9e8f504f983a10dc09569e09f9ccf5703f83c747 Mon Sep 17 00:00:00 2001 From: Nicholas Masson Date: Tue, 2 Jul 2024 11:39:46 -0600 Subject: [PATCH 09/11] localize pycontrail GOES import --- pycontrails/models/cocip/output_formats.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pycontrails/models/cocip/output_formats.py b/pycontrails/models/cocip/output_formats.py index a37edf737..6263b17d8 100644 --- a/pycontrails/models/cocip/output_formats.py +++ b/pycontrails/models/cocip/output_formats.py @@ -32,7 +32,6 @@ from pycontrails.core.met import MetDataArray, MetDataset from pycontrails.core.vector import GeoVectorDataset, vector_to_lon_lat_grid -from pycontrails.datalib.goes import GOES, extract_goes_visualization from pycontrails.models.cocip.contrail_properties import contrail_edges, plume_mass_per_distance from pycontrails.models.cocip.radiative_forcing import albedo from pycontrails.models.humidity_scaling import HumidityScaling @@ -2125,6 +2124,8 @@ def compare_cocip_with_goes( File path of saved CoCiP-GOES image if ``path_write_img`` is provided. """ + from pycontrails.datalib.goes import GOES, extract_goes_visualization + try: import cartopy.crs as ccrs from cartopy.mpl.ticker import LatitudeFormatter, LongitudeFormatter From efe3c1918acf84de0a9be82976340706f519da93 Mon Sep 17 00:00:00 2001 From: Tristan Abbott Date: Wed, 3 Jul 2024 18:42:25 +0000 Subject: [PATCH 10/11] use single-threaded dask scheduler for some tests --- CHANGELOG.md | 1 + tests/unit/test_ecmwf.py | 23 +++++++++++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 396e57bb6..c4febfde9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ - Remove upper limits on netCDF4 and numpy versions. - Remove h5netcdf dependency. - Update doctests with numpy 2 scalar representation (see [NEP 51](https://numpy.org/neps/nep-0051-scalar-representation.html)). Doctests will now fail when run with numpy 1. +- Run certain tests in `test_ecmwf.py` using the single-threaded dask scheduler to prevent tests from hanging while waiting for a lock that is never released. (This issue was [encountered previously](https://github.com/contrailcirrus/pycontrails/pull/68), and removing `lock=False` in `MetDataSrouce.open_dataset` reverts the fix.) ## v0.52.0 diff --git a/tests/unit/test_ecmwf.py b/tests/unit/test_ecmwf.py index 522fb3dae..f6611fbf1 100644 --- a/tests/unit/test_ecmwf.py +++ b/tests/unit/test_ecmwf.py @@ -4,6 +4,7 @@ from datetime import datetime from typing import TypeVar +import dask import numpy as np import pandas as pd import pytest @@ -23,6 +24,18 @@ AnyECMWFDatalibClass = TypeVar("AnyECMWFDatalibClass", AnyERA5DatalibClass, AnyHRESDatalibClass) +@pytest.fixture() +def _dask_single_threaded(): + """Run test using single-threaded dask scheduler. + + As of v0.52.1, using the default multi-threaded scheduler causes + some tests to hang while waiting to acquire a lock that is never released. + This fixture can be used to run those tests using a single-threaded scheduler. + """ + with dask.config.set(scheduler="single-threaded"): + yield + + def test_environ_keys() -> None: """Test CDS Keys.""" assert os.environ["CDSAPI_URL"] == "FAKE" @@ -296,6 +309,7 @@ def test_ERA5_repr() -> None: assert "Dataset:" in out +@pytest.mark.usefixtures("_dask_single_threaded") def test_ERA5_cachestore(met_ecmwf_pl_path: str, override_cache: DiskCacheStore) -> None: """Test ERA5 cachestore input.""" @@ -342,6 +356,7 @@ def test_ERA5_cachestore(met_ecmwf_pl_path: str, override_cache: DiskCacheStore) assert pre_init_size == post_init_size +@pytest.mark.usefixtures("_dask_single_threaded") def test_ERA5_pressure_levels(met_ecmwf_pl_path: str, override_cache: DiskCacheStore) -> None: """Test ERA5 pressure_level parsing.""" times = [datetime(2019, 5, 31, 5, 0, 0)] @@ -432,6 +447,7 @@ def test_ERA5_hash() -> None: assert era5.hash != era52.hash +@pytest.mark.usefixtures("_dask_single_threaded") def test_ERA5_paths_with_time( met_ecmwf_pl_path: str, met_ecmwf_sl_path: str, @@ -465,6 +481,7 @@ def test_ERA5_paths_with_time( assert override_cache.size > 0 +@pytest.mark.usefixtures("_dask_single_threaded") def test_ERA5_paths_without_time( met_ecmwf_pl_path: str, override_cache: DiskCacheStore, @@ -498,6 +515,7 @@ def test_ERA5_paths_without_time( assert pre_init_size == post_init_size +@pytest.mark.usefixtures("_dask_single_threaded") def test_ERA5_paths_with_error( met_ecmwf_pl_path: str, override_cache: DiskCacheStore, @@ -614,6 +632,7 @@ def test_ERA5_set_met_source_metadata(product_type: str, variables: str) -> None assert ds.attrs["product"] == product_type.split("_")[0] +@pytest.mark.usefixtures("_dask_single_threaded") def test_ERA5_met_source_open_metdataset(met_ecmwf_pl_path: str) -> None: """Test the met_source attribute on the MetDataset arising from ERA5.""" era5 = ERA5( @@ -807,7 +826,7 @@ def test_model_level_era5_ensemble_mars_request() -> None: } -def test_model_level_era5_set_metadata(met_ecmwf_pl_path: str) -> None: +def test_model_level_era5_set_metadata() -> None: """Test metadata setting.""" era5 = ERA5ModelLevel(time=(datetime(2000, 1, 1), datetime(2000, 1, 2)), variables=["t", "q"]) ds = xr.Dataset() @@ -1134,7 +1153,7 @@ def test_model_level_hres_mars_request() -> None: ) -def test_model_level_hres_set_metadata(met_ecmwf_pl_path: str) -> None: +def test_model_level_hres_set_metadata() -> None: """Test metadata setting.""" hres = HRESModelLevel(time=datetime(2000, 1, 1), variables=["t", "q"]) ds = xr.Dataset() From ccd5e94ed263c1803eab061afcacccdae3dcce8d Mon Sep 17 00:00:00 2001 From: Tristan Abbott Date: Wed, 3 Jul 2024 22:06:54 +0000 Subject: [PATCH 11/11] Mark additional tests for running with single-threaded dask --- CHANGELOG.md | 2 +- tests/unit/conftest.py | 13 +++++++++++++ tests/unit/test_ecmwf.py | 13 ------------- tests/unit/test_met.py | 1 + 4 files changed, 15 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c4febfde9..b03ccb0ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ - Remove upper limits on netCDF4 and numpy versions. - Remove h5netcdf dependency. - Update doctests with numpy 2 scalar representation (see [NEP 51](https://numpy.org/neps/nep-0051-scalar-representation.html)). Doctests will now fail when run with numpy 1. -- Run certain tests in `test_ecmwf.py` using the single-threaded dask scheduler to prevent tests from hanging while waiting for a lock that is never released. (This issue was [encountered previously](https://github.com/contrailcirrus/pycontrails/pull/68), and removing `lock=False` in `MetDataSrouce.open_dataset` reverts the fix.) +- Run certain tests in `test_ecmwf.py` and `test_met.py` using the single-threaded dask scheduler to prevent tests from hanging while waiting for a lock that is never released. (This issue was [encountered previously](https://github.com/contrailcirrus/pycontrails/pull/68), and removing `lock=False` in `MetDataSrouce.open_dataset` reverts the fix.) ## v0.52.0 diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 3c041bde9..da75c7e36 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -7,6 +7,7 @@ from datetime import datetime from typing import Any +import dask import numpy as np import pandas as pd import pytest @@ -442,3 +443,15 @@ def polygon_bug() -> MetDataArray: path = get_static_path("polygon-bug.nc") da = xr.open_dataarray(path) return MetDataArray(da) + + +@pytest.fixture() +def _dask_single_threaded(): + """Run test using single-threaded dask scheduler. + + As of v0.52.1, using the default multi-threaded scheduler can cause + some tests to hang while waiting to acquire a lock that is never released. + This fixture can be used to run those tests using a single-threaded scheduler. + """ + with dask.config.set(scheduler="single-threaded"): + yield diff --git a/tests/unit/test_ecmwf.py b/tests/unit/test_ecmwf.py index f6611fbf1..fa657aeaf 100644 --- a/tests/unit/test_ecmwf.py +++ b/tests/unit/test_ecmwf.py @@ -4,7 +4,6 @@ from datetime import datetime from typing import TypeVar -import dask import numpy as np import pandas as pd import pytest @@ -24,18 +23,6 @@ AnyECMWFDatalibClass = TypeVar("AnyECMWFDatalibClass", AnyERA5DatalibClass, AnyHRESDatalibClass) -@pytest.fixture() -def _dask_single_threaded(): - """Run test using single-threaded dask scheduler. - - As of v0.52.1, using the default multi-threaded scheduler causes - some tests to hang while waiting to acquire a lock that is never released. - This fixture can be used to run those tests using a single-threaded scheduler. - """ - with dask.config.set(scheduler="single-threaded"): - yield - - def test_environ_keys() -> None: """Test CDS Keys.""" assert os.environ["CDSAPI_URL"] == "FAKE" diff --git a/tests/unit/test_met.py b/tests/unit/test_met.py index 9f51177d9..066ba89a4 100644 --- a/tests/unit/test_met.py +++ b/tests/unit/test_met.py @@ -1013,6 +1013,7 @@ def test_met_copy(met_ecmwf_pl_path: str) -> None: assert round(mds2["t"].data[5][5][2][0].item()) == 235 +@pytest.mark.usefixtures("_dask_single_threaded") def test_met_wrap_longitude_chunks(met_ecmwf_pl_path: str, override_cache: DiskCacheStore) -> None: """Check that the wrap_longitude method increments longitudinal chunks.""" # Using the xr.open_dataset method