From b6b54c08a74d0e1596d32ddf97458fe314c62260 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 14 Aug 2024 15:49:57 -0600 Subject: [PATCH 1/7] `MultiH5` use `Resource._get_dataset` to get list of datasets that includes grouped ones --- rex/multi_file_resource.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rex/multi_file_resource.py b/rex/multi_file_resource.py index 613fa1a0..ee91e00d 100644 --- a/rex/multi_file_resource.py +++ b/rex/multi_file_resource.py @@ -127,7 +127,7 @@ def _get_dsets(h5_path): shared_dsets = [] try: with h5py.File(h5_path, mode='r') as f: - for dset in f: + for dset in Resource._get_datasets(f): if dset not in ['meta', 'time_index', 'coordinates']: unique_dsets.append(dset) else: From 08fb248e3d200a5c53217287b159e0ac2a78b1d6 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 14 Aug 2024 15:50:19 -0600 Subject: [PATCH 2/7] No need for intersection code --- rex/resource.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rex/resource.py b/rex/resource.py index 64b7ccf5..4875fca6 100644 --- a/rex/resource.py +++ b/rex/resource.py @@ -924,7 +924,7 @@ def attrs(self): """ if self._attrs is None: self._attrs = {} - for dset in set(self.datasets).intersection(self.h5): + for dset in self.datasets: self._attrs[dset] = dict(self.h5[dset].attrs) return self._attrs From dbf806b8dfd43714ca268129ffc3b1c18faa28da Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 14 Aug 2024 15:50:41 -0600 Subject: [PATCH 3/7] Add test for extracting attrs for grouped datasets --- tests/test_resource.py | 58 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/tests/test_resource.py b/tests/test_resource.py index 37fe759a..3bff5071 100644 --- a/tests/test_resource.py +++ b/tests/test_resource.py @@ -975,6 +975,64 @@ def test_mh5_iterator(): assert len(dsets_permutation) == len(mh5.datasets) ** 2 +@pytest.mark.parametrize("read_class", [Resource, MultiFileResource]) +def test_attrs_for_grouped_datasets(read_class): + """"Test attrs for files with datasets under groups.""" + + meta = pd.DataFrame({'latitude': np.ones(100), + 'longitude': np.zeros(100)}) + time_index = pd_date_range('20210101', '20220101', freq='1h', + closed='right') + with tempfile.TemporaryDirectory() as td: + fp = os.path.join(td, 'outputs.h5') + + with Outputs(fp, 'w') as f: + f.meta = meta + f.time_index = time_index + + Outputs.add_dataset(h5_file=fp, dset_name='dset1', + dset_data=np.ones((8760, 100)) * 42.42, + attrs={'scale_factor': 100}, dtype=np.int32) + + with Outputs(fp, 'a', group="g1") as f: + f.meta = meta + f.time_index = time_index + + Outputs.add_dataset(h5_file=fp, dset_name='dset_g1', + dset_data=np.ones((8760, 100)) * 42.42, + attrs={'scale_factor': 100}, dtype=np.int32, + group="g1") + + with read_class(fp) as res: + assert np.allclose(res["dset1"], 42.42) + assert np.allclose(res["g1/dset_g1"], 42.42) + + expected_dsets = {'dset1', 'meta', 'time_index', + 'g1/dset_g1', 'g1/meta', 'g1/time_index'} + assert set(res.datasets) == expected_dsets + assert set(res.dtypes) == expected_dsets + + expected_attrs = {'dset1': {'scale_factor': 100}, + 'g1/dset_g1': {'scale_factor': 100}, + 'g1/meta': {}, 'g1/time_index': {}, + 'meta': {}, 'time_index': {}} + assert res.attrs == expected_attrs + + expected_shapes = {'dset1': (8760, 100), + 'g1/dset_g1': (8760, 100), + 'g1/meta': (100,), + 'g1/time_index': (8760,), + 'meta': (100,), 'time_index': (8760,)} + assert res.shapes == expected_shapes + + expected_chunks = {'dset1': None, + 'g1/dset_g1': None, + 'g1/meta': None, + 'g1/time_index': None, + 'meta': None, 'time_index': None} + assert res.chunks == expected_chunks + + def execute_pytest(capture='all', flags='-rapP'): """Execute module as pytest with detailed summary report. From 43cfa92c6467e98f8ed2ec03e0836c0841c294be Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 14 Aug 2024 15:51:49 -0600 Subject: [PATCH 4/7] Add warning to class that it does not support grouped datasets --- rex/rechunk_h5/rechunk_h5.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/rex/rechunk_h5/rechunk_h5.py b/rex/rechunk_h5/rechunk_h5.py index 635b130d..9cc5edda 100644 --- a/rex/rechunk_h5/rechunk_h5.py +++ b/rex/rechunk_h5/rechunk_h5.py @@ -44,7 +44,7 @@ def get_dataset_attributes(h5_file, out_json=None, chunk_size=2, with h5py.File(h5_file, 'r') as f: global_attrs = dict(f.attrs) - for ds_name in f: + for ds_name in BaseResource._get_datasets(f): ds = f[ds_name] try: arr_size = ds_name in ['meta', 'coordinates', 'time_index'] @@ -88,6 +88,9 @@ def get_dataset_attributes(h5_file, out_json=None, chunk_size=2, class RechunkH5: """ Class to create new .h5 file with new chunking + + .. WARNING:: This code does not currently support re-chunking H5 + files with grouped datasets. """ # None time-series NON_TS_DSETS = ('meta', 'coordinates', 'time_index') From 620276371700bfd0e7d4fb3a223abe018b2a9f28 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 14 Aug 2024 15:54:47 -0600 Subject: [PATCH 5/7] Add `flaky` to test reqs --- .github/workflows/codecov.yml | 3 ++- .github/workflows/pull_request_tests.yml | 1 + setup.py | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml index c5212076..f945950a 100644 --- a/.github/workflows/codecov.yml +++ b/.github/workflows/codecov.yml @@ -19,10 +19,11 @@ jobs: pip install --upgrade pip pip install pytest pip install pytest-cov + pip install pytest-timeout + pip install flaky pip install -e . - name: Generate coverage report run: | - pip install pytest-cov cd tests pytest --disable-warnings --cov=./ --cov-report=xml:coverage.xml - name: Upload coverage to Codecov diff --git a/.github/workflows/pull_request_tests.yml b/.github/workflows/pull_request_tests.yml index 56517637..61f85a53 100644 --- a/.github/workflows/pull_request_tests.yml +++ b/.github/workflows/pull_request_tests.yml @@ -29,6 +29,7 @@ jobs: pip install pytest pip install pytest-cov pip install pytest-timeout + pip install flaky pip install -e . - name: Run pytest and Generate coverage report run: | diff --git a/setup.py b/setup.py index 4dc10e80..6a4a0583 100644 --- a/setup.py +++ b/setup.py @@ -41,7 +41,7 @@ def run(self): with open("requirements.txt") as f: install_requires = f.readlines() -test_requires = ["pytest>=5.2", "pytest-timeout>=2.3.1"] +test_requires = ["pytest>=5.2", "pytest-timeout>=2.3.1", "flaky>=3.8.1"] dev_requires = ["flake8", "pre-commit", "pylint", "hsds>=0.8.4"] description = ("National Renewable Energy Laboratory's (NREL's) REsource " "eXtraction tool: rex") From e1846c389ea33442b2dbc99bdf21777e27b816a1 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 14 Aug 2024 15:55:46 -0600 Subject: [PATCH 6/7] Mark bc test as flaky --- tests/test_bc.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_bc.py b/tests/test_bc.py index b7485889..4c2ac06a 100644 --- a/tests/test_bc.py +++ b/tests/test_bc.py @@ -4,11 +4,13 @@ """ import numpy as np +from flaky import flaky from rex.temporal_stats.temporal_stats import cdf from rex.utilities.bc_utils import QuantileDeltaMapping +@flaky(max_runs=3, min_passes=1) def test_qdm(): """Test basic QuantileDeltaMapping functionality with dummy distributions From 4d1ed8698f84aaba7e8d9e0fcedbe2dd0b8ebc20 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 14 Aug 2024 15:58:03 -0600 Subject: [PATCH 7/7] Undo group attr change for now --- rex/rechunk_h5/rechunk_h5.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rex/rechunk_h5/rechunk_h5.py b/rex/rechunk_h5/rechunk_h5.py index 9cc5edda..61758a2e 100644 --- a/rex/rechunk_h5/rechunk_h5.py +++ b/rex/rechunk_h5/rechunk_h5.py @@ -44,7 +44,7 @@ def get_dataset_attributes(h5_file, out_json=None, chunk_size=2, with h5py.File(h5_file, 'r') as f: global_attrs = dict(f.attrs) - for ds_name in BaseResource._get_datasets(f): + for ds_name in f: ds = f[ds_name] try: arr_size = ds_name in ['meta', 'coordinates', 'time_index']