From b6b54c08a74d0e1596d32ddf97458fe314c62260 Mon Sep 17 00:00:00 2001
From: ppinchuk <pinchukpaul@gmail.com>
Date: Wed, 14 Aug 2024 15:49:57 -0600
Subject: [PATCH 1/7] `MultiH5` use `Resource._get_dataset` to get list of
 datasets that includes grouped ones

---
 rex/multi_file_resource.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rex/multi_file_resource.py b/rex/multi_file_resource.py
index 613fa1a0..ee91e00d 100644
--- a/rex/multi_file_resource.py
+++ b/rex/multi_file_resource.py
@@ -127,7 +127,7 @@ def _get_dsets(h5_path):
         shared_dsets = []
         try:
             with h5py.File(h5_path, mode='r') as f:
-                for dset in f:
+                for dset in Resource._get_datasets(f):
                     if dset not in ['meta', 'time_index', 'coordinates']:
                         unique_dsets.append(dset)
                     else:

From 08fb248e3d200a5c53217287b159e0ac2a78b1d6 Mon Sep 17 00:00:00 2001
From: ppinchuk <pinchukpaul@gmail.com>
Date: Wed, 14 Aug 2024 15:50:19 -0600
Subject: [PATCH 2/7] No need for intersection code

---
 rex/resource.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rex/resource.py b/rex/resource.py
index 64b7ccf5..4875fca6 100644
--- a/rex/resource.py
+++ b/rex/resource.py
@@ -924,7 +924,7 @@ def attrs(self):
         """
         if self._attrs is None:
             self._attrs = {}
-            for dset in set(self.datasets).intersection(self.h5):
+            for dset in self.datasets:
                 self._attrs[dset] = dict(self.h5[dset].attrs)
 
         return self._attrs

From dbf806b8dfd43714ca268129ffc3b1c18faa28da Mon Sep 17 00:00:00 2001
From: ppinchuk <pinchukpaul@gmail.com>
Date: Wed, 14 Aug 2024 15:50:41 -0600
Subject: [PATCH 3/7] Add test for extracting attrs for grouped datasets

---
 tests/test_resource.py | 58 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)

diff --git a/tests/test_resource.py b/tests/test_resource.py
index 37fe759a..3bff5071 100644
--- a/tests/test_resource.py
+++ b/tests/test_resource.py
@@ -975,6 +975,64 @@ def test_mh5_iterator():
     assert len(dsets_permutation) == len(mh5.datasets) ** 2
 
 
+@pytest.mark.parametrize("read_class", [Resource, MultiFileResource])
+def test_attrs_for_grouped_datasets(read_class):
+    """"Test attrs for files with datasets under groups."""
+
+    meta = pd.DataFrame({'latitude': np.ones(100),
+                         'longitude': np.zeros(100)})
+    time_index = pd_date_range('20210101', '20220101', freq='1h',
+                               closed='right')
+    with tempfile.TemporaryDirectory() as td:
+        fp = os.path.join(td, 'outputs.h5')
+
+        with Outputs(fp, 'w') as f:
+            f.meta = meta
+            f.time_index = time_index
+
+        Outputs.add_dataset(h5_file=fp, dset_name='dset1',
+                            dset_data=np.ones((8760, 100)) * 42.42,
+                            attrs={'scale_factor': 100}, dtype=np.int32)
+
+        with Outputs(fp, 'a', group="g1") as f:
+            f.meta = meta
+            f.time_index = time_index
+
+        Outputs.add_dataset(h5_file=fp, dset_name='dset_g1',
+                            dset_data=np.ones((8760, 100)) * 42.42,
+                            attrs={'scale_factor': 100}, dtype=np.int32,
+                            group="g1")
+
+        with read_class(fp) as res:
+            assert np.allclose(res["dset1"], 42.42)
+            assert np.allclose(res["g1/dset_g1"], 42.42)
+
+            expected_dsets = {'dset1', 'meta', 'time_index',
+                              'g1/dset_g1', 'g1/meta', 'g1/time_index'}
+            assert set(res.datasets) == expected_dsets
+            assert set(res.dtypes) == expected_dsets
+
+            expected_attrs = {'dset1': {'scale_factor': 100},
+                              'g1/dset_g1': {'scale_factor': 100},
+                              'g1/meta': {}, 'g1/time_index': {},
+                              'meta': {}, 'time_index': {}}
+            assert res.attrs == expected_attrs
+
+            expected_shapes = {'dset1': (8760, 100),
+                               'g1/dset_g1': (8760, 100),
+                               'g1/meta': (100,),
+                               'g1/time_index': (8760,),
+                               'meta': (100,), 'time_index': (8760,)}
+            assert res.shapes == expected_shapes
+
+            expected_chunks = {'dset1': None,
+                               'g1/dset_g1': None,
+                               'g1/meta': None,
+                               'g1/time_index': None,
+                               'meta': None, 'time_index': None}
+            assert res.chunks == expected_chunks
+
+
 def execute_pytest(capture='all', flags='-rapP'):
     """Execute module as pytest with detailed summary report.
 

From 43cfa92c6467e98f8ed2ec03e0836c0841c294be Mon Sep 17 00:00:00 2001
From: ppinchuk <pinchukpaul@gmail.com>
Date: Wed, 14 Aug 2024 15:51:49 -0600
Subject: [PATCH 4/7] Add warning to class that it does not support grouped
 datasets

---
 rex/rechunk_h5/rechunk_h5.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/rex/rechunk_h5/rechunk_h5.py b/rex/rechunk_h5/rechunk_h5.py
index 635b130d..9cc5edda 100644
--- a/rex/rechunk_h5/rechunk_h5.py
+++ b/rex/rechunk_h5/rechunk_h5.py
@@ -44,7 +44,7 @@ def get_dataset_attributes(h5_file, out_json=None, chunk_size=2,
     with h5py.File(h5_file, 'r') as f:
         global_attrs = dict(f.attrs)
 
-        for ds_name in f:
+        for ds_name in BaseResource._get_datasets(f):
             ds = f[ds_name]
             try:
                 arr_size = ds_name in ['meta', 'coordinates', 'time_index']
@@ -88,6 +88,9 @@ def get_dataset_attributes(h5_file, out_json=None, chunk_size=2,
 class RechunkH5:
     """
     Class to create new .h5 file with new chunking
+
+    .. WARNING:: This code does not currently support re-chunking H5
+                 files with grouped datasets.
     """
     # None time-series
     NON_TS_DSETS = ('meta', 'coordinates', 'time_index')

From 620276371700bfd0e7d4fb3a223abe018b2a9f28 Mon Sep 17 00:00:00 2001
From: ppinchuk <pinchukpaul@gmail.com>
Date: Wed, 14 Aug 2024 15:54:47 -0600
Subject: [PATCH 5/7] Add `flaky` to test reqs

---
 .github/workflows/codecov.yml            | 3 ++-
 .github/workflows/pull_request_tests.yml | 1 +
 setup.py                                 | 2 +-
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml
index c5212076..f945950a 100644
--- a/.github/workflows/codecov.yml
+++ b/.github/workflows/codecov.yml
@@ -19,10 +19,11 @@ jobs:
         pip install --upgrade pip
         pip install pytest
         pip install pytest-cov
+        pip install pytest-timeout
+        pip install flaky
         pip install -e .
     - name: Generate coverage report
       run: |
-        pip install pytest-cov
         cd tests
         pytest --disable-warnings --cov=./ --cov-report=xml:coverage.xml
     - name: Upload coverage to Codecov
diff --git a/.github/workflows/pull_request_tests.yml b/.github/workflows/pull_request_tests.yml
index 56517637..61f85a53 100644
--- a/.github/workflows/pull_request_tests.yml
+++ b/.github/workflows/pull_request_tests.yml
@@ -29,6 +29,7 @@ jobs:
           pip install pytest
           pip install pytest-cov
           pip install pytest-timeout
+          pip install flaky
           pip install -e .
       - name: Run pytest and Generate coverage report
         run: |
diff --git a/setup.py b/setup.py
index 4dc10e80..6a4a0583 100644
--- a/setup.py
+++ b/setup.py
@@ -41,7 +41,7 @@ def run(self):
 with open("requirements.txt") as f:
     install_requires = f.readlines()
 
-test_requires = ["pytest>=5.2", "pytest-timeout>=2.3.1"]
+test_requires = ["pytest>=5.2", "pytest-timeout>=2.3.1", "flaky>=3.8.1"]
 dev_requires = ["flake8", "pre-commit", "pylint", "hsds>=0.8.4"]
 description = ("National Renewable Energy Laboratory's (NREL's) REsource "
                "eXtraction tool: rex")

From e1846c389ea33442b2dbc99bdf21777e27b816a1 Mon Sep 17 00:00:00 2001
From: ppinchuk <pinchukpaul@gmail.com>
Date: Wed, 14 Aug 2024 15:55:46 -0600
Subject: [PATCH 6/7] Mark bc test as flaky

---
 tests/test_bc.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/test_bc.py b/tests/test_bc.py
index b7485889..4c2ac06a 100644
--- a/tests/test_bc.py
+++ b/tests/test_bc.py
@@ -4,11 +4,13 @@
 """
 
 import numpy as np
+from flaky import flaky
 
 from rex.temporal_stats.temporal_stats import cdf
 from rex.utilities.bc_utils import QuantileDeltaMapping
 
 
+@flaky(max_runs=3, min_passes=1)
 def test_qdm():
     """Test basic QuantileDeltaMapping functionality with dummy distributions
 

From 4d1ed8698f84aaba7e8d9e0fcedbe2dd0b8ebc20 Mon Sep 17 00:00:00 2001
From: ppinchuk <pinchukpaul@gmail.com>
Date: Wed, 14 Aug 2024 15:58:03 -0600
Subject: [PATCH 7/7] Undo group attr change for now

---
 rex/rechunk_h5/rechunk_h5.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rex/rechunk_h5/rechunk_h5.py b/rex/rechunk_h5/rechunk_h5.py
index 9cc5edda..61758a2e 100644
--- a/rex/rechunk_h5/rechunk_h5.py
+++ b/rex/rechunk_h5/rechunk_h5.py
@@ -44,7 +44,7 @@ def get_dataset_attributes(h5_file, out_json=None, chunk_size=2,
     with h5py.File(h5_file, 'r') as f:
         global_attrs = dict(f.attrs)
 
-        for ds_name in BaseResource._get_datasets(f):
+        for ds_name in f:
             ds = f[ds_name]
             try:
                 arr_size = ds_name in ['meta', 'coordinates', 'time_index']