From 6b6c62e20254cbba7210f82e9f0a9c5b022aca20 Mon Sep 17 00:00:00 2001 From: George Powley Date: Tue, 10 Sep 2024 22:41:00 -0400 Subject: [PATCH] Update to allow multiple calls to read_iter --- apis/python/src/tiledbvcf/dataset.py | 2 ++ apis/python/tests/test_tiledbvcf.py | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/apis/python/src/tiledbvcf/dataset.py b/apis/python/src/tiledbvcf/dataset.py index 8a5a921f7..7028650f4 100644 --- a/apis/python/src/tiledbvcf/dataset.py +++ b/apis/python/src/tiledbvcf/dataset.py @@ -502,6 +502,8 @@ def read_iter( if isinstance(samples, str): samples = [samples] + self.reader.reset() + if not self.read_completed(): yield self.read(attrs, samples, regions, samples_file, bed_file) while not self.read_completed(): diff --git a/apis/python/tests/test_tiledbvcf.py b/apis/python/tests/test_tiledbvcf.py index 3c8a3afbf..e052b9b25 100755 --- a/apis/python/tests/test_tiledbvcf.py +++ b/apis/python/tests/test_tiledbvcf.py @@ -399,6 +399,24 @@ def test_incomplete_read_generator(): overall_df, ) + # Test that the iterator can be used again + dfs = [] + for df in test_ds.read_iter(attrs=["pos_end"], regions=["1:12700-13400"]): + dfs.append(df) + overall_df = pd.concat(dfs, ignore_index=True) + + assert len(overall_df) == 6 + _check_dfs( + pd.DataFrame.from_dict( + { + "pos_end": np.array( + [12771, 12771, 13374, 13389, 13395, 13413], dtype=np.int32 + ) + } + ), + overall_df, + ) + def test_read_filters(test_ds): df = test_ds.read(