Skip to content

Commit

Permalink
Update to allow multiple calls to read_iter
Browse files Browse the repository at this point in the history
  • Loading branch information
gspowley committed Sep 11, 2024
1 parent ade7a50 commit 6b6c62e
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 0 deletions.
2 changes: 2 additions & 0 deletions apis/python/src/tiledbvcf/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,8 @@ def read_iter(
if isinstance(samples, str):
samples = [samples]

self.reader.reset()

if not self.read_completed():
yield self.read(attrs, samples, regions, samples_file, bed_file)
while not self.read_completed():
Expand Down
18 changes: 18 additions & 0 deletions apis/python/tests/test_tiledbvcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,24 @@ def test_incomplete_read_generator():
overall_df,
)

# Test that the iterator can be used again
dfs = []
for df in test_ds.read_iter(attrs=["pos_end"], regions=["1:12700-13400"]):
dfs.append(df)
overall_df = pd.concat(dfs, ignore_index=True)

assert len(overall_df) == 6
_check_dfs(
pd.DataFrame.from_dict(
{
"pos_end": np.array(
[12771, 12771, 13374, 13389, 13395, 13413], dtype=np.int32
)
}
),
overall_df,
)


def test_read_filters(test_ds):
df = test_ds.read(
Expand Down

0 comments on commit 6b6c62e

Please sign in to comment.