Skip to content

Commit

Permalink
generalize file download for grouped datasets
Browse files Browse the repository at this point in the history
  • Loading branch information
jacobmarks authored and brimoor committed Jul 17, 2024
1 parent 9a985a6 commit 9bf1237
Showing 1 changed file with 14 additions and 2 deletions.
16 changes: 14 additions & 2 deletions fiftyone/utils/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from concurrent.futures import ThreadPoolExecutor
from contextlib import contextmanager
import inspect
import itertools
import logging
import math
import os
Expand Down Expand Up @@ -1331,9 +1332,20 @@ def _resolve_dataset_name(config, **kwargs):
return name


def _get_files_to_download(dataset):
filepaths = dataset.values("filepath")
def _get_files_to_download(sample_collection):
if sample_collection.media_type == "group":
return list(
itertools.chain.from_iterable(
_get_files_to_download(
sample_collection.select_group_slices(group)
)
for group in sample_collection.group_slices
)
)

filepaths = sample_collection.values("filepath")
filepaths = [fp for fp in filepaths if not os.path.exists(fp)]

return filepaths


Expand Down

0 comments on commit 9bf1237

Please sign in to comment.