Skip to content

Commit

Permalink
fetch: introduce --tree-only
Browse files Browse the repository at this point in the history
  • Loading branch information
efiop committed Mar 9, 2023
1 parent fe7db32 commit 85b73ff
Show file tree
Hide file tree
Showing 6 changed files with 32 additions and 8 deletions.
7 changes: 7 additions & 0 deletions dvc/commands/data_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ def run(self):
with_deps=self.args.with_deps,
recursive=self.args.recursive,
run_cache=self.args.run_cache,
tree_only=self.args.tree_only,
)
self.log_summary({"fetched": processed_files_count})
except DvcException:
Expand Down Expand Up @@ -322,6 +323,12 @@ def add_parser(subparsers, _parent_parser):
default=False,
help="Fetch run history for all stages.",
)
fetch_parser.add_argument(
"--tree-only",
action="store_true",
default=False,
help="Only fetch .dir objects.",
)
fetch_parser.set_defaults(func=CmdDataFetch)

# Status
Expand Down
8 changes: 7 additions & 1 deletion dvc/dependency/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def get_used_objs(self, **kwargs) -> Dict[Optional["ObjectDB"], Set["HashInfo"]]
return used

def _get_used_and_obj(
self, obj_only: bool = False, **kwargs
self, obj_only: bool = False, tree_only: bool = False, **kwargs
) -> Tuple[Dict[Optional["ObjectDB"], Set["HashInfo"]], "Meta", "HashFile"]:
from dvc.config import NoRemoteError
from dvc.exceptions import NoOutputOrStageError, PathMissingError
Expand All @@ -126,6 +126,7 @@ def _get_used_and_obj(
force=True,
jobs=kwargs.get("jobs"),
recursive=True,
tree_only=tree_only,
).items():
if odb is None:
odb = repo.cloud.get_remote_odb()
Expand All @@ -152,6 +153,11 @@ def _get_used_and_obj(
self._objs[rev] = obj
self._meta[rev] = meta

if tree_only:
if isinstance(obj, Tree):
used_obj_ids[object_store].add(obj.hash_info)
return used_obj_ids, meta, obj

used_obj_ids[object_store].add(obj.hash_info)
if isinstance(obj, Tree):
used_obj_ids[object_store].update(oid for _, _, oid in obj)
Expand Down
19 changes: 12 additions & 7 deletions dvc/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -1065,7 +1065,7 @@ def _collect_used_dir_cache(
return obj

def get_used_objs( # noqa: C901, PLR0911
self, **kwargs
self, tree_only: bool = False, **kwargs
) -> Dict[Optional["ObjectDB"], Set["HashInfo"]]:
"""Return filtered set of used object IDs for this out."""

Expand All @@ -1076,7 +1076,7 @@ def get_used_objs( # noqa: C901, PLR0911
if self.stage.is_repo_import:
if push:
return {}
return self.get_used_external(**kwargs)
return self.get_used_external(tree_only=tree_only, **kwargs)

if push and not self.can_push:
return {}
Expand All @@ -1100,6 +1100,16 @@ def get_used_objs( # noqa: C901, PLR0911
logger.warning(msg)
return {}

if self.remote:
remote = self.repo.cloud.get_remote_odb(name=self.remote)
else:
remote = None

if tree_only:
if self.is_dir_checksum:
return {remote: [self.hash_info]}
return {}

obj: Optional["HashFile"]
if self.is_dir_checksum:
obj = self._collect_used_dir_cache(**kwargs)
Expand All @@ -1111,11 +1121,6 @@ def get_used_objs( # noqa: C901, PLR0911
if not obj:
return {}

if self.remote:
remote = self.repo.cloud.get_remote_odb(name=self.remote)
else:
remote = None

return {remote: self._named_obj_ids(obj)}

def _named_obj_ids(self, obj):
Expand Down
2 changes: 2 additions & 0 deletions dvc/repo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,7 @@ def used_objs( # noqa: PLR0913
revs=None,
num=1,
push: bool = False,
tree_only: bool = False,
):
"""Get the stages related to the given target and collect
the `info` of its outputs.
Expand Down Expand Up @@ -515,6 +516,7 @@ def used_objs( # noqa: PLR0913
recursive=recursive,
with_deps=with_deps,
push=push,
tree_only=tree_only,
).items():
used[odb].update(objs)

Expand Down
2 changes: 2 additions & 0 deletions dvc/repo/fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def fetch( # noqa: C901, PLR0913
run_cache=False,
revs=None,
odb: Optional["HashFileDB"] = None,
tree_only: bool = False,
) -> int:
"""Download data items from a cloud and imported repositories
Expand Down Expand Up @@ -96,6 +97,7 @@ def fetch( # noqa: C901, PLR0913
recursive=recursive,
revs=revs,
odb=odb,
tree_only=tree_only,
)
result.transferred.update(d)
result.failed.update(f)
Expand Down
2 changes: 2 additions & 0 deletions dvc/repo/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,7 @@ def used_objs(
recursive: bool = False,
jobs: Optional[int] = None,
push: bool = False,
tree_only: bool = False,
) -> "ObjectContainer":
from collections import defaultdict

Expand All @@ -452,6 +453,7 @@ def used_objs(
jobs=jobs,
filter_info=filter_info,
push=push,
tree_only=tree_only,
).items():
used[odb].update(objs)
return used
Expand Down

0 comments on commit 85b73ff

Please sign in to comment.