-
Notifications
You must be signed in to change notification settings - Fork 1.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
import-url/update: add --no-download flag #8024
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,7 +3,7 @@ | |
from collections import defaultdict | ||
from contextlib import contextmanager | ||
from functools import wraps | ||
from typing import TYPE_CHECKING, Callable, Optional | ||
from typing import TYPE_CHECKING, Callable, List, Optional | ||
|
||
from funcy import cached_property | ||
|
||
|
@@ -18,6 +18,7 @@ | |
from dvc.fs import FileSystem | ||
from dvc.repo.scm_context import SCMContext | ||
from dvc.scm import Base | ||
from dvc.stage import Stage | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
@@ -444,6 +445,44 @@ def used_objs( | |
|
||
return used | ||
|
||
def partial_imports( | ||
self, | ||
targets=None, | ||
all_branches=False, | ||
all_tags=False, | ||
all_commits=False, | ||
all_experiments=False, | ||
commit_date: Optional[str] = None, | ||
recursive=False, | ||
revs=None, | ||
num=1, | ||
) -> List["Stage"]: | ||
"""Get the stages related to the given target and collect dependencies | ||
which are missing outputs. | ||
|
||
This is useful to retrieve files which have been imported to the repo | ||
using --no-download. | ||
|
||
Returns: | ||
A list of partially imported stages | ||
""" | ||
from itertools import chain | ||
|
||
partial_imports = chain.from_iterable( | ||
self.index.partial_imports(targets, recursive=recursive) | ||
for _ in self.brancher( | ||
Comment on lines
+471
to
+473
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. One other thing to note is that this change makes us call brancher more than once on fetch - first to collect regular/cached used objects and then a second time to collect partial imports. This is expensive if we are fetching multiple revs (i.e. the user uses Ideally we want to walk the repo once per revision, and collect everything we need in that one pass. |
||
revs=revs, | ||
all_branches=all_branches, | ||
all_tags=all_tags, | ||
all_commits=all_commits, | ||
all_experiments=all_experiments, | ||
commit_date=commit_date, | ||
num=num, | ||
) | ||
) | ||
|
||
return list(partial_imports) | ||
|
||
@property | ||
def stages(self): # obsolete, only for backward-compatibility | ||
return self.index.stages | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -93,6 +93,18 @@ def fetch( | |
downloaded += d | ||
failed += f | ||
|
||
d, f = _fetch_partial_imports( | ||
self, | ||
targets, | ||
all_branches=all_branches, | ||
all_tags=all_tags, | ||
all_commits=all_commits, | ||
recursive=recursive, | ||
revs=revs, | ||
) | ||
downloaded += d | ||
failed += f | ||
|
||
if failed: | ||
raise DownloadError(failed) | ||
|
||
|
@@ -107,3 +119,25 @@ def _fetch(repo, obj_ids, **kwargs): | |
except FileTransferError as exc: | ||
failed += exc.amount | ||
return downloaded, failed | ||
|
||
|
||
def _fetch_partial_imports(repo, targets, **kwargs): | ||
from dvc.stage.exceptions import DataSourceChanged | ||
|
||
downloaded = 0 | ||
failed = 0 | ||
for stage in repo.partial_imports(targets, **kwargs): | ||
try: | ||
stage.run() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using in this workflow I would not expect
|
||
except DataSourceChanged as exc: | ||
logger.warning(f"{exc}") | ||
failed += 1 | ||
continue | ||
if not any( | ||
kwargs.get(kw, None) | ||
for kw in ("all_branches", "all_tags", "all_commits", "revs") | ||
): | ||
stage.dump() | ||
|
||
downloaded += 1 | ||
return downloaded, failed |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thoughts? Does this help differentiate the options?