Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

list: add support for tracked directories #4108

Merged
merged 2 commits into from
Jun 25, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions dvc/repo/ls.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@ def _ls(repo, path_info, recursive=None, dvc_only=False):
def onerror(exc):
raise exc

# use our own RepoTree instance instead of repo.repo_tree since we do not
# want fetch/stream enabled for ls
tree = RepoTree(repo)
# use our own RepoTree instance instead of repo.repo_tree since we want to
# fetch directory listings, but don't want to fetch file contents.
tree = RepoTree(repo, stream=True)

ret = {}
try:
Expand Down
56 changes: 32 additions & 24 deletions dvc/repo/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,10 +137,36 @@ def isfile(self, path):

return not self.isdir(path)

def _walk(self, root, trie, topdown=True):
def _add_dir(self, top, trie, out, download_callback=None, **kwargs):
if not self.fetch and not self.stream:
return

# pull dir cache if needed
dir_cache = out.get_dir_cache(**kwargs)

# pull dir contents if needed
if self.fetch:
if out.changed_cache(filter_info=top):
used_cache = out.get_used_cache(filter_info=top)
downloaded = self.repo.cloud.pull(used_cache, **kwargs)
if download_callback:
download_callback(downloaded)

for entry in dir_cache:
entry_relpath = entry[out.remote.tree.PARAM_RELPATH]
if os.name == "nt":
entry_relpath = entry_relpath.replace("/", os.sep)
path_info = out.path_info / entry_relpath
trie[path_info.parts] = None

def _walk(self, root, trie, topdown=True, **kwargs):
dirs = set()
files = []

out = trie.get(root.parts)
if out and out.is_dir_checksum:
self._add_dir(root, trie, out, **kwargs)

root_len = len(root.parts)
for key, out in trie.iteritems(prefix=root.parts): # noqa: B301
if key == root.parts:
Expand All @@ -160,9 +186,7 @@ def _walk(self, root, trie, topdown=True):
for dname in dirs:
yield from self._walk(root / dname, trie)

def walk(
self, top, topdown=True, onerror=None, download_callback=None, **kwargs
):
def walk(self, top, topdown=True, onerror=None, **kwargs):
from pygtrie import Trie

assert topdown
Expand All @@ -185,26 +209,10 @@ def walk(
for out in outs:
trie[out.path_info.parts] = out

if out.is_dir_checksum and (self.fetch or self.stream):
# pull dir cache if needed
dir_cache = out.get_dir_cache(**kwargs)

# pull dir contents if needed
if self.fetch:
if out.changed_cache(filter_info=top):
used_cache = out.get_used_cache(filter_info=top)
downloaded = self.repo.cloud.pull(used_cache, **kwargs)
if download_callback:
download_callback(downloaded)

for entry in dir_cache:
entry_relpath = entry[out.remote.tree.PARAM_RELPATH]
if os.name == "nt":
entry_relpath = entry_relpath.replace("/", os.sep)
path_info = out.path_info / entry_relpath
trie[path_info.parts] = None

yield from self._walk(root, trie, topdown=topdown)
if out.is_dir_checksum and root.isin_or_eq(out.path_info):
self._add_dir(top, trie, out, **kwargs)

yield from self._walk(root, trie, topdown=topdown, **kwargs)

def isdvc(self, path, **kwargs):
try:
Expand Down
27 changes: 27 additions & 0 deletions tests/func/test_ls.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,3 +445,30 @@ def test_ls_shows_pipeline_tracked_outs(tmp_dir, dvc, scm, run_copy):

files = Repo.ls(os.curdir, dvc_only=True)
match_files(files, ((("bar",), True),))


def test_ls_granular(erepo_dir):
with erepo_dir.chdir():
erepo_dir.dvc_gen(
{
"dir": {
"1": "1",
"2": "2",
"subdir": {"foo": "foo", "bar": "bar"},
}
},
commit="create dir",
)

entries = Repo.ls(os.fspath(erepo_dir), os.path.join("dir", "subdir"))
assert entries == [
{"isout": False, "isdir": False, "isexec": False, "path": "bar"},
{"isout": False, "isdir": False, "isexec": False, "path": "foo"},
]

entries = Repo.ls(os.fspath(erepo_dir), "dir")
assert entries == [
{"isout": False, "isdir": False, "isexec": False, "path": "1"},
{"isout": False, "isdir": False, "isexec": False, "path": "2"},
{"isout": False, "isdir": True, "isexec": False, "path": "subdir"},
]