diff --git a/dvc/repo/ls.py b/dvc/repo/ls.py index 9344e8b10d..e02d52116a 100644 --- a/dvc/repo/ls.py +++ b/dvc/repo/ls.py @@ -53,9 +53,9 @@ def _ls(repo, path_info, recursive=None, dvc_only=False): def onerror(exc): raise exc - # use our own RepoTree instance instead of repo.repo_tree since we do not - # want fetch/stream enabled for ls - tree = RepoTree(repo) + # use our own RepoTree instance instead of repo.repo_tree since we want to + # fetch directory listings, but don't want to fetch file contents. + tree = RepoTree(repo, stream=True) ret = {} try: diff --git a/dvc/repo/tree.py b/dvc/repo/tree.py index 175d80f95a..23cf58a0fa 100644 --- a/dvc/repo/tree.py +++ b/dvc/repo/tree.py @@ -137,10 +137,36 @@ def isfile(self, path): return not self.isdir(path) - def _walk(self, root, trie, topdown=True): + def _add_dir(self, top, trie, out, download_callback=None, **kwargs): + if not self.fetch and not self.stream: + return + + # pull dir cache if needed + dir_cache = out.get_dir_cache(**kwargs) + + # pull dir contents if needed + if self.fetch: + if out.changed_cache(filter_info=top): + used_cache = out.get_used_cache(filter_info=top) + downloaded = self.repo.cloud.pull(used_cache, **kwargs) + if download_callback: + download_callback(downloaded) + + for entry in dir_cache: + entry_relpath = entry[out.remote.tree.PARAM_RELPATH] + if os.name == "nt": + entry_relpath = entry_relpath.replace("/", os.sep) + path_info = out.path_info / entry_relpath + trie[path_info.parts] = None + + def _walk(self, root, trie, topdown=True, **kwargs): dirs = set() files = [] + out = trie.get(root.parts) + if out and out.is_dir_checksum: + self._add_dir(root, trie, out, **kwargs) + root_len = len(root.parts) for key, out in trie.iteritems(prefix=root.parts): # noqa: B301 if key == root.parts: @@ -160,9 +186,7 @@ def _walk(self, root, trie, topdown=True): for dname in dirs: yield from self._walk(root / dname, trie) - def walk( - self, top, topdown=True, onerror=None, download_callback=None, **kwargs - ): + def walk(self, top, topdown=True, onerror=None, **kwargs): from pygtrie import Trie assert topdown @@ -185,26 +209,10 @@ def walk( for out in outs: trie[out.path_info.parts] = out - if out.is_dir_checksum and (self.fetch or self.stream): - # pull dir cache if needed - dir_cache = out.get_dir_cache(**kwargs) - - # pull dir contents if needed - if self.fetch: - if out.changed_cache(filter_info=top): - used_cache = out.get_used_cache(filter_info=top) - downloaded = self.repo.cloud.pull(used_cache, **kwargs) - if download_callback: - download_callback(downloaded) - - for entry in dir_cache: - entry_relpath = entry[out.remote.tree.PARAM_RELPATH] - if os.name == "nt": - entry_relpath = entry_relpath.replace("/", os.sep) - path_info = out.path_info / entry_relpath - trie[path_info.parts] = None - - yield from self._walk(root, trie, topdown=topdown) + if out.is_dir_checksum and root.isin_or_eq(out.path_info): + self._add_dir(top, trie, out, **kwargs) + + yield from self._walk(root, trie, topdown=topdown, **kwargs) def isdvc(self, path, **kwargs): try: diff --git a/tests/func/test_ls.py b/tests/func/test_ls.py index bdf1c8cc0b..14d57acf15 100644 --- a/tests/func/test_ls.py +++ b/tests/func/test_ls.py @@ -445,3 +445,30 @@ def test_ls_shows_pipeline_tracked_outs(tmp_dir, dvc, scm, run_copy): files = Repo.ls(os.curdir, dvc_only=True) match_files(files, ((("bar",), True),)) + + +def test_ls_granular(erepo_dir): + with erepo_dir.chdir(): + erepo_dir.dvc_gen( + { + "dir": { + "1": "1", + "2": "2", + "subdir": {"foo": "foo", "bar": "bar"}, + } + }, + commit="create dir", + ) + + entries = Repo.ls(os.fspath(erepo_dir), os.path.join("dir", "subdir")) + assert entries == [ + {"isout": False, "isdir": False, "isexec": False, "path": "bar"}, + {"isout": False, "isdir": False, "isexec": False, "path": "foo"}, + ] + + entries = Repo.ls(os.fspath(erepo_dir), "dir") + assert entries == [ + {"isout": False, "isdir": False, "isexec": False, "path": "1"}, + {"isout": False, "isdir": False, "isexec": False, "path": "2"}, + {"isout": False, "isdir": True, "isexec": False, "path": "subdir"}, + ]