Skip to content

Commit

Permalink
feat(iter_submodules): add match_containing mode
Browse files Browse the repository at this point in the history
This alters the `pathspec` evaluation to yield submodules that
*may* have content matching any pathspec, rather than the
pathspec having to match a submodule item directly.

This feature can be useful for implementing submodule recursion
around Git commands that do not support submodule recursion
directly.
  • Loading branch information
mih committed Jun 11, 2024
1 parent 7622267 commit 8025b7b
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 4 deletions.
45 changes: 41 additions & 4 deletions datalad_next/iter_collections/gitworktree.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,26 +245,63 @@ def iter_submodules(
path: Path,
*,
pathspecs: list[str] | GitPathSpecs | None = None,
match_containing: bool = False,
) -> Generator[GitTreeItem, None, None]:
"""Given a path, report all submodules of a repository worktree underneath
This is a thin convenience wrapper around ``iter_gitworktree()``.
With ``match_containing`` set to the default ``False``, this is merely a
convenience wrapper around ``iter_gitworktree()`` that selectively reports
on submodules. With ``match_containing=True`` and ``pathspecs`` given, the
yielded items corresponding to submodules where the given ``pathsspecs``
*could* match content. This includes submodules that are not available
locally, because no actual matching of pathspecs to submodule content is
performed -- only an evaluation of the submodule item itself.
"""
_pathspecs = GitPathSpecs(pathspecs)
if not _pathspecs:
# force flag to be sensible to simplify internal logic
match_containing = False

for item in iter_gitworktree(
path,
untracked=None,
link_target=False,
fp=False,
recursive='repository',
pathspecs=_pathspecs,
# if we want to match submodules that contain pathspecs matches
# we cannot give the job to Git, it won't report anything,
# but we need to match manually below
pathspecs=None if match_containing else _pathspecs,
):
# exclude non-submodules, or a submodule that was found at
# the root path -- which would indicate that the submodule
# itself it not around, only its record in the parent
if item.gittype == GitTreeItemType.submodule \
and item.name != PurePath('.'):
if item.gittype != GitTreeItemType.submodule \
or item.name == PurePath('.'):
continue

if not match_containing:
yield item
continue

assert pathspecs is not None
# does any pathspec match the "inside" of the current submodule's
# path
# we are using any() here to return as fast as possible.
# theoretically, we could also port all of them and enhance
# GitTreeItem to carry them outside, but we have no idea
# about the outside use case here, and cannot assume the additional
# cost is worth it
if any(
(ps if isinstance(ps, GitPathSpec)
else GitPathSpec.from_pathspec_str(ps)).for_subdir(str(item.name))
for ps in pathspecs
):
yield item
continue

# no match
continue


def _get_item(
Expand Down
16 changes: 16 additions & 0 deletions datalad_next/iter_collections/tests/test_itergitworktree.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,3 +274,19 @@ def test_iter_submodules(modified_dataset):
res = list(iter_submodules(p, pathspecs=[':(exclude)*/sm_c']))
assert len(res) == len(all_sm) - 1
assert not any(r.name == PurePath('dir_sm', 'sm_c') for r in res)

# test pathspecs matching inside submodules
# baseline, pointing inside a submodule gives no matching results
assert not list(iter_submodules(p, pathspecs=['dir_sm/sm_c/.datalad']))
# we can discover the submodule that could have content that matches
# the pathspec
res = list(iter_submodules(p, pathspecs=['dir_sm/sm_c/.datalad'],
match_containing=True))
assert len(res) == 1
assert res[0].name == PurePath('dir_sm', 'sm_c')
# if we use a wildcard that matches any submodule, we also get all of them
# and this includes the dropped submodule, because iter_submodules()
# make no assumptions on what this information will be used for
res = list(iter_submodules(p, pathspecs=['*/.datalad'],
match_containing=True))
assert len(res) == len(all_sm)

0 comments on commit 8025b7b

Please sign in to comment.