Skip to content

Commit

Permalink
iter_gitworktree() for processing work tree content
Browse files Browse the repository at this point in the history
The iterator is also integrated with `ls-file-collection` as collection
type `gitworktree`.

Closes datalad#350
Ping datalad#323
  • Loading branch information
mih committed Jun 5, 2023
1 parent b690ca7 commit 0b16d98
Show file tree
Hide file tree
Showing 7 changed files with 426 additions and 14 deletions.
62 changes: 51 additions & 11 deletions datalad_next/commands/ls_file_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@
FileSystemItemType,
compute_multihash_from_fp,
)
from datalad_next.iter_collections.gitworktree import (
GitTreeItemType,
GitWorktreeFileSystemItem,
iter_gitworktree,
)


lgr = getLogger('datalad.local.ls_file_collection')
Expand All @@ -64,6 +69,7 @@
_supported_collection_types = (
'directory',
'tarfile',
'gitworktree',
)


Expand Down Expand Up @@ -104,7 +110,7 @@ def get_collection_iter(self, **kwargs):
hash = kwargs['hash']
iter_fx = None
iter_kwargs = None
if type in ('directory', 'tarfile'):
if type in ('directory', 'tarfile', 'gitworktree'):
if not isinstance(collection, Path):
self.raise_for(
kwargs,
Expand All @@ -118,10 +124,16 @@ def get_collection_iter(self, **kwargs):
item2res = fsitem_to_dict
if type == 'directory':
iter_fx = iter_dir
item2res = fsitem_to_dict
elif type == 'tarfile':
iter_fx = iter_tar
item2res = fsitem_to_dict
elif type == 'gitworktree':
iter_fx = iter_gitworktree
item2res = gitworktreeitem_to_dict
else:
raise RuntimeError('unhandled condition')
raise RuntimeError(
'unhandled collection-type: this is a defect, please report.')
assert iter_fx is not None
return dict(
collection=CollectionSpec(
Expand Down Expand Up @@ -166,6 +178,33 @@ def fsitem_to_dict(item, hash) -> Dict:
return d


def gitworktreeitem_to_dict(item, hash) -> Dict:
gitworktreeitem_type_to_res_type = {
# permission bits are not distinguished for types
GitTreeItemType.executablefile: 'file',
# 'dataset' is the commonly used label as the command API
# level
GitTreeItemType.submodule: 'dataset',
}

gittype = gitworktreeitem_type_to_res_type.get(
item.gittype, item.gittype.value) if item.gittype else None

if isinstance(item, GitWorktreeFileSystemItem):
d = fsitem_to_dict(item, hash)
else:
d = dict(item=item.name)
if gittype is not None:
d['type'] = gittype

if item.gitsha:
d['gitsha'] = item.gitsha

if gittype is not None:
d['gittype'] = gittype
return d


@build_doc
class LsFileCollection(ValidatedInterface):
"""Report information on files in a collection
Expand Down Expand Up @@ -310,19 +349,20 @@ def custom_result_renderer(res, **kwargs):
'minutes ago', 'min ago').replace(
'seconds ago', 'sec ago')

ui.message('{mode} {size: >6} {uid: >4}:{gid: >4} {hts: >11} {item} ({type})'.format(
# stick with numerical IDs (although less accessible), we cannot
# know in general whether this particular system can map numerical
# IDs to valid target names (think stored name in tarballs)
owner_info = f'{res["uid"]}:{res["gid"]}' if 'uid' in res else ''

ui.message('{mode} {size: >6} {owner: >9} {hts: >11} {item} ({type})'.format(
mode=mode,
size=size,
# stick with numerical IDs (although less accessible), we cannot
# know in general whether this particular system can map numerical
# IDs to valid target names (think stored name in tarballs)
uid=res.get('uid', '-'),
gid=res.get('gid', '-'),
hts=hts,
owner=owner_info,
hts=hts if mtime else '',
item=ac.color_word(
res.get('item', '<missing-item-identifier>'),
res.get('item', '<no-item-identifier>'),
ac.BOLD),
type=ac.color_word(
res.get('type', '<missing-type>'),
res.get('type', '<no-type-information>'),
ac.MAGENTA),
))
14 changes: 14 additions & 0 deletions datalad_next/commands/tests/test_ls_file_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,20 @@ def test_ls_file_collection_directory(tmp_path):
assert len(res) == 0


def test_ls_file_collection_gitworktree(existing_dataset):
kwa = dict(result_renderer='disabled')
# smoke test on a plain dataset
res = ls_file_collection('gitworktree', existing_dataset.pathobj, **kwa)
assert len(res) > 1
assert all('gitsha' in r for r in res)

# and with hashing
res_hash = ls_file_collection('gitworktree', existing_dataset.pathobj,
hash='md5', **kwa)
assert len(res) == len(res_hash)
assert all('hash-md5' in r for r in res_hash)


def test_ls_file_collection_validator():
val = LsFileCollectionParamValidator()

Expand Down
1 change: 1 addition & 0 deletions datalad_next/iter_collections/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
:toctree: generated
directory
gitworktree
tarfile
zipfile
utils
Expand Down
Loading

0 comments on commit 0b16d98

Please sign in to comment.