diff --git a/datalad_next/iter_collections/gitworktree.py b/datalad_next/iter_collections/gitworktree.py index 64320417..549cb6ee 100644 --- a/datalad_next/iter_collections/gitworktree.py +++ b/datalad_next/iter_collections/gitworktree.py @@ -223,12 +223,14 @@ def iter_gitworktree( # report on a pending item, this is not a "higher-stage" # report by ls-files item = _get_item(path, link_target, fp, *pending_item) - if fp and item.type == FileSystemItemType.file: - with (Path(path) / item.name).open('rb') as fp: - item.fp = fp - yield item - else: + fp_src = _get_fp_src(fp, path, item) + if fp_src is None: + # nothing to open yield item + else: + with fp_src.open('rb') as active_fp: + item.fp = active_fp + yield item if ipath is None: # this is the trailing `None` record. we are done here @@ -328,3 +330,29 @@ def _git_ls_files(path, *args): keep_ends=False, ) ) + + +def _get_fp_src( + fp: bool, + basepath: Path, + item: GitWorktreeItem | GitWorktreeFileSystemItem, +) -> Path | None: + if not fp: + # no file pointer request, we are done + return None + + # if we get here, this is about file pointers... + fp_src = None + if item.type in (FileSystemItemType.file, + FileSystemItemType.symlink): + fp_src = item.name + if fp_src is None: + # nothing to open + return None + + fp_src_fullpath = basepath / fp_src + if not fp_src_fullpath.exists(): + # nothing there to open (would resolve through a symlink) + return None + + return fp_src_fullpath diff --git a/datalad_next/iter_collections/tests/test_itergitworktree.py b/datalad_next/iter_collections/tests/test_itergitworktree.py index 1f884ff0..060ee590 100644 --- a/datalad_next/iter_collections/tests/test_itergitworktree.py +++ b/datalad_next/iter_collections/tests/test_itergitworktree.py @@ -5,6 +5,8 @@ import pytest +from datalad_next.utils import check_symlink_capability + from datalad_next.tests.utils import rmtree from ..gitworktree import ( @@ -170,3 +172,62 @@ def test_iter_gitworktree_deadsymlinks(existing_dataset, no_result_rendering): # it may take a different form, hence not checking for type assert len(all_items) == 1 assert all_items[0].name == PurePath('file1') + + +def prep_fp_tester(ds): + # we expect to process an exact number of files below + # 3 annexed files, 1 untracked, 1 in git, + # and possibly 1 symlink in git, 1 symlink untracked + # we count them up on creation, and then down on test + fcount = 0 + + content_tmpl = 'content: #รถ file_{}' + for i in ('annex1', 'annex2', 'annex3'): + (ds.pathobj / f'file_{i}').write_text( + content_tmpl.format(i), encoding='utf-8') + fcount += 1 + ds.save() + ds.drop( + ds.pathobj / 'file_annex1', + reckless='availability', + ) + # and also add a file to git directly and a have one untracked too + for i in ('untracked', 'ingit'): + (ds.pathobj / f'file_{i}').write_text( + content_tmpl.format(i), encoding='utf-8') + fcount += 1 + ds.save('file_ingit', to_git=True) + # and add symlinks (untracked and in git) + if check_symlink_capability( + ds.pathobj / '_dummy', ds.pathobj / '_dummy_target' + ): + for i in ('symlinkuntracked', 'symlinkingit'): + tpath = ds.pathobj / f'target_{i}' + lpath = ds.pathobj / f'file_{i}' + tpath.write_text( + content_tmpl.format(i), encoding='utf-8') + lpath.symlink_to(tpath) + fcount += 1 + ds.save('file_symlinkingit', to_git=True) + return fcount, content_tmpl + + +def test_iter_gitworktree_basic_fp(existing_dataset, no_result_rendering): + ds = existing_dataset + fcount, content_tmpl = prep_fp_tester(ds) + + for ai in filter( + lambda i: i.name.name.startswith('file_'), + iter_gitworktree(ds.pathobj, fp=True) + ): + fcount -= 1 + if ai.fp: + # for annexed files the fp can be an annex pointer file. + # in the context of `iter_gitworktree` this is not a + # recognized construct + assert content_tmpl.format( + ai.name.name[5:]) == ai.fp.read().decode() \ + or ai.name.name.startswith('file_annex') + else: + assert (ds.pathobj / ai.name).exists() is False + assert not fcount