From 18bdd8a8193ed25dce57fe5da2aa4fafecffc0df Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Wed, 6 Dec 2023 11:25:51 +0100 Subject: [PATCH 1/2] Let `iter_gitworktree(fp=True)` report on symlinked content This generalizes an approach from https://github.com/datalad/datalad-next/pull/539. It is implemented in a way that enables reuse of the helpers in that PR too. With this change regular files (tracked or untracked) and symlink targets (via the symlink) are also opened, if they actually exist. Closes #553 --- datalad_next/iter_collections/gitworktree.py | 38 ++++++++++-- .../tests/test_itergitworktree.py | 61 +++++++++++++++++++ 2 files changed, 94 insertions(+), 5 deletions(-) diff --git a/datalad_next/iter_collections/gitworktree.py b/datalad_next/iter_collections/gitworktree.py index 64320417..549cb6ee 100644 --- a/datalad_next/iter_collections/gitworktree.py +++ b/datalad_next/iter_collections/gitworktree.py @@ -223,12 +223,14 @@ def iter_gitworktree( # report on a pending item, this is not a "higher-stage" # report by ls-files item = _get_item(path, link_target, fp, *pending_item) - if fp and item.type == FileSystemItemType.file: - with (Path(path) / item.name).open('rb') as fp: - item.fp = fp - yield item - else: + fp_src = _get_fp_src(fp, path, item) + if fp_src is None: + # nothing to open yield item + else: + with fp_src.open('rb') as active_fp: + item.fp = active_fp + yield item if ipath is None: # this is the trailing `None` record. we are done here @@ -328,3 +330,29 @@ def _git_ls_files(path, *args): keep_ends=False, ) ) + + +def _get_fp_src( + fp: bool, + basepath: Path, + item: GitWorktreeItem | GitWorktreeFileSystemItem, +) -> Path | None: + if not fp: + # no file pointer request, we are done + return None + + # if we get here, this is about file pointers... + fp_src = None + if item.type in (FileSystemItemType.file, + FileSystemItemType.symlink): + fp_src = item.name + if fp_src is None: + # nothing to open + return None + + fp_src_fullpath = basepath / fp_src + if not fp_src_fullpath.exists(): + # nothing there to open (would resolve through a symlink) + return None + + return fp_src_fullpath diff --git a/datalad_next/iter_collections/tests/test_itergitworktree.py b/datalad_next/iter_collections/tests/test_itergitworktree.py index 1f884ff0..7cc13f62 100644 --- a/datalad_next/iter_collections/tests/test_itergitworktree.py +++ b/datalad_next/iter_collections/tests/test_itergitworktree.py @@ -5,6 +5,8 @@ import pytest +from datalad_next.utils import check_symlink_capability + from datalad_next.tests.utils import rmtree from ..gitworktree import ( @@ -170,3 +172,62 @@ def test_iter_gitworktree_deadsymlinks(existing_dataset, no_result_rendering): # it may take a different form, hence not checking for type assert len(all_items) == 1 assert all_items[0].name == PurePath('file1') + + +def prep_fp_tester(ds): + # we expect to process an exact number of files below + # 3 annexed files, 1 untracked, 1 in git, + # and possibly 1 symlink in git, 1 symlink untracked + # we count them up on creation, and then down on test + fcount = 0 + + # TODO bring back the umlaut. But waiting for triage + # https://github.com/datalad/datalad-next/pull/539#issuecomment-1842605708 + #content_tmpl = 'content: #ö file_{}\n' + content_tmpl = 'content: # file_{}\n' + for i in ('annex1', 'annex2', 'annex3'): + (ds.pathobj / f'file_{i}').write_text(content_tmpl.format(i)) + fcount += 1 + ds.save() + ds.drop( + ds.pathobj / 'file_annex1', + reckless='availability', + ) + # and also add a file to git directly and a have one untracked too + for i in ('untracked', 'ingit'): + (ds.pathobj / f'file_{i}').write_text(content_tmpl.format(i)) + fcount += 1 + ds.save('file_ingit', to_git=True) + # and add symlinks (untracked and in git) + if check_symlink_capability( + ds.pathobj / '_dummy', ds.pathobj / '_dummy_target' + ): + for i in ('symlinkuntracked', 'symlinkingit'): + tpath = ds.pathobj / f'target_{i}' + lpath = ds.pathobj / f'file_{i}' + tpath.write_text(content_tmpl.format(i)) + lpath.symlink_to(tpath) + fcount += 1 + ds.save('file_symlinkingit', to_git=True) + return fcount, content_tmpl + + +def test_iter_gitworktree_basic_fp(existing_dataset, no_result_rendering): + ds = existing_dataset + fcount, content_tmpl = prep_fp_tester(ds) + + for ai in filter( + lambda i: i.name.name.startswith('file_'), + iter_gitworktree(ds.pathobj, fp=True) + ): + fcount -= 1 + if ai.fp: + # for annexed files the fp can be an annex pointer file. + # in the context of `iter_gitworktree` this is not a + # recognized construct + assert content_tmpl.format( + ai.name.name[5:]) == ai.fp.read().decode() \ + or ai.name.name.startswith('file_annex') + else: + assert (ds.pathobj / ai.name).exists() is False + assert not fcount From 406d0d5f79507281fa7e5eb462e2dc280fca0e05 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Wed, 6 Dec 2023 12:50:08 +0100 Subject: [PATCH 2/2] Make test non-utf system encoding safe Guidelines from https://github.com/datalad/datalad-next/pull/539/commits/319a07dc97f5403fe30a2285a84c47e6ac45e101 --- .../iter_collections/tests/test_itergitworktree.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/datalad_next/iter_collections/tests/test_itergitworktree.py b/datalad_next/iter_collections/tests/test_itergitworktree.py index 7cc13f62..060ee590 100644 --- a/datalad_next/iter_collections/tests/test_itergitworktree.py +++ b/datalad_next/iter_collections/tests/test_itergitworktree.py @@ -181,12 +181,10 @@ def prep_fp_tester(ds): # we count them up on creation, and then down on test fcount = 0 - # TODO bring back the umlaut. But waiting for triage - # https://github.com/datalad/datalad-next/pull/539#issuecomment-1842605708 - #content_tmpl = 'content: #ö file_{}\n' - content_tmpl = 'content: # file_{}\n' + content_tmpl = 'content: #ö file_{}' for i in ('annex1', 'annex2', 'annex3'): - (ds.pathobj / f'file_{i}').write_text(content_tmpl.format(i)) + (ds.pathobj / f'file_{i}').write_text( + content_tmpl.format(i), encoding='utf-8') fcount += 1 ds.save() ds.drop( @@ -195,7 +193,8 @@ def prep_fp_tester(ds): ) # and also add a file to git directly and a have one untracked too for i in ('untracked', 'ingit'): - (ds.pathobj / f'file_{i}').write_text(content_tmpl.format(i)) + (ds.pathobj / f'file_{i}').write_text( + content_tmpl.format(i), encoding='utf-8') fcount += 1 ds.save('file_ingit', to_git=True) # and add symlinks (untracked and in git) @@ -205,7 +204,8 @@ def prep_fp_tester(ds): for i in ('symlinkuntracked', 'symlinkingit'): tpath = ds.pathobj / f'target_{i}' lpath = ds.pathobj / f'file_{i}' - tpath.write_text(content_tmpl.format(i)) + tpath.write_text( + content_tmpl.format(i), encoding='utf-8') lpath.symlink_to(tpath) fcount += 1 ds.save('file_symlinkingit', to_git=True)