From be6329ae9dd39d24d73a78e64fb1bdc45490a6ad Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Fri, 24 May 2024 06:52:21 +0200 Subject: [PATCH 1/3] chore: remove `git ls-files` filter kludge This worked around a problem with Git outputing information on files within the `.git` directory. This was fixed in version 2.25, which is presently the minimum supported version declared by DataLad. This also side-steps a problem of a software bug in the kludge handling that would have led to a crash, if ever triggered. --- datalad_next/iter_collections/gitworktree.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/datalad_next/iter_collections/gitworktree.py b/datalad_next/iter_collections/gitworktree.py index 3ba3e925..16ac2331 100644 --- a/datalad_next/iter_collections/gitworktree.py +++ b/datalad_next/iter_collections/gitworktree.py @@ -23,10 +23,6 @@ decode_bytes, itemize, ) -from datalad_next.utils import external_versions -# Kludge: Filter out paths starting with .git/ to work around -# an `ls-files -o` bug that was fixed in Git 2.25. -git_needs_filter_kludge = external_versions['cmd:git'] < '2.25' from .utils import ( FileSystemItem, @@ -294,9 +290,6 @@ def _lsfiles_line2props( items = line.split('\t', maxsplit=1) # check if we cannot possibly have a 'staged' report with mode and gitsha if len(items) < 2: - if git_needs_filter_kludge and line.startswith(".git/"): # pragma nocover - lgr.debug("Filtering out .git/ file: %s", line) - return # not known to Git, but Git always reports POSIX path = PurePosixPath(line) # early exist, we have nothing but the path (untracked) @@ -304,9 +297,6 @@ def _lsfiles_line2props( props = items[0].split(' ') if len(props) != 3: - if git_needs_filter_kludge and line.startswith(".git/"): # pragma nocover - lgr.debug("Filtering out .git/ file: %s", line) - return # not known to Git, but Git always reports POSIX path = PurePosixPath(line) # early exist, we have nothing but the path (untracked) From f4e6c2dccacc05408a8d9b23c1545393aa07b5ee Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Fri, 24 May 2024 07:01:52 +0200 Subject: [PATCH 2/3] rf(type-annotation): apply correction to `iter_gitworktree()` implementation --- datalad_next/iter_collections/gitworktree.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/datalad_next/iter_collections/gitworktree.py b/datalad_next/iter_collections/gitworktree.py index 16ac2331..df253267 100644 --- a/datalad_next/iter_collections/gitworktree.py +++ b/datalad_next/iter_collections/gitworktree.py @@ -145,9 +145,9 @@ def iter_gitworktree( lsfiles_args.extend(lsfiles_untracked_args[untracked]) # helper to handle multi-stage reports by ls-files - pending_item = (None, None) + pending_item: tuple[None | PurePosixPath, None | Dict[str, str]] = (None, None) - reported_dirs = set() + reported_dirs: set[PurePosixPath] = set() _single_dir = recursive == 'no' # we add a "fake" `None` record at the end to avoid a special @@ -165,6 +165,7 @@ def iter_gitworktree( if ipath is None or pending_item[0] not in (None, ipath): if ipath is None and pending_item[0] is None: return + assert pending_item[0] is not None # this is the last point where we can still withhold a report. # it is also the point where we can do this with minimal # impact on the rest of the logic. @@ -175,7 +176,7 @@ def iter_gitworktree( # base directory -> ignore # we do reset pending_item here, although this would also # happen below -- it decomplexifies the conditionals - dir_path = pending_item_path_parts[0] + dir_path = PurePosixPath(pending_item_path_parts[0]) if dir_path in reported_dirs: # we only yield each containing dir once, and only once pending_item = (ipath, lsfiles_props) @@ -197,6 +198,7 @@ def iter_gitworktree( pending_item = (ipath, lsfiles_props) continue + assert pending_item[0] is not None # report on a pending item, this is not a "higher-stage" # report by ls-files item = _get_item( @@ -262,7 +264,7 @@ def _get_item( gitsha: str | None = None, ) -> GitWorktreeItem | GitWorktreeFileSystemItem: if isinstance(type, str): - type: GitTreeItemType = _mode_type_map[type] + type = _mode_type_map[type] item = None if link_target or fp: fullpath = basepath / ipath From 07c40c61d01c034fd4e28e8d2a2973bee60377a7 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Fri, 24 May 2024 07:17:14 +0200 Subject: [PATCH 3/3] chore(typos): new codespell update revealed more of them --- datalad_next/commands/tests/test_download.py | 2 +- datalad_next/commands/tests/test_ls_file_collection.py | 2 +- datalad_next/url_operations/base.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/datalad_next/commands/tests/test_download.py b/datalad_next/commands/tests/test_download.py index 43d52aae..b4777c13 100644 --- a/datalad_next/commands/tests/test_download.py +++ b/datalad_next/commands/tests/test_download.py @@ -47,7 +47,7 @@ def test_download(tmp_path, http_server, no_result_rendering): return_type='item-or-list') assert (wdir / 'testfile.txt').read_text() == 'test' - # keys for hashes keep user-provided captialization + # keys for hashes keep user-provided capitalization assert res['md5'] == '098f6bcd4621d373cade4e832627b4f6' assert res['SHA256'] == \ '9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08' diff --git a/datalad_next/commands/tests/test_ls_file_collection.py b/datalad_next/commands/tests/test_ls_file_collection.py index 6c82988a..ee88418d 100644 --- a/datalad_next/commands/tests/test_ls_file_collection.py +++ b/datalad_next/commands/tests/test_ls_file_collection.py @@ -127,7 +127,7 @@ def test_replace_add_archive_content(sample_tar_xz, existing_dataset, # ignore any non-file, would not have an annex key. # Also ignores hardlinks (they consume no space (size=0), but could be # represented as regular copies of a shared key. however, this - # requires further processing of the metadat records, in order to find + # requires further processing of the metadata records, in order to find # the size of the item that has the same checksum as this one) if r.get('type') == 'file' ] diff --git a/datalad_next/url_operations/base.py b/datalad_next/url_operations/base.py index a7b5d443..2c16c8ca 100644 --- a/datalad_next/url_operations/base.py +++ b/datalad_next/url_operations/base.py @@ -126,7 +126,7 @@ def download(self, may not support automatic credential lookup. hash: list(algorithm_names), optional If given, must be a list of hash algorithm names supported by the - `hashlib` module. A corresponding hash will be computed simultaenous + `hashlib` module. A corresponding hash will be computed simultaneously to the download (without reading the data twice), and included in the return value. timeout: float, optional @@ -196,7 +196,7 @@ def upload(self, may not support automatic credential lookup. hash: list(algorithm_names), optional If given, must be a list of hash algorithm names supported by the - `hashlib` module. A corresponding hash will be computed simultaenous + `hashlib` module. A corresponding hash will be computed simultaneously to the upload (without reading the data twice), and included in the return value. timeout: float, optional