Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove old kludge (broken) and fix type annotations #715

Merged
merged 3 commits into from
May 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion datalad_next/commands/tests/test_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def test_download(tmp_path, http_server, no_result_rendering):
return_type='item-or-list')

assert (wdir / 'testfile.txt').read_text() == 'test'
# keys for hashes keep user-provided captialization
# keys for hashes keep user-provided capitalization
assert res['md5'] == '098f6bcd4621d373cade4e832627b4f6'
assert res['SHA256'] == \
'9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08'
Expand Down
2 changes: 1 addition & 1 deletion datalad_next/commands/tests/test_ls_file_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def test_replace_add_archive_content(sample_tar_xz, existing_dataset,
# ignore any non-file, would not have an annex key.
# Also ignores hardlinks (they consume no space (size=0), but could be
# represented as regular copies of a shared key. however, this
# requires further processing of the metadat records, in order to find
# requires further processing of the metadata records, in order to find
# the size of the item that has the same checksum as this one)
if r.get('type') == 'file'
]
Expand Down
20 changes: 6 additions & 14 deletions datalad_next/iter_collections/gitworktree.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,6 @@
decode_bytes,
itemize,
)
from datalad_next.utils import external_versions
# Kludge: Filter out paths starting with .git/ to work around
# an `ls-files -o` bug that was fixed in Git 2.25.
git_needs_filter_kludge = external_versions['cmd:git'] < '2.25'

from .utils import (
FileSystemItem,
Expand Down Expand Up @@ -149,9 +145,9 @@ def iter_gitworktree(
lsfiles_args.extend(lsfiles_untracked_args[untracked])

# helper to handle multi-stage reports by ls-files
pending_item = (None, None)
pending_item: tuple[None | PurePosixPath, None | Dict[str, str]] = (None, None)

reported_dirs = set()
reported_dirs: set[PurePosixPath] = set()
_single_dir = recursive == 'no'

# we add a "fake" `None` record at the end to avoid a special
Expand All @@ -169,6 +165,7 @@ def iter_gitworktree(
if ipath is None or pending_item[0] not in (None, ipath):
if ipath is None and pending_item[0] is None:
return
assert pending_item[0] is not None
# this is the last point where we can still withhold a report.
# it is also the point where we can do this with minimal
# impact on the rest of the logic.
Expand All @@ -179,7 +176,7 @@ def iter_gitworktree(
# base directory -> ignore
# we do reset pending_item here, although this would also
# happen below -- it decomplexifies the conditionals
dir_path = pending_item_path_parts[0]
dir_path = PurePosixPath(pending_item_path_parts[0])
if dir_path in reported_dirs:
# we only yield each containing dir once, and only once
pending_item = (ipath, lsfiles_props)
Expand All @@ -201,6 +198,7 @@ def iter_gitworktree(
pending_item = (ipath, lsfiles_props)
continue

assert pending_item[0] is not None
# report on a pending item, this is not a "higher-stage"
# report by ls-files
item = _get_item(
Expand Down Expand Up @@ -266,7 +264,7 @@ def _get_item(
gitsha: str | None = None,
) -> GitWorktreeItem | GitWorktreeFileSystemItem:
if isinstance(type, str):
type: GitTreeItemType = _mode_type_map[type]
type = _mode_type_map[type]
item = None
if link_target or fp:
fullpath = basepath / ipath
Expand Down Expand Up @@ -294,19 +292,13 @@ def _lsfiles_line2props(
items = line.split('\t', maxsplit=1)
# check if we cannot possibly have a 'staged' report with mode and gitsha
if len(items) < 2:
if git_needs_filter_kludge and line.startswith(".git/"): # pragma nocover
lgr.debug("Filtering out .git/ file: %s", line)
return
# not known to Git, but Git always reports POSIX
path = PurePosixPath(line)
# early exist, we have nothing but the path (untracked)
return path, None

props = items[0].split(' ')
if len(props) != 3:
if git_needs_filter_kludge and line.startswith(".git/"): # pragma nocover
lgr.debug("Filtering out .git/ file: %s", line)
return
# not known to Git, but Git always reports POSIX
path = PurePosixPath(line)
# early exist, we have nothing but the path (untracked)
Expand Down
4 changes: 2 additions & 2 deletions datalad_next/url_operations/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def download(self,
may not support automatic credential lookup.
hash: list(algorithm_names), optional
If given, must be a list of hash algorithm names supported by the
`hashlib` module. A corresponding hash will be computed simultaenous
`hashlib` module. A corresponding hash will be computed simultaneously
to the download (without reading the data twice), and included
in the return value.
timeout: float, optional
Expand Down Expand Up @@ -196,7 +196,7 @@ def upload(self,
may not support automatic credential lookup.
hash: list(algorithm_names), optional
If given, must be a list of hash algorithm names supported by the
`hashlib` module. A corresponding hash will be computed simultaenous
`hashlib` module. A corresponding hash will be computed simultaneously
to the upload (without reading the data twice), and included
in the return value.
timeout: float, optional
Expand Down