Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RF: Simplify EnsureDataset and add optional ID check #279

Merged
merged 6 commits into from
Mar 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 31 additions & 20 deletions datalad_next/constraints/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ class EnsureDataset(Constraint):
"""
def __init__(self,
installed: bool | None = None,
purpose: str | None = None):
purpose: str | None = None,
require_id: bool | None = None):
"""
Parameters
----------
Expand All @@ -53,35 +54,54 @@ def __init__(self,
purpose: str, optional
If given, will be used in generated error messages to communicate
why a dataset is required (to exist)
idcheck: bool, option
If given, performs an additional check whether the dataset has a
valid dataset ID.
"""
self._installed = installed
self._purpose = purpose
self._require_id = require_id
super().__init__()

def __call__(self, value) -> DatasetParameter:
# good-enough test to recognize a dataset instance cheaply
if hasattr(value, 'repo') and hasattr(value, 'pathobj'):
if self._installed is not None:
is_installed = value.is_installed()
if self._installed and not is_installed:
# for uniformity with require_dataset() below, use
# this custom exception
raise NoDatasetFound(f'{value} is not installed')
elif not self._installed and is_installed:
raise ValueError(f'{value} already exists locally')
return DatasetParameter(value, value)
ds = value
# anticipate what require_dataset() could handle and fail if we got
# something else
elif not isinstance(value, (str, PurePath, type(None))):
raise TypeError(f"Cannot create Dataset from {type(value)}")
else:
ds = self._require_dataset(value)
assert ds
if self._installed is not None:
is_installed = ds.is_installed()
if self._installed is False and is_installed:
raise ValueError(f'{ds} already exists locally')
if self._installed and not is_installed:
# for uniformity with require_dataset() below, use
# this custom exception
raise NoDatasetFound(f'{ds} is not installed')
if self._require_id and not ds.id:
raise NoDatasetFound(f'{ds} does not have a valid '
f'datalad-id')
return DatasetParameter(value, ds)

def short_description(self) -> str:
return "(path to) {}Dataset".format(
'an existing ' if self._installed is True
else 'a non-existing ' if self._installed is False else 'a ')


def _require_dataset(self, value):
from datalad.distribution.dataset import require_dataset
try:
ds = require_dataset(
value,
check_installed=self._installed is True,
purpose=self._purpose,
)
return ds
except NoDatasetFound:
# mitigation of non-uniform require_dataset() behavior.
# with value == None it does not honor check_installed
Expand All @@ -94,13 +114,4 @@ def __call__(self, value) -> DatasetParameter:
# find a dataset in any parent dir either, so this is
# the best we can do. Installation absence verification
# will happen further down
ds = Dataset(Path.cwd())

if self._installed is False and ds.is_installed():
raise ValueError(f'{ds} already exists locally')
return DatasetParameter(value, ds)

def short_description(self) -> str:
return "(path to) {}Dataset".format(
'an existing ' if self._installed is True
else 'a non-existing ' if self._installed is False else 'a ')
return Dataset(Path.cwd())
10 changes: 10 additions & 0 deletions datalad_next/constraints/tests/test_special_purpose.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,3 +356,13 @@ def test_EnsureDataset(tmp_path):
assert EnsureDataset(
installed=False).short_description() == \
'(path to) a non-existing Dataset'

# smoke test for idcheck:
assert EnsureDataset(require_id=True)(ds).ds == ds
assert EnsureDataset(require_id=False)(ds).ds == ds
# unset the dataset ID to test whether an ID check would raise, but
# bring it back later in case future tests need it
id = ds.config.get('datalad.dataset.id')
ds.config.unset('datalad.dataset.id', scope='branch')
with pytest.raises(NoDatasetFound):
EnsureDataset(require_id=True)(tmp_path)