Skip to content

Commit

Permalink
Merge pull request #279 from adswa/rf-dataset-id
Browse files Browse the repository at this point in the history
RF: Simplify EnsureDataset and add optional ID check
  • Loading branch information
mih authored Mar 3, 2023
2 parents a07b597 + a957e36 commit 318d200
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 20 deletions.
51 changes: 31 additions & 20 deletions datalad_next/constraints/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ class EnsureDataset(Constraint):
"""
def __init__(self,
installed: bool | None = None,
purpose: str | None = None):
purpose: str | None = None,
require_id: bool | None = None):
"""
Parameters
----------
Expand All @@ -53,35 +54,54 @@ def __init__(self,
purpose: str, optional
If given, will be used in generated error messages to communicate
why a dataset is required (to exist)
idcheck: bool, option
If given, performs an additional check whether the dataset has a
valid dataset ID.
"""
self._installed = installed
self._purpose = purpose
self._require_id = require_id
super().__init__()

def __call__(self, value) -> DatasetParameter:
# good-enough test to recognize a dataset instance cheaply
if hasattr(value, 'repo') and hasattr(value, 'pathobj'):
if self._installed is not None:
is_installed = value.is_installed()
if self._installed and not is_installed:
# for uniformity with require_dataset() below, use
# this custom exception
raise NoDatasetFound(f'{value} is not installed')
elif not self._installed and is_installed:
raise ValueError(f'{value} already exists locally')
return DatasetParameter(value, value)
ds = value
# anticipate what require_dataset() could handle and fail if we got
# something else
elif not isinstance(value, (str, PurePath, type(None))):
raise TypeError(f"Cannot create Dataset from {type(value)}")
else:
ds = self._require_dataset(value)
assert ds
if self._installed is not None:
is_installed = ds.is_installed()
if self._installed is False and is_installed:
raise ValueError(f'{ds} already exists locally')
if self._installed and not is_installed:
# for uniformity with require_dataset() below, use
# this custom exception
raise NoDatasetFound(f'{ds} is not installed')
if self._require_id and not ds.id:
raise NoDatasetFound(f'{ds} does not have a valid '
f'datalad-id')
return DatasetParameter(value, ds)

def short_description(self) -> str:
return "(path to) {}Dataset".format(
'an existing ' if self._installed is True
else 'a non-existing ' if self._installed is False else 'a ')


def _require_dataset(self, value):
from datalad.distribution.dataset import require_dataset
try:
ds = require_dataset(
value,
check_installed=self._installed is True,
purpose=self._purpose,
)
return ds
except NoDatasetFound:
# mitigation of non-uniform require_dataset() behavior.
# with value == None it does not honor check_installed
Expand All @@ -94,13 +114,4 @@ def __call__(self, value) -> DatasetParameter:
# find a dataset in any parent dir either, so this is
# the best we can do. Installation absence verification
# will happen further down
ds = Dataset(Path.cwd())

if self._installed is False and ds.is_installed():
raise ValueError(f'{ds} already exists locally')
return DatasetParameter(value, ds)

def short_description(self) -> str:
return "(path to) {}Dataset".format(
'an existing ' if self._installed is True
else 'a non-existing ' if self._installed is False else 'a ')
return Dataset(Path.cwd())
10 changes: 10 additions & 0 deletions datalad_next/constraints/tests/test_special_purpose.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,3 +356,13 @@ def test_EnsureDataset(tmp_path):
assert EnsureDataset(
installed=False).short_description() == \
'(path to) a non-existing Dataset'

# smoke test for idcheck:
assert EnsureDataset(require_id=True)(ds).ds == ds
assert EnsureDataset(require_id=False)(ds).ds == ds
# unset the dataset ID to test whether an ID check would raise, but
# bring it back later in case future tests need it
id = ds.config.get('datalad.dataset.id')
ds.config.unset('datalad.dataset.id', scope='branch')
with pytest.raises(NoDatasetFound):
EnsureDataset(require_id=True)(tmp_path)

0 comments on commit 318d200

Please sign in to comment.