diff --git a/datalad_next/constraints/dataset.py b/datalad_next/constraints/dataset.py index 71b7b819..7d4bb38a 100644 --- a/datalad_next/constraints/dataset.py +++ b/datalad_next/constraints/dataset.py @@ -43,7 +43,8 @@ class EnsureDataset(Constraint): """ def __init__(self, installed: bool | None = None, - purpose: str | None = None): + purpose: str | None = None, + require_id: bool | None = None): """ Parameters ---------- @@ -53,28 +54,46 @@ def __init__(self, purpose: str, optional If given, will be used in generated error messages to communicate why a dataset is required (to exist) + idcheck: bool, option + If given, performs an additional check whether the dataset has a + valid dataset ID. """ self._installed = installed self._purpose = purpose + self._require_id = require_id super().__init__() def __call__(self, value) -> DatasetParameter: # good-enough test to recognize a dataset instance cheaply if hasattr(value, 'repo') and hasattr(value, 'pathobj'): - if self._installed is not None: - is_installed = value.is_installed() - if self._installed and not is_installed: - # for uniformity with require_dataset() below, use - # this custom exception - raise NoDatasetFound(f'{value} is not installed') - elif not self._installed and is_installed: - raise ValueError(f'{value} already exists locally') - return DatasetParameter(value, value) + ds = value # anticipate what require_dataset() could handle and fail if we got # something else elif not isinstance(value, (str, PurePath, type(None))): raise TypeError(f"Cannot create Dataset from {type(value)}") + else: + ds = self._require_dataset(value) + assert ds + if self._installed is not None: + is_installed = ds.is_installed() + if self._installed is False and is_installed: + raise ValueError(f'{ds} already exists locally') + if self._installed and not is_installed: + # for uniformity with require_dataset() below, use + # this custom exception + raise NoDatasetFound(f'{ds} is not installed') + if self._require_id and not ds.id: + raise NoDatasetFound(f'{ds} does not have a valid ' + f'datalad-id') + return DatasetParameter(value, ds) + + def short_description(self) -> str: + return "(path to) {}Dataset".format( + 'an existing ' if self._installed is True + else 'a non-existing ' if self._installed is False else 'a ') + + def _require_dataset(self, value): from datalad.distribution.dataset import require_dataset try: ds = require_dataset( @@ -82,6 +101,7 @@ def __call__(self, value) -> DatasetParameter: check_installed=self._installed is True, purpose=self._purpose, ) + return ds except NoDatasetFound: # mitigation of non-uniform require_dataset() behavior. # with value == None it does not honor check_installed @@ -94,13 +114,4 @@ def __call__(self, value) -> DatasetParameter: # find a dataset in any parent dir either, so this is # the best we can do. Installation absence verification # will happen further down - ds = Dataset(Path.cwd()) - - if self._installed is False and ds.is_installed(): - raise ValueError(f'{ds} already exists locally') - return DatasetParameter(value, ds) - - def short_description(self) -> str: - return "(path to) {}Dataset".format( - 'an existing ' if self._installed is True - else 'a non-existing ' if self._installed is False else 'a ') + return Dataset(Path.cwd()) diff --git a/datalad_next/constraints/tests/test_special_purpose.py b/datalad_next/constraints/tests/test_special_purpose.py index 72a005ac..ec9b3080 100644 --- a/datalad_next/constraints/tests/test_special_purpose.py +++ b/datalad_next/constraints/tests/test_special_purpose.py @@ -356,3 +356,13 @@ def test_EnsureDataset(tmp_path): assert EnsureDataset( installed=False).short_description() == \ '(path to) a non-existing Dataset' + + # smoke test for idcheck: + assert EnsureDataset(require_id=True)(ds).ds == ds + assert EnsureDataset(require_id=False)(ds).ds == ds + # unset the dataset ID to test whether an ID check would raise, but + # bring it back later in case future tests need it + id = ds.config.get('datalad.dataset.id') + ds.config.unset('datalad.dataset.id', scope='branch') + with pytest.raises(NoDatasetFound): + EnsureDataset(require_id=True)(tmp_path)