datalad · mih · Mar 3, 2023 · Mar 2, 2023 · Mar 2, 2023 · Mar 3, 2023
@@ -43,7 +43,8 @@ class EnsureDataset(Constraint):
     """
     def __init__(self,
                  installed: bool | None = None,
-                 purpose: str | None = None):
+                 purpose: str | None = None,
+                 require_id: bool | None = None):
         """
         Parameters
         ----------
@@ -53,35 +54,54 @@ def __init__(self,
         purpose: str, optional
           If given, will be used in generated error messages to communicate
           why a dataset is required (to exist)
+        idcheck: bool, option
+          If given, performs an additional check whether the dataset has a
+          valid dataset ID.
         """
         self._installed = installed
         self._purpose = purpose
+        self._require_id = require_id
         super().__init__()
 
     def __call__(self, value) -> DatasetParameter:
         # good-enough test to recognize a dataset instance cheaply
         if hasattr(value, 'repo') and hasattr(value, 'pathobj'):
-            if self._installed is not None:
-                is_installed = value.is_installed()
-                if self._installed and not is_installed:
-                    # for uniformity with require_dataset() below, use
-                    # this custom exception
-                    raise NoDatasetFound(f'{value} is not installed')
-                elif not self._installed and is_installed:
-                    raise ValueError(f'{value} already exists locally')
-            return DatasetParameter(value, value)
+            ds = value
         # anticipate what require_dataset() could handle and fail if we got
         # something else
         elif not isinstance(value, (str, PurePath, type(None))):
             raise TypeError(f"Cannot create Dataset from {type(value)}")
+        else:
+            ds = self._require_dataset(value)
+        assert ds
+        if self._installed is not None:
+            is_installed = ds.is_installed()
+            if self._installed is False and is_installed:
+                raise ValueError(f'{ds} already exists locally')
+            if self._installed and not is_installed:
+                # for uniformity with require_dataset() below, use
+                # this custom exception
+                raise NoDatasetFound(f'{ds} is not installed')
+        if self._require_id and not ds.id:
+            raise NoDatasetFound(f'{ds} does not have a valid '
+                                 f'datalad-id')
+        return DatasetParameter(value, ds)
+
+    def short_description(self) -> str:
+        return "(path to) {}Dataset".format(
+            'an existing ' if self._installed is True
+            else 'a non-existing ' if self._installed is False else 'a ')
+
 
+    def _require_dataset(self, value):
         from datalad.distribution.dataset import require_dataset
         try:
             ds = require_dataset(
                 value,
                 check_installed=self._installed is True,
                 purpose=self._purpose,
             )
+            return ds
         except NoDatasetFound:
             # mitigation of non-uniform require_dataset() behavior.
             # with value == None it does not honor check_installed
@@ -94,13 +114,4 @@ def __call__(self, value) -> DatasetParameter:
                 # find a dataset in any parent dir either, so this is
                 # the best we can do. Installation absence verification
                 # will happen further down
-                ds = Dataset(Path.cwd())
-
-        if self._installed is False and ds.is_installed():
-            raise ValueError(f'{ds} already exists locally')
-        return DatasetParameter(value, ds)
-
-    def short_description(self) -> str:
-        return "(path to) {}Dataset".format(
-            'an existing ' if self._installed is True
-            else 'a non-existing ' if self._installed is False else 'a ')
+                return Dataset(Path.cwd())
@@ -356,3 +356,13 @@ def test_EnsureDataset(tmp_path):
     assert EnsureDataset(
         installed=False).short_description() == \
         '(path to) a non-existing Dataset'
+
+    # smoke test for idcheck:
+    assert EnsureDataset(require_id=True)(ds).ds == ds
+    assert EnsureDataset(require_id=False)(ds).ds == ds
+    # unset the dataset ID to test whether an ID check would raise, but
+    # bring it back later in case future tests need it
+    id = ds.config.get('datalad.dataset.id')
+    ds.config.unset('datalad.dataset.id', scope='branch')
+    with pytest.raises(NoDatasetFound):
+        EnsureDataset(require_id=True)(tmp_path)