diff --git a/docker/irods_client/tests/test_data_ops.py b/docker/irods_client/tests/test_data_ops.py index ebc3cde0..0f0131b8 100644 --- a/docker/irods_client/tests/test_data_ops.py +++ b/docker/irods_client/tests/test_data_ops.py @@ -135,7 +135,7 @@ def test_meta_archive(session, testdata, tmpdir): sync(session, testdata, ipath) assert len(list(ipath.meta)) == 0 meta_list = [ - (ipath, ("root", "true", None)), + (ipath, ("root", "true", "")), (ipath / "more_data", ("more_data", "false", "kg")), (ipath / "more_data" / "polarbear.txt", ("is_polar", "true", "bool")), ] diff --git a/docker/irods_client/tests/test_meta.py b/docker/irods_client/tests/test_meta.py index 0fdfc4ac..880e7e93 100644 --- a/docker/irods_client/tests/test_meta.py +++ b/docker/irods_client/tests/test_meta.py @@ -5,7 +5,7 @@ from pytest import mark from ibridges.data_operations import Operations -from ibridges.meta import MetaData +from ibridges.meta import MetaData, MetaDataItem from ibridges.path import IrodsPath @@ -23,7 +23,7 @@ def test_meta(item_name, request): assert len(meta) == 1 assert list(meta)[0].name == "x" assert list(meta)[0].value == "y" - assert list(meta)[0].units is None + assert list(meta)[0].units == "" assert "x" in meta assert ("x", "y") in meta assert "y" not in meta @@ -66,6 +66,7 @@ def test_meta(item_name, request): assert "x" in meta assert ("y", "z") not in meta assert ("y", "x") in meta + meta.clear() @mark.parametrize("item_name", ["collection", "dataobject"]) def test_metadata_todict(item_name, request): @@ -108,3 +109,136 @@ def test_metadata_export(item_name, request, session, tmpdir): with open(tmp_file, "r", encoding="utf-8"): new_meta_dict = json.load(tmp_file) assert isinstance(new_meta_dict, dict) + +@mark.parametrize("item_name", ["collection", "dataobject"]) +def test_metadata_getitem(item_name, request): + item = request.getfixturevalue(item_name) + meta = MetaData(item) + meta.clear() + + assert len(meta) == 0 + meta.add("some_key", "some_value", "some_units") + assert isinstance(meta["some_key"], MetaDataItem) + meta.add("some_key", "some_value", None) + meta.add("some_key", "other_value", "some_units") + meta.add("other_key", "third_value", "other_units") + with pytest.raises(ValueError): + meta["some_key"] + with pytest.raises(ValueError): + meta["some_key", "some_value"] + assert isinstance(meta["some_key", "some_value", "some_units"], MetaDataItem) + assert tuple(meta["other_key"]) == ("other_key", "third_value", "other_units") + with pytest.raises(KeyError): + meta["unknown"] + with pytest.raises(KeyError): + meta["some_key", "unknown"] + with pytest.raises(KeyError): + meta["some_key", "some_value", "unknown"] + meta.clear() + + +@mark.parametrize("item_name", ["collection", "dataobject"]) +def test_metadata_setitem(item_name, request): + item = request.getfixturevalue(item_name) + meta = MetaData(item) + meta.clear() + + meta.add("some_key", "some_value", "some_units") + meta["some_key"] = ("some_key", "new_value", "new_units") + meta["some_key"] = ("some_key", "new_value") + + with pytest.raises(TypeError): + meta["some_key"] = "new_value" + + with pytest.raises(ValueError): + meta["some_key"] = ("some_key", "new_value") + + +@mark.parametrize("item_name", ["collection", "dataobject"]) +def test_metadata_rename(item_name, request, session): + item = request.getfixturevalue(item_name) + meta = MetaData(item) + meta.clear() + + + meta.add("some_key", "some_value", "some_units") + meta["some_key"].key = "new_key" + assert ("new_key", "some_value", "some_units") in meta + assert len(meta) == 1 + + meta["new_key"].value = "new_value" + assert ("new_key", "new_value", "some_units") in meta + assert len(meta) == 1 + + meta["new_key"].units = "new_units" + assert ("new_key", "new_value", "new_units") in meta + assert len(meta) == 1 + + meta.add("new_key", "new_value", "other_units") + with pytest.raises(ValueError): + meta["new_key", "new_value", "other_units"].units = "new_units" + assert len(meta) == 2 + meta["new_key", "new_value", "other_units"].remove() + + meta.add("new_key", "other_value", "new_units") + with pytest.raises(ValueError): + meta["new_key", "other_value", "new_units"].value = "new_value" + assert len(meta) == 2 + meta["new_key", "other_value", "new_units"].remove() + + meta.add("other_key", "new_value", "new_units") + with pytest.raises(ValueError): + meta["other_key", "new_value", "new_units"].key = "new_key" + assert len(meta) == 2 + + with pytest.raises(ValueError): + meta["other_key"].key = "org_something" + assert len(meta) == 2 + assert "other_key" in meta + + meta.clear() + + +@mark.parametrize("item_name", ["collection", "dataobject"]) +def test_metadata_findall(item_name, request, session): + item = request.getfixturevalue(item_name) + meta = MetaData(item) + meta.clear() + + + meta.add("some_key", "some_value", "some_units") + meta.add("some_key", "some_value", None) + meta.add("some_key", "other_value", "some_units") + meta.add("other_key", "third_value", "other_units") + + assert len(meta.find_all()) == 4 + assert len(meta.find_all(key="some_key")) == 3 + assert isinstance(meta.find_all(key="some_key")[0], MetaDataItem) + assert len(meta.find_all(key="?")) == 0 + assert len(meta.find_all(value="some_value")) == 2 + assert len(meta.find_all(units="some_units")) == 2 + + +@mark.parametrize("item_name", ["collection", "dataobject"]) +def test_metadata_errors(item_name, request, session): + item = request.getfixturevalue(item_name) + meta = MetaData(item) + meta.clear() + + with pytest.raises(ValueError): + meta.add("", "some_value") + with pytest.raises(TypeError): + meta.add(None, "some_value") + with pytest.raises(TypeError): + meta.add(10, "some_value") + + with pytest.raises(ValueError): + meta.add("key", "") + with pytest.raises(TypeError): + meta.add("key", None) + with pytest.raises(TypeError): + meta.add("key", 10) + + with pytest.raises(TypeError): + meta.add("key", "value", 10) + diff --git a/docs/source/irods_search.rst b/docs/source/irods_search.rst index d6d7a84d..e5a90072 100644 --- a/docs/source/irods_search.rst +++ b/docs/source/irods_search.rst @@ -2,7 +2,7 @@ iRODS Search ============ `iBridges` offers an easy way to search for data. You can pass a combination of path, metadata, -item type and checksum. The output will be a list of :class:`ibridges.path.IrodsPath`, which contain information where to find the item on the iRODS server. +item type and checksum. The output will be a list of :class:`ibridges.path.CachedIrodsPath`, which contain information where to find the item on the iRODS server. .. note:: @@ -29,6 +29,11 @@ To find all subcollections and dataobjects in a collection use the `%` as wildca search_data(session, path_pattern="subcoll/%") +.. note:: + + The output of a search is a :class:`ibridges.path.CachedIrodsPath`. It contains the information about the data object or collection at the time of the search. + This information is not refetched from the server, i.e. the size of the path will always remain the size at the time of the search. + Search data by metadata ----------------------- @@ -66,7 +71,7 @@ A query with metadata will look like: # and one metadata entry that has value=="value", but they do not have to be # for the same entry as in the above. search_data(session, metadata=[MetaSearch(key="key"), MetaSearch(value="value")]) - + Use the `%` as a wild card again to match any combination of characters. diff --git a/docs/source/metadata.rst b/docs/source/metadata.rst index 14a2b3ce..6186c72d 100644 --- a/docs/source/metadata.rst +++ b/docs/source/metadata.rst @@ -6,8 +6,8 @@ Metadata iRODS offers metadata as key, value, units triplets. The type of the keys, values and units is always a string. Below we show how to create a :doc:`Metadata ` object from a data object or collection. -The Metadata object --------------------- +The MetaData class +------------------ .. code-block:: python @@ -17,8 +17,23 @@ The Metadata object session = interactive_auth() meta = IrodsPath(session, "~", "collection_or_dataobject").meta + # Show all metadata entries with print. + print(meta) + With the object :code:`meta` we can now access and manipulate the metadata of the data object. +The MetaDataItem class +---------------------- + +As explained above, the metadata of a collection or dataobject can have multiple entries. You can iterate over +these entries as follows: + +.. code-block:: python + + for item in meta: + print(item.key, item.value, item.units) + + Add metadata ------------ To add metadata, you always need to provide a key and a value, the units are optional and can be left out. @@ -26,8 +41,6 @@ To add metadata, you always need to provide a key and a value, the units are opt .. code-block:: python meta.add('NewKey', 'NewValue', 'NewUnit') - print(meta) - .. note:: You can have several metadata entries with the same key but different values and units, @@ -46,6 +59,51 @@ same key first. This mirrors the implementation of the `iCommands Operations: """Upload a local directory or file to iRODS. @@ -62,6 +63,8 @@ def upload( Whether to do a dry run before uploading the files/folders. metadata: If not None, it should point to a file that contains the metadata for the upload. + kwargs: + Extra arguments for executing the upload operation, e.g. progress_bar = False. Returns ------- @@ -121,7 +124,7 @@ def upload( if metadata is not None: ops.add_meta_upload(idest_path, metadata) if not dry_run: - ops.execute(session, ignore_err=ignore_err) + ops.execute(session, ignore_err=ignore_err, **kwargs) return ops @@ -136,6 +139,7 @@ def download( options: Optional[dict] = None, dry_run: bool = False, metadata: Union[None, str, Path] = None, + **kwargs ) -> Operations: """Download a collection or data object to the local filesystem. @@ -165,6 +169,8 @@ def download( metadata: If not None, the path to store the metadata to in JSON format. It is recommended to use the .json suffix. + kwargs: + Extra arguments for executing the download operation, e.g. progress_bar = False. Returns ------- @@ -229,7 +235,7 @@ def download( ops.resc_name = resc_name ops.options = options if not dry_run: - ops.execute(session, ignore_err=ignore_err) + ops.execute(session, ignore_err=ignore_err, **kwargs) return ops @@ -272,6 +278,7 @@ def sync( resc_name: str = "", options: Optional[dict] = None, metadata: Union[None, str, Path] = None, + **kwargs ) -> Operations: """Synchronize data between local and remote copies. @@ -313,6 +320,8 @@ def sync( More options for the download/upload metadata: If not None, the location to get the metadata from or store it to. + kwargs: + Extra arguments for executing the sync operation, e.g. progress_bar = False. Returns @@ -352,7 +361,7 @@ def sync( ops.resc_name = resc_name ops.options = options if not dry_run: - ops.execute(session, ignore_err=ignore_err) + ops.execute(session, ignore_err=ignore_err, **kwargs) return ops diff --git a/ibridges/executor.py b/ibridges/executor.py index f1d33e43..ba83ca2a 100644 --- a/ibridges/executor.py +++ b/ibridges/executor.py @@ -147,7 +147,8 @@ def add_create_coll(self, new_col: IrodsPath): """ self.create_collection.add(str(new_col)) - def execute(self, session: Session, ignore_err: bool = False): + def execute(self, session: Session, ignore_err: bool = False, + progress_bar: bool = True): """Execute all added operations. This also creates a progress bar to see the status updates. @@ -159,11 +160,14 @@ def execute(self, session: Session, ignore_err: bool = False): ignore_err, optional Whether to ignore errors when encountered, by default False Note that not all errors will be ignored. + progress_bar + Whether to turn on the progress bar. The progress bar will be disabled + if the total download + upload size is 0 regardless. """ up_sizes = [lpath.stat().st_size for lpath, _ in self.upload] down_sizes = [ipath.size for ipath, _ in self.download] - disable = len(up_sizes) + len(down_sizes) == 0 + disable = len(up_sizes) + len(down_sizes) == 0 or not progress_bar pbar = tqdm( total=sum(up_sizes) + sum(down_sizes), unit="B", diff --git a/ibridges/icat_columns.py b/ibridges/icat_columns.py index 586b211b..be45418e 100644 --- a/ibridges/icat_columns.py +++ b/ibridges/icat_columns.py @@ -10,6 +10,7 @@ DATA_PATH = imodels.DataObject.path DATA_ID = imodels.DataObject.id DATA_CHECKSUM = imodels.DataObject.checksum +DATA_SIZE = imodels.DataObject.size META_COLL_ATTR_NAME = imodels.CollectionMeta.name META_COLL_ATTR_VALUE = imodels.CollectionMeta.value META_COLL_ATTR_UNITS = imodels.CollectionMeta.units diff --git a/ibridges/interactive.py b/ibridges/interactive.py index 204562b7..25d96a6c 100644 --- a/ibridges/interactive.py +++ b/ibridges/interactive.py @@ -61,7 +61,7 @@ def interactive_auth( n_tries = 0 success = False while not success and n_tries < 3: - if sys.stdin.isatty(): + if sys.stdin.isatty() or 'ipykernel' in sys.modules: password = getpass('Your iRODS password: ') else: print('Your iRODS password: ') diff --git a/ibridges/meta.py b/ibridges/meta.py index 2873cd32..a0fa3c10 100644 --- a/ibridges/meta.py +++ b/ibridges/meta.py @@ -11,6 +11,18 @@ import irods.meta +def _parse_tuple(key, value, units = ""): + if key == "": + raise ValueError("Key cannot be of size zero.") + if not isinstance(key, (str, bytes)): + raise TypeError(f"Key should have type str or bytes-like, not {type(key)}.") + if value == "": + raise ValueError("Value cannot be of size zero.") + if not isinstance(value, (str, bytes)): + raise TypeError(f"Value should have type str or bytes-like, not {type(value)}.") + if not isinstance(units, (str, bytes, type(None))): + raise TypeError(f"Key should have type str, bytes-like or None, not {type(units)}.") + class MetaData: """iRODS metadata operations. @@ -48,7 +60,7 @@ class MetaData: def __init__( self, item: Union[irods.data_object.iRODSDataObject, irods.collection.iRODSCollection], - blacklist: str = r"^org_*", + blacklist: Optional[str] = r"^org_[\s\S]+", ): """Initialize the metadata object.""" self.item = item @@ -56,15 +68,14 @@ def __init__( def __iter__(self) -> Iterator: """Iterate over all metadata key/value/units triplets.""" - if self.blacklist is None: - yield from self.item.metadata.items() - return for meta in self.item.metadata.items(): - if self.blacklist and re.match(self.blacklist, meta.name) is None: - yield meta + if not self.blacklist or re.match(self.blacklist, meta.name) is None: + yield MetaDataItem(self, meta) else: - warnings.warn(f"Ignoring metadata entry with value {meta.name}, because it matches " - f"the blacklist {self.blacklist}.") + warnings.warn( + f"Ignoring metadata entry with key {meta.name}, because it matches " + f"the blacklist {self.blacklist}." + ) def __len__(self) -> int: """Get the number of non-blacklisted metadata entries.""" @@ -87,18 +98,9 @@ def __contains__(self, val: Union[str, Sequence]) -> bool: True """ - if isinstance(val, str): - val = [val] - all_attrs = ["name", "value", "units"][: len(val)] - for meta in self: - n_same = 0 - for i_attr, attr in enumerate(all_attrs): - if getattr(meta, attr) == val[i_attr] or val[i_attr] is None: - n_same += 1 - else: - break - if n_same == len(val): - return True + search_pattern = _pad_search_pattern(val) + if len(self.find_all(*search_pattern)) > 0: + return True return False def __repr__(self) -> str: @@ -108,14 +110,85 @@ def __repr__(self) -> str: def __str__(self) -> str: """Return a string showing all metadata entries.""" # Sort the list of items name -> value -> units, where None is the lowest - meta_list = list(self) - meta_list = sorted(meta_list, key=lambda m: (m.units is None, m.units)) - meta_list = sorted(meta_list, key=lambda m: (m.value is None, m.value)) - meta_list = sorted(meta_list, key=lambda m: (m.name is None, m.name)) - return "\n".join(f" - {{name: {meta.name}, value: {meta.value}, units: {meta.units}}}" - for meta in meta_list) - - def add(self, key: str, value: str, units: Optional[str] = None): + meta_list = sorted(list(self)) + return "\n".join(f" - {meta}" for meta in meta_list) + + def find_all(self, key=..., value=..., units=...): + """Find all metadata entries belonging to the data object/collection. + + Wildcards can be used by leaving the key/value/units at default. + """ + all_items = [] + for meta_item in self: + if meta_item.matches(key, value, units): + all_items.append(meta_item) + return all_items + + def __getitem__(self, key: Union[str, Sequence[Union[str, None]]]) -> MetaDataItem: + """Access the metadata like a dictionary of tuples. + + Parameters + ---------- + key + The key to get all metadata for. + + Raises + ------ + KeyError + If the key does not exist. + + + Examples + -------- + >>> meta["some_key"] + ("some_key", "some_value", "some_units") + >>> meta["some_key", "some_value"] + >>> meta["some_key", "some_value", "some_units"] + + """ + search_pattern = _pad_search_pattern(key) + all_items = self.find_all(*search_pattern) + if len(all_items) == 0: + raise KeyError(f"Cannot find metadata item with key '{key}'.") + if len(all_items) > 1: + raise ValueError( + f"Found multiple items with key '{key}', specify value and " + "units as well, for example: meta[key, value, units]." + ) + return all_items[0] + + def __setitem__(self, key: Union[str, Sequence[Union[str, None]]], other: Sequence[str]): + """Set metadata items like a dictionary of tuples. + + Parameters + ---------- + key + The key to get the metadata for. + other + Key, value, units to set the metadata item to. Units is optional. + + Raises + ------ + TypeError: + If the other parameter is a string. + ValueError: + If the item already exists. + + Examples + -------- + >>> meta["key"] = ("key", "new_value", "new_units") + >>> meta["key"] = ("new_key", "old_value") + + """ + if isinstance(other, str): + raise TypeError( + "Cannot set the metadata item to a single string value. " + f'Use meta[{key}].key = "{other}" to change only the key ' + "for example." + ) + self[key].update(*other) + + def add(self, key: str, value: str, units: Optional[str] = ""): """Add metadata to an item. This will never overwrite an existing entry. If the triplet already exists @@ -146,17 +219,22 @@ def add(self, key: str, value: str, units: Optional[str] = None): >>> meta.add("Mass", "10", "kg") """ + _parse_tuple(key, value, units) try: if (key, value, units) in self: raise ValueError("ADD META: Metadata already present") if self.blacklist: - if re.match(self.blacklist, key): - raise ValueError(f"ADD META: Key must not start with {self.blacklist}.") + try: + if re.match(self.blacklist, key): + raise ValueError(f"ADD META: Key must not start with {self.blacklist}.") + except TypeError as error: + raise TypeError( + f"Key {key} must be of type string, found {type(key)}") from error self.item.metadata.add(key, value, units) except irods.exception.CAT_NO_ACCESS_PERMISSION as error: raise PermissionError("UPDATE META: no permissions") from error - def set(self, key: str, value: str, units: Optional[str] = None): + def set(self, key: str, value: str, units: Optional[str] = ""): """Set the metadata entry. If the metadata entry already exists, then all metadata entries with @@ -187,8 +265,11 @@ def set(self, key: str, value: str, units: Optional[str] = None): self.delete(key) self.add(key, value, units) - def delete(self, key: str, value: Union[None, str] = ..., # type: ignore - units: Union[None, str] = ...): # type: ignore + def delete( + self, + key: str, + value: Union[None, str] = ..., # type: ignore + units: Union[None, str] = ...,): # type: ignore """Delete a metadata entry of an item. Parameters @@ -219,24 +300,14 @@ def delete(self, key: str, value: Union[None, str] = ..., # type: ignore >>> meta.delete("mass") """ - try: - if value is ... or units is ...: - all_metas = self.item.metadata.get_all(key) - for meta in all_metas: - if value is ... or value == meta.value and units is ... or units == meta.units: - self.item.metadata.remove(meta) - else: - self.item.metadata.remove(key, value, units) - except irods.exception.CAT_SUCCESS_BUT_WITH_NO_INFO as error: + all_meta_items = self.find_all(key, value, units) + if len(all_meta_items) == 0: raise KeyError( - f"Cannot delete metadata with key '{key}', value '{value}'" - f" and units '{units}' since it does not exist." - ) from error - except irods.exception.CAT_NO_ACCESS_PERMISSION as error: - raise ValueError( - f"Cannot delete metadata due to insufficient permission " - f"for path '{self.item.path}'." - ) from error + f"Cannot delete items with key='{key}', value='{value}' and units='{units}', " + "since no metadata entries exist with those values." + ) + for meta_item in all_meta_items: + meta_item.remove() def clear(self): """Delete all metadata entries belonging to the item. @@ -294,9 +365,9 @@ def to_dict(self, keys: Optional[list] = None) -> dict: if isinstance(self.item, irods.data_object.iRODSDataObject): meta_dict["checksum"] = self.item.checksum if keys is None: - meta_dict["metadata"] = [(m.name, m.value, m.units) for m in self] + meta_dict["metadata"] = [tuple(m) for m in self] else: - meta_dict["metadata"] = [(m.name, m.value, m.units) for m in self if m.name in keys] + meta_dict["metadata"] = [tuple(m) for m in self if m.key in keys] return meta_dict def from_dict(self, meta_dict: dict): @@ -327,3 +398,192 @@ def from_dict(self, meta_dict: dict): self.add(*meta_tuple) except ValueError: pass + + +class MetaDataItem: + """Interface for metadata entries. + + This is a substitute of the python-irodsclient iRODSMeta object. + It implements setting the key/value/units, allows for sorting and can + remove itself. + + This class is generally created by the MetaData class, not directly + created by the user. + + Parameters + ---------- + ibridges_meta: + A MetaData object that the MetaDataItem is part of. + prc_meta: + A PRC iRODSMeta object that points to the entry. + + """ + + def __init__(self, ibridges_meta: MetaData, prc_meta: irods.iRODSMeta): + """Initialize the MetaDataItem object.""" + self._ibridges_meta = ibridges_meta + self._prc_meta: irods.iRODSMeta = prc_meta + + @property + def key(self) -> str: + """Return the key of the metadata item.""" + return self._prc_meta.name + + @key.setter + def key(self, new_key: str): + if new_key == self._prc_meta.name: + return + new_item_values = [new_key, self._prc_meta.value, self._prc_meta.units] + self.update(*new_item_values) + + @property + def value(self) -> Optional[str]: + """Return the value of the metadata item.""" + return self._prc_meta.value + + @value.setter + def value(self, new_value: Optional[str]): + if new_value == self._prc_meta.value: + return + new_item_values = [self._prc_meta.name, new_value, self._prc_meta.units] + self.update(*new_item_values) + + @property + def units(self) -> str: + """Return the units of the metadata item.""" + return "" if self._prc_meta.units is None else self._prc_meta.units + + @units.setter + def units(self, new_units: Optional[str]): + if new_units == self._prc_meta.units: + return + new_item_values = [self._prc_meta.name, self._prc_meta.value, new_units] + self.update(*new_item_values) + + def __repr__(self) -> str: + """Representation of the MetaDataItem.""" + return f"" + + def __str__(self) -> str: + """User readable representation of MetaDataItem.""" + return f"(key: '{self.key}', value: '{self.value}', units: '{self.units}')" + + def __iter__(self) -> Iterator[Optional[str]]: + """Allow iteration over key, value, units.""" + yield self.key + yield self.value + yield self.units + + def update(self, new_key: str, new_value: str, new_units: Optional[str] = ""): + """Update the metadata item changing the key/value/units. + + Parameters + ---------- + new_key: + New key to set the metadata item to. + new_value: + New value to set the metadata item to. + new_units: + New units to set the metadata item to, optional. + + Raises + ------ + ValueError: + If the operation could not be completed because of permission error. + Or if the new to be created item already exists. + + """ + new_item_key = (new_key, new_value, new_units) + try: + _new_item = self._ibridges_meta[new_item_key] + except KeyError: + self._ibridges_meta.add(*new_item_key) + try: + self._ibridges_meta.item.metadata.remove(self._prc_meta) + # If we get an error, roll back the added metadata + except irods.exception.CAT_NO_ACCESS_PERMISSION as error: + self._ibridges_meta.delete(*new_item_key) + raise ValueError( + f"Cannot rename metadata due to insufficient permission " + f"for path '{self.item.path}'." + ) from error + self._prc_meta = self._ibridges_meta[new_item_key]._prc_meta # pylint: disable=protected-access + else: + raise ValueError( + f"Cannot change key/value/units to '{new_item_key}' metadata item " + "already exists." + ) + + def __getattribute__(self, attr: str): + """Add name attribute and check if the metadata item is already removed.""" + if attr == "name": + return self.__getattribute__("key") + if attr == "_prc_meta" and super().__getattribute__(attr) is None: + raise KeyError("Cannot remove metadata item: it has already been removed.") + return super().__getattribute__(attr) + + def remove(self): + """Remove the metadata item.""" + try: + self._ibridges_meta.item.metadata.remove(self._prc_meta) + except irods.exception.CAT_SUCCESS_BUT_WITH_NO_INFO as error: + raise KeyError( + f"Cannot delete metadata with key '{self.key}', value '{self.value}'" + f" and units '{self.units}' since it does not exist." + ) from error + except irods.exception.CAT_NO_ACCESS_PERMISSION as error: + raise ValueError( + f"Cannot delete metadata due to insufficient permission " + f"for path '{self.item.path}'." + ) from error + self._prc_meta = None + + def __lt__(self, other: MetaDataItem) -> bool: + """Compare two metadata items for sorting mainly.""" + if not isinstance(other, MetaDataItem): + raise TypeError(f"Comparison between MetaDataItem and {type(other)} not supported.") + comp_key = _comp_str_none(self.key, other.key) + if comp_key is not None: + return comp_key + comp_value = _comp_str_none(self.value, other.value) + if comp_value is not None: + return comp_value + comp_units = _comp_str_none(self.units, other.units) + if comp_units is not True: + return False + return True + + def matches(self, key, value, units): + """See whether the metadata item matches the key,value,units pattern.""" + units = "" if units is None else units + if key is not ... and key != self.key: + return False + if value is not ... and value != self.value: + return False + if units is not ... and units != self.units: + return False + return True + + +def _comp_str_none(obj: Optional[str], other: Optional[str]) -> Optional[bool]: + if obj is None and other is not None: + return True + if obj is not None and other is None: + return False + if str(obj) == str(other): + return None + return str(obj) < str(other) + + +def _pad_search_pattern(search_pattern) -> tuple: + if isinstance(search_pattern, str): + padded_pattern = (search_pattern, ..., ...) + elif len(search_pattern) == 1: + padded_pattern = (*search_pattern, ..., ...) # type: ignore + elif len(search_pattern) == 2: + padded_pattern = (*search_pattern, ...) # type: ignore + elif len(search_pattern) > 3: + raise ValueError("Too many arguments for '[]', use key, value, units.") + else: + padded_pattern = tuple(search_pattern) # type: ignore + return padded_pattern diff --git a/ibridges/path.py b/ibridges/path.py index 98265d47..784301ea 100644 --- a/ibridges/path.py +++ b/ibridges/path.py @@ -620,6 +620,10 @@ def checksum(self) -> str: return super().checksum return self._checksum + def __repr__(self) -> str: + """Representation of the CachedIrodsPath object in line with a Path object.""" + return f"CachedIrodsPath({', '.join(self._path.parts)})" + def dataobject_exists(self) -> bool: """See IrodsPath.""" return self._is_dataobj diff --git a/ibridges/search.py b/ibridges/search.py index be4f6bbb..e4a92859 100644 --- a/ibridges/search.py +++ b/ibridges/search.py @@ -3,15 +3,15 @@ from __future__ import annotations from collections import namedtuple -from typing import Optional, Union +from typing import List, Optional, Union from ibridges import icat_columns as icat -from ibridges.path import IrodsPath +from ibridges.path import CachedIrodsPath, IrodsPath from ibridges.session import Session META_COLS = { "collection": (icat.META_COLL_ATTR_NAME, icat.META_COLL_ATTR_VALUE, icat.META_COLL_ATTR_UNITS), - "data_object": (icat.META_DATA_ATTR_NAME, icat.META_DATA_ATTR_VALUE, icat.META_DATA_ATTR_UNITS) + "data_object": (icat.META_DATA_ATTR_NAME, icat.META_DATA_ATTR_VALUE, icat.META_DATA_ATTR_UNITS), } @@ -28,8 +28,9 @@ class MetaSearch(namedtuple("MetaSearch", ["key", "value", "units"], defaults=[. def __new__(cls, key=..., value=..., units=...): """Create a new MetaSearch object.""" if key is ... and value is ... and units is ...: - raise ValueError("Cannot create metasearch without specifying either key, value or " - "units.") + raise ValueError( + "Cannot create metasearch without specifying either key, value or units." + ) key = "%" if key is ... else key value = "%" if value is ... else value units = "%" if units is ... else units @@ -43,8 +44,8 @@ def search_data( # pylint: disable=too-many-branches checksum: Optional[str] = None, metadata: Union[None, MetaSearch, list[MetaSearch], list[tuple]] = None, item_type: Optional[str] = None, - case_sensitive: bool = False -) -> list[IrodsPath]: + case_sensitive: bool = False, +) -> list[CachedIrodsPath]: """Search for collections, data objects and metadata. By default all accessible collections and data objects are returned. @@ -83,11 +84,9 @@ def search_data( # pylint: disable=too-many-branches Returns ------- - List of dictionaries with keys: - COLL_NAME (absolute path of the collection), - DATA_NAME (name of the data object), - D_DATA_CHECKSUM (checksum of the data object) - The latter two keys are only present of the found item is a data object. + List of CachedIrodsPaths. + The CachedIrodsPaths for data objects contain the size and the checksum + found in the search. Examples -------- @@ -156,23 +155,29 @@ def search_data( # pylint: disable=too-many-branches queries.append((coll_query, "collection")) if item_type != "collection": # create the query for data objects; we need the collection name, the data name and checksum - data_query = session.irods_session.query(icat.COLL_NAME, - icat.DATA_NAME, - icat.DATA_CHECKSUM, - case_sensitive=case_sensitive) + data_query = session.irods_session.query( + icat.COLL_NAME, + icat.DATA_NAME, + icat.DATA_CHECKSUM, + icat.DATA_SIZE, + case_sensitive=case_sensitive, + ) data_query = data_query.filter(icat.LIKE(icat.COLL_NAME, _postfix_wildcard(path))) queries.append((data_query, "data_object")) - data_name_query = session.irods_session.query(icat.COLL_NAME, icat.DATA_NAME, - icat.DATA_CHECKSUM, - case_sensitive=case_sensitive) + data_name_query = session.irods_session.query( + icat.COLL_NAME, + icat.DATA_NAME, + icat.DATA_CHECKSUM, + icat.DATA_SIZE, + case_sensitive=case_sensitive, + ) data_name_query.filter(icat.LIKE(icat.COLL_NAME, f"{path}")) queries.append((data_name_query, "data_object")) if path_pattern is not None: _path_filter(path_pattern, queries) - for mf in metadata: _meta_filter(mf, queries) @@ -184,23 +189,28 @@ def search_data( # pylint: disable=too-many-branches query_results.extend(list(q[0])) # gather results, data_query and data_name_query can contain the same results - results = [ - dict(s) for s in set(frozenset(d.items()) - for d in query_results) - ] + results = [dict(s) for s in set(frozenset(d.items()) for d in query_results)] for item in results: if isinstance(item, dict): - new_keys = [k.icat_key for k in item.keys()] - for n_key, o_key in zip(new_keys, item.keys()): - item[n_key] = item.pop(o_key) + for meta_key in list(item.keys()): + item[meta_key.icat_key] = item.pop(meta_key) # Convert the results to IrodsPath objects. - ipath_results = [] + ipath_results: List[CachedIrodsPath] = [] for res in results: if "DATA_NAME" in res: - ipath_results.append(IrodsPath(session, res["COLL_NAME"], res["DATA_NAME"])) + ipath_results.append( + CachedIrodsPath( + session, + res["DATA_SIZE"], + True, + res["D_DATA_CHECKSUM"], + res["COLL_NAME"], + res["DATA_NAME"], + ) + ) else: - ipath_results.append(IrodsPath(session, res["COLL_NAME"])) + ipath_results.append(CachedIrodsPath(session, None, False, None, res["COLL_NAME"])) return ipath_results @@ -209,11 +219,13 @@ def _prefix_wildcard(pattern): return pattern return f"%/{pattern}" + def _postfix_wildcard(path): if str(path).endswith("/"): return f"{path}%" return f"{path}/%" + def _path_filter(path_pattern, queries): for q, q_type in queries: if q_type == "collection": @@ -233,6 +245,7 @@ def _meta_filter(metadata, queries): for i_elem, elem in enumerate(MetaSearch(*metadata)): q.filter(icat.LIKE(META_COLS[q_type][i_elem], elem)) + def _checksum_filter(checksum, queries): for q, q_type in queries: if q_type == "data_object": diff --git a/tutorials/04-Metadata.ipynb b/tutorials/04-Metadata.ipynb index b8752841..75ea7f8c 100644 --- a/tutorials/04-Metadata.ipynb +++ b/tutorials/04-Metadata.ipynb @@ -22,14 +22,6 @@ "Technically, iRODS offers metadata as key-value-units triple. Let's investigate this:" ] }, - { - "cell_type": "markdown", - "id": "bde731e4", - "metadata": {}, - "source": [ - "## Add metadata to data objects" - ] - }, { "cell_type": "markdown", "id": "784fdac1", @@ -46,15 +38,26 @@ "outputs": [], "source": [ "from ibridges.interactive import interactive_auth\n", + "import warnings\n", + "warnings.filterwarnings('ignore')\n", + "\n", "session = interactive_auth()" ] }, + { + "cell_type": "markdown", + "id": "bde731e4", + "metadata": {}, + "source": [ + "## Add metadata to an `IrodsPath`" + ] + }, { "cell_type": "markdown", "id": "eb0524a2", "metadata": {}, "source": [ - "Make sure we have our *demo* collection and file available:" + "Make sure we have our *demo* collection and object available:" ] }, { @@ -69,9 +72,9 @@ "irods_path = IrodsPath(session, '~')\n", "print(\"Current working location:\", irods_path)\n", "irods_coll_path = irods_path.joinpath('demo')\n", - "print(\"New collection name:\", irods_coll_path)\n", - "coll = IrodsPath.create_collection(session, irods_coll_path)\n", - "print(\"New collection is created:\", irods_coll_path.collection_exists())" + "irods_obj_path = irods_coll_path / 'demofile.txt'\n", + "print(\"Demo collection name:\", irods_coll_path, \"exists: \", irods_coll_path.collection_exists())\n", + "print(\"Demo object name\", irods_obj_path, \"exists: \", irods_obj_path.dataobject_exists())" ] }, { @@ -79,7 +82,7 @@ "id": "81094d42", "metadata": {}, "source": [ - "Now we can retrieve a data object and insect its metadata." + "We can retrieve the metadata associated with the data object from its `IrodsPath`, for convenience we will store it in the variable `obj_meta`. The `obj_meta` is no longer an `IrodsPath` but of type `MetaData`:" ] }, { @@ -89,11 +92,9 @@ "metadata": {}, "outputs": [], "source": [ - "from ibridges.path import IrodsPath\n", - "\n", - "irods_coll_path = IrodsPath(session, '~').joinpath('demo')\n", - "obj = irods_coll_path.joinpath('demofile.txt')\n", - "print(obj.meta)" + "print(irods_obj_path.meta)\n", + "obj_meta = irods_obj_path.meta\n", + "print(type(obj_meta))" ] }, { @@ -101,8 +102,11 @@ "id": "7d9a578b", "metadata": {}, "source": [ - "Most probably you will see no metadata in the above cell. **Note, that system metadata and user-defined metadata are two different entities in a data object!**\n", - "With the command `MetaData(obj)` we only retrieve the user-defined metadata.\n", + "Most probably you will see no metadata in the output of the above cell. \n", + "\n", + "**Note, that system metadata and user-defined metadata are two different entities in a data object!**\n", + "\n", + "With the command `IrodsPath.meta` we only retrieve the user-defined metadata.\n", "\n", "" ] @@ -122,8 +126,8 @@ "metadata": {}, "outputs": [], "source": [ - "obj.meta.add('Key', 'Value', 'Units')\n", - "print(obj.meta)" + "obj_meta.add('Key', 'Value', 'Units')\n", + "print(obj_meta)" ] }, { @@ -141,8 +145,8 @@ "metadata": {}, "outputs": [], "source": [ - "obj.meta.add('Author', 'Christine')\n", - "print(obj.meta)" + "obj_meta.add('Author', 'Christine')\n", + "print(obj_meta)" ] }, { @@ -160,8 +164,8 @@ "metadata": {}, "outputs": [], "source": [ - "obj.meta.add('Author', 'Raoul')\n", - "print(obj.meta)" + "obj_meta.add('Author', 'Alice')\n", + "print(obj_meta)" ] }, { @@ -169,7 +173,7 @@ "id": "116c0f10", "metadata": {}, "source": [ - "You see, that keys in **iRODS metadata keys can have different values**. That is different from python dictionaries where one key can only have one value. **How then to overwrite a value?**" + "You see, that in **iRODS metadata keys can have different values**. That is different from python dictionaries where one key can only have one value. **How then to overwrite a value?**" ] }, { @@ -185,7 +189,9 @@ "id": "cb5b7fef", "metadata": {}, "source": [ - "If you wish to *overwrite* a value, you will first have to remove the old metadata and subsequently add a new metadata entry. **NOTE, that all entries with the key will be deleted.** If you want to be more specific you will need to give the value and the units." + "If you wish to *overwrite* a key, value or units, we will first have to retrieve the respective metadata item. You can retrieve an item by providing the key. If you have several items with the same key you will have to provide the value too and sometimes also the units.\n", + "\n", + "The syntax looks like accessing a dictionary. Let's have a look how to retrieve the author metadata:" ] }, { @@ -195,31 +201,82 @@ "metadata": {}, "outputs": [], "source": [ - "obj.meta.delete('Author')\n", - "print(obj.meta)" + "obj_meta[\"Author\"]" + ] + }, + { + "cell_type": "markdown", + "id": "98ce293a-6d1b-440e-9138-1711fa39a7ad", + "metadata": {}, + "source": [ + "*iBridges* complains that there are several metadata items with the key `Author`. Let's have a look at all of those:" ] }, { "cell_type": "code", "execution_count": null, - "id": "3348ce2d", + "id": "0c34602b-8b03-4b86-9385-3a4995a845dd", "metadata": {}, "outputs": [], "source": [ - "obj.meta.add('Author', 'Raoul')\n", - "obj.meta.add('Author', 'Christine')\n", - "print(obj.meta)" + "print(obj_meta.find_all('Author'))" + ] + }, + { + "cell_type": "markdown", + "id": "0534ef86-0cfb-47d8-ac79-41804f77fd30", + "metadata": {}, + "source": [ + "Now we can retrieve the one where the author is `Christine`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1e0c836e-da25-432a-ae43-c426e0a25dc0", + "metadata": {}, + "outputs": [], + "source": [ + "meta_item = obj_meta['Author', 'Christine']\n", + "print(meta_item)" + ] + }, + { + "cell_type": "markdown", + "id": "6c8cdae5-874c-4d2f-b5a5-cc3ccde27337", + "metadata": {}, + "source": [ + "And we can change the value of exactly that metadata item:" ] }, { "cell_type": "code", "execution_count": null, - "id": "6c0eaf42", + "id": "caf1db88-df17-450c-80e2-d2b188366fcb", "metadata": {}, "outputs": [], "source": [ - "obj.meta.delete('Author', 'Christine')\n", - "print(obj.meta)" + "print(meta_item)\n", + "meta_item.value = \"AnotherAuthor\"\n", + "print(meta_item)" + ] + }, + { + "cell_type": "markdown", + "id": "a3688ac9-b0b4-4cfd-b663-9da59fff9a17", + "metadata": {}, + "source": [ + "**Important**: What happens if we would change the metadata item to one which is already present in the metadata of the object? Changing `AnotherAuthor` to `Alice` would create an identical metadata item in the list of all metadata of that object. Let's try it out:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23d158e5-3152-4b87-b17f-906e5977a1bf", + "metadata": {}, + "outputs": [], + "source": [ + "meta_item.value = 'Alice'" ] }, { @@ -227,7 +284,7 @@ "id": "e7f47a59", "metadata": {}, "source": [ - "You can also set all existing values to **one** new value:" + "Of course you can also alter the `key` and the value of a metadata item:" ] }, { @@ -237,8 +294,11 @@ "metadata": {}, "outputs": [], "source": [ - "obj.meta.set('Author', 'Maarten')\n", - "print(obj.meta)" + "print(\"Changing: \", meta_item)\n", + "meta_item.key = 'Key'\n", + "print(\"Overwriting the key:\", meta_item)\n", + "meta_item.units = 'MyUnits'\n", + "print(\"Overwriting the units:\", meta_item)" ] }, { @@ -246,79 +306,133 @@ "id": "1af4d99f", "metadata": {}, "source": [ - "iRODS metadata also has a an entry called `units`. The same principles that we showed above, i.e. having the same key-value pair with several units, deleting and setting values, apply to units." + "### Setting metadata" + ] + }, + { + "cell_type": "markdown", + "id": "5dbc03e9-3b30-4b0d-9b7c-f3be1403bf75", + "metadata": {}, + "source": [ + "Another way to set a metadata key to a new value and units is the `set` function." ] }, { "cell_type": "code", "execution_count": null, - "id": "6c8ee90c", + "id": "2815fab7-143c-4be8-ae33-557e1ea43efa", "metadata": {}, "outputs": [], "source": [ - "obj.meta.add('key', 'value', 'units1')\n", - "obj.meta.add('key', 'value', 'units2')\n", - "print(obj.meta)" + "print(obj_meta)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5d8029d7-14f3-458c-927e-db892c6909cd", + "metadata": {}, + "outputs": [], + "source": [ + "obj_meta.set('Author', 'person')\n", + "print(obj_meta)" + ] + }, + { + "cell_type": "markdown", + "id": "2b8d7001-2143-4b43-9856-0ac0abb2d392", + "metadata": {}, + "source": [ + "**Note**, that if there are several entries with the same key, they will all be removed and replaced with the one new metadata item:" ] }, { "cell_type": "code", "execution_count": null, - "id": "d39bf4eb", + "id": "f7cd1403-31d9-406f-b20a-32362b3d256e", "metadata": {}, "outputs": [], "source": [ - "obj.meta.set('key', 'value', 'units3')\n", - "print(obj.meta)" + "obj_meta.set('Key', 'OtherValue')\n", + "print(obj_meta)" + ] + }, + { + "cell_type": "markdown", + "id": "922e8c11-44d9-4d9e-90fe-b488aa8ce2d5", + "metadata": {}, + "source": [ + "## Deleting metadata" ] }, { "cell_type": "code", "execution_count": null, - "id": "97b93549", + "id": "0ea21f51-42ad-40e4-af92-2227415d5f72", "metadata": {}, "outputs": [], "source": [ - "obj.meta.delete('key', 'value')\n", - "print(obj.meta)" + "obj_meta.add('Author', 'Christine')\n", + "print(obj_meta)" ] }, { "cell_type": "markdown", - "id": "c3f54857", + "id": "ad30990d-5e77-4d52-b22b-80d57585351e", + "metadata": {}, + "source": [ + "### Deleting a single metadata item\n", + "\n", + "To delete a single metadata item you will have to be again specific with your key, value and units information to identify the correct metadata item. To delete all metadata with the key `Key` we can simply use:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16ecef1c-6f31-46bd-bebd-0a917abc014c", "metadata": {}, + "outputs": [], "source": [ - "## Add metadata to collections" + "obj_meta.delete('Key')\n", + "print(obj_meta)" ] }, { "cell_type": "markdown", - "id": "51438d57", + "id": "d12e796e-b7d7-4c0e-ae40-c3c71fac96e8", "metadata": {}, "source": [ - "The same functionality we saw above, we can use for collections:" + "The same command on the metadata with the key `Author` would delete all of the entries:" ] }, { "cell_type": "code", "execution_count": null, - "id": "65fb35a5", + "id": "788e6dd1-0ae7-489d-9010-346eea3766fb", "metadata": {}, "outputs": [], "source": [ - "coll = irods_coll_path\n", - "print(coll.meta)" + "obj_meta.delete('Author')\n", + "print(obj_meta)" + ] + }, + { + "cell_type": "markdown", + "id": "d69a6999-a40a-4dbd-861e-c38649c365d1", + "metadata": {}, + "source": [ + "If you want to clear the whole metadata, use:" ] }, { "cell_type": "code", "execution_count": null, - "id": "2af31459", + "id": "34e6c170-acf1-4aee-a869-d8521b4613d0", "metadata": {}, "outputs": [], "source": [ - "coll.meta.add('TypeOfCollection', 'Results')\n", - "print(coll.meta)" + "obj_meta.clear()\n", + "print(obj_meta)" ] }, { @@ -334,7 +448,7 @@ "id": "cb48bb74", "metadata": {}, "source": [ - "iRODS metadata can help you keeping an overview while you are working with data and maybe many files which have relations to each other. There are ontologies which define keywords and links between keywords like the **[prov-o Ontology](https://www.w3.org/TR/prov-o/#prov-o-at-a-glance)**.\n", + "iRODS metadata can help you keeping an overview while you are working with data and many files which have relations to each other. There are ontologies which define keywords and links between keywords like the **[prov-o Ontology](https://www.w3.org/TR/prov-o/#prov-o-at-a-glance)**.\n", "\n", "Let's see how we can annotate our test data, so that we know that it is test data." ] @@ -347,11 +461,12 @@ "outputs": [], "source": [ "from datetime import datetime\n", - "coll.meta.add('prov:wasGeneratedBy', 'Christine')\n", - "coll.meta.add('CollectionType', 'testcollection')\n", - "obj.meta.add('prov:SoftwareAgent', 'iRODS jupyter Tutorial')\n", - "obj.meta.add('prov:wasGeneratedBy', 'Maarten')\n", - "obj.meta.add('DataType', 'testdata')" + "coll_meta = irods_coll_path.meta\n", + "coll_meta.add('prov:wasGeneratedBy', 'Christine')\n", + "coll_meta.add('CollectionType', 'testcollection')\n", + "obj_meta.add('prov:SoftwareAgent', 'iRODS jupyter Tutorial')\n", + "obj_meta.add('prov:wasGeneratedBy', 'Maarten')\n", + "obj_meta.add('DataType', 'testdata')" ] }, { @@ -369,9 +484,9 @@ "metadata": {}, "outputs": [], "source": [ - "print(coll.meta)\n", + "print(coll_meta)\n", "print()\n", - "print(obj.meta)" + "print(obj_meta)" ] }, { @@ -409,8 +524,8 @@ "id": "ea7129f8", "metadata": {}, "source": [ - "The output is a list of IrodsPath's indicating the locations of the data objects and collections.\n", - "If no `path` is provided, *ibridges* will automatically fall back on your `home`." + "The output is a list of `CachedIrodsPaths` indicating the locations of the data objects and collections.\n", + "If the parameter `path` is not provided, *ibridges* will automatically fall back on your `home`." ] }, { @@ -610,7 +725,7 @@ "id": "05958fd7", "metadata": {}, "source": [ - "#### 3. Find all collections and data called `demo` that on the 5th layer of the collection tree" + "#### 3. Find all collections and data called `demo` on the 5th layer of the collection tree" ] }, { @@ -667,7 +782,15 @@ "id": "946e355c", "metadata": {}, "source": [ - "Now that we have the search results we can use the `IrodsPath` to download them or to fetch more information:" + "Now that we have the search results we can use the `CachedIrodsPath` to download them or to fetch more information." + ] + }, + { + "cell_type": "markdown", + "id": "629673a8-1b03-47f0-8f69-d62bc54d98e8", + "metadata": {}, + "source": [ + "**Note, the `CachedIrodsPath` contains information, e.g. checksum and size at the time of the search.**" ] }, { @@ -677,11 +800,32 @@ "metadata": {}, "outputs": [], "source": [ + "print(type(result[0]))\n", "print(result[0].size)\n", + "print(result[0].checksum)\n", "print(result[0].collection_exists())\n", "print(result[0].dataobject_exists())" ] }, + { + "cell_type": "markdown", + "id": "7b6d7062-4e24-4fe8-bb7a-9b22ce0280ea", + "metadata": {}, + "source": [ + "In case you need to be sure about the current size or checksum, you will have to cast the path again to an `IrodsPath`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "134d5ad1-52b8-4a35-871c-6ad77e641ed1", + "metadata": {}, + "outputs": [], + "source": [ + "ipath = IrodsPath(session, result[0])\n", + "type(ipath)" + ] + }, { "cell_type": "markdown", "id": "4072812e", @@ -770,7 +914,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.18" + "version": "3.12.4" } }, "nbformat": 4,