From 47d1a697054cdb37f3d794f120ac795096b5a6ab Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Tue, 27 Apr 2021 16:27:17 +0200 Subject: [PATCH 01/26] First stab at HDF mixin --- pyiron_base/generic/has_hdf.py | 43 ++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 pyiron_base/generic/has_hdf.py diff --git a/pyiron_base/generic/has_hdf.py b/pyiron_base/generic/has_hdf.py new file mode 100644 index 000000000..9284fa4de --- /dev/null +++ b/pyiron_base/generic/has_hdf.py @@ -0,0 +1,43 @@ + +from abc import ABC, abstractmethod + +class WithHDF: + + def __init__(self, hdf, group_name=None): + self._hdf = hdf + self._group_name = group_name + + def __enter__(self): + if self._group_name is not None: + self._hdf = self._hdf.open(self._group_name) + + return self._hdf + + def __exit__(self, *args): + if self._group_name is not None: + self._hdf.close() + +class HasHDF(ABC): + + @abstractmethod + def _from_hdf(hdf): + pass + + @abstractmethod + def _to_hdf(hdf): + pass + + def from_hdf(self, hdf, group_name=None): + with WithHDF(hdf, group_name) as hdf: + self._from_hdf(hdf) + + def to_hdf(self, hdf, group_name=None): + with WithHDF(hdf, group_name) as hdf: + self._to_hdf(hdf) + + def rewrite_hdf(self, hdf, group_name=None): + with WithHDF(hdf, group_name) as hdf: + obj = self.__class__() + obj.from_hdf(hdf) + hdf.remove(...) + obj.to_hdf(hdf) From ecf6ff344c4bcda309d9743507b46e453b5e88b8 Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Mon, 3 May 2021 14:37:29 +0200 Subject: [PATCH 02/26] Turn WithHDF into a slots object --- pyiron_base/generic/has_hdf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pyiron_base/generic/has_hdf.py b/pyiron_base/generic/has_hdf.py index 9284fa4de..0340f8738 100644 --- a/pyiron_base/generic/has_hdf.py +++ b/pyiron_base/generic/has_hdf.py @@ -2,6 +2,7 @@ from abc import ABC, abstractmethod class WithHDF: + __slots__ = ("_hdf", "_group_name") def __init__(self, hdf, group_name=None): self._hdf = hdf From 51bb0501cb172f2f419509c11e2535416348469a Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Mon, 3 May 2021 14:37:37 +0200 Subject: [PATCH 03/26] Add _type_to_hdf to interface --- pyiron_base/generic/has_hdf.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pyiron_base/generic/has_hdf.py b/pyiron_base/generic/has_hdf.py index 0340f8738..5260605d6 100644 --- a/pyiron_base/generic/has_hdf.py +++ b/pyiron_base/generic/has_hdf.py @@ -24,6 +24,10 @@ class HasHDF(ABC): def _from_hdf(hdf): pass + @abstractmethod + def _type_to_hdf(hdf): + pass + @abstractmethod def _to_hdf(hdf): pass @@ -34,6 +38,7 @@ def from_hdf(self, hdf, group_name=None): def to_hdf(self, hdf, group_name=None): with WithHDF(hdf, group_name) as hdf: + self._type_to_hdf(hdf) self._to_hdf(hdf) def rewrite_hdf(self, hdf, group_name=None): From 1e31f578b371b360005ba287d1a75a3d7d9959eb Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Sun, 6 Jun 2021 10:32:07 +0200 Subject: [PATCH 04/26] Implement _type_to_hdf --- pyiron_base/generic/has_hdf.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/pyiron_base/generic/has_hdf.py b/pyiron_base/generic/has_hdf.py index 5260605d6..35edf5438 100644 --- a/pyiron_base/generic/has_hdf.py +++ b/pyiron_base/generic/has_hdf.py @@ -20,18 +20,23 @@ def __exit__(self, *args): class HasHDF(ABC): - @abstractmethod - def _from_hdf(hdf): - pass + __version__ = "0.1.0" + __hdf_version__ = "0.1.0" @abstractmethod - def _type_to_hdf(hdf): + def _from_hdf(hdf): pass @abstractmethod def _to_hdf(hdf): pass + def _type_to_hdf(hdf): + hdf["NAME"] = self.__class__.__name__ + hdf["TYPE"] = str(type(self)) + hdf["VERSION"] = self.__version__ + hdf["HDF_VERSION"] = self.__hdf_version__ + def from_hdf(self, hdf, group_name=None): with WithHDF(hdf, group_name) as hdf: self._from_hdf(hdf) From abfd073c4020030cc4ffc7d6a70eca24465d2858 Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Sun, 6 Jun 2021 10:34:45 +0200 Subject: [PATCH 05/26] Pass HDF version to _from_hdf --- pyiron_base/generic/has_hdf.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pyiron_base/generic/has_hdf.py b/pyiron_base/generic/has_hdf.py index 35edf5438..4f64193a6 100644 --- a/pyiron_base/generic/has_hdf.py +++ b/pyiron_base/generic/has_hdf.py @@ -24,7 +24,7 @@ class HasHDF(ABC): __hdf_version__ = "0.1.0" @abstractmethod - def _from_hdf(hdf): + def _from_hdf(hdf, version=None): pass @abstractmethod @@ -39,7 +39,8 @@ def _type_to_hdf(hdf): def from_hdf(self, hdf, group_name=None): with WithHDF(hdf, group_name) as hdf: - self._from_hdf(hdf) + version = hdf.get("HDF_VERSION", "0.1.0") + self._from_hdf(hdf, version=version) def to_hdf(self, hdf, group_name=None): with WithHDF(hdf, group_name) as hdf: From a8b42a31c76009c4e560a52332babb1ea06ee14c Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Sun, 6 Jun 2021 10:43:12 +0200 Subject: [PATCH 06/26] Move HasHDF to pyiron_base.interfaces Adds a method to allow sub classes to specify an HDF group name that they want to be written into. Will be used if to_hdf() is not given a group_name. Pass self in abstractmethods --- pyiron_base/{generic => interfaces}/has_hdf.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) rename pyiron_base/{generic => interfaces}/has_hdf.py (85%) diff --git a/pyiron_base/generic/has_hdf.py b/pyiron_base/interfaces/has_hdf.py similarity index 85% rename from pyiron_base/generic/has_hdf.py rename to pyiron_base/interfaces/has_hdf.py index 4f64193a6..cb72b324d 100644 --- a/pyiron_base/generic/has_hdf.py +++ b/pyiron_base/interfaces/has_hdf.py @@ -24,20 +24,25 @@ class HasHDF(ABC): __hdf_version__ = "0.1.0" @abstractmethod - def _from_hdf(hdf, version=None): + def _from_hdf(self, hdf, version=None): pass @abstractmethod - def _to_hdf(hdf): + def _to_hdf(self, hdf): pass - def _type_to_hdf(hdf): + @abstractmethod + def _get_group_name(self): + pass + + def _type_to_hdf(self, hdf): hdf["NAME"] = self.__class__.__name__ hdf["TYPE"] = str(type(self)) hdf["VERSION"] = self.__version__ hdf["HDF_VERSION"] = self.__hdf_version__ def from_hdf(self, hdf, group_name=None): + group_name = group_name or self._get_group_name() with WithHDF(hdf, group_name) as hdf: version = hdf.get("HDF_VERSION", "0.1.0") self._from_hdf(hdf, version=version) From 3aedebe1415f8f4096c72f71c96737453c92a77d Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Sun, 6 Jun 2021 11:03:12 +0200 Subject: [PATCH 07/26] Rename get_group_name method --- pyiron_base/interfaces/has_hdf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyiron_base/interfaces/has_hdf.py b/pyiron_base/interfaces/has_hdf.py index cb72b324d..8577936c6 100644 --- a/pyiron_base/interfaces/has_hdf.py +++ b/pyiron_base/interfaces/has_hdf.py @@ -32,7 +32,7 @@ def _to_hdf(self, hdf): pass @abstractmethod - def _get_group_name(self): + def _get_hdf_group_name(self): pass def _type_to_hdf(self, hdf): From 61cce4fb501e8a9b08174e63b51a64e5ee90b5f0 Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Sun, 6 Jun 2021 11:03:29 +0200 Subject: [PATCH 08/26] Check if HDF group is empty before writing --- pyiron_base/interfaces/has_hdf.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyiron_base/interfaces/has_hdf.py b/pyiron_base/interfaces/has_hdf.py index 8577936c6..fba6d233f 100644 --- a/pyiron_base/interfaces/has_hdf.py +++ b/pyiron_base/interfaces/has_hdf.py @@ -49,6 +49,8 @@ def from_hdf(self, hdf, group_name=None): def to_hdf(self, hdf, group_name=None): with WithHDF(hdf, group_name) as hdf: + if len(hdf.list_dirs()) > 0 and group_name is None: + raise ValueError("HDF group must be empty when group_name is not set.") self._type_to_hdf(hdf) self._to_hdf(hdf) From 14e13198f280c47a1dc271eeb92680be14406b66 Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Sun, 6 Jun 2021 11:04:59 +0200 Subject: [PATCH 09/26] Use _get_hdf_group_name in to_hdf --- pyiron_base/interfaces/has_hdf.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyiron_base/interfaces/has_hdf.py b/pyiron_base/interfaces/has_hdf.py index fba6d233f..18b1b3278 100644 --- a/pyiron_base/interfaces/has_hdf.py +++ b/pyiron_base/interfaces/has_hdf.py @@ -42,12 +42,13 @@ def _type_to_hdf(self, hdf): hdf["HDF_VERSION"] = self.__hdf_version__ def from_hdf(self, hdf, group_name=None): - group_name = group_name or self._get_group_name() + group_name = group_name or self._get_hdf_group_name() with WithHDF(hdf, group_name) as hdf: version = hdf.get("HDF_VERSION", "0.1.0") self._from_hdf(hdf, version=version) def to_hdf(self, hdf, group_name=None): + group_name = group_name or self._get_hdf_group_name() with WithHDF(hdf, group_name) as hdf: if len(hdf.list_dirs()) > 0 and group_name is None: raise ValueError("HDF group must be empty when group_name is not set.") From 94bc4d5d78e4fc7b7ec8a8b166cb9028829fe6f0 Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Sun, 6 Jun 2021 11:06:09 +0200 Subject: [PATCH 10/26] Add HasHDF to DataContainer --- pyiron_base/generic/datacontainer.py | 63 +++------------------------- 1 file changed, 6 insertions(+), 57 deletions(-) diff --git a/pyiron_base/generic/datacontainer.py b/pyiron_base/generic/datacontainer.py index 6d967c96c..421b89a2b 100644 --- a/pyiron_base/generic/datacontainer.py +++ b/pyiron_base/generic/datacontainer.py @@ -15,7 +15,7 @@ from pyiron_base.generic.fileio import read, write from pyiron_base.generic.hdfstub import HDFStub -from pyiron_base.interfaces.has_groups import HasGroups +from pyiron_base.interfaces.has_groups import HasGroups, HasHDF __author__ = "Marvin Poul" __copyright__ = ( @@ -50,7 +50,7 @@ def _normalize(key): return key -class DataContainer(MutableMapping, HasGroups): +class DataContainer(MutableMapping, HasGroups, HasHDF): """ Mutable sequence with optional keys. @@ -677,30 +677,10 @@ def copy(self): """ return copy.deepcopy(self) - def to_hdf(self, hdf, group_name=None): - """ - Store the DataContainer in an HDF5 file. If ``group_name`` or - *self.table_name* are not `None`, create a sub group in hdf prior to - writing if not save directly to hdf. group_name overrides - self.table_name if both are not None. - - Args: - hdf (ProjectHDFio): HDF5 group object - group_name (str, optional): HDF5 subgroup name, overrides - self.table_name - """ - - group_name = group_name or self.table_name - if group_name: - hdf = hdf.create_group(group_name) - elif len(hdf.list_dirs()) > 0: - raise ValueError( - "HDF group must be empty when neither table_name nor " - "group_name are set." - ) - + def _get_hdf_group_name(self): + return self.table_name - self._type_to_hdf(hdf) + def _to_hdf(self, hdf): hdf["READ_ONLY"] = self.read_only for i, (k, v) in enumerate(self.items()): if isinstance(k, str) and "__index_" in k: @@ -720,38 +700,7 @@ def to_hdf(self, hdf, group_name=None): raise TypeError("Error saving {} (key {}): DataContainer doesn't support saving elements " "of type \"{}\" to HDF!".format(v, k, type(v))) from None - def _type_to_hdf(self, hdf): - """ - Internal helper function to save type and version in hdf root - - Args: - hdf (ProjectHDFio): HDF5 group object - """ - hdf["NAME"] = self.__class__.__name__ - hdf["TYPE"] = str(type(self)) - hdf["VERSION"] = self.__version__ - hdf["HDF_VERSION"] = self.__hdf_version__ - hdf["OBJECT"] = "DataContainer" - - def from_hdf(self, hdf, group_name=None): - """ - Restore the DataContainer from an HDF5 file. If group_name or - self.table_name are not None, open a sub group in hdf prior to reading - if not read directly from hdf. group_name overrides self.table_name if - both are not None. - - Args: - hdf (ProjectHDFio): HDF5 group object - group_name (str, optional): HDF5 subgroup name, overrides - self.table_name - """ - - group_name = group_name or self.table_name - if group_name: - hdf = hdf.open(group_name) - - version = hdf.get("HDF_VERSION", "0.1.0") - + def _from_hdf(self, hdf, version=None): self.clear() if version == "0.1.0": From 52b5bbbba76d6b35670c64601eee64e42a4aa62f Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Mon, 7 Jun 2021 01:16:58 +0200 Subject: [PATCH 11/26] Keep old OBJECT field in type information It's unused in all our code, but removing should involve discussion --- pyiron_base/interfaces/has_hdf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pyiron_base/interfaces/has_hdf.py b/pyiron_base/interfaces/has_hdf.py index 18b1b3278..185e4f664 100644 --- a/pyiron_base/interfaces/has_hdf.py +++ b/pyiron_base/interfaces/has_hdf.py @@ -38,6 +38,7 @@ def _get_hdf_group_name(self): def _type_to_hdf(self, hdf): hdf["NAME"] = self.__class__.__name__ hdf["TYPE"] = str(type(self)) + hdf["OBJECT"] = hdf["NAME"] # unused alias hdf["VERSION"] = self.__version__ hdf["HDF_VERSION"] = self.__hdf_version__ From 9a143596758d53a3676f76e809d2d82ed439aaf8 Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Thu, 26 Aug 2021 15:33:09 +0200 Subject: [PATCH 12/26] Save __version__ only if defined --- pyiron_base/interfaces/has_hdf.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyiron_base/interfaces/has_hdf.py b/pyiron_base/interfaces/has_hdf.py index 185e4f664..f97a148e9 100644 --- a/pyiron_base/interfaces/has_hdf.py +++ b/pyiron_base/interfaces/has_hdf.py @@ -39,7 +39,8 @@ def _type_to_hdf(self, hdf): hdf["NAME"] = self.__class__.__name__ hdf["TYPE"] = str(type(self)) hdf["OBJECT"] = hdf["NAME"] # unused alias - hdf["VERSION"] = self.__version__ + if hasattr(self, "__version__"): + hdf["VERSION"] = self.__version__ hdf["HDF_VERSION"] = self.__hdf_version__ def from_hdf(self, hdf, group_name=None): From 0f7cc6abe587c333015a67dc03824be5be9b6acf Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Thu, 26 Aug 2021 15:35:44 +0200 Subject: [PATCH 13/26] Implement rewrite_hdf --- pyiron_base/interfaces/has_hdf.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pyiron_base/interfaces/has_hdf.py b/pyiron_base/interfaces/has_hdf.py index f97a148e9..0af356224 100644 --- a/pyiron_base/interfaces/has_hdf.py +++ b/pyiron_base/interfaces/has_hdf.py @@ -59,7 +59,6 @@ def to_hdf(self, hdf, group_name=None): def rewrite_hdf(self, hdf, group_name=None): with WithHDF(hdf, group_name) as hdf: - obj = self.__class__() - obj.from_hdf(hdf) - hdf.remove(...) + obj = hdf.to_object() + hdf.remove_group() obj.to_hdf(hdf) From 0b8c9c47abe06dbeb0792c27191b056a68eaff1d Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Thu, 26 Aug 2021 15:36:23 +0200 Subject: [PATCH 14/26] Write type info after calling _to_hdf This is so the objects implementation cannot muck it up accidentally. --- pyiron_base/interfaces/has_hdf.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyiron_base/interfaces/has_hdf.py b/pyiron_base/interfaces/has_hdf.py index 0af356224..2dc41c73c 100644 --- a/pyiron_base/interfaces/has_hdf.py +++ b/pyiron_base/interfaces/has_hdf.py @@ -1,4 +1,3 @@ - from abc import ABC, abstractmethod class WithHDF: @@ -54,8 +53,8 @@ def to_hdf(self, hdf, group_name=None): with WithHDF(hdf, group_name) as hdf: if len(hdf.list_dirs()) > 0 and group_name is None: raise ValueError("HDF group must be empty when group_name is not set.") - self._type_to_hdf(hdf) self._to_hdf(hdf) + self._type_to_hdf(hdf) def rewrite_hdf(self, hdf, group_name=None): with WithHDF(hdf, group_name) as hdf: From 25757bc047cf0fb457c75e71517d8250134df60f Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Thu, 26 Aug 2021 15:52:40 +0200 Subject: [PATCH 15/26] Fix import --- pyiron_base/generic/datacontainer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyiron_base/generic/datacontainer.py b/pyiron_base/generic/datacontainer.py index 421b89a2b..fa18e21c4 100644 --- a/pyiron_base/generic/datacontainer.py +++ b/pyiron_base/generic/datacontainer.py @@ -15,7 +15,8 @@ from pyiron_base.generic.fileio import read, write from pyiron_base.generic.hdfstub import HDFStub -from pyiron_base.interfaces.has_groups import HasGroups, HasHDF +from pyiron_base.interfaces.has_groups import HasGroups +from pyiron_base.interfaces.has_hdf import HasHDF __author__ = "Marvin Poul" __copyright__ = ( From 6e52c70b74ca0b62b7d115fbd4f6887f98f31d47 Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Mon, 30 Aug 2021 14:12:07 +0200 Subject: [PATCH 16/26] Make WithHDF private --- pyiron_base/interfaces/has_hdf.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pyiron_base/interfaces/has_hdf.py b/pyiron_base/interfaces/has_hdf.py index 2dc41c73c..00a11bd2a 100644 --- a/pyiron_base/interfaces/has_hdf.py +++ b/pyiron_base/interfaces/has_hdf.py @@ -1,6 +1,6 @@ from abc import ABC, abstractmethod -class WithHDF: +class _WithHDF: __slots__ = ("_hdf", "_group_name") def __init__(self, hdf, group_name=None): @@ -44,20 +44,20 @@ def _type_to_hdf(self, hdf): def from_hdf(self, hdf, group_name=None): group_name = group_name or self._get_hdf_group_name() - with WithHDF(hdf, group_name) as hdf: + with _WithHDF(hdf, group_name) as hdf: version = hdf.get("HDF_VERSION", "0.1.0") self._from_hdf(hdf, version=version) def to_hdf(self, hdf, group_name=None): group_name = group_name or self._get_hdf_group_name() - with WithHDF(hdf, group_name) as hdf: + with _WithHDF(hdf, group_name) as hdf: if len(hdf.list_dirs()) > 0 and group_name is None: raise ValueError("HDF group must be empty when group_name is not set.") self._to_hdf(hdf) self._type_to_hdf(hdf) def rewrite_hdf(self, hdf, group_name=None): - with WithHDF(hdf, group_name) as hdf: + with _WithHDF(hdf, group_name) as hdf: obj = hdf.to_object() hdf.remove_group() obj.to_hdf(hdf) From 74072d6bad4e8d49d9a3e98dbb7141eed0aa4388 Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Tue, 31 Aug 2021 11:32:05 +0200 Subject: [PATCH 17/26] Add from_hdf_args to Mixin --- pyiron_base/interfaces/has_hdf.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pyiron_base/interfaces/has_hdf.py b/pyiron_base/interfaces/has_hdf.py index 00a11bd2a..2e53519ca 100644 --- a/pyiron_base/interfaces/has_hdf.py +++ b/pyiron_base/interfaces/has_hdf.py @@ -34,6 +34,19 @@ def _to_hdf(self, hdf): def _get_hdf_group_name(self): pass + @classmethod + def from_hdf_args(cls, hdf): + """ + Read arguments for instance creation from HDF5 file. + + Args: + hdf (ProjectHDFio): HDF5 group object + + Returns: + dict: arguments that can be **kwarg-passed to cls(). + """ + return {} + def _type_to_hdf(self, hdf): hdf["NAME"] = self.__class__.__name__ hdf["TYPE"] = str(type(self)) From 0da4c88ab5361a336804f49c36dae171bfcb040b Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Tue, 31 Aug 2021 11:34:04 +0200 Subject: [PATCH 18/26] Add copyright blurb --- pyiron_base/interfaces/has_hdf.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pyiron_base/interfaces/has_hdf.py b/pyiron_base/interfaces/has_hdf.py index 2e53519ca..b55e4c8d9 100644 --- a/pyiron_base/interfaces/has_hdf.py +++ b/pyiron_base/interfaces/has_hdf.py @@ -1,5 +1,21 @@ +# coding: utf-8 +# Copyright (c) Max-Planck-Institut für Eisenforschung GmbH - Computational Materials Design (CM) Department +# Distributed under the terms of "New BSD License", see the LICENSE file. +"""Interface for classes to serialize to HDF5.""" + from abc import ABC, abstractmethod +__author__ = "Marvin Poul" +__copyright__ = ( + "Copyright 2021, Max-Planck-Institut für Eisenforschung GmbH - " + "Computational Materials Design (CM) Department" +) +__version__ = "1.0" +__maintainer__ = "Marvin Poul" +__email__ = "poul@mpie.de" +__status__ = "production" +__date__ = "Sep 1, 2021" + class _WithHDF: __slots__ = ("_hdf", "_group_name") From b95fb39f12ace82f2959573c4f55dd29227ae506 Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Wed, 15 Sep 2021 10:04:07 +0200 Subject: [PATCH 19/26] Don't define version by default Some objects only have an HDF version unlike jobs that also have a code version. --- pyiron_base/interfaces/has_hdf.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pyiron_base/interfaces/has_hdf.py b/pyiron_base/interfaces/has_hdf.py index b55e4c8d9..97635119b 100644 --- a/pyiron_base/interfaces/has_hdf.py +++ b/pyiron_base/interfaces/has_hdf.py @@ -35,7 +35,6 @@ def __exit__(self, *args): class HasHDF(ABC): - __version__ = "0.1.0" __hdf_version__ = "0.1.0" @abstractmethod From 0a8821115101832f4fbea779862033716b2af0b4 Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Wed, 15 Sep 2021 10:04:50 +0200 Subject: [PATCH 20/26] Explicitly check for None --- pyiron_base/interfaces/has_hdf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyiron_base/interfaces/has_hdf.py b/pyiron_base/interfaces/has_hdf.py index 97635119b..7b920a33f 100644 --- a/pyiron_base/interfaces/has_hdf.py +++ b/pyiron_base/interfaces/has_hdf.py @@ -71,13 +71,13 @@ def _type_to_hdf(self, hdf): hdf["HDF_VERSION"] = self.__hdf_version__ def from_hdf(self, hdf, group_name=None): - group_name = group_name or self._get_hdf_group_name() + group_name = group_name if group_name is not None else self._get_hdf_group_name() with _WithHDF(hdf, group_name) as hdf: version = hdf.get("HDF_VERSION", "0.1.0") self._from_hdf(hdf, version=version) def to_hdf(self, hdf, group_name=None): - group_name = group_name or self._get_hdf_group_name() + group_name = group_name if group_name is not None else self._get_hdf_group_name() with _WithHDF(hdf, group_name) as hdf: if len(hdf.list_dirs()) > 0 and group_name is None: raise ValueError("HDF group must be empty when group_name is not set.") From af26ef905ee20cfb09d8b45518cd5852fc988d98 Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Wed, 15 Sep 2021 10:05:13 +0200 Subject: [PATCH 21/26] Copy hdf object in HDFStubs Not copying caused an error in lazy DataContainers, because with the new HasHDF inheritance it would: 1. hdf.open(...) and pass that to DataContainer._from_hdf 2. this method would then pass references to the HDF object to the HDFStub 3. HasHDF.from_hdf then calls hdf.close(), this modifies the hdf object passed to DataContainer in place, changing where it points to in the file 4. At some point HDFStub.load() would be called, but since the place where hdf points to changed under its feet HDFStub.load() then reaches into the wrong location in the file --- pyiron_base/generic/hdfstub.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pyiron_base/generic/hdfstub.py b/pyiron_base/generic/hdfstub.py index 8124f4044..f875cb052 100644 --- a/pyiron_base/generic/hdfstub.py +++ b/pyiron_base/generic/hdfstub.py @@ -60,11 +60,14 @@ def __init__(self, hdf, group_name): """ Create new stub. + The given hdf object is copied, so that calls to its :meth:`ProjectHDFio.open` and :meth:`.ProjectHDFio.close` + between this initialization and later calls to :meth:.load` do not change the location this stub is pointing at. + Args: hdf (:class:`.ProjectHDFio`): hdf object to load from group_name (str): node or group name to load from the hdf object """ - self._hdf = hdf + self._hdf = hdf.copy() self._group_name = group_name @classmethod From 82a18a06d722c5261b64b929b75f3458c4896304 Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Thu, 16 Sep 2021 10:25:12 +0200 Subject: [PATCH 22/26] Add type annotations --- pyiron_base/interfaces/has_hdf.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/pyiron_base/interfaces/has_hdf.py b/pyiron_base/interfaces/has_hdf.py index 7b920a33f..dc26cf2c3 100644 --- a/pyiron_base/interfaces/has_hdf.py +++ b/pyiron_base/interfaces/has_hdf.py @@ -3,6 +3,8 @@ # Distributed under the terms of "New BSD License", see the LICENSE file. """Interface for classes to serialize to HDF5.""" +from pyiron_base.generic.hdfio import ProjectHDFio + from abc import ABC, abstractmethod __author__ = "Marvin Poul" @@ -34,23 +36,26 @@ def __exit__(self, *args): self._hdf.close() class HasHDF(ABC): + """ + Mixin class for objects that can write themselves to HDF. + """ __hdf_version__ = "0.1.0" @abstractmethod - def _from_hdf(self, hdf, version=None): + def _from_hdf(self, hdf: ProjectHDFio, version: str=None): pass @abstractmethod - def _to_hdf(self, hdf): + def _to_hdf(self, hdf: ProjectHDFio): pass @abstractmethod - def _get_hdf_group_name(self): + def _get_hdf_group_name(self) -> str: pass @classmethod - def from_hdf_args(cls, hdf): + def from_hdf_args(cls, hdf: ProjectHDFio): """ Read arguments for instance creation from HDF5 file. @@ -62,7 +67,7 @@ def from_hdf_args(cls, hdf): """ return {} - def _type_to_hdf(self, hdf): + def _type_to_hdf(self, hdf: ProjectHDFio): hdf["NAME"] = self.__class__.__name__ hdf["TYPE"] = str(type(self)) hdf["OBJECT"] = hdf["NAME"] # unused alias @@ -70,13 +75,13 @@ def _type_to_hdf(self, hdf): hdf["VERSION"] = self.__version__ hdf["HDF_VERSION"] = self.__hdf_version__ - def from_hdf(self, hdf, group_name=None): + def from_hdf(self, hdf: ProjectHDFio, group_name: str=None): group_name = group_name if group_name is not None else self._get_hdf_group_name() with _WithHDF(hdf, group_name) as hdf: version = hdf.get("HDF_VERSION", "0.1.0") self._from_hdf(hdf, version=version) - def to_hdf(self, hdf, group_name=None): + def to_hdf(self, hdf: ProjectHDFio, group_name: str=None): group_name = group_name if group_name is not None else self._get_hdf_group_name() with _WithHDF(hdf, group_name) as hdf: if len(hdf.list_dirs()) > 0 and group_name is None: @@ -84,7 +89,7 @@ def to_hdf(self, hdf, group_name=None): self._to_hdf(hdf) self._type_to_hdf(hdf) - def rewrite_hdf(self, hdf, group_name=None): + def rewrite_hdf(self, hdf: ProjectHDFio, group_name: str=None): with _WithHDF(hdf, group_name) as hdf: obj = hdf.to_object() hdf.remove_group() From 7ed52a67adb449ed67ddfe5dffb8d83ad2af68ba Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Thu, 16 Sep 2021 11:16:38 +0200 Subject: [PATCH 23/26] Add docstring and usage example --- pyiron_base/interfaces/has_hdf.py | 66 +++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/pyiron_base/interfaces/has_hdf.py b/pyiron_base/interfaces/has_hdf.py index dc26cf2c3..2dd417c31 100644 --- a/pyiron_base/interfaces/has_hdf.py +++ b/pyiron_base/interfaces/has_hdf.py @@ -38,6 +38,72 @@ def __exit__(self, *args): class HasHDF(ABC): """ Mixin class for objects that can write themselves to HDF. + + Subclasses must implement :meth:`._from_hdf`, :meth:`._to_hdf` and :meth:`_get_hdf_group_name`. They may implement + :meth:`.from_hdf_args`. + + :meth:`from_hdf` and :meth:`to_hdf` shall respect the given `group_name` in the following way. If either the + argument or the method :meth:`_get_hdf_group_name` returns not `None` they shall create a new subgroup in the given + HDF object and then call :meth:`_from_hdf` or :meth:`_to_hdf` with this subgroup and afterwards call + :meth:`ProjectHDFio.close` on it. If both are `None` it shall pass the given HDF object unchanged. + + Subclasses that need to read special arguments from HDF before an instance can be created, can overwrite + :meth:`.from_hdf_args` and return the arguments in a `dict` that can be **kwargs-passed to the `__init__` of the + subclass. When loading an object with :class:`ProjectHDFio.to_object` this method is called internally, used to + create an instance on which then :meth:`.from_hdf` is called. + + Here's a toy class that enables writting `list`s to HDF. + + >>> class HDFList(list, HasHDF): + ... def _from_hdf(self, hdf, version=None): + ... values = [] + ... for n in hdf.list_nodes(): + ... if not n.startswith("__index_"): continue + ... values.append((int(n.split("__index_")[1]), hdf[n])) + ... values = sorted(values, key=lambda e: e[0]) + ... self.clear() + ... self.extend(list(zip(*values))[1]) + ... def _to_hdf(self, hdf): + ... for i, v in enumerate(self): + ... hdf[f"__index_{i}"] = v + ... def _get_hdf_group_name(self): + ... return "list" + + We can use this simply like any other list, but also call the new HDF methods on it after we get an HDF object. + + >>> l = HDFList([1,2,3,4]) + >>> from pyiron_base import Project + >>> pr = Project('test_foo') + >>> hdf = pr.create_hdf(pr.path, 'list') + + Since we return "list" in :meth:`._get_hdf_group_name` by default our list gets written into a group of the same + name. + + >>> l.to_hdf(hdf) + >>> hdf + {'groups': ['list'], 'nodes': []} + >>> hdf['list'] + {'groups': [], 'nodes': ['HDF_VERSION', 'NAME', 'OBJECT', 'TYPE', '__index_0', '__index_1', '__index_2', '__index_3']} + + (Since this is a docstring, actually calling :meth:`ProjectHDFio.to_object()` wont' work, so we'll simulate it.) + + >>> lcopy = HDFList() + >>> lcopy.from_hdf(hdf) + >>> lcopy + [1, 2, 3, 4] + + We can also override the target group name by passing it + >>> l.to_hdf(hdf, "my_group") + >>> hdf + {'groups': ['list', 'my_group'], 'nodes': []} + + >>> hdf.remove_file() + >>> pr.remove(enable=True) + + .. document private methods + .. automethod _from_hdf + .. automethod _to_hdf + .. automethod _get_hdf_group_name """ __hdf_version__ = "0.1.0" From 27ff660dd592b9c849ef6621b7352d735392760b Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Thu, 16 Sep 2021 11:22:22 +0200 Subject: [PATCH 24/26] More docstrings --- pyiron_base/interfaces/has_hdf.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/pyiron_base/interfaces/has_hdf.py b/pyiron_base/interfaces/has_hdf.py index 2dd417c31..418b76afe 100644 --- a/pyiron_base/interfaces/has_hdf.py +++ b/pyiron_base/interfaces/has_hdf.py @@ -121,7 +121,7 @@ def _get_hdf_group_name(self) -> str: pass @classmethod - def from_hdf_args(cls, hdf: ProjectHDFio): + def from_hdf_args(cls, hdf: ProjectHDFio) -> dict: """ Read arguments for instance creation from HDF5 file. @@ -142,12 +142,30 @@ def _type_to_hdf(self, hdf: ProjectHDFio): hdf["HDF_VERSION"] = self.__hdf_version__ def from_hdf(self, hdf: ProjectHDFio, group_name: str=None): + """ + Read object to HDF. + + If group_name is given descend into subgroup in hdf first. + + Args: + hdf (:class:`.ProjectHDFio`): HDF group to read from + group_name (str, optional): name of subgroup + """ group_name = group_name if group_name is not None else self._get_hdf_group_name() with _WithHDF(hdf, group_name) as hdf: version = hdf.get("HDF_VERSION", "0.1.0") self._from_hdf(hdf, version=version) def to_hdf(self, hdf: ProjectHDFio, group_name: str=None): + """ + Write object to HDF. + + If group_name is given create a subgroup in hdf first. + + Args: + hdf (:class:`.ProjectHDFio`): HDF group to write to + group_name (str, optional): name of subgroup + """ group_name = group_name if group_name is not None else self._get_hdf_group_name() with _WithHDF(hdf, group_name) as hdf: if len(hdf.list_dirs()) > 0 and group_name is None: @@ -156,6 +174,15 @@ def to_hdf(self, hdf: ProjectHDFio, group_name: str=None): self._type_to_hdf(hdf) def rewrite_hdf(self, hdf: ProjectHDFio, group_name: str=None): + """ + Update the HDF representation. + + If an object is read from an older format, this will remove the old data and rewrite it in the newest format. + + Args: + hdf (:class:`.ProjectHDFio`): HDF group to read/write + group_name (str, optional): name of subgroup + """ with _WithHDF(hdf, group_name) as hdf: obj = hdf.to_object() hdf.remove_group() From bb3b746e13b2b7e027f0a6499aaad331140dde65 Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Thu, 16 Sep 2021 11:25:32 +0200 Subject: [PATCH 25/26] Talk about version --- pyiron_base/interfaces/has_hdf.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pyiron_base/interfaces/has_hdf.py b/pyiron_base/interfaces/has_hdf.py index 418b76afe..969fd36c2 100644 --- a/pyiron_base/interfaces/has_hdf.py +++ b/pyiron_base/interfaces/has_hdf.py @@ -52,6 +52,10 @@ class HasHDF(ABC): subclass. When loading an object with :class:`ProjectHDFio.to_object` this method is called internally, used to create an instance on which then :meth:`.from_hdf` is called. + Subclasses may specify an :attr:`__hdf_version__` to signal changes in the layout of the data in HDF. + :meth:`.from_hdf` will read this value and pass it verbatim to the subclasses :meth:`._from_hdf`. No semantics are + imposed on this value, but it is usually a three digit version number. + Here's a toy class that enables writting `list`s to HDF. >>> class HDFList(list, HasHDF): @@ -177,7 +181,7 @@ def rewrite_hdf(self, hdf: ProjectHDFio, group_name: str=None): """ Update the HDF representation. - If an object is read from an older format, this will remove the old data and rewrite it in the newest format. + If an object is read from an older layout, this will remove the old data and rewrite it in the newest layout. Args: hdf (:class:`.ProjectHDFio`): HDF group to read/write From 66464e8b059cf9d591c8c8353479e26d79295a9b Mon Sep 17 00:00:00 2001 From: Marvin Poul Date: Thu, 16 Sep 2021 11:44:21 +0200 Subject: [PATCH 26/26] Run doctests --- tests/interfaces/__init__.py | 0 tests/interfaces/test_hashdf.py | 8 ++++++++ 2 files changed, 8 insertions(+) create mode 100644 tests/interfaces/__init__.py create mode 100644 tests/interfaces/test_hashdf.py diff --git a/tests/interfaces/__init__.py b/tests/interfaces/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/interfaces/test_hashdf.py b/tests/interfaces/test_hashdf.py new file mode 100644 index 000000000..107584380 --- /dev/null +++ b/tests/interfaces/test_hashdf.py @@ -0,0 +1,8 @@ +import pyiron_base.interfaces.has_hdf +from pyiron_base._tests import PyironTestCase + +class TestHasHDF(PyironTestCase): + + @property + def docstring_module(self): + return pyiron_base.interfaces.has_hdf