From 2f0381e58328e4b2d684315fa5721a6de9945512 Mon Sep 17 00:00:00 2001 From: Yuxuan Zhuang Date: Fri, 21 Aug 2020 20:31:42 +0200 Subject: [PATCH] Serialization of AtomGroup and remove core.universe._ANCHOR_UNIVERSES (PR #2893) * AtomGroup now are pickled/unpickled without looking for its anchored Universe * removed core.universe._ANCHOR_UNIVERSES (originally introduced with #293) and related machinery to keep global state. No global state is kept any more. * update docs for atomgroup pickle * update CHANGELOG * update tests --- package/CHANGELOG | 5 +- package/MDAnalysis/__init__.py | 5 - package/MDAnalysis/core/groups.py | 72 ++++++++++-- package/MDAnalysis/core/universe.py | 74 ++---------- .../MDAnalysisTests/utils/test_persistence.py | 110 ++++++------------ 5 files changed, 106 insertions(+), 160 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index 81bee146f7d..1df27904a8e 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -47,11 +47,12 @@ Fixes empty AtomGroup (Issue #2848) * Fixed the DMSParser, allowing the creation of multiple segids sharing residues with identical resids (Issue #1387, PR #2872) - * H5MD files are now pickleable with H5PYPicklable (Issue #2890, PR #2894) + * H5MD files are now picklable with H5PYPicklable (Issue #2890, PR #2894) * Fixed Janin analysis residue filtering (include CYSH) (Issue #2898) * libmdaxdr and libdcd classes in their last frame can now be pickled (Issue #2878, PR #2911) - + * AtomGroup now are pickled/unpickled without looking for its anchored + Universe (PR #2893) Enhancements * Refactored analysis.helanal into analysis.helix_analysis diff --git a/package/MDAnalysis/__init__.py b/package/MDAnalysis/__init__.py index be589ac162a..49307f2df84 100644 --- a/package/MDAnalysis/__init__.py +++ b/package/MDAnalysis/__init__.py @@ -181,11 +181,6 @@ _TOPOLOGY_ATTRNAMES = {} # {lower case name w/o _ : name} -# Storing anchor universes for unpickling groups -import weakref -_ANCHOR_UNIVERSES = weakref.WeakValueDictionary() -del weakref - # custom exceptions and warnings from .exceptions import ( SelectionError, NoDataError, ApplicationError, SelectionWarning, diff --git a/package/MDAnalysis/core/groups.py b/package/MDAnalysis/core/groups.py index ddcf9b792ee..31f6da06f71 100644 --- a/package/MDAnalysis/core/groups.py +++ b/package/MDAnalysis/core/groups.py @@ -96,7 +96,7 @@ import os import warnings -from .. import (_ANCHOR_UNIVERSES, _CONVERTERS, +from .. import (_CONVERTERS, _TOPOLOGY_ATTRS, _TOPOLOGY_TRANSPLANTS, _TOPOLOGY_ATTRNAMES) from ..lib import util from ..lib.util import cached, warn_if_not_unique, unique_int_1d @@ -110,16 +110,7 @@ from ._get_readers import get_writer_for, get_converter_for -def _unpickle(uhash, ix): - try: - u = _ANCHOR_UNIVERSES[uhash] - except KeyError: - # doesn't provide as nice an error message as before as only hash of universe is stored - # maybe if we pickled the filename too we could do better... - errmsg = (f"Couldn't find a suitable Universe to unpickle AtomGroup " - f"onto with Universe hash '{uhash}'. Availble hashes: " - f"{', '.join([str(k) for k in _ANCHOR_UNIVERSES.keys()])}") - raise RuntimeError(errmsg) from None +def _unpickle(u, ix): return u.atoms[ix] @@ -2252,21 +2243,78 @@ class AtomGroup(GroupBase): :class:`AtomGroup` instances are always bound to a :class:`MDAnalysis.core.universe.Universe`. They cannot exist in isolation. + During serialization, :class:`AtomGroup` will be pickled with its bound + :class:`MDAnalysis.core.universe.Universe` which means after unpickling, + a new :class:`MDAnalysis.core.universe.Universe` will be created and + be attached by the new :class:`AtomGroup`. If the Universe is serialized + with its :class:`AtomGroup`, they will still be bound together afterwards: + + .. code-block:: python + + >>> u = mda.Universe(PSF, DCD) + >>> g = u.atoms + + >>> g_pickled = pickle.loads(pickle.dumps(g)) + >>> print("g_pickled.universe is u: ", u is g_pickled.universe) + g_pickled.universe is u: False + + >>> g_pickled, u_pickled = pickle.load(pickle.dumps(g, u)) + >>> print("g_pickled.universe is u_pickled: ", + >>> u_pickle is g_pickled.universe) + g_pickled.universe is u_pickled: True + + If multiple :class:`AtomGroup` are bound to the same + :class:`MDAnalysis.core.universe.Universe`, they will bound to the same one + after serialization: + + .. code-block:: python + + >>> u = mda.Universe(PSF, DCD) + >>> g = u.atoms + >>> h = u.atoms + + >>> g_pickled = pickle.loads(pickle.dumps(g)) + >>> h_pickled = pickle.loads(pickle.dumps(h)) + >>> print("g_pickled.universe is h_pickled.universe : ", + >>> g_pickled.universe is h_pickled.universe) + g_pickled.universe is h_pickled.universe: False + + >>> g_pickled, h_pickled = pickle.load(pickle.dumps(g, h)) + >>> print("g_pickled.universe is h_pickled.universe: ", + >>> g_pickle.universe is h_pickled.universe) + g_pickled.universe is h_pickled.universe: True + + The aforementioned two cases are useful for implementation of parallel + analysis base classes. First, you always get an independent + :class:`MDAnalysis.core.universe.Universe` + in the new process; you don't have to worry about detaching and reattaching + Universe with :class:`AtomGroup`. It also means the state of the + new pickled AtomGroup will not be changed with the old Universe, + So either the Universe has to pickled together with the AtomGroup + (e.g. as a tuple, or as attributes of the object to be pickled), or the + implicit new Universe (`AtomGroup.Universe`) needs to be used. + Second, When multiple AtomGroup need to be pickled, they will recognize if + they belong to the same Univese or not. + Also keep in mind that they need to be pickled together. See Also -------- :class:`MDAnalysis.core.universe.Universe` + .. deprecated:: 0.16.2 *Instant selectors* of :class:`AtomGroup` will be removed in the 1.0 release. .. versionchanged:: 1.0.0 Removed instant selectors, use select_atoms('name ...') to select atoms by name. + .. versionchanged:: 2.0.0 + :class:`AtomGroup` can always be pickled with or without its universe, + instead of failing when not finding its anchored universe. """ def __reduce__(self): - return (_unpickle, (self.universe.anchor_name, self.ix)) + return (_unpickle, (self.universe, self.ix)) def __getattr__(self, attr): # special-case timestep info diff --git a/package/MDAnalysis/core/universe.py b/package/MDAnalysis/core/universe.py index 390acb8e65e..cbf8041359a 100644 --- a/package/MDAnalysis/core/universe.py +++ b/package/MDAnalysis/core/universe.py @@ -83,7 +83,7 @@ os.fork = _os_dot_fork del _os_dot_fork -from .. import _ANCHOR_UNIVERSES, _TOPOLOGY_ATTRS, _PARSERS +from .. import _TOPOLOGY_ATTRS, _PARSERS from ..exceptions import NoDataError from ..lib import util from ..lib.log import ProgressBar @@ -273,17 +273,6 @@ class Universe(object): vdwradii: dict, ``None``, default ``None`` For use with *guess_bonds*. Supply a dict giving a vdwradii for each atom type which are used in guessing bonds. - is_anchor: bool, default ``True`` - When unpickling instances of - :class:`MDAnalysis.core.groups.AtomGroup` existing Universes are - searched for one where to anchor those atoms. Set to ``False`` to - prevent this Universe from being considered. - anchor_name: str, ``None``, default ``None`` - Setting to other than ``None`` will cause - :class:`MDAnalysis.core.groups.AtomGroup` instances pickled from the - Universe to only unpickle if a compatible Universe with matching - *anchor_name* is found. Even if *anchor_name* is set *is_anchor* will - still be honored when unpickling. transformations: function or list, ``None``, default ``None`` Provide a list of transformations that you wish to apply to the trajectory upon reading. Transformations can be found in @@ -318,14 +307,14 @@ class Universe(object): .. versionchanged:: 2.0.0 Universe now can be (un)pickled. - ``topology``, ``trajectory`` and ``anchor_name`` are reserved + ``topology`` and ``trajectory`` are reserved upon unpickle. """ # Py3 TODO # def __init__(self, topology=None, *coordinates, all_coordinates=False, # format=None, topology_format=None, transformations=None, -# guess_bonds=False, vdwradii=None, anchor_name=None, -# is_anchor=True, in_memory=False, in_memory_step=1, +# guess_bonds=False, vdwradii=None, +# in_memory=False, in_memory_step=1, # **kwargs): def __init__(self, *args, **kwargs): topology = args[0] if args else None @@ -336,15 +325,11 @@ def __init__(self, *args, **kwargs): transformations = kwargs.pop('transformations', None) guess_bonds = kwargs.pop('guess_bonds', False) vdwradii = kwargs.pop('vdwradii', None) - anchor_name = kwargs.pop('anchor_name', None) - is_anchor = kwargs.pop('is_anchor', True) in_memory = kwargs.pop('in_memory', False) in_memory_step = kwargs.pop('in_memory_step', 1) self._trajectory = None # managed attribute holding Reader self._cache = {} - self._anchor_name = anchor_name - self.is_anchor = is_anchor self.atoms = None self.residues = None self.segments = None @@ -354,8 +339,6 @@ def __init__(self, *args, **kwargs): 'transformations': transformations, 'guess_bonds': guess_bonds, 'vdwradii': vdwradii, - 'anchor_name': anchor_name, - 'is_anchor': is_anchor, 'in_memory': in_memory, 'in_memory_step': in_memory_step, 'format': format, @@ -696,45 +679,6 @@ def impropers(self): """Improper dihedral angles between atoms""" return self.atoms.impropers - @property - def anchor_name(self): - # hash used for anchoring. - # Try and use anchor_name, else use (and store) uuid - if self._anchor_name is not None: - return self._anchor_name - else: - try: - return self._anchor_uuid - except AttributeError: - # store this so we can later recall it if needed - self._anchor_uuid = str(uuid.uuid4()) - return self._anchor_uuid - - @anchor_name.setter - def anchor_name(self, name): - self.remove_anchor() # clear any old anchor - self._anchor_name = str(name) if not name is None else name - self.make_anchor() # add anchor again - - @property - def is_anchor(self): - """Is this Universe an anchoring for unpickling AtomGroups""" - return self.anchor_name in _ANCHOR_UNIVERSES - - @is_anchor.setter - def is_anchor(self, new): - if new: - self.make_anchor() - else: - self.remove_anchor() - - def remove_anchor(self): - """Remove this Universe from the possible anchor list for unpickling""" - _ANCHOR_UNIVERSES.pop(self.anchor_name, None) - - def make_anchor(self): - _ANCHOR_UNIVERSES[self.anchor_name] = self - def __repr__(self): # return "".format( # n_atoms=len(self.atoms), @@ -745,17 +689,13 @@ def __repr__(self): def __getstate__(self): # Universe's two "legs" of topology and traj both serialise themselves - # the only other state held in Universe is anchor name? - return self.anchor_name, self._topology, self._trajectory + return self._topology, self._trajectory def __setstate__(self, args): - self._anchor_name = args[0] - self.make_anchor() - - self._topology = args[1] + self._topology = args[0] _generate_from_topology(self) - self._trajectory = args[2] + self._trajectory = args[1] # Properties @property diff --git a/testsuite/MDAnalysisTests/utils/test_persistence.py b/testsuite/MDAnalysisTests/utils/test_persistence.py index e5a3679d2da..4fda0de2efe 100644 --- a/testsuite/MDAnalysisTests/utils/test_persistence.py +++ b/testsuite/MDAnalysisTests/utils/test_persistence.py @@ -44,11 +44,6 @@ class TestAtomGroupPickle(object): def universe(): return mda.Universe(PDB_small, PDB_small, PDB_small) - @staticmethod - @pytest.fixture() - def universe_n(): - return mda.Universe(PDB_small, PDB_small, PDB_small, anchor_name="test1") - @staticmethod @pytest.fixture() def ag(universe): @@ -56,8 +51,8 @@ def ag(universe): @staticmethod @pytest.fixture() - def ag_n(universe_n): - return universe_n.atoms[:10] + def ag_2(universe): + return universe.atoms[10:20] @staticmethod @pytest.fixture() @@ -66,79 +61,24 @@ def pickle_str(ag): @staticmethod @pytest.fixture() - def pickle_str_n(ag_n): - return pickle.dumps(ag_n, protocol=pickle.HIGHEST_PROTOCOL) + def pickle_str_two_ag(ag, ag_2): + return pickle.dumps((ag, ag_2), protocol=pickle.HIGHEST_PROTOCOL) + + @staticmethod + @pytest.fixture() + def pickle_str_ag_with_universe_f(ag, universe): + return pickle.dumps((universe, ag), protocol=pickle.HIGHEST_PROTOCOL) + + @staticmethod + @pytest.fixture() + def pickle_str_ag_with_universe(ag, universe): + return pickle.dumps((ag, universe), protocol=pickle.HIGHEST_PROTOCOL) def test_unpickle(self, pickle_str, ag, universe): """Test that an AtomGroup can be unpickled (Issue 293)""" newag = pickle.loads(pickle_str) # Can unpickle assert_equal(ag.indices, newag.indices) - assert newag.universe is universe, "Unpickled AtomGroup on wrong Universe." - - def test_unpickle_named(self, pickle_str_n, ag_n, universe_n): - """Test that an AtomGroup can be unpickled (Issue 293)""" - newag = pickle.loads(pickle_str_n) - # Can unpickle - assert_equal(ag_n.indices, newag.indices) - assert newag.universe is universe_n, "Unpickled AtomGroup on wrong Universe." - - def test_unpickle_missing(self): - universe = mda.Universe(PDB_small, PDB_small, PDB_small) - universe_n = mda.Universe(PDB_small, PDB_small, PDB_small, - anchor_name="test1") - ag = universe.atoms[:20] # prototypical AtomGroup - ag_n = universe_n.atoms[:10] - pickle_str_n = pickle.dumps(ag_n, protocol=pickle.HIGHEST_PROTOCOL) - # Kill AtomGroup and Universe - del ag_n - del universe_n - # and make sure they're very dead - gc.collect() - # we shouldn't be able to unpickle - # assert_raises(RuntimeError, pickle.loads, pickle_str_n) - with pytest.raises(RuntimeError): - pickle.loads(pickle_str_n) - - def test_unpickle_noanchor(self, universe, pickle_str): - # Shouldn't unpickle if the universe is removed from the anchors - universe.remove_anchor() - # In the complex (parallel) testing environment there's the risk of - # other compatible Universes being available for anchoring even after - # this one is expressly removed. - # assert_raises(RuntimeError, pickle.loads, pickle_str) - with pytest.raises(RuntimeError): - pickle.loads(pickle_str) - # If this fails to raise an exception either: - # 1-the anchoring Universe failed to remove_anchor or 2-another - # Universe with the same characteristics was created for testing and is - # being used as anchor." - - def test_unpickle_reanchor(self, universe, pickle_str, ag): - # universe is removed from the anchors - universe.remove_anchor() - # now it goes back into the anchor list again - universe.make_anchor() - newag = pickle.loads(pickle_str) - assert_equal(ag.indices, newag.indices) - assert newag.universe is universe, "Unpickled AtomGroup on wrong Universe." - - def test_unpickle_wrongname(self, universe_n, pickle_str_n): - # we change the universe's anchor_name - universe_n.anchor_name = "test2" - # shouldn't unpickle if no name matches, even if there's a compatible - # universe in the unnamed anchor list. - with pytest.raises(RuntimeError): - pickle.loads(pickle_str_n) - - def test_unpickle_rename(self, universe_n, universe, pickle_str_n, ag_n): - # we change universe_n's anchor_name - universe_n.anchor_name = "test2" - # and make universe a named anchor - universe.anchor_name = "test1" - newag = pickle.loads(pickle_str_n) - assert_equal(ag_n.indices, newag.indices) - assert newag.universe is universe, "Unpickled AtomGroup on wrong Universe." def test_pickle_unpickle_empty(self, universe): """Test that an empty AtomGroup can be pickled/unpickled (Issue 293)""" @@ -147,6 +87,28 @@ def test_pickle_unpickle_empty(self, universe): newag = pickle.loads(pickle_str) assert len(newag) == 0 + def test_unpickle_two_ag(self, pickle_str_two_ag): + newag, newag2 = pickle.loads(pickle_str_two_ag) + assert newag.universe is newag2.universe, ( + "Two AtomGroups are unpickled to two different Universes" + ) + + def test_unpickle_ag_with_universe_f(self, + pickle_str_ag_with_universe_f): + newu, newag = pickle.loads(pickle_str_ag_with_universe_f) + assert newag.universe is newu, ( + "AtomGroup is not unpickled to the bound Universe" + "when Universe is pickled first" + ) + + def test_unpickle_ag_with_universe(self, + pickle_str_ag_with_universe): + newag, newu = pickle.loads(pickle_str_ag_with_universe) + assert newag.universe is newu, ( + "AtomGroup is not unpickled to the bound Universe" + "when AtomGroup is pickled first" + ) + class TestPicklingUpdatingAtomGroups(object):