Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Serialization of AtomGroup #2893

Merged
merged 13 commits into from
Aug 21, 2020
5 changes: 3 additions & 2 deletions package/CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,12 @@ Fixes
empty AtomGroup (Issue #2848)
* Fixed the DMSParser, allowing the creation of multiple segids sharing
residues with identical resids (Issue #1387, PR #2872)
* H5MD files are now pickleable with H5PYPicklable (Issue #2890, PR #2894)
* H5MD files are now picklable with H5PYPicklable (Issue #2890, PR #2894)
* Fixed Janin analysis residue filtering (include CYSH) (Issue #2898)
* libmdaxdr and libdcd classes in their last frame can now be pickled
(Issue #2878, PR #2911)

* AtomGroup now are pickled/unpickled without looking for its anchored
Universe (PR #2893)

Enhancements
* Refactored analysis.helanal into analysis.helix_analysis
Expand Down
5 changes: 0 additions & 5 deletions package/MDAnalysis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,11 +181,6 @@
_TOPOLOGY_ATTRNAMES = {} # {lower case name w/o _ : name}


# Storing anchor universes for unpickling groups
import weakref
_ANCHOR_UNIVERSES = weakref.WeakValueDictionary()
del weakref

# custom exceptions and warnings
from .exceptions import (
SelectionError, NoDataError, ApplicationError, SelectionWarning,
Expand Down
72 changes: 60 additions & 12 deletions package/MDAnalysis/core/groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@
import os
import warnings

from .. import (_ANCHOR_UNIVERSES, _CONVERTERS,
from .. import (_CONVERTERS,
_TOPOLOGY_ATTRS, _TOPOLOGY_TRANSPLANTS, _TOPOLOGY_ATTRNAMES)
from ..lib import util
from ..lib.util import cached, warn_if_not_unique, unique_int_1d
Expand All @@ -110,16 +110,7 @@
from ._get_readers import get_writer_for, get_converter_for


def _unpickle(uhash, ix):
try:
u = _ANCHOR_UNIVERSES[uhash]
except KeyError:
# doesn't provide as nice an error message as before as only hash of universe is stored
# maybe if we pickled the filename too we could do better...
errmsg = (f"Couldn't find a suitable Universe to unpickle AtomGroup "
f"onto with Universe hash '{uhash}'. Availble hashes: "
f"{', '.join([str(k) for k in _ANCHOR_UNIVERSES.keys()])}")
raise RuntimeError(errmsg) from None
def _unpickle(u, ix):
return u.atoms[ix]


Expand Down Expand Up @@ -2252,21 +2243,78 @@ class AtomGroup(GroupBase):
:class:`AtomGroup` instances are always bound to a
:class:`MDAnalysis.core.universe.Universe`. They cannot exist in isolation.

During serialization, :class:`AtomGroup` will be pickled with its bound
:class:`MDAnalysis.core.universe.Universe` which means after unpickling,
a new :class:`MDAnalysis.core.universe.Universe` will be created and
be attached by the new :class:`AtomGroup`. If the Universe is serialized
with its :class:`AtomGroup`, they will still be bound together afterwards:

.. code-block:: python

>>> u = mda.Universe(PSF, DCD)
>>> g = u.atoms

>>> g_pickled = pickle.loads(pickle.dumps(g))
>>> print("g_pickled.universe is u: ", u is g_pickled.universe)
g_pickled.universe is u: False

>>> g_pickled, u_pickled = pickle.load(pickle.dumps(g, u))
>>> print("g_pickled.universe is u_pickled: ",
>>> u_pickle is g_pickled.universe)
g_pickled.universe is u_pickled: True

If multiple :class:`AtomGroup` are bound to the same
:class:`MDAnalysis.core.universe.Universe`, they will bound to the same one
after serialization:

.. code-block:: python

>>> u = mda.Universe(PSF, DCD)
>>> g = u.atoms
>>> h = u.atoms

>>> g_pickled = pickle.loads(pickle.dumps(g))
>>> h_pickled = pickle.loads(pickle.dumps(h))
>>> print("g_pickled.universe is h_pickled.universe : ",
>>> g_pickled.universe is h_pickled.universe)
g_pickled.universe is h_pickled.universe: False

>>> g_pickled, h_pickled = pickle.load(pickle.dumps(g, h))
>>> print("g_pickled.universe is h_pickled.universe: ",
>>> g_pickle.universe is h_pickled.universe)
g_pickled.universe is h_pickled.universe: True

The aforementioned two cases are useful for implementation of parallel
analysis base classes. First, you always get an independent
:class:`MDAnalysis.core.universe.Universe`
in the new process; you don't have to worry about detaching and reattaching
Universe with :class:`AtomGroup`. It also means the state of the
new pickled AtomGroup will not be changed with the old Universe,
So either the Universe has to pickled together with the AtomGroup
(e.g. as a tuple, or as attributes of the object to be pickled), or the
implicit new Universe (`AtomGroup.Universe`) needs to be used.
Second, When multiple AtomGroup need to be pickled, they will recognize if
they belong to the same Univese or not.
Also keep in mind that they need to be pickled together.

See Also
--------
:class:`MDAnalysis.core.universe.Universe`


.. deprecated:: 0.16.2
*Instant selectors* of :class:`AtomGroup` will be removed in the 1.0
release.
.. versionchanged:: 1.0.0
Removed instant selectors, use select_atoms('name ...') to select
atoms by name.
.. versionchanged:: 2.0.0
:class:`AtomGroup` can always be pickled with or without its universe,
instead of failing when not finding its anchored universe.
"""

def __reduce__(self):
return (_unpickle, (self.universe.anchor_name, self.ix))
return (_unpickle, (self.universe, self.ix))

def __getattr__(self, attr):
# special-case timestep info
Expand Down
74 changes: 7 additions & 67 deletions package/MDAnalysis/core/universe.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@
os.fork = _os_dot_fork
del _os_dot_fork

from .. import _ANCHOR_UNIVERSES, _TOPOLOGY_ATTRS, _PARSERS
from .. import _TOPOLOGY_ATTRS, _PARSERS
from ..exceptions import NoDataError
from ..lib import util
from ..lib.log import ProgressBar
Expand Down Expand Up @@ -273,17 +273,6 @@ class Universe(object):
vdwradii: dict, ``None``, default ``None``
For use with *guess_bonds*. Supply a dict giving a vdwradii for each
atom type which are used in guessing bonds.
is_anchor: bool, default ``True``
When unpickling instances of
:class:`MDAnalysis.core.groups.AtomGroup` existing Universes are
searched for one where to anchor those atoms. Set to ``False`` to
prevent this Universe from being considered.
anchor_name: str, ``None``, default ``None``
Setting to other than ``None`` will cause
:class:`MDAnalysis.core.groups.AtomGroup` instances pickled from the
Universe to only unpickle if a compatible Universe with matching
*anchor_name* is found. Even if *anchor_name* is set *is_anchor* will
still be honored when unpickling.
transformations: function or list, ``None``, default ``None``
Provide a list of transformations that you wish to apply to the
trajectory upon reading. Transformations can be found in
Expand Down Expand Up @@ -318,14 +307,14 @@ class Universe(object):

.. versionchanged:: 2.0.0
Universe now can be (un)pickled.
``topology``, ``trajectory`` and ``anchor_name`` are reserved
``topology`` and ``trajectory`` are reserved
upon unpickle.
"""
# Py3 TODO
# def __init__(self, topology=None, *coordinates, all_coordinates=False,
# format=None, topology_format=None, transformations=None,
# guess_bonds=False, vdwradii=None, anchor_name=None,
# is_anchor=True, in_memory=False, in_memory_step=1,
# guess_bonds=False, vdwradii=None,
# in_memory=False, in_memory_step=1,
# **kwargs):
def __init__(self, *args, **kwargs):
topology = args[0] if args else None
Expand All @@ -336,15 +325,11 @@ def __init__(self, *args, **kwargs):
transformations = kwargs.pop('transformations', None)
guess_bonds = kwargs.pop('guess_bonds', False)
vdwradii = kwargs.pop('vdwradii', None)
anchor_name = kwargs.pop('anchor_name', None)
is_anchor = kwargs.pop('is_anchor', True)
in_memory = kwargs.pop('in_memory', False)
in_memory_step = kwargs.pop('in_memory_step', 1)

self._trajectory = None # managed attribute holding Reader
self._cache = {}
self._anchor_name = anchor_name
self.is_anchor = is_anchor
self.atoms = None
self.residues = None
self.segments = None
Expand All @@ -354,8 +339,6 @@ def __init__(self, *args, **kwargs):
'transformations': transformations,
'guess_bonds': guess_bonds,
'vdwradii': vdwradii,
'anchor_name': anchor_name,
'is_anchor': is_anchor,
'in_memory': in_memory,
'in_memory_step': in_memory_step,
'format': format,
Expand Down Expand Up @@ -696,45 +679,6 @@ def impropers(self):
"""Improper dihedral angles between atoms"""
return self.atoms.impropers

@property
def anchor_name(self):
# hash used for anchoring.
# Try and use anchor_name, else use (and store) uuid
if self._anchor_name is not None:
return self._anchor_name
else:
try:
return self._anchor_uuid
except AttributeError:
# store this so we can later recall it if needed
self._anchor_uuid = str(uuid.uuid4())
return self._anchor_uuid

@anchor_name.setter
def anchor_name(self, name):
self.remove_anchor() # clear any old anchor
self._anchor_name = str(name) if not name is None else name
self.make_anchor() # add anchor again

@property
def is_anchor(self):
"""Is this Universe an anchoring for unpickling AtomGroups"""
return self.anchor_name in _ANCHOR_UNIVERSES

@is_anchor.setter
def is_anchor(self, new):
if new:
self.make_anchor()
else:
self.remove_anchor()

def remove_anchor(self):
"""Remove this Universe from the possible anchor list for unpickling"""
_ANCHOR_UNIVERSES.pop(self.anchor_name, None)

def make_anchor(self):
_ANCHOR_UNIVERSES[self.anchor_name] = self

def __repr__(self):
# return "<Universe with {n_atoms} atoms{bonds}>".format(
# n_atoms=len(self.atoms),
Expand All @@ -745,17 +689,13 @@ def __repr__(self):

def __getstate__(self):
# Universe's two "legs" of topology and traj both serialise themselves
# the only other state held in Universe is anchor name?
return self.anchor_name, self._topology, self._trajectory
return self._topology, self._trajectory

def __setstate__(self, args):
self._anchor_name = args[0]
self.make_anchor()

self._topology = args[1]
self._topology = args[0]
_generate_from_topology(self)

self._trajectory = args[2]
self._trajectory = args[1]

# Properties
@property
Expand Down
Loading