From 574cd1f2de2dc5a31b720ba31e321ebae346892b Mon Sep 17 00:00:00 2001 From: David Dotson Date: Wed, 20 Apr 2016 23:10:25 -0700 Subject: [PATCH] TopologyAttr registration; prototype Topology json serialization We now register TopologyAttrs using the same metaclassing mechanism as used for Parsers, Readers, and Writers. This is necessary in order to deserialize Topology objects that have been serialized to JSON. An existing Topology can now be serialized to JSON with: ```python top.to_json('topology.json') ``` and a Topology can be generated from its JSON form with: ```python top = Topology.from_json('topology.json') ``` Individual TopologyAttrs must define how to serialize themselves and to deserialize their data to make an instance of themselves for this to work. Addresses #643. --- package/MDAnalysis/core/__init__.py | 2 + package/MDAnalysis/core/groups.py | 95 ++++++++++++++++++++++++ package/MDAnalysis/core/topology.py | 63 ++++++++++++++++ package/MDAnalysis/core/topologyattrs.py | 45 ++++++++++- package/MDAnalysis/core/universe.py | 7 +- 5 files changed, 209 insertions(+), 3 deletions(-) diff --git a/package/MDAnalysis/core/__init__.py b/package/MDAnalysis/core/__init__.py index 8587891c615..ec290a78ee5 100644 --- a/package/MDAnalysis/core/__init__.py +++ b/package/MDAnalysis/core/__init__.py @@ -86,6 +86,8 @@ __all__ = ['AtomGroup', 'Selection', 'Timeseries'] +# Registry of TopologyAttrs +_TOPOLOGYATTRS = {} # set up flags for core routines (more convoluted than strictly necessary but should # be clean to add more flags if needed) diff --git a/package/MDAnalysis/core/groups.py b/package/MDAnalysis/core/groups.py index 6971f53e547..23fed89203b 100644 --- a/package/MDAnalysis/core/groups.py +++ b/package/MDAnalysis/core/groups.py @@ -7,6 +7,7 @@ import itertools from ..lib import mdamath +from ..lib import util from . import selection from . import flags from . import levels @@ -1050,6 +1051,100 @@ def improper(self): "improper only makes sense for a group with exactly 4 atoms") return topologyobjects.ImproperDihedral(self._ix, self.universe) +# TODO: possibly refactor; copied directly from old AG + @property + def ts(self): + """Returns a Timestep that contains only the group's coordinates. + + Returns + ------- + A :class:`~MDAnalysis.coordinates.base.Timestep` instance, + which can be passed to a trajectory writer. + + If the returned timestep is modified the modifications + will not be reflected in the base timestep. Likewise, + when the underlying timestep changes (either by loading a + new frame or by setting new positions by hand) the returned + timestep will not reflect those changes. + + """ + return self.universe.trajectory.ts.copy_slice(self.indices) + +# TODO: refactor; copied directly from old AG + def write(self, filename=None, format="PDB", + filenamefmt="%(trjname)s_%(frame)d", **kwargs): + """Write AtomGroup to a file. + + AtomGroup.write(filename[,format]) + + :Keywords: + *filename* + ``None``: create TRJNAME_FRAME.FORMAT from filenamefmt [``None``] + *format* + PDB, CRD, GRO, VMD (tcl), PyMol (pml), Gromacs (ndx) CHARMM (str) + Jmol (spt); case-insensitive and can also be supplied as the + filename extension [PDB] + *filenamefmt* + format string for default filename; use substitution tokens + 'trjname' and 'frame' ["%(trjname)s_%(frame)d"] + *bonds* + how to handle bond information, especially relevant for PDBs; + default is ``"conect"``. + + * ``"conect"``: write only the CONECT records defined in the original + file + + * ``"all"``: write out all bonds, both the original defined and those + guessed by MDAnalysis + + * ``None``: do not write out bonds + + .. versionchanged:: 0.9.0 + Merged with write_selection. This method can now write both + selections out. + """ + import MDAnalysis.coordinates + import MDAnalysis.selections + + # check that AtomGroup actually has any atoms (Issue #434) + if len(self.atoms) == 0: + raise IndexError("Cannot write an AtomGroup with 0 atoms") + + trj = self.universe.trajectory # unified trajectory API + frame = trj.ts.frame + + if trj.n_frames == 1: kwargs.setdefault("multiframe", False) + + if filename is None: + trjname, ext = os.path.splitext(os.path.basename(trj.filename)) + filename = filenamefmt % vars() + filename = util.filename(filename, ext=format.lower(), keep=True) + + # From the following blocks, one must pass. + # Both can't pass as the extensions don't overlap. + try: + writer = MDAnalysis.coordinates.writer(filename, **kwargs) + except TypeError: + # might be selections format + coords = False + else: + coords = True + + try: + SelectionWriter = MDAnalysis.selections.get_writer(filename, format) + except (TypeError, NotImplementedError): + selection = False + else: + writer = SelectionWriter(filename, **kwargs) + selection = True + + if not (coords or selection): + raise ValueError("No writer found for format: {0}".format(filename)) + else: + writer.write(self.atoms) + if coords: # only these writers have a close method + writer.close() + class ResidueGroup(object): level = 'residue' diff --git a/package/MDAnalysis/core/topology.py b/package/MDAnalysis/core/topology.py index b47d10524ac..b32fcdcb045 100644 --- a/package/MDAnalysis/core/topology.py +++ b/package/MDAnalysis/core/topology.py @@ -18,6 +18,8 @@ =================================================================== """ +import json + import numpy as np from ..lib.mdamath import one_to_many_pointers @@ -348,3 +350,64 @@ def add_TopologyAttr(self, topologyattr): self.attrs.append(topologyattr) topologyattr.top = self self.__setattr__(topologyattr.attrname, topologyattr) + + def to_json(self, filename): + """Write out Topology to JSON form. + + Parameters + ---------- + filename : str + File to write. + + """ + jstruct = {'n_atoms': self.n_atoms, + 'n_residues': self.n_residues, + 'n_segments': self.n_segments, + 'atoms->residues': self.tt.AR.tolist(), + 'residues->segments': self.tt.RS.tolist(), + 'topattrs': dict()} + + # TODO: add warning that could not serialize some topology attributes + for topattr in self.attrs: + + # these don't hold any data of their own + if isinstance(topattr, (Atomindices, Resindices, Segindices)): + continue + + try: + jstruct['topattrs'][topattr.attrname] = topattr._to_json() + except AttributeError: + pass + + # write out + with open(filename, 'w') as f: + json.dump(jstruct, f) + + @classmethod + def from_json(cls, filename): + """Generate Topology from JSON form. + + """ + from . import _TOPOLOGYATTRS + + with open(filename, 'r') as f: + jstruct = json.load(f) + + attrs = [] + for attrname, values in jstruct['topattrs'].items(): + # TODO: add warning that could not deserialize a topology attribute + try: + topattr = _TOPOLOGYATTRS[attrname] + except KeyError: + pass + + attrs.append(topattr._from_json(values)) + + top = cls(n_atoms=jstruct['n_atoms'], + n_res=jstruct['n_residues'], + n_seg=jstruct['n_segments'], + attrs=attrs, + atom_resindex=np.array(jstruct['atoms->residues']), + residue_segindex=np.array(jstruct['residues->segments'])) + + return top diff --git a/package/MDAnalysis/core/topologyattrs.py b/package/MDAnalysis/core/topologyattrs.py index 7cdbf58c478..6b868638734 100644 --- a/package/MDAnalysis/core/topologyattrs.py +++ b/package/MDAnalysis/core/topologyattrs.py @@ -20,6 +20,7 @@ Common TopologyAttrs used by most topology parsers. """ +import six from six.moves import zip from collections import defaultdict import itertools @@ -32,8 +33,22 @@ from . import selection from . import flags +from . import _TOPOLOGYATTRS -class TopologyAttr(object): + +class _TopologyAttrmeta(type): + # Auto register upon class creation + def __init__(cls, name, bases, classdict): + type.__init__(type, name, bases, classdict) + try: + attrname = classdict['attrname'] + except KeyError: + pass + else: + _TOPOLOGYATTRS[attrname] = cls + + +class TopologyAttr(six.with_metaclass(_TopologyAttrmeta)): """Base class for Topology attributes. .. note:: This class is intended to be subclassed, and mostly amounts to a @@ -234,6 +249,13 @@ class AtomAttr(TopologyAttr): singular = 'atomattr' target_levels = ['atom'] + def _to_json(self): + return self.values.tolist() + + @classmethod + def _from_json(cls, values): + return cls(np.array(values)) + def get_atoms(self, ag): return self.values[ag._ix] @@ -864,6 +886,13 @@ class ResidueAttr(TopologyAttr): target_levels = ['residue'] per_object = 'residue' + def _to_json(self): + return self.values.tolist() + + @classmethod + def _from_json(cls, values): + return cls(np.array(values)) + def get_atoms(self, ag): rix = self.top.tt.atoms2residues(ag._ix) return self.values[rix] @@ -959,6 +988,13 @@ class SegmentAttr(TopologyAttr): target_levels = ['segment'] per_object = 'segment' + def _to_json(self): + return self.values.tolist() + + @classmethod + def _from_json(cls, values): + return cls(np.array(values)) + def get_atoms(self, ag): six = self.top.tt.atoms2segments(ag._ix) return self.values[six] @@ -1045,6 +1081,13 @@ def __init__(self, values, types=None): def __len__(self): return len(self._bondDict) + def _to_json(self): + return {'values': self.values, 'types': self.types} + + @classmethod + def _from_json(cls, values): + return cls(values=values['values'], types=values['types']) + @property @cached('bd') def _bondDict(self): diff --git a/package/MDAnalysis/core/universe.py b/package/MDAnalysis/core/universe.py index ee127a18a84..6cc912be37d 100644 --- a/package/MDAnalysis/core/universe.py +++ b/package/MDAnalysis/core/universe.py @@ -134,6 +134,9 @@ def __init__(self, *args, **kwargs): if isinstance(args[0], Topology): self._topology = args[0] self.filename = None + + if len(coordinatefile) == 0: + coordinatefile = None else: self.filename = args[0] topology_format = kwargs.pop('topology_format', None) @@ -154,8 +157,8 @@ def __init__(self, *args, **kwargs): # or if file is known as a topology & coordinate file, use that if fmt is None: fmt = util.guess_format(self.filename) - if (fmt in MDAnalysis.coordinates._trajectory_readers - and fmt in MDAnalysis.topology._topology_parsers): + if (fmt in MDAnalysis.coordinates._READERS + and fmt in MDAnalysis.topology._PARSERS): coordinatefile = self.filename if len(coordinatefile) == 0: coordinatefile = None