From f00ee28e0143ab41c75880af2c6eb8c3f0761429 Mon Sep 17 00:00:00 2001 From: Luthaf Date: Wed, 15 May 2019 22:15:41 +0200 Subject: [PATCH] Convert MDAnalysis topology to chemfiles when writing --- package/MDAnalysis/coordinates/chemfiles.py | 125 +++++++++++++++--- .../coordinates/test_chemfiles.py | 74 ++++++++++- 2 files changed, 179 insertions(+), 20 deletions(-) diff --git a/package/MDAnalysis/coordinates/chemfiles.py b/package/MDAnalysis/coordinates/chemfiles.py index 83864a4168e..39794598f90 100644 --- a/package/MDAnalysis/coordinates/chemfiles.py +++ b/package/MDAnalysis/coordinates/chemfiles.py @@ -27,11 +27,6 @@ provides C++ implementation of multiple formats readers and writers, the full list if available `here `_. -This module only contains the coordinate reader and writer, which means that the -topology read by chemfiles is ignored when reading a file, and no topology is -written either. This can be surprising when using formats where topology and -coordinates are not separated, such as XYZ or PDB. - .. _chemfiles: https://chemfiles.org .. _formats: http://chemfiles.org/chemfiles/latest/formats.html @@ -47,11 +42,12 @@ from __future__ import print_function, unicode_literals import numpy as np +from six import string_types from . import base, core import chemfiles -from chemfiles import Trajectory, Frame, Atom, UnitCell, Residue +from chemfiles import Trajectory, Frame, Atom, UnitCell, Residue, Topology assert chemfiles.__version__.startswith("0.9.") @@ -90,7 +86,6 @@ def __init__(self, filename, chemfiles_format="", **kwargs): self._open() self.ts = self._Timestep(self.n_atoms, **self._ts_kwargs) self.next() - print(kwargs) def _open(self): self._closed = False @@ -137,7 +132,7 @@ def _read_next_timestep(self, ts=None): self.ts = ts frame = self._file.read_step(self._step) self._frame_to_ts(frame, ts) - ts.frame = self._step + ts.frame = frame.step self._step += 1 return ts @@ -145,8 +140,8 @@ def _frame_to_ts(self, frame, ts): """convert a chemfiles frame to a :class:`TimeStep`""" if len(frame.atoms) != self.n_atoms: raise IOError( - "The number of atom changed in the trajectory. This is not " - "supported by MDAnalysis." + "The number of atom changed in the trajectory. " + "This is not supported by MDAnalysis." ) ts.dimensions[:] = frame.cell.lengths + frame.cell.angles @@ -155,12 +150,6 @@ def _frame_to_ts(self, frame, ts): ts.has_velocities = True ts.velocities[:] = frame.velocities[:] - @property - def dimensions(self): - """unitcell dimensions (*A*, *B*, *C*, *alpha*, *beta*, *gamma*) - """ - return self.ts.dimensions - def Writer(self, filename, n_atoms=None, **kwargs): """Return writer for trajectory format""" if n_atoms is None: @@ -185,7 +174,7 @@ class ChemfilesWriter(base.WriterBase): multiframe = True units = {'time': 'fs', 'length': 'Angstrom'} - def __init__(self, filename, n_atoms=0, mode="w", chemfiles_format="", **kwargs): + def __init__(self, filename, n_atoms=0, mode="w", chemfiles_format="", topology=None, **kwargs): """ Parameters ---------- @@ -202,6 +191,10 @@ def __init__(self, filename, n_atoms=0, mode="w", chemfiles_format="", **kwargs) use the given format name instead of guessing from the extension. The `list of supported formats `_ and the associated names is available in chemfiles documentation. + topology : Universe or AtomGroup (optional) + use the topology from this :class:`~MDAnalysis.core.groups.AtomGroup` + or :class:`~MDAnalysis.core.universe.Universe` to write all the + timesteps to the file **kwargs : dict General writer arguments. @@ -214,6 +207,12 @@ def __init__(self, filename, n_atoms=0, mode="w", chemfiles_format="", **kwargs) raise IOError("Expected 'a' or 'w' as mode in ChemfilesWriter") self._file = Trajectory(self.filename, mode, chemfiles_format) self._closed = False + if topology is not None: + if isinstance(topology, string_types): + self._file.set_topology(topology) + else: + topology = self._topology_to_chemfiles(topology, n_atoms) + self._file.set_topology(topology) def close(self): """Close the trajectory file and finalize the writing""" @@ -221,6 +220,42 @@ def close(self): self._file.close() self._closed = True + def write(self, obj): + """Write object `obj` at current trajectory frame to file. + + Topology for the output is taken from the `obj` or default + to the value of the `topology` keyword supplied to the + :class:`ChemfilesWriter` constructor. + + Parameters + ---------- + obj : Universe or AtomGroup + The :class:`~MDAnalysis.core.groups.AtomGroup` or + :class:`~MDAnalysis.core.universe.Universe` to write. + """ + if hasattr(obj, "atoms"): + if hasattr(obj, 'universe'): + # For AtomGroup and children (Residue, ResidueGroup, Segment) + ts_full = obj.universe.trajectory.ts + if ts_full.n_atoms == obj.atoms.n_atoms: + ts = ts_full + else: + # Only populate a time step with the selected atoms. + ts = ts_full.copy_slice(atoms.indices) + elif hasattr(obj, 'trajectory'): + # For Universe only --- get everything + ts = obj.trajectory.ts + else: + if isinstance(obj, base.Timestep): + ts = obj + topology = None + else: + raise TypeError("No Timestep found in obj argument") + + frame = self._timestep_to_chemfiles(ts) + frame.topology = self._topology_to_chemfiles(obj, len(frame.atoms)) + self._file.write(frame) + def write_next_timestep(self, ts): """Write timestep object into trajectory. @@ -228,10 +263,10 @@ def write_next_timestep(self, ts): ---------- ts: TimeStep """ - frame = self._ts_to_frame(ts) + frame = self._timestep_to_chemfiles(ts) self._file.write(frame) - def _ts_to_frame(self, ts): + def _timestep_to_chemfiles(self, ts): """ Convert a Timestep to a chemfiles Frame """ @@ -244,3 +279,55 @@ def _ts_to_frame(self, ts): frame.velocities[:] = ts.velocities[:] frame.cell = UnitCell(*ts.dimensions) return frame + + def _topology_to_chemfiles(self, obj, n_atoms): + """ + Convert an AtomGroup or an Universe to a chemfiles Topology + """ + topology = Topology() + if not hasattr(obj, "atoms"): + # use an empty topology + topology.resize(n_atoms) + return topology + + # (1) add all atoms to the topology + residues = {} + for atom in obj.atoms: + name = getattr(atom, 'name', "") + type = getattr(atom, 'type', name) + chemfiles_atom = Atom(name, type) + + if hasattr(atom, 'altLoc'): + chemfiles_atom["altloc"] = str(atom.altLoc) + + if hasattr(atom, 'segid'): + # TODO: what is the type of atom.segid? + chemfiles_atom["segid"] = atom.segid + + if hasattr(atom, 'resid'): + resname = getattr(atom, 'resname', "") + if atom.resid not in residues.keys(): + residues[atom.resid] = Residue(resname, atom.resid) + residue = residues[atom.resid] + + atom_idx = len(topology.atoms) + residue.atoms.append(atom_idx) + + if hasattr(atom, "record_type"): + # set corresponding chemfiles residue property + if atom.record_type == "ATOM": + residue["is_standard_pdb"] = True + else: + residue["is_standard_pdb"] = False + + topology.atoms.append(chemfiles_atom) + + # (2) add residues to the topology + for residue in residues.values(): + topology.residues.append(residue) + + # (3) add bonds to the topology + for bond in getattr(obj, "bonds", []): + topology.add_bond(bond.atoms[0].ix, bond.atoms[1].ix) + + return topology diff --git a/testsuite/MDAnalysisTests/coordinates/test_chemfiles.py b/testsuite/MDAnalysisTests/coordinates/test_chemfiles.py index 458dc7fd324..e94e6063454 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_chemfiles.py +++ b/testsuite/MDAnalysisTests/coordinates/test_chemfiles.py @@ -29,7 +29,7 @@ from MDAnalysis.coordinates.chemfiles import ChemfilesReader, ChemfilesWriter -from MDAnalysisTests import datafiles +from MDAnalysisTests import datafiles, tempdir from MDAnalysisTests.coordinates.base import ( MultiframeReaderTest, BaseWriterTest, BaseReference ) @@ -78,3 +78,75 @@ def test_read_chemfiles_format(self): for ts in u.trajectory: self.assertEqual(ts.n_atoms, 18364) + + def test_changing_system_size(self): + outfile = tempdir.TempDir().name + "chemfiles-changing-size.xyz" + with open(outfile, "w") as fd: + fd.write(VARYING_XYZ) + + u = mda.Universe(outfile, format="chemfiles", topology_format="XYZ") + + with self.assertRaises(IOError): + u.trajectory._read_next_timestep() + + def test_wrong_open_mode(self): + with self.assertRaises(IOError): + _ = ChemfilesWriter("", mode="r") + + def check_topology(self, reference, file): + u = mda.Universe(reference) + atoms = set([ + (atom.name, atom.type, atom.record_type) + for atom in u.atoms + ]) + bonds = set([ + (bond.atoms[0].ix, bond.atoms[1].ix) + for bond in u.bonds + ]) + + check = mda.Universe(file) + np.testing.assert_equal( + u.trajectory.ts.positions, + check.trajectory.ts.positions + ) + + for atom in check.atoms: + self.assertIn((atom.name, atom.type, atom.record_type), atoms) + + for bond in check.bonds: + self.assertIn((bond.atoms[0].ix, bond.atoms[1].ix), bonds) + + def test_write_topology(self): + u = mda.Universe(datafiles.CONECT) + outfile = tempdir.TempDir().name + "chemfiles-write-topology.pdb" + with ChemfilesWriter(outfile) as writer: + writer.write(u) + self.check_topology(datafiles.CONECT, outfile) + + # Manually setting the topology when creating the ChemfilesWriter + # (1) from an object + with ChemfilesWriter(outfile, topology=u) as writer: + writer.write_next_timestep(u.trajectory.ts) + self.check_topology(datafiles.CONECT, outfile) + + # (2) from a file + with ChemfilesWriter(outfile, topology=datafiles.CONECT) as writer: + writer.write_next_timestep(u.trajectory.ts) + # FIXME: this does not work, since chemfiles also insert the bonds + # which are implicit in PDB format (bewteen standard residues), while + # MDAnalysis only read the explicit CONNECT records. + + # self.check_topology(datafiles.CONECT, outfile) + + +VARYING_XYZ = """2 + +A 0 0 0 +A 0 0 0 +4 + +A 0 0 0 +A 0 0 0 +A 0 0 0 +A 0 0 0 +"""