Skip to content

Commit

Permalink
Convert MDAnalysis topology to chemfiles when writing
Browse files Browse the repository at this point in the history
  • Loading branch information
Luthaf authored and Guillaume Fraux committed Jul 5, 2019
1 parent 942acc0 commit f00ee28
Show file tree
Hide file tree
Showing 2 changed files with 179 additions and 20 deletions.
125 changes: 106 additions & 19 deletions package/MDAnalysis/coordinates/chemfiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,6 @@
provides C++ implementation of multiple formats readers and writers, the full
list if available `here <formats>`_.
This module only contains the coordinate reader and writer, which means that the
topology read by chemfiles is ignored when reading a file, and no topology is
written either. This can be surprising when using formats where topology and
coordinates are not separated, such as XYZ or PDB.
.. _chemfiles: https://chemfiles.org
.. _formats: http://chemfiles.org/chemfiles/latest/formats.html
Expand All @@ -47,11 +42,12 @@
from __future__ import print_function, unicode_literals

import numpy as np
from six import string_types

from . import base, core

import chemfiles
from chemfiles import Trajectory, Frame, Atom, UnitCell, Residue
from chemfiles import Trajectory, Frame, Atom, UnitCell, Residue, Topology


assert chemfiles.__version__.startswith("0.9.")
Expand Down Expand Up @@ -90,7 +86,6 @@ def __init__(self, filename, chemfiles_format="", **kwargs):
self._open()
self.ts = self._Timestep(self.n_atoms, **self._ts_kwargs)
self.next()
print(kwargs)

def _open(self):
self._closed = False
Expand Down Expand Up @@ -137,16 +132,16 @@ def _read_next_timestep(self, ts=None):
self.ts = ts
frame = self._file.read_step(self._step)
self._frame_to_ts(frame, ts)
ts.frame = self._step
ts.frame = frame.step
self._step += 1
return ts

def _frame_to_ts(self, frame, ts):
"""convert a chemfiles frame to a :class:`TimeStep`"""
if len(frame.atoms) != self.n_atoms:
raise IOError(
"The number of atom changed in the trajectory. This is not "
"supported by MDAnalysis."
"The number of atom changed in the trajectory. "
"This is not supported by MDAnalysis."
)

ts.dimensions[:] = frame.cell.lengths + frame.cell.angles
Expand All @@ -155,12 +150,6 @@ def _frame_to_ts(self, frame, ts):
ts.has_velocities = True
ts.velocities[:] = frame.velocities[:]

@property
def dimensions(self):
"""unitcell dimensions (*A*, *B*, *C*, *alpha*, *beta*, *gamma*)
"""
return self.ts.dimensions

def Writer(self, filename, n_atoms=None, **kwargs):
"""Return writer for trajectory format"""
if n_atoms is None:
Expand All @@ -185,7 +174,7 @@ class ChemfilesWriter(base.WriterBase):
multiframe = True
units = {'time': 'fs', 'length': 'Angstrom'}

def __init__(self, filename, n_atoms=0, mode="w", chemfiles_format="", **kwargs):
def __init__(self, filename, n_atoms=0, mode="w", chemfiles_format="", topology=None, **kwargs):
"""
Parameters
----------
Expand All @@ -202,6 +191,10 @@ def __init__(self, filename, n_atoms=0, mode="w", chemfiles_format="", **kwargs)
use the given format name instead of guessing from the extension.
The `list of supported formats <formats>`_ and the associated names
is available in chemfiles documentation.
topology : Universe or AtomGroup (optional)
use the topology from this :class:`~MDAnalysis.core.groups.AtomGroup`
or :class:`~MDAnalysis.core.universe.Universe` to write all the
timesteps to the file
**kwargs : dict
General writer arguments.
Expand All @@ -214,24 +207,66 @@ def __init__(self, filename, n_atoms=0, mode="w", chemfiles_format="", **kwargs)
raise IOError("Expected 'a' or 'w' as mode in ChemfilesWriter")
self._file = Trajectory(self.filename, mode, chemfiles_format)
self._closed = False
if topology is not None:
if isinstance(topology, string_types):
self._file.set_topology(topology)
else:
topology = self._topology_to_chemfiles(topology, n_atoms)
self._file.set_topology(topology)

def close(self):
"""Close the trajectory file and finalize the writing"""
if hasattr(self, "_closed") and not self._closed:
self._file.close()
self._closed = True

def write(self, obj):
"""Write object `obj` at current trajectory frame to file.
Topology for the output is taken from the `obj` or default
to the value of the `topology` keyword supplied to the
:class:`ChemfilesWriter` constructor.
Parameters
----------
obj : Universe or AtomGroup
The :class:`~MDAnalysis.core.groups.AtomGroup` or
:class:`~MDAnalysis.core.universe.Universe` to write.
"""
if hasattr(obj, "atoms"):
if hasattr(obj, 'universe'):
# For AtomGroup and children (Residue, ResidueGroup, Segment)
ts_full = obj.universe.trajectory.ts
if ts_full.n_atoms == obj.atoms.n_atoms:
ts = ts_full
else:
# Only populate a time step with the selected atoms.
ts = ts_full.copy_slice(atoms.indices)
elif hasattr(obj, 'trajectory'):
# For Universe only --- get everything
ts = obj.trajectory.ts
else:
if isinstance(obj, base.Timestep):
ts = obj
topology = None
else:
raise TypeError("No Timestep found in obj argument")

frame = self._timestep_to_chemfiles(ts)
frame.topology = self._topology_to_chemfiles(obj, len(frame.atoms))
self._file.write(frame)

def write_next_timestep(self, ts):
"""Write timestep object into trajectory.
Parameters
----------
ts: TimeStep
"""
frame = self._ts_to_frame(ts)
frame = self._timestep_to_chemfiles(ts)
self._file.write(frame)

def _ts_to_frame(self, ts):
def _timestep_to_chemfiles(self, ts):
"""
Convert a Timestep to a chemfiles Frame
"""
Expand All @@ -244,3 +279,55 @@ def _ts_to_frame(self, ts):
frame.velocities[:] = ts.velocities[:]
frame.cell = UnitCell(*ts.dimensions)
return frame

def _topology_to_chemfiles(self, obj, n_atoms):
"""
Convert an AtomGroup or an Universe to a chemfiles Topology
"""
topology = Topology()
if not hasattr(obj, "atoms"):
# use an empty topology
topology.resize(n_atoms)
return topology

# (1) add all atoms to the topology
residues = {}
for atom in obj.atoms:
name = getattr(atom, 'name', "")
type = getattr(atom, 'type', name)
chemfiles_atom = Atom(name, type)

if hasattr(atom, 'altLoc'):
chemfiles_atom["altloc"] = str(atom.altLoc)

if hasattr(atom, 'segid'):
# TODO: what is the type of atom.segid?
chemfiles_atom["segid"] = atom.segid

if hasattr(atom, 'resid'):
resname = getattr(atom, 'resname', "")
if atom.resid not in residues.keys():
residues[atom.resid] = Residue(resname, atom.resid)
residue = residues[atom.resid]

atom_idx = len(topology.atoms)
residue.atoms.append(atom_idx)

if hasattr(atom, "record_type"):
# set corresponding chemfiles residue property
if atom.record_type == "ATOM":
residue["is_standard_pdb"] = True
else:
residue["is_standard_pdb"] = False

topology.atoms.append(chemfiles_atom)

# (2) add residues to the topology
for residue in residues.values():
topology.residues.append(residue)

# (3) add bonds to the topology
for bond in getattr(obj, "bonds", []):
topology.add_bond(bond.atoms[0].ix, bond.atoms[1].ix)

return topology
74 changes: 73 additions & 1 deletion testsuite/MDAnalysisTests/coordinates/test_chemfiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

from MDAnalysis.coordinates.chemfiles import ChemfilesReader, ChemfilesWriter

from MDAnalysisTests import datafiles
from MDAnalysisTests import datafiles, tempdir
from MDAnalysisTests.coordinates.base import (
MultiframeReaderTest, BaseWriterTest, BaseReference
)
Expand Down Expand Up @@ -78,3 +78,75 @@ def test_read_chemfiles_format(self):

for ts in u.trajectory:
self.assertEqual(ts.n_atoms, 18364)

def test_changing_system_size(self):
outfile = tempdir.TempDir().name + "chemfiles-changing-size.xyz"
with open(outfile, "w") as fd:
fd.write(VARYING_XYZ)

u = mda.Universe(outfile, format="chemfiles", topology_format="XYZ")

with self.assertRaises(IOError):
u.trajectory._read_next_timestep()

def test_wrong_open_mode(self):
with self.assertRaises(IOError):
_ = ChemfilesWriter("", mode="r")

def check_topology(self, reference, file):
u = mda.Universe(reference)
atoms = set([
(atom.name, atom.type, atom.record_type)
for atom in u.atoms
])
bonds = set([
(bond.atoms[0].ix, bond.atoms[1].ix)
for bond in u.bonds
])

check = mda.Universe(file)
np.testing.assert_equal(
u.trajectory.ts.positions,
check.trajectory.ts.positions
)

for atom in check.atoms:
self.assertIn((atom.name, atom.type, atom.record_type), atoms)

for bond in check.bonds:
self.assertIn((bond.atoms[0].ix, bond.atoms[1].ix), bonds)

def test_write_topology(self):
u = mda.Universe(datafiles.CONECT)
outfile = tempdir.TempDir().name + "chemfiles-write-topology.pdb"
with ChemfilesWriter(outfile) as writer:
writer.write(u)
self.check_topology(datafiles.CONECT, outfile)

# Manually setting the topology when creating the ChemfilesWriter
# (1) from an object
with ChemfilesWriter(outfile, topology=u) as writer:
writer.write_next_timestep(u.trajectory.ts)
self.check_topology(datafiles.CONECT, outfile)

# (2) from a file
with ChemfilesWriter(outfile, topology=datafiles.CONECT) as writer:
writer.write_next_timestep(u.trajectory.ts)
# FIXME: this does not work, since chemfiles also insert the bonds
# which are implicit in PDB format (bewteen standard residues), while
# MDAnalysis only read the explicit CONNECT records.

# self.check_topology(datafiles.CONECT, outfile)


VARYING_XYZ = """2
A 0 0 0
A 0 0 0
4
A 0 0 0
A 0 0 0
A 0 0 0
A 0 0 0
"""

0 comments on commit f00ee28

Please sign in to comment.