Update to chemfiles 0.10 (#3126)

* Update to chemfiles 0.10 * Pin chemfiles to a version >=0.9 * Put conda-forge first in the list of channels Otherwise conda picks outdated version of libssh2 from biobuilds, which links to non-existing libopenssl.so.1.0.0 * Add a test for chemfiles version check * Refactor ChemfilesWriter to follow the same code as the XYZ writer * Update condition checking for missing cell data * Update package/MDAnalysis/coordinates/chemfiles.py Co-authored-by: Oliver Beckstein <[email protected]>
MDAnalysis · May 8, 2021 · a9eafab · a9eafab
1 parent f5c9159
commit a9eafab
Show file tree

Hide file tree

Showing 5 changed files with 151 additions and 78 deletions.
diff --git a/.github/workflows/gh-ci.yaml b/.github/workflows/gh-ci.yaml
@@ -15,7 +15,7 @@ defaults:
 
 env:
   MDA_CONDA_MIN_DEPS: "pip pytest==6.1.2 mmtf-python biopython networkx cython matplotlib-base scipy griddataformats hypothesis gsd codecov threadpoolctl openmm"
-  MDA_CONDA_EXTRA_DEPS: "seaborn>=0.7.0 clustalw=2.1 netcdf4 scikit-learn joblib>=0.12 chemfiles tqdm>=4.43.0 tidynamics>=1.0.0 rdkit>=2020.03.1 h5py"
+  MDA_CONDA_EXTRA_DEPS: "seaborn>=0.7.0 clustalw=2.1 netcdf4 scikit-learn joblib>=0.12 chemfiles>=0.9 tqdm>=4.43.0 tidynamics>=1.0.0 rdkit>=2020.03.1 h5py"
   MDA_PIP_MIN_DEPS: 'coveralls coverage<5 pytest-cov pytest-xdist'
   MDA_PIP_EXTRA_DEPS: 'duecredit parmed'
 
@@ -107,11 +107,15 @@ jobs:
         python-version: ${{ matrix.python-version }}
         auto-update-conda: true
         channel-priority: flexible
+        # The order of these channel matters: both provide "fundamental"
+        # software (curl, ssh, ...), but the version in biobuilds are very
+        # outdated. This can lead to problem when loading shared libraries,
+        # see https://github.com/MDAnalysis/mdanalysis/pull/3126#issuecomment-813112080
         channels: conda-forge, biobuilds
         add-pip-as-python-dependency: true
         mamba-version: "*"
         architecture: x64
-        
+
     - name: install_deps
       env:
         MDA_CONDA_FULL_DEPS: "${{ env.MDA_CONDA_MIN_DEPS }} ${{ env.MDA_CONDA_EXTRA_DEPS }}"
@@ -128,7 +132,7 @@ jobs:
         if [ ${{ matrix.name }} = "asv_check" ]; then
           pip install asv
         fi
-    
+
     - name: check_setup
       run: |
         # Check OS and python setup

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -65,7 +65,7 @@ jobs:
   - script: >-
       python -m pip install
       biopython
-      chemfiles
+      chemfiles>=0.9
       duecredit
       gsd==1.9.3
       joblib

diff --git a/maintainer/conda/environment.yml b/maintainer/conda/environment.yml
@@ -3,7 +3,7 @@ channels:
   - defaults
   - conda-forge
 dependencies:
-  - chemfiles
+  - chemfiles>=0.9
   - codecov
   - cython
   - griddataformats

diff --git a/package/MDAnalysis/coordinates/chemfiles.py b/package/MDAnalysis/coordinates/chemfiles.py
@@ -23,9 +23,9 @@
 Reading trajectories with *chemfiles* --- :mod:`MDAnalysis.coordinates.chemfiles`
 =================================================================================
 
-MDAnalysis interoperates with the `chemfiles`_ library. The *chemfiles* C++ library 
+MDAnalysis interoperates with the `chemfiles`_ library. The *chemfiles* C++ library
 supports an expanding set of file formats, some of which are not natively supported by
-MDAnalysis. Using the *CHEMFILES* reader you can use  `chemfiles`_ for the low-level 
+MDAnalysis. Using the *CHEMFILES* reader you can use  `chemfiles`_ for the low-level
 file reading. Check the list of `chemfile-supported file formats <formats>`_.
 
 .. _chemfiles: https://chemfiles.org
@@ -37,7 +37,7 @@
 Using the CHEMFILES reader
 --------------------------
 
-When reading, set the ``format="CHEMFILES"`` keyword argument and I/O is delegated to 
+When reading, set the ``format="CHEMFILES"`` keyword argument and I/O is delegated to
 `chemfiles`_. For example::
 
    >>> import MDAnalysis as mda
@@ -81,8 +81,9 @@
 """
 from distutils.version import LooseVersion
 import warnings
+import numpy as np
 
-from . import base, core
+from . import base
 
 try:
     import chemfiles
@@ -94,6 +95,7 @@
 
     class MockTrajectory:
         pass
+
     chemfiles = types.ModuleType("chemfiles")
     chemfiles.Trajectory = MockTrajectory
 else:
@@ -105,14 +107,14 @@ class MockTrajectory:
 MIN_CHEMFILES_VERSION = LooseVersion("0.9")
 #: Lowest version of chemfiles that is *not supported*
 #: by MDAnalysis.
-MAX_CHEMFILES_VERSION = LooseVersion("0.10")
+MAX_CHEMFILES_VERSION = LooseVersion("0.11")
 
 
 def check_chemfiles_version():
     """Check if an appropriate *chemfiles* is available
 
     Returns ``True`` if a usable chemfiles version is available,
-    with :data:`MIN_CHEMFILES_VERSION` <= version < 
+    with :data:`MIN_CHEMFILES_VERSION` <= version <
     :data:`MAX_CHEMFILES_VERSION`
 
     .. versionadded:: 1.0.0
@@ -127,8 +129,9 @@ def check_chemfiles_version():
     wrong = version < MIN_CHEMFILES_VERSION or version >= MAX_CHEMFILES_VERSION
     if wrong:
         warnings.warn(
-            "unsupported Chemfiles version {}, we need a version >{} and <{}"
-            .format(version, MIN_CHEMFILES_VERSION, MAX_CHEMFILES_VERSION)
+            "unsupported Chemfiles version {}, we need a version >{} and <{}".format(
+                version, MIN_CHEMFILES_VERSION, MAX_CHEMFILES_VERSION
+            )
         )
     return not wrong
 
@@ -142,8 +145,9 @@ class ChemfilesReader(base.ReaderBase):
 
     .. versionadded:: 1.0.0
     """
-    format = 'chemfiles'
-    units = {'time': 'fs', 'length': 'Angstrom'}
+
+    format = "chemfiles"
+    units = {"time": "fs", "length": "Angstrom"}
 
     def __init__(self, filename, chemfiles_format="", **kwargs):
         """
@@ -161,8 +165,9 @@ def __init__(self, filename, chemfiles_format="", **kwargs):
 
         """
         if not check_chemfiles_version():
-            raise RuntimeError("Please install Chemfiles > {}"
-                               "".format(MIN_CHEMFILES_VERSION))
+            raise RuntimeError(
+                "Please install Chemfiles > {}" "".format(MIN_CHEMFILES_VERSION)
+            )
         super(ChemfilesReader, self).__init__(filename, **kwargs)
         self._format = chemfiles_format
         self._cached_n_atoms = None
@@ -184,7 +189,7 @@ def _open(self):
         if isinstance(self.filename, chemfiles.Trajectory):
             self._file = self.filename
         else:
-            self._file = ChemfilesPicklable(self.filename, 'r', self._format)
+            self._file = ChemfilesPicklable(self.filename, "r", self._format)
 
     def close(self):
         """close reader"""
@@ -217,7 +222,7 @@ def _read_frame(self, i):
     def _read_next_timestep(self, ts=None):
         """copy next frame into timestep"""
         if self._step >= self.n_frames:
-            raise IOError('trying to go over trajectory limit')
+            raise IOError("trying to go over trajectory limit")
         if ts is None:
             ts = self.ts
         self.ts = ts
@@ -262,17 +267,26 @@ class ChemfilesWriter(base.WriterBase):
 
     .. versionadded:: 1.0.0
     """
-    format = 'chemfiles'
+
+    format = "chemfiles"
     multiframe = True
 
     # chemfiles mostly[1] uses these units for the in-memory representation,
     # and convert into the file format units when writing.
     #
     # [1] mostly since some format don't have a specified unit
     # (XYZ for example), so then chemfiles just assume they are in A and fs.
-    units = {'time': 'fs', 'length': 'Angstrom'}
-
-    def __init__(self, filename, n_atoms=0, mode="w", chemfiles_format="", topology=None, **kwargs):
+    units = {"time": "fs", "length": "Angstrom"}
+
+    def __init__(
+        self,
+        filename,
+        n_atoms=0,
+        mode="w",
+        chemfiles_format="",
+        topology=None,
+        **kwargs
+    ):
         """
         Parameters
         ----------
@@ -298,8 +312,9 @@ def __init__(self, filename, n_atoms=0, mode="w", chemfiles_format="", topology=
 
         """
         if not check_chemfiles_version():
-            raise RuntimeError("Please install Chemfiles > {}"
-                               "".format(MIN_CHEMFILES_VERSION))
+            raise RuntimeError(
+                "Please install Chemfiles > {}" "".format(MIN_CHEMFILES_VERSION)
+            )
         self.filename = filename
         self.n_atoms = n_atoms
         if mode != "a" and mode != "w":
@@ -328,7 +343,7 @@ def _write_next_frame(self, obj):
         constructor.
 
         If `obj` contains velocities, and the underlying format supports it, the
-        velocities are writen to the file. Writing forces is unsupported at the
+        velocities are written to the file. Writing forces is unsupported at the
         moment.
 
         Parameters
@@ -342,21 +357,23 @@ def _write_next_frame(self, obj):
            Deprecated support for Timestep argument has now been removed.
            Use AtomGroup or Universe as an input instead.
         """
-        if hasattr(obj, "atoms"):
-            if hasattr(obj, 'universe'):
+        try:
+            atoms = obj.atoms
+        except AttributeError:
+            errmsg = "Input obj is neither an AtomGroup or Universe"
+            raise TypeError(errmsg) from None
+        else:
+            if hasattr(obj, "universe"):
                 # For AtomGroup and children (Residue, ResidueGroup, Segment)
                 ts_full = obj.universe.trajectory.ts
-                if ts_full.n_atoms == obj.atoms.n_atoms:
+                if ts_full.n_atoms == atoms.n_atoms:
                     ts = ts_full
                 else:
                     # Only populate a time step with the selected atoms.
                     ts = ts_full.copy_slice(atoms.indices)
-            elif hasattr(obj, 'trajectory'):
+            elif hasattr(obj, "trajectory"):
                 # For Universe only --- get everything
                 ts = obj.trajectory.ts
-        else:
-            errmsg = "Input obj is neither an AtomGroup or Universe"
-            raise TypeError(errmsg) from None
 
         frame = self._timestep_to_chemfiles(ts)
         frame.topology = self._topology_to_chemfiles(obj, len(frame.atoms))
@@ -375,7 +392,20 @@ def _timestep_to_chemfiles(self, ts):
         if ts.has_velocities:
             frame.add_velocities()
             frame.velocities[:] = ts.velocities[:]
-        frame.cell = chemfiles.UnitCell(*ts.dimensions)
+
+        lengths = ts.dimensions[:3]
+        angles = ts.dimensions[3:]
+
+        # if there is no cell information in this universe, still pass a valid
+        # cell to chemfiles
+        if np.all(ts.dimensions == 0.0):
+            angles = [90, 90, 90]
+
+        if chemfiles.__version__.startswith("0.9"):
+            frame.cell = chemfiles.UnitCell(*lengths, *angles)
+        else:
+            frame.cell = chemfiles.UnitCell(lengths, angles)
+
         return frame
 
     def _topology_to_chemfiles(self, obj, n_atoms):
@@ -391,21 +421,21 @@ def _topology_to_chemfiles(self, obj, n_atoms):
         # (1) add all atoms to the topology
         residues = {}
         for atom in obj.atoms:
-            name = getattr(atom, 'name', "")
-            type = getattr(atom, 'type', name)
+            name = getattr(atom, "name", "")
+            type = getattr(atom, "type", name)
             chemfiles_atom = chemfiles.Atom(name, type)
 
-            if hasattr(atom, 'altLoc'):
+            if hasattr(atom, "altLoc"):
                 chemfiles_atom["altloc"] = str(atom.altLoc)
 
-            if hasattr(atom, 'segid'):
+            if hasattr(atom, "segid"):
                 chemfiles_atom["segid"] = str(atom.segid)
 
-            if hasattr(atom, 'segindex'):
+            if hasattr(atom, "segindex"):
                 chemfiles_atom["segindex"] = int(atom.segindex)
 
-            if hasattr(atom, 'resid'):
-                resname = getattr(atom, 'resname', "")
+            if hasattr(atom, "resid"):
+                resname = getattr(atom, "resname", "")
                 if atom.resid not in residues.keys():
                     residues[atom.resid] = chemfiles.Residue(resname, atom.resid)
                 residue = residues[atom.resid]
@@ -486,13 +516,11 @@ class ChemfilesPicklable(chemfiles.Trajectory):
 
     .. versionadded:: 2.0.0
     """
+
     def __init__(self, path, mode="r", format=""):
-        if mode != 'r':
-            raise ValueError("Only read mode ('r') "
-                             "files can be pickled.")
-        super().__init__(path=path,
-                         mode=mode,
-                         format=format)
+        if mode != "r":
+            raise ValueError("Only read mode ('r') files can be pickled.")
+        super().__init__(path=path, mode=mode, format=format)
 
     def __getstate__(self):
         return self.path, self._Trajectory__mode, self._Trajectory__format