From 82e0d6da244b3bfa91b31dfd619ef12298910ab1 Mon Sep 17 00:00:00 2001 From: SHUBHAM SHARMA Date: Sat, 7 Mar 2020 23:01:50 +0530 Subject: [PATCH 1/7] Fix #2582: polymer persistence length example fix (#2589) * Fix #2582 * fix polymer persistence length example fix * Added name to AUTHOR file * Update CHANGELOG Co-authored-by: Richard Gowers --- package/AUTHORS | 1 + package/CHANGELOG | 4 +++- package/MDAnalysis/analysis/polymer.py | 10 +++++----- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/package/AUTHORS b/package/AUTHORS index 7e87950245e..6168416987b 100644 --- a/package/AUTHORS +++ b/package/AUTHORS @@ -133,6 +133,7 @@ Chronological list of authors - Hao Tian - Hugo MacDermott-Opeskin - Anshul Angaria + - Shubham Sharma diff --git a/package/CHANGELOG b/package/CHANGELOG index 46ee8181641..f100fb9e587 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -15,7 +15,8 @@ The rules for this file: ------------------------------------------------------------------------------ mm/dd/yy richardjgowers, kain88-de, lilyminium, p-j-smith, bdice, joaomcteixeira, PicoCentauri, davidercruz, jbarnoud, RMeli, IAlibay, mtiberti, CCook96, - Yuan-Yu, xiki-tempula, HTian1997, Iv-Hristov, hmacdope, AnshulAngaria + Yuan-Yu, xiki-tempula, HTian1997, Iv-Hristov, hmacdope, AnshulAngaria, + ss62171 * 0.21.0 @@ -58,6 +59,7 @@ Fixes argument. The directives parsed into bonds, angles, impropers, and dihedrals now match TPRParser. (PR #2408) * Added parmed to setup.py + * Fixed example docs for polymer persistence length (#2582) Enhancements * New analysis.hole2 module for HOLE interfacing. Contains a function (hole) diff --git a/package/MDAnalysis/analysis/polymer.py b/package/MDAnalysis/analysis/polymer.py index e699628c38b..b85010f1d6f 100644 --- a/package/MDAnalysis/analysis/polymer.py +++ b/package/MDAnalysis/analysis/polymer.py @@ -48,16 +48,16 @@ # so we can select the chains by using the .fragments attribute chains = u.atoms.fragments # select only the backbone atoms for each chain - backbones = [chain.select_atoms('not name O* H') for chain in chains] + backbones = [chain.select_atoms('not name O* H*') for chain in chains] # sort the chains, removing any non-backbone atoms sorted_backbones = [polymer.sort_backbone(bb) for bb in backbones] - lp = polymer.PersistenceLength(sorted_backbones) + persistence_length = polymer.PersistenceLength(sorted_backbones) # Run the analysis, this will average over all polymer chains # and all timesteps in trajectory - lp = lp.run() - print('The persistence length is: {}'.format(lp.pl)) + persistence_length = persistence_length.run() + print('The persistence length is: {}'.format(persistence_length.lp)) # always check the visualisation of this: - lp.plot() + persistence_length.plot() """ from __future__ import division, absolute_import From 9581bc41d8887085d4fb2c3cf9f74db11d5d3546 Mon Sep 17 00:00:00 2001 From: Irfan Alibay Date: Mon, 9 Mar 2020 11:27:10 +0000 Subject: [PATCH 2/7] Fixes TRJReader random frame access docs (#2595) * Fixes #2398 * Update CHANGELOG --- package/CHANGELOG | 2 ++ package/MDAnalysis/coordinates/TRJ.py | 6 ------ 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index f100fb9e587..92c29e2b534 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -21,6 +21,8 @@ mm/dd/yy richardjgowers, kain88-de, lilyminium, p-j-smith, bdice, joaomcteixeira * 0.21.0 Fixes + * Fixes outdated :class:`TRJReader` documentation on random frame access + (Issue #2398) * Fixed mda.Merge for Universes without coordinates (Issue #2470)(PR #2580) * PCA(align=True) now correctly aligns the trajectory and computes the correct means and covariance matrix (Issue #2561) diff --git a/package/MDAnalysis/coordinates/TRJ.py b/package/MDAnalysis/coordinates/TRJ.py index 3804e0c390d..e35d606e5ad 100644 --- a/package/MDAnalysis/coordinates/TRJ.py +++ b/package/MDAnalysis/coordinates/TRJ.py @@ -120,9 +120,6 @@ * The trajectory does not contain time information so we simply set the time step to 1 ps (or the user could provide it as kwarg *dt*) -* **No direct access of frames is implemented, only iteration through - the trajectory.** - * Trajectories with fewer than 4 atoms probably fail to be read (BUG). * If the trajectory contains exactly *one* atom then it is always @@ -204,9 +201,6 @@ class TRJReader(base.ReaderBase): be set by passing the `dt` keyword argument to the constructor; it is assumed to be in ps. The default value is 1 ps. - Functionality is currently limited to simple iteration over the - trajectory. - .. _AMBER TRJ format: http://ambermd.org/formats.html#trajectory .. versionchanged:: 0.11.0 From ca1f5a80f6f2a5b77530123a15334f0e8d7fdebf Mon Sep 17 00:00:00 2001 From: Irfan Alibay Date: Tue, 10 Mar 2020 01:40:03 +0000 Subject: [PATCH 3/7] PSAnalysis generate_paths defaults and fixes load tests (#2594) * fix #2593 * Changes the :meth:PSAnalysis.generate_paths defaults for store and filename defaults to those originally documented in the docstring. * Updates the test for :meth:PSAnalysis.save_paths and :meth:PSAnalysis.load so it no longer fails. * Also some blank line PEP8 changes, because I got carried away * Update CHANGELOG --- package/CHANGELOG | 4 ++ package/MDAnalysis/analysis/psa.py | 42 ++++-------- .../MDAnalysisTests/analysis/test_psa.py | 67 +++++++++++++++---- 3 files changed, 72 insertions(+), 41 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index 92c29e2b534..b8af3725ee1 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -21,6 +21,8 @@ mm/dd/yy richardjgowers, kain88-de, lilyminium, p-j-smith, bdice, joaomcteixeira * 0.21.0 Fixes + * Fixed test for the `save_paths` and `load` methods of :class:`PSAnalysis` + (Issue #2593) * Fixes outdated :class:`TRJReader` documentation on random frame access (Issue #2398) * Fixed mda.Merge for Universes without coordinates (Issue #2470)(PR #2580) @@ -98,6 +100,8 @@ Enhancements * Improve the distance search in water bridge analysis with capped_distance (PR #2480) Changes + * Changed :meth:`PSAnalysis.generate_paths` keywords `store` and `filename` + defaults to `False` and `None` (Issue #2593) * Removed `format` keyword from :meth:`MemoryReader.timeseries` (Issue #1453, #2443) * Deprecated :class:`LAMMPSDataConverter` has now been removed (Issue #2564) diff --git a/package/MDAnalysis/analysis/psa.py b/package/MDAnalysis/analysis/psa.py index b29fce93e28..011cd34846b 100644 --- a/package/MDAnalysis/analysis/psa.py +++ b/package/MDAnalysis/analysis/psa.py @@ -237,6 +237,7 @@ cite_module=True) del Doi + def get_path_metric_func(name): """Selects a path metric function by name. @@ -835,7 +836,6 @@ def __init__(self, universe, reference, select='name CA', self.path = None self.natoms = None - def fit_to_reference(self, filename=None, prefix='', postfix='_fit', rmsdfile=None, targetdir=os.path.curdir, weights=None, tol_mass=0.1): @@ -894,7 +894,6 @@ def fit_to_reference(self, filename=None, prefix='', postfix='_fit', aligntrj.save(rmsdfile) return MDAnalysis.Universe(self.top_name, self.newtrj_name) - def to_path(self, fitted=False, select=None, flat=False): r"""Generates a coordinate time series from the fitted universe trajectory. @@ -947,7 +946,6 @@ def to_path(self, fitted=False, select=None, flat=False): else: return np.array([atoms.positions for _ in frames]) - def run(self, align=False, filename=None, postfix='_fit', rmsdfile=None, targetdir=os.path.curdir, weights=None, tol_mass=0.1, flat=False): @@ -1018,7 +1016,6 @@ def run(self, align=False, filename=None, postfix='_fit', rmsdfile=None, self.path = self.to_path(fitted=align, flat=flat) return self.top_name, self.newtrj_name - def get_num_atoms(self): """Return the number of atoms used to construct the :class:`Path`. @@ -1103,7 +1100,6 @@ def __init__(self, npaths, i, j): # Set by self.getHausdorffPair self.hausdorff_pair = {'frames' : (None, None), 'distance' : None} - def _dvec_idx(self, i, j): """Convert distance matrix indices (in the upper triangle) to the index of the corresponding distance vector. @@ -1127,7 +1123,6 @@ def _dvec_idx(self, i, j): """ return (self.npaths*i) + j - (i+2)*(i+1)/2 - def compute_nearest_neighbors(self, P,Q, N=None): """Generates Hausdorff nearest neighbor lists of *frames* (by index) and *distances* for *this* pair of paths corresponding to distance matrix @@ -1155,7 +1150,6 @@ def compute_nearest_neighbors(self, P,Q, N=None): self.nearest_neighbors['frames'] = hn['frames'] self.nearest_neighbors['distances'] = hn['distances'] - def find_hausdorff_pair(self): r"""Find the Hausdorff pair (of frames) for *this* pair of paths. @@ -1183,7 +1177,6 @@ def find_hausdorff_pair(self): self.hausdorff_pair['frames'] = nn_idx_Q[max_nn_idx_Q], max_nn_idx_Q self.hausdorff_pair['distance'] = max_nn_dist_Q - def get_nearest_neighbors(self, frames=True, distances=True): """Returns the nearest neighbor frame indices, distances, or both, for each path in *this* :class:`PSAPair`. @@ -1403,9 +1396,8 @@ def __init__(self, universes, reference=None, select='name CA', self._NN = None # (distance vector order) list of all nearest neighbors self._psa_pairs = None # (distance vector order) list of all PSAPairs - - def generate_paths(self, align=False, filename='fitted', infix='', weights=None, - tol_mass=False, ref_frame=None, flat=False, save=True, store=True): + def generate_paths(self, align=False, filename=None, infix='', weights=None, + tol_mass=False, ref_frame=None, flat=False, save=True, store=False): """Generate paths, aligning each to reference structure if necessary. Parameters @@ -1424,12 +1416,12 @@ def generate_paths(self, align=False, filename='fitted', infix='', weights=None, ``None`` weigh each atom equally. If a float array of the same length as the selected AtomGroup is provided, use each element of the `array_like` as a weight for the corresponding atom in the - AtomGroup. + AtomGroup [``None``] tol_mass : float Reject match if the atomic masses for matched atoms differ by more - than *tol_mass* + than *tol_mass* [``False``] ref_frame : int - frame index to select frame from *reference* + frame index to select frame from *reference* [``None``] flat : bool represent :attr:`Path.path` as a 2D (|2D|) :class:`numpy.ndarray`; if ``False`` then :attr:`Path.path` is a 3D (|3D|) @@ -1459,6 +1451,11 @@ def generate_paths(self, align=False, filename='fitted', infix='', weights=None, .. versionchanged:: 0.17.0 Deprecated keyword `mass_weighted` was removed. + + .. versionchanged:: 1.0.0 + Defaults for the `store` and `filename` keywords have been changed + from `True` and `fitted` to `False` and `None` respectively. These + now match the docstring documented defaults. """ if ref_frame is None: ref_frame = self.ref_frame @@ -1487,7 +1484,6 @@ def generate_paths(self, align=False, filename='fitted', infix='', weights=None, if store: self.save_paths(filename=filename) - def run(self, **kwargs): """Perform path similarity analysis on the trajectories to compute the distance matrix. @@ -1590,7 +1586,6 @@ def run_pairs_analysis(self, **kwargs): self._HP.append(pp.get_hausdorff_pair()) self.D = D - def save_paths(self, filename=None): """Save fitted :attr:`PSAnalysis.paths` to numpy compressed npz files. @@ -1627,7 +1622,6 @@ def save_paths(self, filename=None): cPickle.dump(self.path_names, output) return filename - def load(self): """Load fitted paths specified by 'psa_path-names.pkl' in :attr:`PSAnalysis.targetdir`. @@ -1642,13 +1636,12 @@ def load(self): if not os.path.exists(self._paths_pkl): raise NoDataError("Fitted trajectories cannot be loaded; save file" + "{0} does not exist.".format(self._paths_pkl)) - self.path_names = np.load(self._paths_pkl) + self.path_names = np.load(self._paths_pkl, allow_pickle=True) self.paths = [np.load(pname) for pname in self.path_names] if os.path.exists(self._labels_pkl): - self.labels = np.load(self._labels_pkl) + self.labels = np.load(self._labels_pkl, allow_pickle=True) logger.info("Loaded paths from %r", self._paths_pkl) - def plot(self, filename=None, linkage='ward', count_sort=False, distance_sort=False, figsize=4.5, labelsize=12): """Plot a clustered distance matrix. @@ -1769,7 +1762,6 @@ def plot(self, filename=None, linkage='ward', count_sort=False, return Z, dgram, dist_matrix_clus - def plot_annotated_heatmap(self, filename=None, linkage='ward', \ count_sort=False, distance_sort=False, \ figsize=8, annot_size=6.5): @@ -1893,7 +1885,6 @@ def plot_annotated_heatmap(self, filename=None, linkage='ward', \ return Z, dgram, dist_matrix_clus - def plot_nearest_neighbors(self, filename=None, idx=0, \ labels=('Path 1', 'Path 2'), figsize=4.5, \ multiplot=False, aspect_ratio=1.75, \ @@ -1999,7 +1990,6 @@ def plot_nearest_neighbors(self, filename=None, idx=0, \ return ax - def cluster(self, dist_mat=None, method='ward', count_sort=False, \ distance_sort=False, no_plot=False, no_labels=True, \ color_threshold=4): @@ -2091,7 +2081,6 @@ def _get_plot_obj_locs(self): return dgram_loc, hmap_loc, cbar_loc - def get_num_atoms(self): """Return the number of atoms used to construct the :class:`Path` instances in :class:`PSA`. @@ -2111,7 +2100,6 @@ def get_num_atoms(self): "No path data; do 'PSAnalysis.generate_paths()' first.") return self.natoms - def get_num_paths(self): """Return the number of paths in :class:`PSA`. @@ -2129,7 +2117,6 @@ def get_num_paths(self): "No path data; do 'PSAnalysis.generate_paths()' first.") return self.npaths - def get_paths(self): """Return the paths in :class:`PSA`. @@ -2149,7 +2136,6 @@ def get_paths(self): "No path data; do 'PSAnalysis.generate_paths()' first.") return self.paths - def get_pairwise_distances(self, vectorform=False, checks=False): """Return the distance matrix (or vector) of pairwise path distances. @@ -2180,7 +2166,6 @@ def get_pairwise_distances(self, vectorform=False, checks=False): else: return self.D - @property def psa_pairs(self): """The list of :class:`PSAPair` instances for each pair of paths. @@ -2210,7 +2195,6 @@ def psa_pairs(self): " 'PSAnalysis.run_pairs_analysis()' first.") return self._psa_pairs - @property def hausdorff_pairs(self): """The Hausdorff pair for each (unique) pairs of paths. diff --git a/testsuite/MDAnalysisTests/analysis/test_psa.py b/testsuite/MDAnalysisTests/analysis/test_psa.py index f3f5b553b0f..74a109305ae 100644 --- a/testsuite/MDAnalysisTests/analysis/test_psa.py +++ b/testsuite/MDAnalysisTests/analysis/test_psa.py @@ -36,6 +36,7 @@ import matplotlib from MDAnalysisTests.datafiles import PSF, DCD, DCD2 +from MDAnalysis import NoDataError class TestPSAnalysis(object): @@ -167,21 +168,63 @@ def test_nearest_neighbors(self, psa): psa.run_pairs_analysis(neighbors=True) assert len(psa.nearest_neighbors) == 3 - @pytest.mark.xfail - def test_load(self, psa): + @pytest.mark.parametrize('stored', [True, False]) + def test_load(self, stored, tmpdir): """Test that the automatically saved files can be loaded""" - expected_path_names = psa.path_names[:] + # To allow for testing the store keyword, ignore fixture + universe1 = mda.Universe(PSF, DCD) + universe2 = mda.Universe(PSF, DCD2) + universe_rev = mda.Universe(PSF, DCD) + + psa = PSA.PSAnalysis([universe1, universe2, universe_rev], + path_select='name CA', + targetdir=str(tmpdir)) + + psa2 = PSA.PSAnalysis([universe1, universe2, universe_rev], + path_select='name CA', + targetdir=str(tmpdir)) + + psa.generate_paths(align=True, store=stored) + + # Make copies to the existing data + # Note: path names are set after save_paths has been called expected_paths = [p.copy() for p in psa.paths] - psa.save_paths() - psa.load() - assert psa.path_names == expected_path_names - # manually compare paths because - # assert_almost_equal(psa.paths, expected_paths, decimal=6) - # raises a ValueError in the assertion code itself - assert len(psa.paths) == len(expected_paths) - for ipath, (observed, expected) in enumerate(zip(psa.paths, expected_paths)): + + if not stored: + psa.save_paths() + + expected_path_names = psa.path_names[:] + + # Load data in the empty PSAnalysis object + psa2.load() + + assert psa2.path_names == expected_path_names + assert len(psa2.paths) == len(expected_paths) + + for ipath, (observed, expected) in enumerate(zip(psa2.paths, + expected_paths)): assert_almost_equal(observed, expected, decimal=6, - err_msg="loaded path {} does not agree with input".format(ipath)) + err_msg=("loaded path {} does not agree with " + "input").format(ipath)) + + def test_load_nofile(self, psa): + """Test case where save_paths hasn't been called before load""" + match_exp = "Fitted trajectories cannot be loaded" + with pytest.raises(NoDataError, match=match_exp): + psa.load() + + def test_save_nopaths(self, tmpdir): + """Test case were save_paths is called without calcualted paths""" + match_exp = "Paths have not been calculated yet" + with pytest.raises(NoDataError, match=match_exp): + universe1 = mda.Universe(PSF, DCD) + universe2 = mda.Universe(PSF, DCD2) + universe_rev = mda.Universe(PSF, DCD) + + psa = PSA.PSAnalysis([universe1, universe2, universe_rev], + path_select='name CA', + targetdir=str(tmpdir)) + psa.save_paths() def test_dendrogram_produced(self, plot_data): """Test whether Dendrogram dictionary object was produced""" From d9a78a4eb1c3add937b34524b071e5ed185d924c Mon Sep 17 00:00:00 2001 From: Irfan Alibay Date: Tue, 10 Mar 2020 01:45:15 +0000 Subject: [PATCH 4/7] Removed PersistenceLength.perform_fit (#2597) - Fixes #2596 - Removes deprecated :meth:`PersistenceLength.perform_fit` (was deprecated in 0.19.2 but no removal date give: in #2596 it was determined that it will be removed in 1.0) --- package/CHANGELOG | 1 + package/MDAnalysis/analysis/polymer.py | 6 ++---- .../MDAnalysisTests/analysis/test_persistencelength.py | 3 --- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index b8af3725ee1..ccf66efba15 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -100,6 +100,7 @@ Enhancements * Improve the distance search in water bridge analysis with capped_distance (PR #2480) Changes + * Removed deprecated :meth:`PersistenceLength.perform_fit` (Issue #2596) * Changed :meth:`PSAnalysis.generate_paths` keywords `store` and `filename` defaults to `False` and `None` (Issue #2593) * Removed `format` keyword from :meth:`MemoryReader.timeseries` (Issue #1453, diff --git a/package/MDAnalysis/analysis/polymer.py b/package/MDAnalysis/analysis/polymer.py index b85010f1d6f..bf363934c9c 100644 --- a/package/MDAnalysis/analysis/polymer.py +++ b/package/MDAnalysis/analysis/polymer.py @@ -189,6 +189,8 @@ class PersistenceLength(AnalysisBase): .. versionadded:: 0.13.0 .. versionchanged:: 0.20.0 The run method now automatically performs the exponential fit + .. versionchanged:: 1.0.0 + Deprecated :meth:`PersistenceLength.perform_fit` has now been removed. """ def __init__(self, atomgroups, **kwargs): super(PersistenceLength, self).__init__( @@ -240,10 +242,6 @@ def _calc_bond_length(self): bs.append(b) self.lb = np.mean(bs) - def perform_fit(self): - warnings.warn("perform_fit is now called automatically from run", - DeprecationWarning) - def _perform_fit(self): """Fit the results to an exponential decay""" try: diff --git a/testsuite/MDAnalysisTests/analysis/test_persistencelength.py b/testsuite/MDAnalysisTests/analysis/test_persistencelength.py index a6acbc837b9..8fbaf91dbf7 100644 --- a/testsuite/MDAnalysisTests/analysis/test_persistencelength.py +++ b/testsuite/MDAnalysisTests/analysis/test_persistencelength.py @@ -93,9 +93,6 @@ def test_plot_with_ax(self, p_run): assert ax2 is ax - def test_perform_fit_warn(self, p_run): - with pytest.warns(DeprecationWarning): - p_run.perform_fit() class TestFitExponential(object): x = np.linspace(0, 250, 251) From e416aa7d9adb9a9c14adb0774e65162da89a0d1c Mon Sep 17 00:00:00 2001 From: Guillaume Fraux Date: Tue, 10 Mar 2020 12:47:15 +0100 Subject: [PATCH 5/7] Add Chemfiles as a coordinate reader/writer (#1862) * Implement CHEMFILES reader and writer Co-authored-by: richardjgowers Co-authored-by: Max Linke --- .appveyor.yml | 2 +- .travis.yml | 4 +- package/AUTHORS | 2 +- package/CHANGELOG | 3 +- package/MDAnalysis/coordinates/__init__.py | 1 + package/MDAnalysis/coordinates/chemfiles.py | 393 ++++++++++++++++++ .../coordinates/chemfiles.rst | 1 + .../coordinates_modules.rst | 1 + testsuite/MDAnalysisTests/coordinates/base.py | 4 +- .../coordinates/test_chemfiles.py | 256 ++++++++++++ .../MDAnalysisTests/coordinates/test_xyz.py | 2 +- 11 files changed, 661 insertions(+), 8 deletions(-) create mode 100644 package/MDAnalysis/coordinates/chemfiles.py create mode 100644 package/doc/sphinx/source/documentation_pages/coordinates/chemfiles.rst create mode 100644 testsuite/MDAnalysisTests/coordinates/test_chemfiles.py diff --git a/.appveyor.yml b/.appveyor.yml index 76339b3fb2f..6f07a88fc36 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -10,7 +10,7 @@ cache: environment: global: CONDA_CHANNELS: conda-forge - CONDA_DEPENDENCIES: pip setuptools wheel cython mock six biopython networkx joblib matplotlib scipy vs2015_runtime pytest mmtf-python GridDataFormats hypothesis pytest-cov codecov + CONDA_DEPENDENCIES: pip setuptools wheel cython mock six biopython networkx joblib matplotlib scipy vs2015_runtime pytest mmtf-python GridDataFormats hypothesis pytest-cov codecov chemfiles PIP_DEPENDENCIES: gsd==1.9.3 duecredit parmed DEBUG: "False" MINGW_64: C:\mingw-w64\x86_64-6.3.0-posix-seh-rt_v5-rev1\mingw64\bin diff --git a/.travis.yml b/.travis.yml index 2818163d465..1249fc9a70d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -28,7 +28,7 @@ env: - SETUP_CMD="${PYTEST_FLAGS}" - BUILD_CMD="pip install -e package/ && (cd testsuite/ && python setup.py build)" - CONDA_MIN_DEPENDENCIES="mmtf-python mock six biopython networkx cython matplotlib scipy griddataformats hypothesis gsd codecov" - - CONDA_DEPENDENCIES="${CONDA_MIN_DEPENDENCIES} seaborn>=0.7.0 clustalw=2.1 netcdf4 scikit-learn joblib>=0.12" + - CONDA_DEPENDENCIES="${CONDA_MIN_DEPENDENCIES} seaborn>=0.7.0 clustalw=2.1 netcdf4 scikit-learn joblib>=0.12 chemfiles" - CONDA_CHANNELS='biobuilds conda-forge' - CONDA_CHANNEL_PRIORITY=True - PIP_DEPENDENCIES="duecredit parmed" @@ -66,7 +66,7 @@ matrix: INSTALL_HOLE="false" - env: NAME="python 2.7" - PYTHON_VERSION=2.7 + PYTHON_VERSION=2.7 CODECOV="true" NUMPY_VERSION=1.16 SETUP_CMD="${PYTEST_FLAGS} --cov=MDAnalysis" diff --git a/package/AUTHORS b/package/AUTHORS index 6168416987b..05f38057159 100644 --- a/package/AUTHORS +++ b/package/AUTHORS @@ -128,6 +128,7 @@ Chronological list of authors 2020 - Charlie Cook - Yuanyu Chang + - Guillaume Fraux - Ivan Hristov - Michael Quevillon - Hao Tian @@ -136,7 +137,6 @@ Chronological list of authors - Shubham Sharma - External code ------------- diff --git a/package/CHANGELOG b/package/CHANGELOG index ccf66efba15..17ef7119be3 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -16,7 +16,7 @@ The rules for this file: mm/dd/yy richardjgowers, kain88-de, lilyminium, p-j-smith, bdice, joaomcteixeira, PicoCentauri, davidercruz, jbarnoud, RMeli, IAlibay, mtiberti, CCook96, Yuan-Yu, xiki-tempula, HTian1997, Iv-Hristov, hmacdope, AnshulAngaria, - ss62171 + ss62171, Luthaf * 0.21.0 @@ -66,6 +66,7 @@ Fixes * Fixed example docs for polymer persistence length (#2582) Enhancements + * Added ability to use Chemfiles as a trajectory reader backend (PR #1862) * New analysis.hole2 module for HOLE interfacing. Contains a function (hole) for running HOLE on singular PDB files and class (HoleAnalysis) for trajectories (PR #2523) diff --git a/package/MDAnalysis/coordinates/__init__.py b/package/MDAnalysis/coordinates/__init__.py index f724e257a33..5bd23b82bf8 100644 --- a/package/MDAnalysis/coordinates/__init__.py +++ b/package/MDAnalysis/coordinates/__init__.py @@ -716,6 +716,7 @@ class can choose an appropriate reader automatically. from . import base from .core import reader, writer from . import chain +from . import chemfiles from . import CRD from . import DCD from . import DLPoly diff --git a/package/MDAnalysis/coordinates/chemfiles.py b/package/MDAnalysis/coordinates/chemfiles.py new file mode 100644 index 00000000000..c7fbe842c2c --- /dev/null +++ b/package/MDAnalysis/coordinates/chemfiles.py @@ -0,0 +1,393 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# +""" +Reading trajectory with Chemfiles --- :mod:`MDAnalysis.coordinates.chemfiles` +============================================================================= + +Classes to read and write files using the `chemfiles`_ library. This library +provides C++ implementation of multiple formats readers and writers, the full +list if available `here `_. + +.. _chemfiles: https://chemfiles.org +.. _formats: http://chemfiles.org/chemfiles/latest/formats.html + +Classes +------- + +.. autoclass:: ChemfilesReader + +.. autoclass:: ChemfilesWriter + +""" +from __future__ import absolute_import, division +from __future__ import print_function, unicode_literals + +import numpy as np +from six import string_types +from distutils.version import LooseVersion +import warnings + +from . import base, core + +try: + import chemfiles +except ImportError: + HAS_CHEMFILES = False +else: + HAS_CHEMFILES = True + + +MIN_CHEMFILES_VERSION = LooseVersion("0.9") +MAX_CHEMFILES_VERSION = LooseVersion("0.10") + + +def check_chemfiles_version(): + """Check an appropriate Chemfiles is available + + Returns True if a usable chemfiles version is available + + .. versionadded:: 1.0.0 + """ + if not HAS_CHEMFILES: + warnings.warn( + "No Chemfiles package found. " + "Try installing with 'pip install chemfiles'" + ) + return False + version = LooseVersion(chemfiles.__version__) + wrong = version < MIN_CHEMFILES_VERSION or version >= MAX_CHEMFILES_VERSION + if wrong: + warnings.warn( + "unsupported Chemfiles version {}, we need a version >{} and <{}" + .format(version, MIN_CHEMFILES_VERSION, MAX_CHEMFILES_VERSION) + ) + return not wrong + + +class ChemfilesReader(base.ReaderBase): + """Coordinate reader using chemfiles. + + The file format to used is guessed based on the file extension. If no + matching format is found, a ``ChemfilesError`` is raised. It is also + possible to manually specify the format to use for a given file. + + .. versionadded:: 1.0.0 + """ + format = 'chemfiles' + units = {'time': 'fs', 'length': 'Angstrom'} + + def __init__(self, filename, chemfiles_format="", **kwargs): + """ + Parameters + ---------- + filename : chemfiles.Trajectory or str + the chemfiles object to read or filename to read + chemfiles_format : str (optional) + if *filename* was a string, use the given format name instead of + guessing from the extension. The `list of supported formats + `_ and the associated names is available in chemfiles + documentation. + **kwargs : dict + General reader arguments. + + + .. _formats: http://chemfiles.org/chemfiles/latest/formats.html + """ + if not check_chemfiles_version(): + raise RuntimeError("Please install Chemfiles > {}" + "".format(MIN_CHEMFILES_VERSION)) + super(ChemfilesReader, self).__init__(filename, **kwargs) + self._format = chemfiles_format + self._cached_n_atoms = None + self._open() + self.ts = self._Timestep(self.n_atoms, **self._ts_kwargs) + self.next() + + @staticmethod + def _format_hint(thing): + """Can this Reader read *thing*""" + # nb, filename strings can still get passed through if + # format='CHEMFILES' is used + return HAS_CHEMFILES and isinstance(thing, chemfiles.Trajectory) + + def _open(self): + self._closed = False + self._step = 0 + self._frame = -1 + if isinstance(self.filename, chemfiles.Trajectory): + self._file = self.filename + else: + self._file = chemfiles.Trajectory(self.filename, 'r', self._format) + + def close(self): + """close reader""" + if not self._closed: + self._file.close() + self._closed = True + + @property + def n_frames(self): + """number of frames in trajectory""" + return self._file.nsteps + + @property + def n_atoms(self): + """number of atoms in the first frame of the trajectory""" + if self._cached_n_atoms is None: + self._cached_n_atoms = len(self._file.read_step(0).atoms) + return self._cached_n_atoms + + def _reopen(self): + """reopen trajectory""" + self.close() + self._open() + + def _read_frame(self, i): + """read frame i""" + self._step = i + return self._read_next_timestep() + + def _read_next_timestep(self, ts=None): + """copy next frame into timestep""" + if self._step >= self.n_frames: + raise IOError('trying to go over trajectory limit') + if ts is None: + ts = self.ts + self.ts = ts + frame = self._file.read_step(self._step) + self._frame_to_ts(frame, ts) + ts.frame = frame.step + self._step += 1 + return ts + + def _frame_to_ts(self, frame, ts): + """convert a chemfiles frame to a :class:`TimeStep`""" + if len(frame.atoms) != self.n_atoms: + raise IOError( + "The number of atom changed in the trajectory. " + "This is not supported by MDAnalysis." + ) + + ts.dimensions[:] = frame.cell.lengths + frame.cell.angles + ts.positions[:] = frame.positions[:] + if frame.has_velocities(): + ts.has_velocities = True + ts.velocities[:] = frame.velocities[:] + + def Writer(self, filename, n_atoms=None, **kwargs): + """Return writer for trajectory format""" + if n_atoms is None: + n_atoms = self.n_atoms + return ChemfilesWriter(filename, n_atoms, **kwargs) + + +class ChemfilesWriter(base.WriterBase): + """ + Coordinate writer using chemfiles. + + The file format to used is guessed based on the file extension. If no + matching format is found, a ``ChemfilesError`` is raised. It is also + possible to manually specify the format to use for a given file. + + Chemfiles support writting to files with varying number of atoms if the + underlying format support it. This is the case of most of text-based + formats. + + .. versionadded:: 1.0.0 + """ + format = 'chemfiles' + multiframe = True + + # chemfiles mostly[1] uses these units for the in-memory representation, + # and convert into the file format units when writing. + # + # [1] mostly since some format don't have a specified unit + # (XYZ for example), so then chemfiles just assume they are in A and fs. + units = {'time': 'fs', 'length': 'Angstrom'} + + def __init__(self, filename, n_atoms=0, mode="w", chemfiles_format="", topology=None, **kwargs): + """ + Parameters + ---------- + filename: str + filename of trajectory file. + n_atoms: int + number of atoms in the trajectory to write. This value is not + used and can vary during trajectory, if the underlying format + support it + mode : str (optional) + file opening mode: use 'a' to append to an existing file or 'w' to + create a new file + chemfiles_format : str (optional) + use the given format name instead of guessing from the extension. + The `list of supported formats `_ and the associated names + is available in chemfiles documentation. + topology : Universe or AtomGroup (optional) + use the topology from this :class:`~MDAnalysis.core.groups.AtomGroup` + or :class:`~MDAnalysis.core.universe.Universe` to write all the + timesteps to the file + **kwargs : dict + General writer arguments. + + + .. _formats: http://chemfiles.org/chemfiles/latest/formats.html + """ + if not check_chemfiles_version(): + raise RuntimeError("Please install Chemfiles > {}" + "".format(MIN_CHEMFILES_VERSION)) + self.filename = filename + self.n_atoms = n_atoms + if mode != "a" and mode != "w": + raise IOError("Expected 'a' or 'w' as mode in ChemfilesWriter") + self._file = chemfiles.Trajectory(self.filename, mode, chemfiles_format) + self._closed = False + if topology is not None: + if isinstance(topology, string_types): + self._file.set_topology(topology) + else: + topology = self._topology_to_chemfiles(topology, n_atoms) + self._file.set_topology(topology) + + def close(self): + """Close the trajectory file and finalize the writing""" + if hasattr(self, "_closed") and not self._closed: + self._file.close() + self._closed = True + + def write(self, obj): + """Write object `obj` at current trajectory frame to file. + + Topology for the output is taken from the `obj` or default to the value + of the `topology` keyword supplied to the :class:`ChemfilesWriter` + constructor. + + If `obj` contains velocities, and the underlying format supports it, the + velocities are writen to the file. Writing forces is unsupported at the + moment. + + Parameters + ---------- + obj : Universe or AtomGroup + The :class:`~MDAnalysis.core.groups.AtomGroup` or + :class:`~MDAnalysis.core.universe.Universe` to write. + """ + if hasattr(obj, "atoms"): + if hasattr(obj, 'universe'): + # For AtomGroup and children (Residue, ResidueGroup, Segment) + ts_full = obj.universe.trajectory.ts + if ts_full.n_atoms == obj.atoms.n_atoms: + ts = ts_full + else: + # Only populate a time step with the selected atoms. + ts = ts_full.copy_slice(atoms.indices) + elif hasattr(obj, 'trajectory'): + # For Universe only --- get everything + ts = obj.trajectory.ts + else: + if isinstance(obj, base.Timestep): + ts = obj + topology = None + else: + raise TypeError("No Timestep found in obj argument") + + frame = self._timestep_to_chemfiles(ts) + frame.topology = self._topology_to_chemfiles(obj, len(frame.atoms)) + self._file.write(frame) + + def write_next_timestep(self, ts): + """Write timestep object into trajectory. + + Parameters + ---------- + ts: TimeStep + """ + frame = self._timestep_to_chemfiles(ts) + self._file.write(frame) + + def _timestep_to_chemfiles(self, ts): + """ + Convert a Timestep to a chemfiles Frame + """ + # TODO: CONVERTERS? + frame = chemfiles.Frame() + frame.resize(ts.n_atoms) + if ts.has_positions: + frame.positions[:] = ts.positions[:] + if ts.has_velocities: + frame.add_velocities() + frame.velocities[:] = ts.velocities[:] + frame.cell = chemfiles.UnitCell(*ts.dimensions) + return frame + + def _topology_to_chemfiles(self, obj, n_atoms): + """ + Convert an AtomGroup or an Universe to a chemfiles Topology + """ + topology = chemfiles.Topology() + if not hasattr(obj, "atoms"): + # use an empty topology + topology.resize(n_atoms) + return topology + + # (1) add all atoms to the topology + residues = {} + for atom in obj.atoms: + name = getattr(atom, 'name', "") + type = getattr(atom, 'type', name) + chemfiles_atom = chemfiles.Atom(name, type) + + if hasattr(atom, 'altLoc'): + chemfiles_atom["altloc"] = str(atom.altLoc) + + if hasattr(atom, 'segid'): + chemfiles_atom["segid"] = str(atom.segid) + + if hasattr(atom, 'segindex'): + chemfiles_atom["segindex"] = int(atom.segindex) + + if hasattr(atom, 'resid'): + resname = getattr(atom, 'resname', "") + if atom.resid not in residues.keys(): + residues[atom.resid] = chemfiles.Residue(resname, atom.resid) + residue = residues[atom.resid] + + atom_idx = len(topology.atoms) + residue.atoms.append(atom_idx) + + if hasattr(atom, "record_type"): + # set corresponding chemfiles residue property + if atom.record_type == "ATOM": + residue["is_standard_pdb"] = True + else: + residue["is_standard_pdb"] = False + + topology.atoms.append(chemfiles_atom) + + # (2) add residues to the topology + for residue in residues.values(): + topology.residues.append(residue) + + # (3) add bonds to the topology + for bond in getattr(obj, "bonds", []): + topology.add_bond(bond.atoms[0].ix, bond.atoms[1].ix) + + return topology diff --git a/package/doc/sphinx/source/documentation_pages/coordinates/chemfiles.rst b/package/doc/sphinx/source/documentation_pages/coordinates/chemfiles.rst new file mode 100644 index 00000000000..13016a9a621 --- /dev/null +++ b/package/doc/sphinx/source/documentation_pages/coordinates/chemfiles.rst @@ -0,0 +1 @@ +.. automodule:: MDAnalysis.coordinates.chemfiles diff --git a/package/doc/sphinx/source/documentation_pages/coordinates_modules.rst b/package/doc/sphinx/source/documentation_pages/coordinates_modules.rst index 21f1ab32cb8..e74387a4bd6 100644 --- a/package/doc/sphinx/source/documentation_pages/coordinates_modules.rst +++ b/package/doc/sphinx/source/documentation_pages/coordinates_modules.rst @@ -41,6 +41,7 @@ provide the format in the keyword argument *format* to coordinates/XTC coordinates/XYZ coordinates/memory + coordinates/chemfiles coordinates/null .. rubric:: Coordinate core modules diff --git a/testsuite/MDAnalysisTests/coordinates/base.py b/testsuite/MDAnalysisTests/coordinates/base.py index eb509c82193..ba62bd0bd36 100644 --- a/testsuite/MDAnalysisTests/coordinates/base.py +++ b/testsuite/MDAnalysisTests/coordinates/base.py @@ -236,14 +236,14 @@ def test_double_close(self, reader): def test_get_writer_1(self, ref, reader): with tempdir.in_tempdir(): - outfile = 'test-writer' + ref.ext + outfile = 'test-writer.' + ref.ext with reader.Writer(outfile) as W: assert_equal(isinstance(W, ref.writer), True) assert_equal(W.n_atoms, reader.n_atoms) def test_get_writer_2(self, ref, reader): with tempdir.in_tempdir(): - outfile = 'test-writer' + ref.ext + outfile = 'test-writer.' + ref.ext with reader.Writer(outfile, n_atoms=100) as W: assert_equal(isinstance(W, ref.writer), True) assert_equal(W.n_atoms, 100) diff --git a/testsuite/MDAnalysisTests/coordinates/test_chemfiles.py b/testsuite/MDAnalysisTests/coordinates/test_chemfiles.py new file mode 100644 index 00000000000..fa9a7ef5f4c --- /dev/null +++ b/testsuite/MDAnalysisTests/coordinates/test_chemfiles.py @@ -0,0 +1,256 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fileencoding=utf-8 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# +from __future__ import absolute_import + +import numpy as np +import pytest + +import MDAnalysis as mda +from MDAnalysis.coordinates.chemfiles import ( + ChemfilesReader, ChemfilesWriter, check_chemfiles_version, +) + +from MDAnalysisTests import datafiles, tempdir +from MDAnalysisTests.coordinates.base import ( + MultiframeReaderTest, BaseWriterTest, BaseReference +) +from MDAnalysisTests.coordinates.test_xyz import XYZReference + + +# skip entire test module if no appropriate chemfiles +chemfiles = pytest.importorskip('chemfiles') +@pytest.mark.skipif(not check_chemfiles_version(), + reason="Wrong version of chemfiles") +class TestChemFileXYZ(MultiframeReaderTest): + @staticmethod + @pytest.fixture + def ref(): + base = XYZReference() + base.writer = ChemfilesWriter + base.dimensions = np.array([0, 0, 0, 90, 90, 90], dtype=np.float32) + + return base + + @pytest.fixture + def reader(self, ref): + reader = ChemfilesReader(ref.trajectory) + reader.add_auxiliary('lowf', ref.aux_lowf, dt=ref.aux_lowf_dt, initial_time=0, time_selector=None) + reader.add_auxiliary('highf', ref.aux_highf, dt=ref.aux_highf_dt, initial_time=0, time_selector=None) + return reader + + + +class ChemfilesXYZReference(BaseReference): + def __init__(self): + super(ChemfilesXYZReference, self).__init__() + self.trajectory = datafiles.COORDINATES_XYZ + self.topology = datafiles.COORDINATES_XYZ + self.reader = ChemfilesReader + self.writer = ChemfilesWriter + self.ext = 'xyz' + self.volume = 0 + self.dimensions = np.zeros(6) + self.dimensions[3:] = 90.0 + + +@pytest.mark.skipif(not check_chemfiles_version(), + reason="Wrong version of chemfiles") +class TestChemfilesReader(MultiframeReaderTest): + @staticmethod + @pytest.fixture() + def ref(): + return ChemfilesXYZReference() + + +@pytest.mark.skipif(not check_chemfiles_version(), + reason="Wrong version of chemfiles") +class TestChemfilesWriter(BaseWriterTest): + @staticmethod + @pytest.fixture() + def ref(): + return ChemfilesXYZReference() + + # Disable 'test_no_container' as it try to open a file for writing without + # extension. + def test_no_container(self, ref): + pass + + def test_no_extension_raises(self, ref): + with pytest.raises(chemfiles.ChemfilesError): + ref.writer('foo') + + +@pytest.mark.skipif(not check_chemfiles_version(), + reason="Wrong version of chemfiles") +class TestChemfiles(object): + def test_read_chemfiles_format(self): + u = mda.Universe( + datafiles.LAMMPSdata, + format="chemfiles", + topology_format="data", + chemfiles_format="LAMMPS Data" + ) + + for ts in u.trajectory: + assert ts.n_atoms == 18364 + + def test_changing_system_size(self): + outfile = tempdir.TempDir().name + "chemfiles-changing-size.xyz" + with open(outfile, "w") as fd: + fd.write(VARYING_XYZ) + + u = mda.Universe(outfile, format="chemfiles", topology_format="XYZ") + + with pytest.raises(IOError): + u.trajectory._read_next_timestep() + + def test_wrong_open_mode(self): + with pytest.raises(IOError): + _ = ChemfilesWriter("", mode="r") + + def check_topology(self, reference, file): + u = mda.Universe(reference) + atoms = set([ + (atom.name, atom.type, atom.record_type) + for atom in u.atoms + ]) + bonds = set([ + (bond.atoms[0].ix, bond.atoms[1].ix) + for bond in u.bonds + ]) + + check = mda.Universe(file) + np.testing.assert_equal( + u.trajectory.ts.positions, + check.trajectory.ts.positions + ) + + for atom in check.atoms: + assert (atom.name, atom.type, atom.record_type) in atoms + + for bond in check.bonds: + assert (bond.atoms[0].ix, bond.atoms[1].ix) in bonds + + def test_write_topology(self): + u = mda.Universe(datafiles.CONECT) + outfile = tempdir.TempDir().name + "chemfiles-write-topology.pdb" + with ChemfilesWriter(outfile) as writer: + writer.write(u) + self.check_topology(datafiles.CONECT, outfile) + + # Manually setting the topology when creating the ChemfilesWriter + # (1) from an object + with ChemfilesWriter(outfile, topology=u) as writer: + writer.write_next_timestep(u.trajectory.ts) + self.check_topology(datafiles.CONECT, outfile) + + # (2) from a file + with ChemfilesWriter(outfile, topology=datafiles.CONECT) as writer: + writer.write_next_timestep(u.trajectory.ts) + # FIXME: this does not work, since chemfiles also insert the bonds + # which are implicit in PDB format (between standard residues), while + # MDAnalysis only read the explicit CONNECT records. + + # self.check_topology(datafiles.CONECT, outfile) + + def test_write_velocities(self): + ts = mda.coordinates.base.Timestep(4, velocities=True) + ts.dimensions = [20, 30, 41, 90, 90, 90] + + ts.positions = [ + [1, 1, 1], + [2, 2, 2], + [3, 3, 3], + [4, 4, 4], + ] + ts.velocities = [ + [10, 10, 10], + [20, 20, 20], + [30, 30, 30], + [40, 40, 40], + ] + + u = mda.Universe.empty(4) + u.add_TopologyAttr('type') + u.atoms[0].type = "H" + u.atoms[1].type = "H" + u.atoms[2].type = "H" + u.atoms[3].type = "H" + + outfile = tempdir.TempDir().name + "chemfiles-write-velocities.lmp" + with ChemfilesWriter(outfile, topology=u, chemfiles_format='LAMMPS Data') as writer: + writer.write_next_timestep(ts) + + with open(outfile) as file: + content = file.read() + assert content == EXPECTED_LAMMPS_DATA + + +VARYING_XYZ = """2 + +A 0 0 0 +A 0 0 0 +4 + +A 0 0 0 +A 0 0 0 +A 0 0 0 +A 0 0 0 +""" + + +EXPECTED_LAMMPS_DATA = """LAMMPS data file -- atom_style full -- generated by chemfiles +4 atoms +0 bonds +0 angles +0 dihedrals +0 impropers +1 atom types +0 bond types +0 angle types +0 dihedral types +0 improper types +0 20.0 xlo xhi +0 30.0 ylo yhi +0 41.0 zlo zhi + +# Pair Coeffs +# 1 H + +Masses + +1 0.0 # H + +Atoms # full + +1 1 1 0.0 1.0 1.0 1.0 # H +2 2 1 0.0 2.0 2.0 2.0 # H +3 3 1 0.0 3.0 3.0 3.0 # H +4 4 1 0.0 4.0 4.0 4.0 # H + +Velocities + +1 10.0 10.0 10.0 +2 20.0 20.0 20.0 +3 30.0 30.0 30.0 +4 40.0 40.0 40.0 +""" diff --git a/testsuite/MDAnalysisTests/coordinates/test_xyz.py b/testsuite/MDAnalysisTests/coordinates/test_xyz.py index 6436233fa1d..89e3ac975d9 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_xyz.py +++ b/testsuite/MDAnalysisTests/coordinates/test_xyz.py @@ -175,4 +175,4 @@ def test_elements_and_names(self, outfile, attr): with open(outfile, "r") as r: names = ''.join(l.split()[0].strip() for l in r.readlines()[2:-1]) - assert names[:-1].lower() == 'testing' \ No newline at end of file + assert names[:-1].lower() == 'testing' From 53ccf6def40ca0e1342179269b1dfeef0e4877b4 Mon Sep 17 00:00:00 2001 From: Yuxuan Zhuang Date: Thu, 12 Mar 2020 11:26:13 +0100 Subject: [PATCH 6/7] Increase coverage of leaflet.py (#2603) Commit title: Increase coverage of leaflet.py (#2603) Contents of commit: ============= 1) This commit adds a series of tests to `analysis/leaflet.py` which cover parts of the code which did not use to be tested (Issue #2600). As of this commit, coverage is now at over 95% for this module. 2) This commit also fixes a missing import for the selection module (#2612). Unaltered commit history: ================= * _get_graph without self.pbc for leaflet.py * using update * add test for leaflet write_selection and fix one missing module in leaflet.py * tyop * check expected lipids contact are returned when pbc on * check leaflet write_selection output * remove unused codes * add more tests on update function * cover three options for sparse * add test on components index in group & add name CHANGELOG * add test on update without cutoff * PEP8 formatting * exclude valueerror in matrix cal from coverage * add author --- package/AUTHORS | 2 +- package/CHANGELOG | 3 +- package/MDAnalysis/analysis/leaflet.py | 5 +- .../MDAnalysisTests/analysis/test_leaflet.py | 107 +++++++++++++++++- 4 files changed, 112 insertions(+), 5 deletions(-) diff --git a/package/AUTHORS b/package/AUTHORS index 05f38057159..00cde2773cb 100644 --- a/package/AUTHORS +++ b/package/AUTHORS @@ -135,7 +135,7 @@ Chronological list of authors - Hugo MacDermott-Opeskin - Anshul Angaria - Shubham Sharma - + - Yuxuan Zhuang External code ------------- diff --git a/package/CHANGELOG b/package/CHANGELOG index 17ef7119be3..fcda3ba8281 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -16,7 +16,7 @@ The rules for this file: mm/dd/yy richardjgowers, kain88-de, lilyminium, p-j-smith, bdice, joaomcteixeira, PicoCentauri, davidercruz, jbarnoud, RMeli, IAlibay, mtiberti, CCook96, Yuan-Yu, xiki-tempula, HTian1997, Iv-Hristov, hmacdope, AnshulAngaria, - ss62171, Luthaf + ss62171, Luthaf, yuxuanzhuang * 0.21.0 @@ -64,6 +64,7 @@ Fixes match TPRParser. (PR #2408) * Added parmed to setup.py * Fixed example docs for polymer persistence length (#2582) + * Added missing selection module to leaflet.py (Issue #2612) Enhancements * Added ability to use Chemfiles as a trajectory reader backend (PR #1862) diff --git a/package/MDAnalysis/analysis/leaflet.py b/package/MDAnalysis/analysis/leaflet.py index e70e2359ee7..0cde51c7d8a 100644 --- a/package/MDAnalysis/analysis/leaflet.py +++ b/package/MDAnalysis/analysis/leaflet.py @@ -79,6 +79,7 @@ from .. import core from . import distances +from .. import selections from ..due import due, Doi @@ -174,7 +175,7 @@ def _get_graph(self): # only try distance array try: adj = distances.contact_matrix(coord, cutoff=self.cutoff, returntype="numpy", box=box) - except ValueError: + except ValueError: # pragma: no cover warnings.warn('N x N matrix too big, use sparse=True or sparse=None', category=UserWarning, stacklevel=2) raise @@ -186,7 +187,7 @@ def _get_graph(self): try: # this works for small-ish systems and depends on system memory adj = distances.contact_matrix(coord, cutoff=self.cutoff, returntype="numpy", box=box) - except ValueError: + except ValueError: # pragma: no cover # but use a sparse matrix method for larger systems for memory reasons warnings.warn( 'N x N matrix too big - switching to sparse matrix method (works fine, but is currently rather ' diff --git a/testsuite/MDAnalysisTests/analysis/test_leaflet.py b/testsuite/MDAnalysisTests/analysis/test_leaflet.py index 143c815fbc5..3cac111e04a 100644 --- a/testsuite/MDAnalysisTests/analysis/test_leaflet.py +++ b/testsuite/MDAnalysisTests/analysis/test_leaflet.py @@ -26,13 +26,16 @@ from numpy.testing import assert_equal, assert_almost_equal import numpy as np +import networkx as NX from MDAnalysis.analysis.leaflet import LeafletFinder, optimize_cutoff from MDAnalysisTests.datafiles import Martini_membrane_gro - LIPID_HEAD_STRING = "name PO4" +def lines2one(lines): + """Join lines and squash all whitespace""" + return " ".join(" ".join(lines).split()) @pytest.fixture() def universe(): @@ -69,3 +72,105 @@ def test_optimize_cutoff(universe, lipid_heads): cutoff, N = optimize_cutoff(universe, lipid_heads, pbc=True) assert N == 2 assert_almost_equal(cutoff, 10.5, decimal=4) + + +def test_pbc_on_off(universe, lipid_heads): + lfls_pbc_on = LeafletFinder(universe, lipid_heads, pbc=True) + lfls_pbc_off = LeafletFinder(universe, lipid_heads, pbc=False) + assert lfls_pbc_on.graph.size() > lfls_pbc_off.graph.size() + + +def test_pbc_on_off_difference(universe, lipid_heads): + lfls_pbc_on = LeafletFinder(universe, lipid_heads, cutoff=7, pbc=True) + lfls_pbc_off = LeafletFinder(universe, lipid_heads, cutoff=7, pbc=False) + pbc_on_graph = lfls_pbc_on.graph + pbc_off_graph = lfls_pbc_off.graph + diff_graph = NX.difference(pbc_on_graph, pbc_off_graph) + assert_equal(set(diff_graph.edges), {(69, 153), (73, 79), + (206, 317), (313, 319)}) + + +@pytest.mark.parametrize("sparse", [True, False, None]) +def test_sparse_on_off_none(universe, lipid_heads, sparse): + lfls_ag = LeafletFinder(universe, lipid_heads, cutoff=15.0, pbc=True, + sparse=sparse) + assert_almost_equal(len(lfls_ag.graph.edges), 1903, decimal=4) + + +def test_cutoff_update(universe, lipid_heads): + lfls_ag = LeafletFinder(universe, lipid_heads, cutoff=15.0, pbc=True) + lfls_ag.update(cutoff=1.0) + assert_almost_equal(lfls_ag.cutoff, 1.0, decimal=4) + assert_almost_equal(len(lfls_ag.groups()), 360, decimal=4) + + +def test_cutoff_update_default(universe, lipid_heads): + lfls_ag = LeafletFinder(universe, lipid_heads, cutoff=15.0, pbc=True) + lfls_ag.update() + assert_almost_equal(lfls_ag.cutoff, 15.0, decimal=4) + assert_almost_equal(len(lfls_ag.groups()), 2, decimal=4) + + +def test_write_selection(universe, lipid_heads, tmpdir): + lfls_ag = LeafletFinder(universe, lipid_heads, cutoff=15.0, pbc=True) + with tmpdir.as_cwd(): + filename = lfls_ag.write_selection('leaflet.vmd') + expected_output = lines2one([ + """# leaflets based on select= cutoff=15.000000 + # MDAnalysis VMD selection + atomselect macro leaflet_1 {index 1 13 25 37 49 61 73 85 \\ + 97 109 121 133 145 157 169 181 \\ + 193 205 217 229 241 253 265 277 \\ + 289 301 313 325 337 349 361 373 \\ + 385 397 409 421 433 445 457 469 \\ + 481 493 505 517 529 541 553 565 \\ + 577 589 601 613 625 637 649 661 \\ + 673 685 697 709 721 733 745 757 \\ + 769 781 793 805 817 829 841 853 \\ + 865 877 889 901 913 925 937 949 \\ + 961 973 985 997 1009 1021 1033 1045 \\ + 1057 1069 1081 1093 1105 1117 1129 1141 \\ + 1153 1165 1177 1189 1201 1213 1225 1237 \\ + 1249 1261 1273 1285 1297 1309 1321 1333 \\ + 1345 1357 1369 1381 1393 1405 1417 1429 \\ + 1441 1453 1465 1477 1489 1501 1513 1525 \\ + 1537 1549 1561 1573 1585 1597 1609 1621 \\ + 1633 1645 1657 1669 1681 1693 1705 1717 \\ + 1729 1741 1753 1765 1777 1789 1801 1813 \\ + 1825 1837 1849 1861 1873 1885 1897 1909 \\ + 1921 1933 1945 1957 1969 1981 1993 2005 \\ + 2017 2029 2041 2053 2065 2077 2089 2101 \\ + 2113 2125 2137 2149 } + # MDAnalysis VMD selection + atomselect macro leaflet_2 {index 2521 2533 2545 2557 2569 2581 2593 2605 \\ + 2617 2629 2641 2653 2665 2677 2689 2701 \\ + 2713 2725 2737 2749 2761 2773 2785 2797 \\ + 2809 2821 2833 2845 2857 2869 2881 2893 \\ + 2905 2917 2929 2941 2953 2965 2977 2989 \\ + 3001 3013 3025 3037 3049 3061 3073 3085 \\ + 3097 3109 3121 3133 3145 3157 3169 3181 \\ + 3193 3205 3217 3229 3241 3253 3265 3277 \\ + 3289 3301 3313 3325 3337 3349 3361 3373 \\ + 3385 3397 3409 3421 3433 3445 3457 3469 \\ + 3481 3493 3505 3517 3529 3541 3553 3565 \\ + 3577 3589 3601 3613 3625 3637 3649 3661 \\ + 3673 3685 3697 3709 3721 3733 3745 3757 \\ + 3769 3781 3793 3805 3817 3829 3841 3853 \\ + 3865 3877 3889 3901 3913 3925 3937 3949 \\ + 3961 3973 3985 3997 4009 4021 4033 4045 \\ + 4057 4069 4081 4093 4105 4117 4129 4141 \\ + 4153 4165 4177 4189 4201 4213 4225 4237 \\ + 4249 4261 4273 4285 4297 4309 4321 4333 \\ + 4345 4357 4369 4381 4393 4405 4417 4429 \\ + 4441 4453 4465 4477 4489 4501 4513 4525 \\ + 4537 4549 4561 4573 4585 4597 4609 4621 \\ + 4633 4645 4657 4669 } + +"""]) + + assert lines2one(open('leaflet.vmd').readlines()) == expected_output + + +def test_component_index_is_not_none(universe, lipid_heads): + lfls_ag = LeafletFinder(universe, lipid_heads, cutoff=15.0, pbc=True) + assert_almost_equal(len(lfls_ag.groups(component_index=0)), 180, decimal=4) From 9740d80abd6bc3927a925c621d1ba03cade8b457 Mon Sep 17 00:00:00 2001 From: Abhishek Shandilya Date: Thu, 12 Mar 2020 15:07:55 -0400 Subject: [PATCH 7/7] avoid importing ABCs from `collections.abc` to remove deprecation warning (#2607) * Fixes #2605 * test for `dict` instead of deprecated top level ABC `collections.MutableMapping` (compatible with Python 2.7 and 3.x) * updated AUTHORS and CHANGELOG --- package/AUTHORS | 1 + package/CHANGELOG | 4 +++- package/MDAnalysis/lib/util.py | 3 +-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/package/AUTHORS b/package/AUTHORS index 00cde2773cb..711e7054076 100644 --- a/package/AUTHORS +++ b/package/AUTHORS @@ -136,6 +136,7 @@ Chronological list of authors - Anshul Angaria - Shubham Sharma - Yuxuan Zhuang + - Abhishek Shandilya External code ------------- diff --git a/package/CHANGELOG b/package/CHANGELOG index fcda3ba8281..d633d88603e 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -16,11 +16,13 @@ The rules for this file: mm/dd/yy richardjgowers, kain88-de, lilyminium, p-j-smith, bdice, joaomcteixeira, PicoCentauri, davidercruz, jbarnoud, RMeli, IAlibay, mtiberti, CCook96, Yuan-Yu, xiki-tempula, HTian1997, Iv-Hristov, hmacdope, AnshulAngaria, - ss62171, Luthaf, yuxuanzhuang + ss62171, Luthaf, yuxuanzhuang, abhishandy * 0.21.0 Fixes + * Fixed the deprecation warning from `collections` library in `flatten_dict` + (Issue #2605) * Fixed test for the `save_paths` and `load` methods of :class:`PSAnalysis` (Issue #2593) * Fixes outdated :class:`TRJReader` documentation on random frame access diff --git a/package/MDAnalysis/lib/util.py b/package/MDAnalysis/lib/util.py index 3807e10ca47..4d4e682197e 100644 --- a/package/MDAnalysis/lib/util.py +++ b/package/MDAnalysis/lib/util.py @@ -196,7 +196,6 @@ import re import io import warnings -import collections import functools from functools import wraps import textwrap @@ -1767,7 +1766,7 @@ def flatten_dict(d, parent_key=tuple()): new_key = parent_key + (k, ) else: new_key = parent_key + k - if isinstance(v, collections.MutableMapping): + if isinstance(v, dict): items.extend(flatten_dict(v, new_key).items()) else: items.append((new_key, v))