Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add timeseries for all readers #3890

Merged
merged 21 commits into from
Nov 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions package/CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ The rules for this file:
* 2.4.0

Fixes
* NoDataError raised on empty atomgroup provided to `DCDReader.timeseries`
was changed to ValueError (see #3901). A matching ValueError was added to
`MemoryReader.timeseries`(PR #3890).
* Removes ``pbc`` kwarg from ``AtomGroup.wrap`` (Issue #3909)
* LAMMPSDump Reader translates the box to the origin (#3741)
* hbond analysis: access hbonds results through new results member in count_by_ids() and count_by_type()
Expand All @@ -32,6 +35,8 @@ Fixes
(e.g. bonds, angles) (PR #3779).

Enhancements
* The timeseries method for exporting coordinates from multiple frames to a
NumPy array was added to ProtoReader (PR #3890)
* MDAnalysis now officially supports py3.11 (Issue #3878)
* LAMMPSDump Reader optionally unwraps trajectories with image flags upon
loading (#3843
Expand Down
6 changes: 4 additions & 2 deletions package/MDAnalysis/coordinates/DCD.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@
import warnings

from .. import units as mdaunits # use mdaunits instead of units to avoid a clash
from ..exceptions import NoDataError
from . import base, core
from ..lib.formats.libdcd import DCDFile
from ..lib.mdamath import triclinic_box
Expand Down Expand Up @@ -298,13 +297,16 @@ def timeseries(self,

.. versionchanged:: 1.0.0
`skip` and `format` keywords have been removed.
.. versionchanged:: 2.4.0
ValueError now raised instead of NoDataError for empty input
AtomGroup
"""

start, stop, step = self.check_slice_indices(start, stop, step)

if asel is not None:
if len(asel) == 0:
raise NoDataError(
raise ValueError(
"Timeseries requires at least one atom to analyze")
atom_numbers = list(asel.indices)
else:
Expand Down
73 changes: 72 additions & 1 deletion package/MDAnalysis/coordinates/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,6 @@

from .timestep import Timestep
from . import core
from .. import NoDataError
from .. import (
_READERS, _READER_HINTS,
_SINGLEFRAME_WRITERS,
Expand Down Expand Up @@ -980,6 +979,78 @@ def __repr__(self):
nframes=self.n_frames,
natoms=self.n_atoms
))

def timeseries(self, asel: Optional['AtomGroup']=None,
start: Optional[int]=None, stop: Optional[int]=None,
step: Optional[int]=None,
order: Optional[str]='fac') -> np.ndarray:
"""Return a subset of coordinate data for an AtomGroup

Parameters
----------
asel : AtomGroup (optional)
hmacdope marked this conversation as resolved.
Show resolved Hide resolved
The :class:`~MDAnalysis.core.groups.AtomGroup` to read the
coordinates from. Defaults to ``None``, in which case the full set
of coordinate data is returned.
start : int (optional)
Begin reading the trajectory at frame index `start` (where 0 is the
index of the first frame in the trajectory); the default
``None`` starts at the beginning.
stop : int (optional)
End reading the trajectory at frame index `stop`-1, i.e, `stop` is
excluded. The trajectory is read to the end with the default
``None``.
step : int (optional)
Step size for reading; the default ``None`` is equivalent to 1 and
means to read every frame.
order : str (optional)
the order/shape of the return data array, corresponding
to (a)tom, (f)rame, (c)oordinates all six combinations
of 'a', 'f', 'c' are allowed ie "fac" - return array
where the shape is (frame, number of atoms,
coordinates)

See Also
--------
:class:`MDAnalysis.coordinates.memory`


.. versionadded:: 2.4.0
"""
start, stop, step = self.check_slice_indices(start, stop, step)
nframes = len(range(start, stop, step))

if asel is not None:
if len(asel) == 0:
raise ValueError(
"Timeseries requires at least one atom to analyze")
atom_numbers = asel.indices
natoms = len(atom_numbers)
hmacdope marked this conversation as resolved.
Show resolved Hide resolved
else:
natoms = self.n_atoms
atom_numbers = np.arange(natoms)

# allocate output array in 'fac' order
coordinates = np.empty((nframes, natoms, 3), dtype=np.float32)
for i, ts in enumerate(self[start:stop:step]):
coordinates[i, :] = ts.positions[atom_numbers]

# switch axes around
default_order = 'fac'
if order != default_order:
try:
newidx = [default_order.index(i) for i in order]
except ValueError:
raise ValueError(f"Unrecognized order key in {order}, "
"must be permutation of 'fac'")

try:
coordinates = np.moveaxis(coordinates, newidx, [0, 1, 2])
except ValueError:
errmsg = ("Repeated or missing keys passed to argument "
f"`order`: {order}, each key must be used once")
raise ValueError(errmsg)
return coordinates

hmacdope marked this conversation as resolved.
Show resolved Hide resolved
# TODO: Change order of aux_spec and auxdata for 3.0 release, cf. Issue #3811
def add_auxiliary(self,
Expand Down
6 changes: 6 additions & 0 deletions package/MDAnalysis/coordinates/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,9 @@ def timeseries(self, asel=None, start=0, stop=-1, step=1, order='afc'):

.. versionchanged:: 1.0.0
Deprecated `format` keyword has been removed. Use `order` instead.
.. versionchanged:: 2.4.0
ValueError now raised instead of NoDataError for empty input
AtomGroup
"""
if stop != -1:
warnings.warn("MemoryReader.timeseries inclusive `stop` "
Expand Down Expand Up @@ -559,6 +562,9 @@ def timeseries(self, asel=None, start=0, stop=-1, step=1, order='afc'):
if (asel is None or asel is asel.universe.atoms):
return array
else:
if len(asel) == 0:
raise ValueError("Timeseries requires at least one atom "
"to analyze")
# If selection is specified, return a copy
return array.take(asel.indices, a_index)

Expand Down
39 changes: 38 additions & 1 deletion testsuite/MDAnalysisTests/coordinates/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
import MDAnalysis as mda
from MDAnalysis.coordinates.timestep import Timestep
from MDAnalysis.transformations import translate
from MDAnalysis import NoDataError


from MDAnalysisTests.coordinates.reference import RefAdKSmall
Expand Down Expand Up @@ -445,6 +444,44 @@ def test_pickle_reader(self, reader):
assert_equal(reader.ts, reader_p.ts,
"Timestep is changed after pickling")

@pytest.mark.parametrize('order', ('fac', 'fca', 'afc', 'acf', 'caf', 'cfa'))
def test_timeseries_shape(self, reader, order):
timeseries = reader.timeseries(order=order)
a_index = order.index('a')
f_index = order.index('f')
c_index = order.index('c')
assert(timeseries.shape[a_index] == reader.n_atoms)
assert(timeseries.shape[f_index] == len(reader))
assert(timeseries.shape[c_index] == 3)

@pytest.mark.parametrize('slice', ([0,2,1], [0,10,2], [0,10,3]))
def test_timeseries_values(self, reader, slice):
ts_positions = []
if isinstance(reader, mda.coordinates.memory.MemoryReader):
pytest.xfail("MemoryReader uses deprecated stop inclusive"
hmacdope marked this conversation as resolved.
Show resolved Hide resolved
" indexing, see Issue #3893")
if slice[1] > len(reader):
pytest.skip("too few frames in reader")
for i in range(slice[0], slice[1], slice[2]):
ts = reader[i]
ts_positions.append(ts.positions.copy())
positions = np.asarray(ts_positions)
timeseries = reader.timeseries(start=slice[0], stop=slice[1],
step=slice[2], order='fac')
assert_allclose(timeseries, positions)

hmacdope marked this conversation as resolved.
Show resolved Hide resolved
@pytest.mark.parametrize('asel', ("index 1", "index 2", "index 1 to 3"))
def test_timeseries_asel_shape(self, reader, asel):
atoms = mda.Universe(reader.filename).select_atoms(asel)
timeseries = reader.timeseries(atoms, order='fac')
assert(timeseries.shape[0] == len(reader))
assert(timeseries.shape[1] == len(atoms))
assert(timeseries.shape[2] == 3)

def test_timeseries_empty_asel(self, reader):
atoms = mda.Universe(reader.filename).select_atoms(None)
with pytest.raises(ValueError, match="Timeseries requires at least"):
reader.timeseries(atoms)

class MultiframeReaderTest(BaseReaderTest):
def test_last_frame(self, ref, reader):
Expand Down
3 changes: 1 addition & 2 deletions testsuite/MDAnalysisTests/coordinates/test_dcd.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@

import MDAnalysis as mda
from MDAnalysis.coordinates.DCD import DCDReader
from MDAnalysis.exceptions import NoDataError

from numpy.testing import (assert_equal, assert_array_equal,
assert_almost_equal, assert_array_almost_equal)
Expand Down Expand Up @@ -231,7 +230,7 @@ def test_timeseries_atomindices(indices, universe_dcd):


def test_timeseries_empty_selection(universe_dcd):
with pytest.raises(NoDataError):
with pytest.raises(ValueError):
asel = universe_dcd.select_atoms('name FOO')
universe_dcd.trajectory.timeseries(asel=asel)

Expand Down
9 changes: 9 additions & 0 deletions testsuite/MDAnalysisTests/coordinates/test_reader_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,15 @@ def test_raises_StopIteration(self, reader):
with pytest.raises(StopIteration):
next(reader)

@pytest.mark.parametrize('order', ['turnip', 'abc'])
def test_timeseries_raises_unknown_order_key(self, reader, order):
with pytest.raises(ValueError, match="Unrecognized order key"):
reader.timeseries(order=order)

@pytest.mark.parametrize('order', ['faac', 'affc', 'afcc', ''])
def test_timeseries_raises_incorrect_order_key(self, reader, order):
with pytest.raises(ValueError, match="Repeated or missing keys"):
reader.timeseries(order=order)

class _Multi(_TestReader):
n_frames = 10
Expand Down