Merge pull request MDAnalysis#916 from jdetle/slicing_fixes

Slicing fixes
abiedermann · Aug 2, 2016 · 4bb20c6 · 4bb20c6
2 parents 6571e2f + 1407b8e
commit 4bb20c6
Show file tree

Hide file tree

Showing 12 changed files with 413 additions and 314 deletions.
diff --git a/package/CHANGELOG b/package/CHANGELOG
@@ -36,7 +36,8 @@ Fixes
   * Display of Deprecation warnings doesn't affect other modules anymore (Issue #754)
   * Changed nframes to n_frames in analysis modules for consistency (Issue #890)
   * fixed incompatibility with newer matplotlib in analysis.hole
-
+  * Fixed modules that improperly documented and/or used frame slicing 
+    defaults (#914)
 Changes
   * Added protected variable _frame_index to to keep track of frame iteration
     number in AnalysisBase

diff --git a/package/MDAnalysis/analysis/contacts.py b/package/MDAnalysis/analysis/contacts.py
@@ -396,11 +396,13 @@ def __init__(self, u, selection, refgroup, method="hard_cut", radius=4.5,
             dictionary of additional kwargs passed to `method`. Check
             respective functions for reasonable values.
         start : int, optional
-            First frame of trajectory to analyse, Default: 0
+            First frame of trajectory to analyse, Default: None becomes 0.
         stop : int, optional
-            Last frame of trajectory to analyse, Default: -1
+            Frame index to stop analysis. Default: None becomes
+            n_frames. Iteration stops *before* this frame number,
+            which means that the trajectory would be read until the end.
         step : int, optional
-            Step between frames to analyse, Default: 1
+            Step between frames to analyse, Default: None becomes 1.
 
         """
         self.u = u

diff --git a/package/MDAnalysis/analysis/diffusionmap.py b/package/MDAnalysis/analysis/diffusionmap.py
@@ -227,11 +227,13 @@ def __init__(self, u, select='all', metric=rmsd, cutoff=1E0-5,
         weights : array, optional
             Weights to be given to coordinates for metric calculation
         start : int, optional
-            First frame of trajectory to analyse, Default: 0
+            First frame of trajectory to analyse, Default: None becomes 0.
         stop : int, optional
-            Last frame of trajectory to analyse, Default: -1
+            Frame index to stop analysis. Default: None becomes
+            n_frames. Iteration stops *before* this frame number,
+            which means that the trajectory would be read until the end.
         step : int, optional
-            Step between frames to analyse, Default: 1
+            Step between frames to analyse, Default: None becomes 1.
         """
         self._u = u
         traj = self._u.trajectory

diff --git a/package/MDAnalysis/analysis/polymer.py b/package/MDAnalysis/analysis/polymer.py
@@ -59,11 +59,13 @@ def __init__(self, atomgroups, **kwargs):
             List of atomgroups.  Each atomgroup should represent a single
             polymer chain, ordered in the correct order.
         start : int, optional
-            First frame of trajectory to analyse, Default: 0
+            First frame of trajectory to analyse, Default: None becomes 0.
         stop : int, optional
-            Last frame of trajectory to analyse, Default: -1
+            Last frame of trajectory to analyse, Default: None becomes
+            n_frames.
         step : int, optional
-            Step between frames to analyse, Default: 1
+            Frame index to stop analysis. Default: None becomes
+            n_frames. Iteration stops *before* this frame number.
         """
         super(PersistenceLength, self).__init__(
             atomgroups[0].universe.trajectory, **kwargs)

diff --git a/package/MDAnalysis/analysis/rms.py b/package/MDAnalysis/analysis/rms.py
@@ -556,7 +556,7 @@ def __init__(self, atomgroup):
         self.atomgroup = atomgroup
         self._rmsf = None
 
-    def run(self, start=0, stop=-1, step=1, progout=10, quiet=False):
+    def run(self, start=None, stop=None, step=None, progout=10, quiet=False):
         """Calculate RMSF of given atoms across a trajectory.
 
         This method implements an algorithm for computing sums of squares while
@@ -565,11 +565,13 @@ def run(self, start=0, stop=-1, step=1, progout=10, quiet=False):
         Parameters
         ----------
         start : int (optional)
-            starting frame [0]
+            starting frame, default None becomes 0.
         stop : int (optional)
-            stopping frame [-1]
+            Frame index to stop analysis. Default: None becomes
+            n_frames. Iteration stops *before* this frame number,
+            which means that the trajectory would be read until the end.
         step : int (optional)
-            step between frames [1]
+            step between frames, default None becomes 1.
         progout : int (optional)
             number of frames to iterate through between updates to progress
             output; ``None`` for no updates [10]
@@ -583,6 +585,8 @@ def run(self, start=0, stop=-1, step=1, progout=10, quiet=False):
            Calculating Corrected Sums of Squares and Products." Technometrics
            4(3):419-420.
         """
+        traj = self.atomgroup.universe.trajectory
+        start, stop, step = traj.check_slice_indices(start, stop, step)
         sumsquares = np.zeros((self.atomgroup.n_atoms, 3))
         means = np.array(sumsquares)
 

diff --git a/package/MDAnalysis/coordinates/DCD.py b/package/MDAnalysis/coordinates/DCD.py
@@ -73,6 +73,7 @@
 import numpy as np
 import struct
 import types
+import warnings
 
 from ..core import flags
 from .. import units as mdaunits  # use mdaunits instead of units to avoid a clash
@@ -85,6 +86,7 @@
 from . import dcdtimeseries
 
 
+
 class Timestep(base.Timestep):
     #: Indices into :attr:`Timestep._unitcell` (``[A, gamma, B, beta, alpha,
     #: C]``, provided by the :class:`DCDReader` C code) to pull out
@@ -498,22 +500,33 @@ def _read_frame(self, frame):
         ts.frame = frame
         return ts
 
-    def timeseries(self, asel, start=0, stop=-1, skip=1, format='afc'):
+    def timeseries(self, asel, start=None, stop=None, step=None, skip=None,
+                   format='afc'):
         """Return a subset of coordinate data for an AtomGroup
 
         :Arguments:
             *asel*
                :class:`~MDAnalysis.core.AtomGroup.AtomGroup` object
-            *start, stop, skip*
-               range of trajectory to access, start and stop are inclusive
+            *start, stop, step*
+               A range of the trajectory to access, with start being inclusive
+               and stop being exclusive.
             *format*
                the order/shape of the return data array, corresponding
                to (a)tom, (f)rame, (c)oordinates all six combinations
                of 'a', 'f', 'c' are allowed ie "fac" - return array
                where the shape is (frame, number of atoms,
                coordinates)
+        :Deprecated:
+            *skip*
+                Skip has been deprecated in favor of the standard keyword step.
         """
-        start, stop, skip = self.check_slice_indices(start, stop, skip)
+        if skip is not None:
+            step = skip
+            warnings.warn("Skip is deprecated and will be removed in"
+                          "in 1.0. Use step instead.",
+                          category=DeprecationWarning)
+
+        start, stop, step = self.check_slice_indices(start, stop, step)
         if len(asel) == 0:
             raise NoDataError("Timeseries requires at least one atom to analyze")
         if len(format) != 3 and format not in ['afc', 'acf', 'caf', 'cfa', 'fac', 'fca']:
@@ -522,26 +535,36 @@ def timeseries(self, asel, start=0, stop=-1, skip=1, format='afc'):
         # Check if the atom numbers can be grouped for efficiency, then we can read partial buffers
         # from trajectory file instead of an entire timestep
         # XXX needs to be implemented
-        return self._read_timeseries(atom_numbers, start, stop, skip, format)
+        return self._read_timeseries(atom_numbers, start, stop, step, format)
 
-    def correl(self, timeseries, start=0, stop=-1, skip=1):
+    def correl(self, timeseries, start=None, stop=None, step=None, skip=None):
         """Populate a TimeseriesCollection object with timeseries computed from the trajectory
 
         :Arguments:
             *timeseries*
                :class:`MDAnalysis.core.Timeseries.TimeseriesCollection`
-            *start, stop, skip*
-               subset of trajectory to use, with start and stop being inclusive
+            *start, stop, step*
+               A subset of the trajectory to use, with start being inclusive
+               and stop being exclusive.
+        :Deprecated:
+            *skip*
+                Skip has been deprecated in favor of the standard keyword step.
         """
-        start, stop, skip = self.check_slice_indices(start, stop, skip)
+        if skip is not None:
+            step = skip
+            warnings.warn("Skip is deprecated and will be removed in"
+                          "in 1.0. Use step instead.",
+                          category=DeprecationWarning)
+
+        start, stop, step = self.check_slice_indices(start, stop, step)
         atomlist = timeseries._getAtomList()
         format = timeseries._getFormat()
         lowerb, upperb = timeseries._getBounds()
         sizedata = timeseries._getDataSize()
         atomcounts = timeseries._getAtomCounts()
         auxdata = timeseries._getAuxData()
         return self._read_timecorrel(atomlist, atomcounts, format, auxdata,
-                                     sizedata, lowerb, upperb, start, stop, skip)
+                                     sizedata, lowerb, upperb, start, stop, step)
 
     def close(self):
         if self.dcdfile is not None:

diff --git a/package/MDAnalysis/coordinates/base.py b/package/MDAnalysis/coordinates/base.py
@@ -1230,7 +1230,7 @@ def check_slice_indices(self, start, stop, step):
         ----------
         start, stop, step : int or None
           Values representing the slice indices.
-          Can use `None` to use defaults of (0, -1, and 1)
+          Can use `None` to use defaults of (0, n_frames, and 1)
           respectively.
 
         Returns

diff --git a/package/MDAnalysis/coordinates/dcdtimeseries.pyx b/package/MDAnalysis/coordinates/dcdtimeseries.pyx
@@ -59,7 +59,7 @@ cdef extern from "correl.h":
 
 import numpy as np
 
-def __read_timecorrel(object self, object atoms, object atomcounts, object format, object auxdata, int sizedata, int lowerb, int upperb, int start, int stop, int skip):
+def __read_timecorrel(object self, object atoms, object atomcounts, object format, object auxdata, int sizedata, int lowerb, int upperb, int start, int stop, int step):
     cdef dcdhandle* dcd
     cdef numpy.ndarray atomlist, atomcountslist, auxlist
     cdef numpy.ndarray data, temp
@@ -69,8 +69,9 @@ def __read_timecorrel(object self, object atoms, object atomcounts, object forma
 
     dcd = <dcdhandle*>PyCObject_AsVoidPtr(self._dcd_C_ptr)
     cdef int n_frames
-    if (stop == -1): stop = dcd.nsets
-    n_frames = (stop-start+1) / skip
+
+    n_frames = ((stop-start) / step);
+    if (stop-start) % step > 0 : n_frames += 1;
     cdef int numdata
     numdata = len(format)
     if numdata==0:
@@ -109,18 +110,31 @@ def __read_timecorrel(object self, object atoms, object atomcounts, object forma
     cdef int index, numskip
     cdef int i, j
     cdef float unitcell[6]
+    cdef int remaining_frames = stop-start
+    numskip = 0
     for i from 0 <= i < n_frames:
-        if (skip > 1):
+        if (step > 1 and i > 0):
             # Check if we have fixed atoms
             # XXX not done
-            numskip = skip - (dcd.setsread % skip) - 1
+            # Figure out how many steps to step over, if step = n, np array
+            # slicing treats this as skip over n-1, read the nth.
+            numskip = step - 1
+            # If the number to skip is greater than the number of frames left
+            # to be jumped over, just take one more step to reflect np slicing
+            # if there is a remainder, guaranteed to have at least one more
+            # frame.
+            if(remaining_frames < numskip):
+               numskip = 1
+
             rc = skip_dcdstep(dcd.fd, dcd.natoms, dcd.nfixed, dcd.charmm, numskip)
             if (rc < 0):
                 raise IOError("Error skipping frame from DCD file")
-            dcd.setsread = dcd.setsread + numskip
+        # on first iteration, numskip = 0, first set is always read.
+        dcd.setsread += numskip
         rc = read_dcdsubset(dcd.fd, dcd.natoms, lowerb, upperb, tempX, tempY, tempZ, unitcell, dcd.nfixed, dcd.first, dcd.freeind, dcd.fixedcoords, dcd.reverse, dcd.charmm)
-        dcd.first=0
-        dcd.setsread = dcd.setsread + 1
+        dcd.first = 0
+        dcd.setsread += 1
+        remaining_frames = stop - dcd.setsread
         if (rc < 0):
             raise IOError("Error reading frame from DCD file")
         # Copy into data array based on format