From 2fe4bb8ebf1fbca8c5381155a7a7316868bd79f4 Mon Sep 17 00:00:00 2001
From: Max Linke <max_linke@gmx.de>
Date: Mon, 25 Sep 2017 20:29:38 +0200
Subject: [PATCH 1/3] use functions instead of classes for encore cython files

This fixes a pickle error we have seen with recent versions of cython
and our mocked imports. cython now introduced special reduce and setstate
functions to enable cross python pickling for simple classes. It also tried to
do this for some encore classes that only had a run method and would overwrite
the `__call__` special method. So lets make them function right away.
---
 .../encore/clustering/ClusteringMethod.py     |   2 +-
 .../encore/clustering/affinityprop.pyx        | 169 +++++++++---------
 .../DimensionalityReductionMethod.py          |   2 +-
 .../stochasticproxembed.pyx                   |  86 ++++-----
 4 files changed, 121 insertions(+), 138 deletions(-)

diff --git a/package/MDAnalysis/analysis/encore/clustering/ClusteringMethod.py b/package/MDAnalysis/analysis/encore/clustering/ClusteringMethod.py
index b9506e9c514..46e6f9607f4 100644
--- a/package/MDAnalysis/analysis/encore/clustering/ClusteringMethod.py
+++ b/package/MDAnalysis/analysis/encore/clustering/ClusteringMethod.py
@@ -148,7 +148,7 @@ def __call__(self, distance_matrix):
         numpy.array
             list of cluster indices
         """
-        clusters = affinityprop.AffinityPropagation().run(
+        clusters = affinityprop.AffinityPropagation(
             s=distance_matrix * -1.,   # invert sign
             preference=self.preference,
             lam=self.damping,
diff --git a/package/MDAnalysis/analysis/encore/clustering/affinityprop.pyx b/package/MDAnalysis/analysis/encore/clustering/affinityprop.pyx
index f43b03a83e5..3e028addaeb 100644
--- a/package/MDAnalysis/analysis/encore/clustering/affinityprop.pyx
+++ b/package/MDAnalysis/analysis/encore/clustering/affinityprop.pyx
@@ -34,8 +34,7 @@ cimport cython
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-
-cdef class AffinityPropagation(object):
+def AffinityPropagation(s, preference, float lam, int max_iterations, int convergence, int noise=1):
     """
     Affinity propagation clustering algorithm. This class is a Cython wrapper around the Affinity propagation algorithm, which is implement as a C library (see ap.c). The implemented algorithm is described in the paper:
 
@@ -43,99 +42,93 @@ cdef class AffinityPropagation(object):
     Brendan J. Frey and Delbert Dueck, University of Toronto
     Science 315, 972–976, February 2007
 
-    """
-
-    def run(self, s, preference, float lam, int max_iterations, int convergence, int noise=1):
-        """
-        Run the clustering algorithm.
-
-        Parameters
-        ----------
-
-        s : encore.utils.TriangularMatrix object
-		    Triangular matrix containing the similarity values for each pair of clustering elements. Notice that the current implementation does not allow for asymmetric values (i.e. similarity(a,b) is assumed to be equal to similarity(b,a))
-
-	preference : numpy.array of floats or float
-		    Preference values, which the determine the number of clusters. If a single value is given, all the preference values are set to that. Otherwise, the list is used to set the preference values (one value per element, so the list must be of the same size as the number of elements)
-
-        lam : float
-		    Floating point value that defines how much damping is applied to the solution at each iteration. Must be ]0,1]
-
-        max_iterations : int
-		    Maximum number of iterations
-
-        convergence : int
-		    Number of iterations in which the cluster centers must remain the same in order to reach convergence
-
-        noise : int
-		    Whether to apply noise to the input s matrix, such there are no equal values. 1 is for yes, 0 is for no.
+    Parameters
+    ----------
 
+    s : encore.utils.TriangularMatrix object
+        Triangular matrix containing the similarity values for each pair of clustering elements. Notice that the current implementation does not allow for asymmetric values (i.e. similarity(a,b) is assumed to be equal to similarity(b,a))
 
-        Returns
-        -------
+preference : numpy.array of floats or float
+        Preference values, which the determine the number of clusters. If a single value is given, all the preference values are set to that. Otherwise, the list is used to set the preference values (one value per element, so the list must be of the same size as the number of elements)
 
-        elements : list of int or None
-		    List of cluster-assigned elements, which can be used by encore.utils.ClustersCollection to generate Cluster objects. See these classes for more details.
+    lam : float
+        Floating point value that defines how much damping is applied to the solution at each iteration. Must be ]0,1]
 
-	"""
-        cdef int cn = s.size
-        cdef float cpreference = preference
+    max_iterations : int
+        Maximum number of iterations
 
-        # Assign preference values to diagonal
-        try:
-            for i in xrange(s.size):
-                s[i,i] = <float>preference[i]
-        except:
-            pass
+    convergence : int
+        Number of iterations in which the cluster centers must remain the same in order to reach convergence
 
-        if type(preference) == float:
-            for i in xrange(s.size):
-                s[i,i] = <float>preference
-        else:
-            raise TypeError ("Preference should be of type float")
+    noise : int
+        Whether to apply noise to the input s matrix, such there are no equal values. 1 is for yes, 0 is for no.
 
-        logging.info("Preference %3.2f: starting Affinity Propagation" % (preference))
 
-        # Prepare input and ouput arrays
-        cdef numpy.ndarray[numpy.float32_t,  ndim=1] matndarray = numpy.ascontiguousarray(s._elements, dtype=numpy.float32)
-        cdef numpy.ndarray[long,   ndim=1] clusters   = numpy.zeros((s.size),dtype=long)
+    Returns
+    -------
 
-        # run C module Affinity Propagation
-        iterations = caffinityprop.CAffinityPropagation( <float*>matndarray.data, cn, lam, max_iterations, convergence, noise, <long*>clusters.data)
+    elements : list of int or None
+        List of cluster-assigned elements, which can be used by encore.utils.ClustersCollection to generate Cluster objects. See these classes for more details.
 
-        # Provide warning in case of lack of convergence
-        if iterations == 0:
-            logging.info("Preference %3.2f: could not converge in %d iterations" % (preference, -iterations))
-            import warnings
-            warnings.warn("Clustering with preference {0:3.2f} did not fully converge in {1:d} iterations".format(preference, -iterations))
-
-        # Find centroids
-        centroids = numpy.unique(clusters)
-        for k in numpy.arange(centroids.shape[0]):
-            ii = numpy.where(clusters == centroids[k])[0]
-            small_mat = numpy.zeros((ii.shape[0], ii.shape[0]))
-            for ii1 in numpy.arange(ii.shape[0]):
-                for ii2 in numpy.arange(ii.shape[0]):
-                    small_mat[ii1,ii2] = s[ ii[ii1], ii[ii2] ]
-            j = numpy.argmax(numpy.sum(small_mat, axis=0))
-
-            centroids[k] = ii[j]
-
-        # Similarity to centroids
-        S_centroids = numpy.zeros((s.size, centroids.shape[0]))
-        for line in numpy.arange(s.size):
-            for c in numpy.arange(centroids.shape[0]):
-                S_centroids[line,c] = s[line, centroids[c]]
-
-        # Center values for each observation
-        c = numpy.argmax(S_centroids, axis=1)
-
-        # Centroids should point to themselves
-        c[centroids] = numpy.arange(centroids.shape[0])
-
-        # Assign centroid indices to all observables
-        clusters = centroids[c]
-
-        logging.info("Preference %3.2f: converged in %d iterations" % (preference, iterations))
-
-        return clusters
+"""
+    cdef int cn = s.size
+    cdef float cpreference = preference
+
+    # Assign preference values to diagonal
+    try:
+        for i in xrange(s.size):
+            s[i,i] = <float>preference[i]
+    except:
+        pass
+
+    if type(preference) == float:
+        for i in xrange(s.size):
+            s[i,i] = <float>preference
+    else:
+        raise TypeError ("Preference should be of type float")
+
+    logging.info("Preference %3.2f: starting Affinity Propagation" % (preference))
+
+    # Prepare input and ouput arrays
+    cdef numpy.ndarray[numpy.float32_t,  ndim=1] matndarray = numpy.ascontiguousarray(s._elements, dtype=numpy.float32)
+    cdef numpy.ndarray[long,   ndim=1] clusters   = numpy.zeros((s.size),dtype=long)
+
+    # run C module Affinity Propagation
+    iterations = caffinityprop.CAffinityPropagation( <float*>matndarray.data, cn, lam, max_iterations, convergence, noise, <long*>clusters.data)
+
+    # Provide warning in case of lack of convergence
+    if iterations == 0:
+        logging.info("Preference %3.2f: could not converge in %d iterations" % (preference, -iterations))
+        import warnings
+        warnings.warn("Clustering with preference {0:3.2f} did not fully converge in {1:d} iterations".format(preference, -iterations))
+
+    # Find centroids
+    centroids = numpy.unique(clusters)
+    for k in numpy.arange(centroids.shape[0]):
+        ii = numpy.where(clusters == centroids[k])[0]
+        small_mat = numpy.zeros((ii.shape[0], ii.shape[0]))
+        for ii1 in numpy.arange(ii.shape[0]):
+            for ii2 in numpy.arange(ii.shape[0]):
+                small_mat[ii1,ii2] = s[ ii[ii1], ii[ii2] ]
+        j = numpy.argmax(numpy.sum(small_mat, axis=0))
+
+        centroids[k] = ii[j]
+
+    # Similarity to centroids
+    S_centroids = numpy.zeros((s.size, centroids.shape[0]))
+    for line in numpy.arange(s.size):
+        for c in numpy.arange(centroids.shape[0]):
+            S_centroids[line,c] = s[line, centroids[c]]
+
+    # Center values for each observation
+    c = numpy.argmax(S_centroids, axis=1)
+
+    # Centroids should point to themselves
+    c[centroids] = numpy.arange(centroids.shape[0])
+
+    # Assign centroid indices to all observables
+    clusters = centroids[c]
+
+    logging.info("Preference %3.2f: converged in %d iterations" % (preference, iterations))
+
+    return clusters
diff --git a/package/MDAnalysis/analysis/encore/dimensionality_reduction/DimensionalityReductionMethod.py b/package/MDAnalysis/analysis/encore/dimensionality_reduction/DimensionalityReductionMethod.py
index e5c019fcac3..cb63ca80c7c 100644
--- a/package/MDAnalysis/analysis/encore/dimensionality_reduction/DimensionalityReductionMethod.py
+++ b/package/MDAnalysis/analysis/encore/dimensionality_reduction/DimensionalityReductionMethod.py
@@ -137,7 +137,7 @@ def __call__(self, distance_matrix):
 
         """
         final_stress, coordinates = \
-            stochasticproxembed.StochasticProximityEmbedding().run(
+            stochasticproxembed.StochasticProximityEmbedding(
             s=distance_matrix,
             rco=self.distance_cutoff,
             dim=self.dimension,
diff --git a/package/MDAnalysis/analysis/encore/dimensionality_reduction/stochasticproxembed.pyx b/package/MDAnalysis/analysis/encore/dimensionality_reduction/stochasticproxembed.pyx
index 45262298dd4..b7f15c5d4db 100644
--- a/package/MDAnalysis/analysis/encore/dimensionality_reduction/stochasticproxembed.pyx
+++ b/package/MDAnalysis/analysis/encore/dimensionality_reduction/stochasticproxembed.pyx
@@ -36,78 +36,68 @@ cimport cython
 
 
 @cython.embedsignature(True)
-
-cdef class StochasticProximityEmbedding:
+def StochasticProximityEmbedding(s, double rco, int dim, double maxlam, double minlam, int ncycle, int nstep, int stressfreq):
     """
     Stochastic proximity embedding dimensionality reduction algorithm. The
     algorithm implemented here is described in this paper:
 
-	Dmitrii N. Rassokhin, Dimitris K. Agrafiotis
-	A modified update rule for stochastic proximity embedding
-	Journal of Molecular Graphics and Modelling 22 (2003) 133–140
+    Dmitrii N. Rassokhin, Dimitris K. Agrafiotis
+    A modified update rule for stochastic proximity embedding
+    Journal of Molecular Graphics and Modelling 22 (2003) 133–140
 
     This class is a Cython wrapper for a C implementation (see spe.c)
-    """
-
 
-    def run(self, s, double rco, int dim, double maxlam, double minlam, int ncycle, int nstep, int stressfreq):
-        """
-        Run stochastic proximity embedding.
+    Parameters:
+    ----------
 
-	    Parameters:
-	    ----------
+    s : encore.utils.TriangularMatrix object
+        Triangular matrix containing the distance values for each pair of
+        elements in the original space.
 
-	    s : encore.utils.TriangularMatrix object
-            Triangular matrix containing the distance values for each pair of
-            elements in the original space.
+    rco : float
+        neighborhood distance cut-off
 
-	    rco : float
-	    	neighborhood distance cut-off
+    dim : int
+        number of dimensions for the embedded space
 
-	    dim : int
-	    	number of dimensions for the embedded space
+    minlam  : float
+        final learning parameter
 
-	    minlam  : float
-	    	final learning parameter
+    maxlam  : float
+        starting learning parameter
 
-	    maxlam  : float
-	    	starting learning parameter
+    ncycle : int
+        number of cycles. Each cycle is composed of nstep steps. At the end
+        of each cycle, the lerning parameter lambda is updated.
 
-	    ncycle : int
-	    	number of cycles. Each cycle is composed of nstep steps. At the end
-	    	of each cycle, the lerning parameter lambda is updated.
+    nstep : int
+        number of coordinate update steps for each cycle
 
-	    nstep : int
-		    number of coordinate update steps for each cycle
 
 
+    Returns
+    -------
 
-	    Returns
-	    -------
+    space : (float, numpy.array)
+        float is the final stress obtained; the array are the coordinates of
+        the elements in the embedded space
 
-	    space : (float, numpy.array)
-	    	float is the final stress obtained; the array are the coordinates of
-	    	the elements in the embedded space
+    stressfreq : int
+        calculate and report stress value every stressfreq cycle
 
-	    stressfreq : int
-	    	calculate and report stress value every stressfreq cycle
 
+    """
 
-	    """
-
-        cdef int nelem = s.size
-        cdef double finalstress = 0.0
-
-        logging.info("Starting Stochastic Proximity Embedding")
+    cdef int nelem = s.size
+    cdef double finalstress = 0.0
 
-        cdef numpy.ndarray[numpy.float64_t,  ndim=1] matndarray = numpy.ascontiguousarray(s._elements, dtype=numpy.float64)
-        cdef numpy.ndarray[numpy.float64_t,   ndim=1] d_coords   = numpy.zeros((nelem*dim),dtype=numpy.float64)
+    logging.info("Starting Stochastic Proximity Embedding")
 
-        finalstress = cstochasticproxembed.CStochasticProximityEmbedding( <double*>matndarray.data, <double*>d_coords.data, rco, nelem, dim, maxlam, minlam, ncycle, nstep, stressfreq)
+    cdef numpy.ndarray[numpy.float64_t,  ndim=1] matndarray = numpy.ascontiguousarray(s._elements, dtype=numpy.float64)
+    cdef numpy.ndarray[numpy.float64_t,   ndim=1] d_coords   = numpy.zeros((nelem*dim),dtype=numpy.float64)
 
-        logging.info("Stochastic Proximity Embedding finished. Residual stress: %.3f" % finalstress)
+    finalstress = cstochasticproxembed.CStochasticProximityEmbedding( <double*>matndarray.data, <double*>d_coords.data, rco, nelem, dim, maxlam, minlam, ncycle, nstep, stressfreq)
 
-        return (finalstress, d_coords.reshape((-1,dim)).T)
+    logging.info("Stochastic Proximity Embedding finished. Residual stress: %.3f" % finalstress)
 
-    def __call__(self, *args):
-        return self.run(*args)
+    return (finalstress, d_coords.reshape((-1,dim)).T)

From 29942015eb7ee56cf16194c5fb4e2e56b52c92d4 Mon Sep 17 00:00:00 2001
From: Max Linke <max_linke@gmx.de>
Date: Mon, 25 Sep 2017 23:27:23 +0200
Subject: [PATCH 2/3] update sphinx rst files

---
 .../source/documentation_pages/analysis/encore/clustering.rst   | 2 +-
 .../analysis/encore/dimensionality_reduction.rst                | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/package/doc/sphinx/source/documentation_pages/analysis/encore/clustering.rst b/package/doc/sphinx/source/documentation_pages/analysis/encore/clustering.rst
index ccd9f4781bd..0b0ca7ea642 100644
--- a/package/doc/sphinx/source/documentation_pages/analysis/encore/clustering.rst
+++ b/package/doc/sphinx/source/documentation_pages/analysis/encore/clustering.rst
@@ -17,7 +17,7 @@ Clustering algorithms
 
 The following clustering algorithms are always available:
 
-.. autoclass:: MDAnalysis.analysis.encore.clustering.affinityprop.AffinityPropagation
+.. automodule:: MDAnalysis.analysis.encore.clustering.affinityprop
    :members:
 
 
diff --git a/package/doc/sphinx/source/documentation_pages/analysis/encore/dimensionality_reduction.rst b/package/doc/sphinx/source/documentation_pages/analysis/encore/dimensionality_reduction.rst
index 77cda0dc39c..7d326824218 100644
--- a/package/doc/sphinx/source/documentation_pages/analysis/encore/dimensionality_reduction.rst
+++ b/package/doc/sphinx/source/documentation_pages/analysis/encore/dimensionality_reduction.rst
@@ -14,5 +14,5 @@ Dimensionality reduction algorithms
 The following dimensionality reduction algorithms are always natively
 available:
 
-.. autoclass:: MDAnalysis.analysis.encore.dimensionality_reduction.stochasticproxembed
+.. automodule:: MDAnalysis.analysis.encore.dimensionality_reduction.stochasticproxembed
    :members:

From f7174dbd568712aee5a1bb9f7f3f6acdd69ddbec Mon Sep 17 00:00:00 2001
From: Max Linke <max_linke@gmx.de>
Date: Mon, 25 Sep 2017 23:37:20 +0200
Subject: [PATCH 3/3] fix rst docs

---
 .../encore/clustering/affinityprop.pyx        | 39 ++++++++++---------
 .../stochasticproxembed.pyx                   | 12 +-----
 2 files changed, 22 insertions(+), 29 deletions(-)

diff --git a/package/MDAnalysis/analysis/encore/clustering/affinityprop.pyx b/package/MDAnalysis/analysis/encore/clustering/affinityprop.pyx
index 3e028addaeb..25114d056af 100644
--- a/package/MDAnalysis/analysis/encore/clustering/affinityprop.pyx
+++ b/package/MDAnalysis/analysis/encore/clustering/affinityprop.pyx
@@ -35,8 +35,7 @@ cimport cython
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def AffinityPropagation(s, preference, float lam, int max_iterations, int convergence, int noise=1):
-    """
-    Affinity propagation clustering algorithm. This class is a Cython wrapper around the Affinity propagation algorithm, which is implement as a C library (see ap.c). The implemented algorithm is described in the paper:
+    """Affinity propagation clustering algorithm. This class is a Cython wrapper around the Affinity propagation algorithm, which is implement as a C library (see ap.c). The implemented algorithm is described in the paper:
 
     Clustering by Passing Messages Between Data Points.
     Brendan J. Frey and Delbert Dueck, University of Toronto
@@ -44,33 +43,37 @@ def AffinityPropagation(s, preference, float lam, int max_iterations, int conver
 
     Parameters
     ----------
-
     s : encore.utils.TriangularMatrix object
-        Triangular matrix containing the similarity values for each pair of clustering elements. Notice that the current implementation does not allow for asymmetric values (i.e. similarity(a,b) is assumed to be equal to similarity(b,a))
-
-preference : numpy.array of floats or float
-        Preference values, which the determine the number of clusters. If a single value is given, all the preference values are set to that. Otherwise, the list is used to set the preference values (one value per element, so the list must be of the same size as the number of elements)
-
+        Triangular matrix containing the similarity values for each pair of
+        clustering elements. Notice that the current implementation does not
+        allow for asymmetric values (i.e. similarity(a,b) is assumed to be
+        equal to similarity(b,a))
+    preference : numpy.array of floats or float
+        Preference values, which the determine the number of clusters. If a
+        single value is given, all the preference values are set to that.
+        Otherwise, the list is used to set the preference values (one value per
+        element, so the list must be of the same size as the number of
+        elements)
     lam : float
-        Floating point value that defines how much damping is applied to the solution at each iteration. Must be ]0,1]
-
+        Floating point value that defines how much damping is applied to the
+        solution at each iteration. Must be ]0,1]
     max_iterations : int
         Maximum number of iterations
-
     convergence : int
-        Number of iterations in which the cluster centers must remain the same in order to reach convergence
-
+        Number of iterations in which the cluster centers must remain the same
+        in order to reach convergence
     noise : int
-        Whether to apply noise to the input s matrix, such there are no equal values. 1 is for yes, 0 is for no.
-
+        Whether to apply noise to the input s matrix, such there are no equal
+        values. 1 is for yes, 0 is for no.
 
     Returns
     -------
-
     elements : list of int or None
-        List of cluster-assigned elements, which can be used by encore.utils.ClustersCollection to generate Cluster objects. See these classes for more details.
+        List of cluster-assigned elements, which can be used by
+        encore.utils.ClustersCollection to generate Cluster objects. See these
+        classes for more details.
 
-"""
+    """
     cdef int cn = s.size
     cdef float cpreference = preference
 
diff --git a/package/MDAnalysis/analysis/encore/dimensionality_reduction/stochasticproxembed.pyx b/package/MDAnalysis/analysis/encore/dimensionality_reduction/stochasticproxembed.pyx
index b7f15c5d4db..d79d602809b 100644
--- a/package/MDAnalysis/analysis/encore/dimensionality_reduction/stochasticproxembed.pyx
+++ b/package/MDAnalysis/analysis/encore/dimensionality_reduction/stochasticproxembed.pyx
@@ -47,41 +47,31 @@ def StochasticProximityEmbedding(s, double rco, int dim, double maxlam, double m
 
     This class is a Cython wrapper for a C implementation (see spe.c)
 
-    Parameters:
+    Parameters
     ----------
-
     s : encore.utils.TriangularMatrix object
         Triangular matrix containing the distance values for each pair of
         elements in the original space.
-
     rco : float
         neighborhood distance cut-off
-
     dim : int
         number of dimensions for the embedded space
-
     minlam  : float
         final learning parameter
-
     maxlam  : float
         starting learning parameter
-
     ncycle : int
         number of cycles. Each cycle is composed of nstep steps. At the end
         of each cycle, the lerning parameter lambda is updated.
-
     nstep : int
         number of coordinate update steps for each cycle
 
 
-
     Returns
     -------
-
     space : (float, numpy.array)
         float is the final stress obtained; the array are the coordinates of
         the elements in the embedded space
-
     stressfreq : int
         calculate and report stress value every stressfreq cycle