Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix cython error on travis #1675

Merged
merged 3 commits into from
Sep 26, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def __call__(self, distance_matrix):
numpy.array
list of cluster indices
"""
clusters = affinityprop.AffinityPropagation().run(
clusters = affinityprop.AffinityPropagation(
s=distance_matrix * -1., # invert sign
preference=self.preference,
lam=self.damping,
Expand Down
194 changes: 95 additions & 99 deletions package/MDAnalysis/analysis/encore/clustering/affinityprop.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -34,108 +34,104 @@ cimport cython

@cython.boundscheck(False)
@cython.wraparound(False)

cdef class AffinityPropagation(object):
"""
Affinity propagation clustering algorithm. This class is a Cython wrapper around the Affinity propagation algorithm, which is implement as a C library (see ap.c). The implemented algorithm is described in the paper:
def AffinityPropagation(s, preference, float lam, int max_iterations, int convergence, int noise=1):
"""Affinity propagation clustering algorithm. This class is a Cython wrapper around the Affinity propagation algorithm, which is implement as a C library (see ap.c). The implemented algorithm is described in the paper:

Clustering by Passing Messages Between Data Points.
Brendan J. Frey and Delbert Dueck, University of Toronto
Science 315, 972–976, February 2007

"""

def run(self, s, preference, float lam, int max_iterations, int convergence, int noise=1):
"""
Run the clustering algorithm.

Parameters
----------

s : encore.utils.TriangularMatrix object
Triangular matrix containing the similarity values for each pair of clustering elements. Notice that the current implementation does not allow for asymmetric values (i.e. similarity(a,b) is assumed to be equal to similarity(b,a))

preference : numpy.array of floats or float
Preference values, which the determine the number of clusters. If a single value is given, all the preference values are set to that. Otherwise, the list is used to set the preference values (one value per element, so the list must be of the same size as the number of elements)

lam : float
Floating point value that defines how much damping is applied to the solution at each iteration. Must be ]0,1]

max_iterations : int
Maximum number of iterations

convergence : int
Number of iterations in which the cluster centers must remain the same in order to reach convergence

noise : int
Whether to apply noise to the input s matrix, such there are no equal values. 1 is for yes, 0 is for no.


Returns
-------

elements : list of int or None
List of cluster-assigned elements, which can be used by encore.utils.ClustersCollection to generate Cluster objects. See these classes for more details.

"""
cdef int cn = s.size
cdef float cpreference = preference
Parameters
----------
s : encore.utils.TriangularMatrix object
Triangular matrix containing the similarity values for each pair of
clustering elements. Notice that the current implementation does not
allow for asymmetric values (i.e. similarity(a,b) is assumed to be
equal to similarity(b,a))
preference : numpy.array of floats or float
Preference values, which the determine the number of clusters. If a
single value is given, all the preference values are set to that.
Otherwise, the list is used to set the preference values (one value per
element, so the list must be of the same size as the number of
elements)
lam : float
Floating point value that defines how much damping is applied to the
solution at each iteration. Must be ]0,1]
max_iterations : int
Maximum number of iterations
convergence : int
Number of iterations in which the cluster centers must remain the same
in order to reach convergence
noise : int
Whether to apply noise to the input s matrix, such there are no equal
values. 1 is for yes, 0 is for no.

Returns
-------
elements : list of int or None
List of cluster-assigned elements, which can be used by
encore.utils.ClustersCollection to generate Cluster objects. See these
classes for more details.

# Assign preference values to diagonal
try:
for i in xrange(s.size):
s[i,i] = <float>preference[i]
except:
pass

if type(preference) == float:
for i in xrange(s.size):
s[i,i] = <float>preference
else:
raise TypeError ("Preference should be of type float")

logging.info("Preference %3.2f: starting Affinity Propagation" % (preference))

# Prepare input and ouput arrays
cdef numpy.ndarray[numpy.float32_t, ndim=1] matndarray = numpy.ascontiguousarray(s._elements, dtype=numpy.float32)
cdef numpy.ndarray[long, ndim=1] clusters = numpy.zeros((s.size),dtype=long)

# run C module Affinity Propagation
iterations = caffinityprop.CAffinityPropagation( <float*>matndarray.data, cn, lam, max_iterations, convergence, noise, <long*>clusters.data)

# Provide warning in case of lack of convergence
if iterations == 0:
logging.info("Preference %3.2f: could not converge in %d iterations" % (preference, -iterations))
import warnings
warnings.warn("Clustering with preference {0:3.2f} did not fully converge in {1:d} iterations".format(preference, -iterations))

# Find centroids
centroids = numpy.unique(clusters)
for k in numpy.arange(centroids.shape[0]):
ii = numpy.where(clusters == centroids[k])[0]
small_mat = numpy.zeros((ii.shape[0], ii.shape[0]))
for ii1 in numpy.arange(ii.shape[0]):
for ii2 in numpy.arange(ii.shape[0]):
small_mat[ii1,ii2] = s[ ii[ii1], ii[ii2] ]
j = numpy.argmax(numpy.sum(small_mat, axis=0))

centroids[k] = ii[j]

# Similarity to centroids
S_centroids = numpy.zeros((s.size, centroids.shape[0]))
for line in numpy.arange(s.size):
for c in numpy.arange(centroids.shape[0]):
S_centroids[line,c] = s[line, centroids[c]]

# Center values for each observation
c = numpy.argmax(S_centroids, axis=1)

# Centroids should point to themselves
c[centroids] = numpy.arange(centroids.shape[0])

# Assign centroid indices to all observables
clusters = centroids[c]

logging.info("Preference %3.2f: converged in %d iterations" % (preference, iterations))

return clusters
"""
cdef int cn = s.size
cdef float cpreference = preference

# Assign preference values to diagonal
try:
for i in xrange(s.size):
s[i,i] = <float>preference[i]
except:
pass

if type(preference) == float:
for i in xrange(s.size):
s[i,i] = <float>preference
else:
raise TypeError ("Preference should be of type float")

logging.info("Preference %3.2f: starting Affinity Propagation" % (preference))

# Prepare input and ouput arrays
cdef numpy.ndarray[numpy.float32_t, ndim=1] matndarray = numpy.ascontiguousarray(s._elements, dtype=numpy.float32)
cdef numpy.ndarray[long, ndim=1] clusters = numpy.zeros((s.size),dtype=long)

# run C module Affinity Propagation
iterations = caffinityprop.CAffinityPropagation( <float*>matndarray.data, cn, lam, max_iterations, convergence, noise, <long*>clusters.data)

# Provide warning in case of lack of convergence
if iterations == 0:
logging.info("Preference %3.2f: could not converge in %d iterations" % (preference, -iterations))
import warnings
warnings.warn("Clustering with preference {0:3.2f} did not fully converge in {1:d} iterations".format(preference, -iterations))

# Find centroids
centroids = numpy.unique(clusters)
for k in numpy.arange(centroids.shape[0]):
ii = numpy.where(clusters == centroids[k])[0]
small_mat = numpy.zeros((ii.shape[0], ii.shape[0]))
for ii1 in numpy.arange(ii.shape[0]):
for ii2 in numpy.arange(ii.shape[0]):
small_mat[ii1,ii2] = s[ ii[ii1], ii[ii2] ]
j = numpy.argmax(numpy.sum(small_mat, axis=0))

centroids[k] = ii[j]

# Similarity to centroids
S_centroids = numpy.zeros((s.size, centroids.shape[0]))
for line in numpy.arange(s.size):
for c in numpy.arange(centroids.shape[0]):
S_centroids[line,c] = s[line, centroids[c]]

# Center values for each observation
c = numpy.argmax(S_centroids, axis=1)

# Centroids should point to themselves
c[centroids] = numpy.arange(centroids.shape[0])

# Assign centroid indices to all observables
clusters = centroids[c]

logging.info("Preference %3.2f: converged in %d iterations" % (preference, iterations))

return clusters
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def __call__(self, distance_matrix):

"""
final_stress, coordinates = \
stochasticproxembed.StochasticProximityEmbedding().run(
stochasticproxembed.StochasticProximityEmbedding(
s=distance_matrix,
rco=self.distance_cutoff,
dim=self.dimension,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,78 +36,58 @@ cimport cython


@cython.embedsignature(True)

cdef class StochasticProximityEmbedding:
def StochasticProximityEmbedding(s, double rco, int dim, double maxlam, double minlam, int ncycle, int nstep, int stressfreq):
"""
Stochastic proximity embedding dimensionality reduction algorithm. The
algorithm implemented here is described in this paper:

Dmitrii N. Rassokhin, Dimitris K. Agrafiotis
A modified update rule for stochastic proximity embedding
Journal of Molecular Graphics and Modelling 22 (2003) 133–140
Dmitrii N. Rassokhin, Dimitris K. Agrafiotis
A modified update rule for stochastic proximity embedding
Journal of Molecular Graphics and Modelling 22 (2003) 133–140

This class is a Cython wrapper for a C implementation (see spe.c)
"""


def run(self, s, double rco, int dim, double maxlam, double minlam, int ncycle, int nstep, int stressfreq):
"""
Run stochastic proximity embedding.

Parameters:
----------

s : encore.utils.TriangularMatrix object
Triangular matrix containing the distance values for each pair of
elements in the original space.

rco : float
neighborhood distance cut-off

dim : int
number of dimensions for the embedded space

minlam : float
final learning parameter

maxlam : float
starting learning parameter
Parameters
----------
s : encore.utils.TriangularMatrix object
Triangular matrix containing the distance values for each pair of
elements in the original space.
rco : float
neighborhood distance cut-off
dim : int
number of dimensions for the embedded space
minlam : float
final learning parameter
maxlam : float
starting learning parameter
ncycle : int
number of cycles. Each cycle is composed of nstep steps. At the end
of each cycle, the lerning parameter lambda is updated.
nstep : int
number of coordinate update steps for each cycle


Returns
-------
space : (float, numpy.array)
float is the final stress obtained; the array are the coordinates of
the elements in the embedded space
stressfreq : int
calculate and report stress value every stressfreq cycle

ncycle : int
number of cycles. Each cycle is composed of nstep steps. At the end
of each cycle, the lerning parameter lambda is updated.

nstep : int
number of coordinate update steps for each cycle



Returns
-------

space : (float, numpy.array)
float is the final stress obtained; the array are the coordinates of
the elements in the embedded space

stressfreq : int
calculate and report stress value every stressfreq cycle


"""

cdef int nelem = s.size
cdef double finalstress = 0.0
"""

logging.info("Starting Stochastic Proximity Embedding")
cdef int nelem = s.size
cdef double finalstress = 0.0

cdef numpy.ndarray[numpy.float64_t, ndim=1] matndarray = numpy.ascontiguousarray(s._elements, dtype=numpy.float64)
cdef numpy.ndarray[numpy.float64_t, ndim=1] d_coords = numpy.zeros((nelem*dim),dtype=numpy.float64)
logging.info("Starting Stochastic Proximity Embedding")

finalstress = cstochasticproxembed.CStochasticProximityEmbedding( <double*>matndarray.data, <double*>d_coords.data, rco, nelem, dim, maxlam, minlam, ncycle, nstep, stressfreq)
cdef numpy.ndarray[numpy.float64_t, ndim=1] matndarray = numpy.ascontiguousarray(s._elements, dtype=numpy.float64)
cdef numpy.ndarray[numpy.float64_t, ndim=1] d_coords = numpy.zeros((nelem*dim),dtype=numpy.float64)

logging.info("Stochastic Proximity Embedding finished. Residual stress: %.3f" % finalstress)
finalstress = cstochasticproxembed.CStochasticProximityEmbedding( <double*>matndarray.data, <double*>d_coords.data, rco, nelem, dim, maxlam, minlam, ncycle, nstep, stressfreq)

return (finalstress, d_coords.reshape((-1,dim)).T)
logging.info("Stochastic Proximity Embedding finished. Residual stress: %.3f" % finalstress)

def __call__(self, *args):
return self.run(*args)
return (finalstress, d_coords.reshape((-1,dim)).T)
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ Clustering algorithms

The following clustering algorithms are always available:

.. autoclass:: MDAnalysis.analysis.encore.clustering.affinityprop.AffinityPropagation
.. automodule:: MDAnalysis.analysis.encore.clustering.affinityprop
:members:


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,5 @@ Dimensionality reduction algorithms
The following dimensionality reduction algorithms are always natively
available:

.. autoclass:: MDAnalysis.analysis.encore.dimensionality_reduction.stochasticproxembed
.. automodule:: MDAnalysis.analysis.encore.dimensionality_reduction.stochasticproxembed
:members: