diff --git a/package/MDAnalysis/analysis/align.py b/package/MDAnalysis/analysis/align.py index 4b1c2c755a0..4c9b2f5908c 100644 --- a/package/MDAnalysis/analysis/align.py +++ b/package/MDAnalysis/analysis/align.py @@ -259,6 +259,7 @@ def rotation_matrix(a, b, weights=None): # so that R acts **to the left** and can be broadcasted; we're saving # one transpose. [orbeckst]) rmsd = qcp.CalcRMSDRotationalMatrix(a.T, b.T, N, rot, weights) + logger.info("qcp: %d", rmsd) return np.matrix(rot.reshape(3, 3)), rmsd diff --git a/package/MDAnalysis/analysis/diffusionmap.py b/package/MDAnalysis/analysis/diffusionmap.py index e9f26f8730b..aa0a6e34e12 100644 --- a/package/MDAnalysis/analysis/diffusionmap.py +++ b/package/MDAnalysis/analysis/diffusionmap.py @@ -23,15 +23,15 @@ :Copyright: GNU Public License v3 The module contains the non-linear dimension reduction method diffusion map. -The diffusion map allows to get a quick estimate of the slowest collective -coordinates for a trajectory. This non-linear dimension reduction method -assumes that the trajectory is long enough to represents a probability -distribution of as protein close to the equilibrium. Further the diffusion map -assumes that the diffusion coefficients are constant. The eigenvectors with the -largest eigenvalues are the slowest collective coordinates. The complexity of -the diffusion map is O(N^2), where N is the number of frames in the trajectory. -Instead of a single trajectory a sample of protein structures can be used. -The sample should be equiblibrated, at least locally. Different weights can be used. +The diffusion map allows to get a quick estimate of the slowest collective +coordinates for a trajectory. This non-linear dimension reduction method +assumes that the trajectory is long enough to represent a probability +distribution of as protein close to the equilibrium. Furthermore, the diffusion map +assumes that the diffusion coefficients are constant. The eigenvectors with the +largest eigenvalues are the slowest collective coordinates. The complexity of +the diffusion map is O(N^3), where N is the number of frames in the trajectory. +Instead of a single trajectory a sample of protein structures can be used. +The sample should be equiblibrated, at least locally. Different weights can be used. The order of the sampled structures in the trajectory is irrelevant. The :ref:`Diffusion-Map-tutorial` shows how to use diffusion map for dimension reduction. @@ -45,9 +45,9 @@ The example uses files provided as part of the MDAnalysis test suite (in the variables :data:`~MDAnalysis.tests.datafiles.PSF` and -:data:`~MDAnalysis.tests.datafiles.DCD`). Notice that these test data -aren't representing a sample from the equilibrium. This violates a basic -assumption of the diffusion map and the results shouldn't be interpreted for this reason. +:data:`~MDAnalysis.tests.datafiles.DCD`). Notice that these test data +aren't representing a sample from the equilibrium. This violates a basic +assumption of the diffusion map and the results shouldn't be interpreted for this reason. This tutorial shows how to use the diffusionmap function. First load all modules and test data :: @@ -56,11 +56,12 @@ >>> import MDAnalysis.analysis.diffusionmap as diffusionmap >>> from MDAnalysis.tests.datafiles import PSF,DCD -In the simplest case, we can simply calculate the diffusion map from +In the simplest case, we can simply calculate the diffusion map from one trajectory :func:`diffusionmap`:: >>> u = MDAnalysis.Universe(PSF,DCD) - >>> eg,ev=diffusionmap.diffusionmap(u) + >>> dmap = diffusionmap.DiffusionMap(u) + >>> eg,ev= diffusionmap.DiffusionMap(u).run() To see how the two slowest collective coordinates how the Other stuff in paper @@ -71,21 +72,21 @@ >>> plt.ylabel("second slowest collective coordinate") >>> plt.show() +Classes +------- -Functions ---------- - -.. autofunction:: diffusionmap +.. autoclass:: Contacts + :members: References --------- If you use this QCP rotation calculation method in a publication, please reference: -..[Lafon1] Coifman, Ronald R., Lafon, Stephane (2006) Diffusion maps. +..[Lafon1] Coifman, Ronald R., Lafon, Stephane (2006) Diffusion maps. Appl. Comput. Harmon. Anal. 21, 5–30. -..[Clementi1] Rohrdanz, M. A, Zheng, W, Maggioni, M, & Clementi, C. (2013) -Determination of reaction coordinates via locally scaled +..[Clementi1] Rohrdanz, M. A, Zheng, W, Maggioni, M, & Clementi, C. (2013) +Determination of reaction coordinates via locally scaled diffusion map. Journal of Chemical Physics. @@ -94,9 +95,12 @@ import logging import numpy as np +from deco import * import MDAnalysis.lib.qcprot as qcp from six.moves import range +from .base import AnalysisBase +logger = logging.getLogger("MDAnalysis.analysis.diffusionmap") def diffusionmap(u, select='all', epsilon='average', k=10, weight=None): """Non-linear dimension reduction method diffusion map @@ -107,16 +111,15 @@ def diffusionmap(u, select='all', epsilon='average', k=10, weight=None): Parameters ------------- - *u* - trajectory :class:`~MDAnalysis.core.AtomGroup.Universe` - The trajectory can be a long trajectory - select: str, optional + u : trajectory :class:`~MDAnalysis.core.AtomGroup.Universe` + Remember that eigenvalue decomposition scales at O(n^3) + select: str, optional 1. any valid selection string for - :meth:`~MDAnalysis.core.AtomGroup.AtomGroup.select_atoms` + :meth:`~MDAnalysis.core.AtomGroup.AtomGroup.select_atoms` This selection of atoms is used to calculate the RMSD between different frames. Water should be excluded. - epsilon : float, optional - Specifies the epsilon used for the diffusion map. More information in [1] and [2] - With 'average' the average of the RMSD to the k-nearest-neighbor will be used. + epsilon : float, optional + Specifies the epsilon used for the diffusion map. More information in [1] and [2] + With 'average' the average of the RMSD to the k-nearest-neighbor will be used. k : int, optional specifies the k for the k-nearest-neighbor is average epsilon is used. weight: numpy array, optional @@ -135,7 +138,7 @@ def diffusionmap(u, select='all', epsilon='average', k=10, weight=None): Notes --------------- - + The dimension reduction works in the following way: 1. A RMSD between each every pair of frames is calculated. @@ -143,7 +146,6 @@ def diffusionmap(u, select='all', epsilon='average', k=10, weight=None): 3. The eigenvalues and eigenvectors of the normalized kernel are the output. """ - logger = logging.getLogger('MDAnalysis.analysis.diffusionmap') frames = u.trajectory ref_atoms = u.select_atoms(select) nframes = len(frames) @@ -181,7 +183,7 @@ def diffusionmap(u, select='all', epsilon='average', k=10, weight=None): #fill in symmetric values rmsd_matrix = rmsd_matrix + rmsd_matrix.T - np.diag(rmsd_matrix.diagonal()) - #calculate epsilons + #calculate epsilons if type_epsilon == 'average': for i in range(nframes): #np.argsort(rmsd_matrix[i,:])#[10]] @@ -218,3 +220,134 @@ def diffusionmap(u, select='all', epsilon='average', k=10, weight=None): ev = ev[eg_arg[::-1],:] return eg, ev + +class DiffusionMap(AnalysisBase): + def __init__(self, u, select='all', epsilon='average', k=10, weights=None, + metric=None, start=None, stop=None, step=None): + """ + Parameters + ------------- + u : trajectory `~MDAnalysis.core.AtomGroup.Universe` + The MD Trajectory for dimension reduction, remember that computational + scales at O(n^3). Cost can be reduced by increasing step interval or + specifying start and stop + select: str, optional + 1. any valid selection string for + :meth:`~MDAnalysis.core.AtomGroup.AtomGroup.select_atoms` + This selection of atoms is used to calculate the RMSD between different frames. Water should be excluded. + epsilon : float, optional + Specifies the epsilon used for the diffusion map. More information in [1] and [2] + With 'average' the average of the RMSD to the k-nearest-neighbor will be used. + k : int, optional + specifies the k for the k-nearest-neighbor is average epsilon is used. + weights: list, optional + The list has to have the same length as the trajectory. + With 'None' the weight of each frame of the trajectory will be the same. + + metric : function, optional + Maps two numpy arrays to a float, is positive definite and symmetric. + start : int, optional + First frame of trajectory to analyse, Default: 0 + stop : int, optional + Last frame of trajectory to analyse, Default: -1 + step : int, optional + Step between frames to analyse, Default: 1 + """ + self.u = u + self.atoms = u.select_atoms(select) + self.natoms = self.atoms.n_atoms + self.k = k + frames = u.trajectory + self.epsilon = epsilon + if metric is not None: + self.metric = metric + else: + self.metric = qcp.CalcRMSDRotationalMatrix + + self._setup_frames(frames, start, stop, step) + + if weights is None: + self.weights = np.ones((self.nframes,)) + else: + if weights.shape[0] != self.nframes: + raise ValueError("The weight should have the same length as the trajectroy") + else: + # weights are constructed as relative to the mean + self.weights = np.asarray(weights, dtype=np.float64) / np.mean(weights) + + def _prepare(self): + self.rmsd_matrix = np.zeros((self.nframes,self.nframes)) + + if self.epsilon == 'average': + self.epsilon = np.zeros((self.nframes, ), ) + self.type_epsilon = 'average' + else: + value_epsilon = epsilon + self.epsilon = np.full((nframes, ), value_epsilon) + self.type_epsilon = 'constant' + + self.rot = np.zeros(9) + + #mappable function + def calc_diffusion(self, traj_index): + """Calculates diffusion distance from metric function + rmsd_matrix will be 0's in the lower triangle. + """ + logger.info("calculating rmsd from structure {0} to all".format(traj_index)) + i_ref = np.copy(self.u.trajectory[traj_index].positions-self.atoms.center_of_mass()) + + for j in range(traj_index, self.nframes): + j_ref = np.copy(self.u.trajectory[j].positions-self.atoms.center_of_mass()) + logger.info('_ts.frame {0}'.format(self._ts.frame)) + self.rmsd_matrix[traj_index, j] = self.metric(i_ref.T.astype(np.float64),\ + j_ref.T.astype(np.float64), self.natoms, self.rot, weights=None) + + def _single_frame(self): + logger.info("_ts.frame {0}, numframes{1}".format(self._ts.frame, self.nframes)) + self.calc_diffusion(self._ts.frame) + + def _conclude(self): + + logger.info('rmsd_matrix: {0}'.format(self.rmsd_matrix)) + + self.rmsd_matrix = self.rmsd_matrix + self.rmsd_matrix.T - \ + np.diag(self.rmsd_matrix.diagonal()) + if self.type_epsilon == 'average': + for i in range(self.nframes): + #np.argsort(rmsd_matrix[i,:])#[10]] + self.epsilon[i] = self.rmsd_matrix[i, \ + np.argsort(self.rmsd_matrix[i, :])[self.k]] + + self.epsilon = np.full((self.nframes, ), self.epsilon.mean()) + + logger.info('epsilon: {0}'.format(self.epsilon)) + + self.kernel2 = np.zeros((self.nframes, self.nframes)) + + #possibly mappable + for i in range(self.nframes): + self.kernel2[i, :] = np.exp(-self.rmsd_matrix[i, :]**2/(self.epsilon[i]*self.epsilon[:])) + + p_vector = np.zeros((self.nframes, )) + d_vector = np.zeros((self.nframes, )) + + for i in range(self.nframes): + p_vector[i] = np.dot(self.kernel2[i, :], self.weights) + + self.kernel2 /= np.sqrt(p_vector[:, np.newaxis].dot(p_vector[np.newaxis])) + + for i in range(self.nframes): + d_vector[i] = np.dot(self.kernel2[i, :], self.weights) + + for i in range(self.nframes): + self.kernel2[i, :] = self.kernel2[i, :]*self.weights + + self.kernel2 /= np.sqrt(d_vector[:, np.newaxis].dot(d_vector[np.newaxis])) + + logger.info('kernel: {0}'.format(self.kernel2)) + #eigenvalues and eigenvector are the collective coordinates + eg, ev = np.linalg.eig(self.kernel2) + + eg_arg = np.argsort(eg) + self.eg = eg[eg_arg[::-1]] + self.ev = ev[eg_arg[::-1],:]