From e77350012ea7301e69eacfcb468488fff3a80c39 Mon Sep 17 00:00:00 2001 From: Eugen Hruska Date: Thu, 10 Mar 2016 18:44:18 -0600 Subject: [PATCH] included notes --- package/MDAnalysis/analysis/diffusionmap.py | 179 +++++++++--------- .../analysis/test_diffusionmap.py | 30 +-- 2 files changed, 102 insertions(+), 107 deletions(-) diff --git a/package/MDAnalysis/analysis/diffusionmap.py b/package/MDAnalysis/analysis/diffusionmap.py index e4f3e300b1d..e9f26f8730b 100644 --- a/package/MDAnalysis/analysis/diffusionmap.py +++ b/package/MDAnalysis/analysis/diffusionmap.py @@ -34,15 +34,9 @@ The sample should be equiblibrated, at least locally. Different weights can be used. The order of the sampled structures in the trajectory is irrelevant. -More details how the correst slowest collective coordinates can be calculated.in: -[1] Nadler, B, Lafon, S, Coifman, R. R, & Kevrekidis, I. G. (2013) Diffusion maps, -spectral clustering and reaction coordinates of dynamical systems. Applied and -Computational Harmonic Analysis 21, 113–127. -[2] Rohrdanz, M. A, Zheng, W, Maggioni, M, & Clementi, C. (2013) Determi- nation -of reaction coordinates via locally scaled diffusion map. Journal of Chemical Physics. - The :ref:`Diffusion-Map-tutorial` shows how to use diffusion map for dimension reduction. +More details about diffusion maps are in [Lafon1]_ and [Clementi1]_. .. _Diffusion-Map-tutorial: @@ -59,18 +53,17 @@ >>> import MDAnalysis >>> import numpy as np - >>> import MDAnalysis.lib.qcprot as qcp >>> import MDAnalysis.analysis.diffusionmap as diffusionmap >>> from MDAnalysis.tests.datafiles import PSF,DCD -In the simplest case, we can simply calculate the diffusion map from one trajectory :func:`diffusionmap`:: +In the simplest case, we can simply calculate the diffusion map from +one trajectory :func:`diffusionmap`:: >>> u = MDAnalysis.Universe(PSF,DCD) >>> eg,ev=diffusionmap.diffusionmap(u) To see how the two slowest collective coordinates how the Other stuff in paper - >>> import matplotlib.pyplot as plt >>> import matplotlib.pyplot as plt >>> plt.scatter(ev[:,1],ev[:,2]) >>> plt.title('diffusion map') @@ -79,137 +72,149 @@ >>> plt.show() -Common usage ------------- - -To **fit a single structure** with :func:`alignto`:: - - >>> ref = Universe(PSF, PDB_small) - >>> mobile = Universe(PSF, DCD) # we use the first frame - >>> alignto(mobile, ref, select="protein and name CA", mass_weighted=True) - -This will change *all* coordinates in *mobile* so that the protein -C-alpha atoms are optimally superimposed (translation and rotation). - - Functions --------- .. autofunction:: diffusionmap +References +--------- -""" - -import os.path -from six.moves import range, zip, zip_longest -import numpy as np -import warnings -import logging +If you use this QCP rotation calculation method in a publication, please +reference: +..[Lafon1] Coifman, Ronald R., Lafon, Stephane (2006) Diffusion maps. +Appl. Comput. Harmon. Anal. 21, 5–30. +..[Clementi1] Rohrdanz, M. A, Zheng, W, Maggioni, M, & Clementi, C. (2013) +Determination of reaction coordinates via locally scaled +diffusion map. Journal of Chemical Physics. -import MDAnalysis.lib.qcprot as qcp -from MDAnalysis.exceptions import SelectionError, SelectionWarning -from MDAnalysis.lib.log import ProgressMeter -import MDAnalysis.analysis.rms as rms +""" -logger = logging.getLogger('MDAnalysis.analysis.diffusionmap') +import logging +import numpy as np +import MDAnalysis.lib.qcprot as qcp +from six.moves import range def diffusionmap(u, select='all', epsilon='average', k=10, weight=None): - """ Non-linear dimension reduction method diffusion map - - The dimension reduction is done in the following way: + """Non-linear dimension reduction method diffusion map - 1. A RMSD between each every pair of frames is calculated. - 2. The normalized kernel is obtain as in [1] and [2]. - 3. The eigenvalues and eigenvectors of the normalized kernel are the output. + diffusionmap(u, select, epsilon, k, weight) - :Arguments: + Dimension reduction with diffusion map of the structures in the universe. + + Parameters + ------------- *u* trajectory :class:`~MDAnalysis.core.AtomGroup.Universe` The trajectory can be a long trajectory - *select* + select: str, optional 1. any valid selection string for :meth:`~MDAnalysis.core.AtomGroup.AtomGroup.select_atoms` This selection of atoms is used to calculate the RMSD between different frames. Water should be excluded. - *epsilon* : float + epsilon : float, optional Specifies the epsilon used for the diffusion map. More information in [1] and [2] With 'average' the average of the RMSD to the k-nearest-neighbor will be used. - *k* : specifies the k for the k-nearest-neighbor is average epsilon is used. - *weight* : None or numpy array of the same length as the trajectory + k : int, optional + specifies the k for the k-nearest-neighbor is average epsilon is used. + weight: numpy array, optional + The numpy array has to have the same length as the trajectory. With 'None' the weight of each frame of the trajectory will be the same. If order of the weights has to be the same as the order of framesin the trajectory. - :Returns: eigenvalues, eigenvectors - the second and higher eigenvectors ev[i+1,:] represent the i-th slowest collective coordinates. + Returns + ---------------- + eigenvalues: numpy array + Eigenvalues of the diffusion map + eigenvectors: numpy array + Eigenvectors of the diffusion map + The second and higher eigenvectors ev[i+1,:] represent the i-th slowest collective coordinates. - """ + Notes + --------------- + + The dimension reduction works in the following way: - frames=u.trajectory + 1. A RMSD between each every pair of frames is calculated. + 2. The normalized kernel is obtain as in [1] and [2]. + 3. The eigenvalues and eigenvectors of the normalized kernel are the output. + + """ + logger = logging.getLogger('MDAnalysis.analysis.diffusionmap') + frames = u.trajectory ref_atoms = u.select_atoms(select) - nframes=len(frames) + nframes = len(frames) natoms = ref_atoms.n_atoms - rmsd_matrix=np.zeros((nframes,nframes), dtype=np.float64) - epsilon=np.zeros((nframes,), dtype=np.float64) - kernel2=np.zeros((nframes,nframes), dtype=np.float64) + rmsd_matrix = np.zeros((nframes, nframes)) + kernel2 = np.zeros((nframes, nframes)) + + if epsilon == 'average': + epsilon = np.zeros((nframes, ), ) + type_epsilon = 'average' + else: + value_epsilon = epsilon + epsilon = np.full((nframes, ), value_epsilon) + type_epsilon = 'constant' + - rmsd = np.zeros((nframes,)) - rot= np.zeros(9, dtype=np.float64) - weight=None - type_epsilon='average' + rot = np.zeros(9) if weight is None: - weights=np.full((nframes,),1) + weights = np.full((nframes, ), 1) else: - if weight.shape[0]!=nframes: - raise ValueError("The weight should have the same length as the trajectroy") - else: - weights=weight + if weight.shape[0] != nframes: + raise ValueError("The weight should have the same length as the trajectroy") + else: + weights = weight for i in range(nframes): - logger.info("calculating rmsd from structure "+str(i)+" to all") - i_ref=np.copy(u.trajectory[i].positions-ref_atoms.center_of_mass()) - for j in range(nframes): - j_ref=np.copy(u.trajectory[j].positions-ref_atoms.center_of_mass()) - rmsd_matrix[i,j] = qcp.CalcRMSDRotationalMatrix(i_ref.T.astype(np.float64), j_ref.T.astype(np.float64), natoms, rot, weight) + logger.info("calculating rmsd from structure {0} to all".format(i)) + i_ref = np.copy(u.trajectory[i].positions-ref_atoms.center_of_mass()) + for j in range(i, nframes): + j_ref = np.copy(u.trajectory[j].positions-ref_atoms.center_of_mass()) + rmsd_matrix[i, j] = qcp.CalcRMSDRotationalMatrix(i_ref.T.astype(np.float64), \ + j_ref.T.astype(np.float64), natoms, rot, weight) + + #fill in symmetric values + rmsd_matrix = rmsd_matrix + rmsd_matrix.T - np.diag(rmsd_matrix.diagonal()) #calculate epsilons - for i in range(nframes): - #np.argsort(rmsd_matrix[i,:])#[10]] - epsilon[i]=rmsd_matrix[i,np.argsort(rmsd_matrix[i,:])[k]] + if type_epsilon == 'average': + for i in range(nframes): + #np.argsort(rmsd_matrix[i,:])#[10]] + epsilon[i] = rmsd_matrix[i, np.argsort(rmsd_matrix[i, :])[k]] + epsilon = np.full((nframes, ), epsilon.mean()) - if epsilon=='average': - epsilon=np.full((nframes,),epsilon.mean()) - else: - epsilon=np.full((nframes,),epsilon) - logger.info("epsilon: "+str(epsilon)) + logger.info('epsilon: {0}'.format(epsilon)) #calculate normalized kernel for i in range(nframes): - kernel2[i,:]=np.exp(-rmsd_matrix[i,:]**2/(epsilon[i]*epsilon[:])) + kernel2[i, :] = np.exp(-rmsd_matrix[i, :]**2/(epsilon[i]*epsilon[:])) - p_vector = np.zeros((nframes,)) - d_vector = np.zeros((nframes,)) + p_vector = np.zeros((nframes, )) + d_vector = np.zeros((nframes, )) for i in range(nframes): - p_vector[i]=np.dot(kernel2[i,:],weights) + p_vector[i] = np.dot(kernel2[i, :], weights) - kernel2 /= np.sqrt(p_vector[:,np.newaxis].dot(p_vector[np.newaxis])) + kernel2 /= np.sqrt(p_vector[:, np.newaxis].dot(p_vector[np.newaxis])) for i in range(nframes): - d_vector[i]=np.dot(kernel2[i,:],weights) + d_vector[i] = np.dot(kernel2[i, :], weights) for i in range(nframes): - kernel2[i,:]= kernel2[i,:]*weights + kernel2[i, :] = kernel2[i, :]*weights - kernel2 /= np.sqrt(d_vector[:,np.newaxis].dot(d_vector[np.newaxis])) + kernel2 /= np.sqrt(d_vector[:, np.newaxis].dot(d_vector[np.newaxis])) #eigenvalues and eigenvector are the collective coordinates - eg, ev=np.linalg.eig(kernel2) + eg, ev = np.linalg.eig(kernel2) - if np.abs(eg[0]-1)>1.0e-14: - raise ValueError("Lowest eigenvalue should be 1 up to numeric precision") + eg_arg = np.argsort(eg) + eg = eg[eg_arg[::-1]] + ev = ev[eg_arg[::-1],:] return eg, ev diff --git a/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py b/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py index b49e9760e4c..8242d5890e6 100644 --- a/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py +++ b/testsuite/MDAnalysisTests/analysis/test_diffusionmap.py @@ -14,22 +14,12 @@ # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # from __future__ import print_function - import MDAnalysis - import MDAnalysis.analysis.diffusionmap as diffusionmap -from MDAnalysis import SelectionError - -from numpy.testing import (TestCase, dec, - assert_almost_equal, assert_raises, assert_equal) -import numpy as np -from nose.plugins.attrib import attr +from numpy.testing import (assert_almost_equal, assert_equal) -import tempdir -from os import path -from MDAnalysisTests.datafiles import PDB,XTC -from MDAnalysisTests import executable_not_found, parser_not_found +from MDAnalysisTests.datafiles import PDB, XTC class TestDiffusionmap(object): @@ -44,20 +34,20 @@ def __init__(self): #assert_almost_equal(self.ev[0,0], .095836037343022831) #faster u = MDAnalysis.Universe(PDB, XTC) - eg,ev=diffusionmap.diffusionmap(u, select='backbone', k=5) - self.eg=eg - self.ev=ev + eg, ev = diffusionmap.diffusionmap(u, select = 'backbone', k = 5) + self.eg = eg + self.ev = ev def test_eg(self): - assert_equal(self.eg.shape, (10,)) - assert_almost_equal(self.eg[0], 1.0) - assert_almost_equal(self.eg[-1],0.03661048812801191) + assert_equal(self.eg.shape, (10, )) + assert_almost_equal(self.eg[0], 1.0, decimal=5) + assert_almost_equal(self.eg[-1], 0.0142, decimal = 3) def test_ev(self): - assert_equal(self.ev.shape, (10,10)) - assert_almost_equal(self.ev[0,0], -0.30796900898350615) + assert_equal(self.ev.shape, (10, 10)) + assert_almost_equal(self.ev[0, 0], -0.3019, decimal = 2)