From 0a24544aed72109ea62b586040d729194070b717 Mon Sep 17 00:00:00 2001 From: mat Date: Fri, 18 Jan 2019 16:21:20 +0000 Subject: [PATCH 01/16] Added skipping frames that are not used in the Survival Probability analysis. Before, all frames were loaded (and selection was always applied). Now, the frames which are not used are not loaded, thus improving the performance. This only happens when the step is larger than the tau_max + 1. Test cases cover the situation where some frames are skipped. The test checks how many times the 'select_atoms' was called. This is to help with the large size MD simulations analysis. --- package/MDAnalysis/analysis/waterdynamics.py | 35 ++++++++++++++++--- .../analysis/test_waterdynamics.py | 14 +++++++- 2 files changed, 44 insertions(+), 5 deletions(-) diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py index baba1de9032..cfd342304d4 100644 --- a/package/MDAnalysis/analysis/waterdynamics.py +++ b/package/MDAnalysis/analysis/waterdynamics.py @@ -1273,15 +1273,42 @@ def run(self, tau_max=20, start=0, stop=None, step=1, verbose=False): if tau_max > (stop - start): raise ValueError("Too few frames selected for given tau_max.") - # load all frames to an array of sets + # preload the frames (selected ids) to a list of sets selected_ids = [] - for ts in self.universe.trajectory[start:stop]: - self.print(verbose, "Loading frame:", ts) - selected_ids.append(set(self.universe.select_atoms(self.selection).ids)) + if step <= tau_max + 1: + # load all frames + for ts in self.universe.trajectory[start:stop]: + self.print(verbose, "Loading frame:", ts) + selected_ids.append(set(self.universe.select_atoms(self.selection).ids)) + else: + # skip frames that will not be used + # Example: step 5 and tau 2: L, L, L, S, S, L, L, L, S, S, ... with L = Load, and S = Skip + loaded_counter = 0 + no_of_frames_to_load = tau_max + 1 + skipped = 0 + no_of_frames_to_skip = step - (tau_max + 1) + for ts in self.universe.trajectory[start:stop]: + if skipped == no_of_frames_to_skip: + skipped = 0 + loaded_counter = 0 + + if loaded_counter == no_of_frames_to_load: + self.print(verbose, "Skipping loading frame:", ts) + skipped += 1 + continue + + self.print(verbose, "Loading frame:", ts) + selected_ids.append(set(self.universe.select_atoms(self.selection).ids)) + + loaded_counter += 1 + tau_timeseries = np.arange(1, tau_max + 1) sp_timeseries_data = [[] for _ in range(tau_max)] + # frames not analysed are skipped because they were not loaded + step = tau_max + 1 if step >= (tau_max + 1) else step + for t in range(0, len(selected_ids), step): Nt = len(selected_ids[t]) diff --git a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py index e78be038ce8..33e74d4dba1 100644 --- a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py +++ b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py @@ -31,7 +31,7 @@ import numpy as np from mock import patch from mock import Mock -from numpy.testing import assert_almost_equal +from numpy.testing import assert_almost_equal, assert_equal SELECTION1 = "byres name OH2" SELECTION2 = "byres name P1" @@ -120,3 +120,15 @@ def test_SurvivalProbability_alwaysPresent(universe): sp = waterdynamics.SurvivalProbability(universe, "") sp.run(tau_max=3, start=0, stop=6) assert all(np.equal(sp.sp_timeseries, 1)) + + +def test_SurvivalProbability_stepLargerThanDtmax(universe): + # Testing if the frames are skipped correctly + with patch.object(universe, 'select_atoms', return_value=Mock(ids=(1,))) as select_atoms_mock: + sp = waterdynamics.SurvivalProbability(universe, "") + sp.run(tau_max=2, step=5, stop=9, verbose=True) + assert_equal(sp.sp_timeseries, [1, 1]) + # with tau_max=2 for all the frames we only read 6 of them + # this is because the frames which are not used are skipped, and therefore 'select_atoms' + assert universe.trajectory.n_frames > 6 + assert_equal(select_atoms_mock.call_count, 6) From a0edf640b3ad2622cfcae71630a06e025287b672 Mon Sep 17 00:00:00 2001 From: Mateusz Bieniek Date: Wed, 23 Jan 2019 18:19:05 +0000 Subject: [PATCH 02/16] Use the same code for loading the frames regardless of whether the step was set up or not. Also, manually moving between the frames rather then using "for _ in trajectory[]". This removed unnecessary variables. --- package/MDAnalysis/analysis/waterdynamics.py | 45 +++++++++----------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py index cfd342304d4..311af621e4a 100644 --- a/package/MDAnalysis/analysis/waterdynamics.py +++ b/package/MDAnalysis/analysis/waterdynamics.py @@ -1273,36 +1273,33 @@ def run(self, tau_max=20, start=0, stop=None, step=1, verbose=False): if tau_max > (stop - start): raise ValueError("Too few frames selected for given tau_max.") - # preload the frames (selected ids) to a list of sets + # preload the frames (atom IDs) to a list of sets selected_ids = [] - if step <= tau_max + 1: - # load all frames - for ts in self.universe.trajectory[start:stop]: - self.print(verbose, "Loading frame:", ts) - selected_ids.append(set(self.universe.select_atoms(self.selection).ids)) - else: - # skip frames that will not be used - # Example: step 5 and tau 2: L, L, L, S, S, L, L, L, S, S, ... with L = Load, and S = Skip - loaded_counter = 0 - no_of_frames_to_load = tau_max + 1 - skipped = 0 - no_of_frames_to_skip = step - (tau_max + 1) - for ts in self.universe.trajectory[start:stop]: - if skipped == no_of_frames_to_skip: - skipped = 0 - loaded_counter = 0 - if loaded_counter == no_of_frames_to_load: - self.print(verbose, "Skipping loading frame:", ts) - skipped += 1 - continue + # skip frames that will not be used + # Example: step 5 and tau 2: L, L, L, S, S, L, L, L, S, S, ... with L = Load, and S = Skip + frame_loaded_counter = 0 + frames_to_load_no = tau_max + 1 + frames_to_skip_no = max(step - (tau_max + 1), 0) + + frame_no = start + while frame_no < stop: # we have already added 1 to stop, therefore < + if frame_loaded_counter == frames_to_load_no: + self.print(verbose, "Skipping the next %d frames:" % frames_to_skip_no) + frame_no += frames_to_skip_no + frame_loaded_counter = 0 + continue - self.print(verbose, "Loading frame:", ts) - selected_ids.append(set(self.universe.select_atoms(self.selection).ids)) + # update the frame number + self.universe.trajectory[frame_no] - loaded_counter += 1 + self.print(verbose, "Loading frame:", self.universe.trajectory.ts) + selected_ids.append(set(self.universe.select_atoms(self.selection).ids)) + frame_no += 1 + frame_loaded_counter += 1 + ## calculate Survival Probability tau_timeseries = np.arange(1, tau_max + 1) sp_timeseries_data = [[] for _ in range(tau_max)] From a9abc5014089a58d7245fbc1ea2984ab35705d7d Mon Sep 17 00:00:00 2001 From: Mateusz Bieniek Date: Wed, 23 Jan 2019 18:20:08 +0000 Subject: [PATCH 03/16] A test case that checks if all frames are loaded. This is the border condition for "tau_max" and "step". --- testsuite/MDAnalysisTests/analysis/test_waterdynamics.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py index 33e74d4dba1..4ef6583b6f8 100644 --- a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py +++ b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py @@ -132,3 +132,11 @@ def test_SurvivalProbability_stepLargerThanDtmax(universe): # this is because the frames which are not used are skipped, and therefore 'select_atoms' assert universe.trajectory.n_frames > 6 assert_equal(select_atoms_mock.call_count, 6) + + +def test_SurvivalProbability_stepEqualDtMax(universe): + with patch.object(universe, 'select_atoms', return_value=Mock(ids=(1,))) as select_atoms_mock: + sp = waterdynamics.SurvivalProbability(universe, "") + sp.run(tau_max=4, step=5, stop=9, verbose=True) + # all frames from 0, with 9 inclusive + assert_equal(select_atoms_mock.call_count, 10) \ No newline at end of file From 036140a3e67bf5a775a8ff396c67f442e06a9350 Mon Sep 17 00:00:00 2001 From: Mateusz Bieniek Date: Wed, 23 Jan 2019 18:26:40 +0000 Subject: [PATCH 04/16] Refactoring with the simpler "return_value" when possible with the mock patch. --- .../analysis/test_waterdynamics.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py index 4ef6583b6f8..9bac73c785d 100644 --- a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py +++ b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py @@ -100,25 +100,23 @@ def test_SurvivalProbability_definedTaus(universe): ids = [(9, 8, 7), (8, 7, 6), (7, 6, 5), (6, 5, 4), (5, 4, 3), (4, 3, 2), (3, 2, 1)] select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop()) # atom IDs fed set by set sp = waterdynamics.SurvivalProbability(universe, "") - sp.run(tau_max=3, start=0, stop=6) + sp.run(tau_max=3, start=0, stop=6, verbose=True) assert_almost_equal(sp.sp_timeseries, [2 / 3.0, 1 / 3.0, 0]) def test_SurvivalProbability_zeroMolecules(universe): - with patch.object(universe, 'select_atoms') as select_atoms_mock: - # no atom IDs found - select_atoms_mock.return_value = Mock(ids=[]) + # no atom IDs found + with patch.object(universe, 'select_atoms', return_value=Mock(ids=[])) as select_atoms_mock: sp = waterdynamics.SurvivalProbability(universe, "") - sp.run(tau_max=3, start=3, stop=6) + sp.run(tau_max=3, start=3, stop=6, verbose=True) assert all(np.isnan(sp.sp_timeseries)) def test_SurvivalProbability_alwaysPresent(universe): - with patch.object(universe, 'select_atoms') as select_atoms_mock: - # always the same atom IDs found - select_atoms_mock.return_value = Mock(ids=[7, 8]) + # always the same atom IDs found, 7 and 8 + with patch.object(universe, 'select_atoms', return_value=Mock(ids=[7, 8])) as select_atoms_mock: sp = waterdynamics.SurvivalProbability(universe, "") - sp.run(tau_max=3, start=0, stop=6) + sp.run(tau_max=3, start=0, stop=6, verbose=True) assert all(np.equal(sp.sp_timeseries, 1)) @@ -139,4 +137,4 @@ def test_SurvivalProbability_stepEqualDtMax(universe): sp = waterdynamics.SurvivalProbability(universe, "") sp.run(tau_max=4, step=5, stop=9, verbose=True) # all frames from 0, with 9 inclusive - assert_equal(select_atoms_mock.call_count, 10) \ No newline at end of file + assert_equal(select_atoms_mock.call_count, 10) From f45a005546da124089e2987e720201f347f10780 Mon Sep 17 00:00:00 2001 From: Mateusz Bieniek Date: Wed, 23 Jan 2019 18:28:41 +0000 Subject: [PATCH 05/16] Better "verbose" message - avoid polluting. --- package/MDAnalysis/analysis/waterdynamics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py index 311af621e4a..8f3274a72cc 100644 --- a/package/MDAnalysis/analysis/waterdynamics.py +++ b/package/MDAnalysis/analysis/waterdynamics.py @@ -1284,7 +1284,7 @@ def run(self, tau_max=20, start=0, stop=None, step=1, verbose=False): frame_no = start while frame_no < stop: # we have already added 1 to stop, therefore < - if frame_loaded_counter == frames_to_load_no: + if frames_to_skip_no != 0 and frame_loaded_counter == frames_to_load_no: self.print(verbose, "Skipping the next %d frames:" % frames_to_skip_no) frame_no += frames_to_skip_no frame_loaded_counter = 0 From 91037bc3354fa691caab39b0c626418cd09963a2 Mon Sep 17 00:00:00 2001 From: Mateusz Bieniek Date: Thu, 31 Jan 2019 17:42:00 +0000 Subject: [PATCH 06/16] The SP value of entire residues can be calculated, regardless which atoms are found. --- package/MDAnalysis/analysis/waterdynamics.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py index 8f3274a72cc..78397570fa3 100644 --- a/package/MDAnalysis/analysis/waterdynamics.py +++ b/package/MDAnalysis/analysis/waterdynamics.py @@ -1231,7 +1231,7 @@ def print(self, verbose, *args): elif verbose: print(args) - def run(self, tau_max=20, start=0, stop=None, step=1, verbose=False): + def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, verbose=False): """ Computes and returns the survival probability timeseries @@ -1245,6 +1245,9 @@ def run(self, tau_max=20, start=0, stop=None, step=1, verbose=False): Jump every `step`'th frame tau_max : int Survival probability is calculated for the range :math:`1 <= \tau <= tau_max` + residues : Boolean + the selection and analysis will be carried out on the residues (resids) rather than on atoms. + A single atom is sufficient to classify the residue as within the distance. verbose : Boolean Overwrite the constructor's verbosity @@ -1294,12 +1297,17 @@ def run(self, tau_max=20, start=0, stop=None, step=1, verbose=False): self.universe.trajectory[frame_no] self.print(verbose, "Loading frame:", self.universe.trajectory.ts) - selected_ids.append(set(self.universe.select_atoms(self.selection).ids)) + atoms = self.universe.select_atoms(self.selection) + + # SP of residues or of atoms + ids = atoms.residues.resids if residues else atoms.ids + selected_ids.append(set(ids)) frame_no += 1 frame_loaded_counter += 1 ## calculate Survival Probability + # TODO tau_timeseries = np.arange(1, tau_max + 1) sp_timeseries_data = [[] for _ in range(tau_max)] @@ -1319,6 +1327,7 @@ def run(self, tau_max=20, start=0, stop=None, step=1, verbose=False): break # ids that survive from t to t + tau and at every frame in between + # TODO intermittend Ntau = len(set.intersection(*selected_ids[t:t + tau + 1])) sp_timeseries_data[tau - 1].append(Ntau / float(Nt)) From f9e829be2c6b3f621baecbc128a4a10fc6983e22 Mon Sep 17 00:00:00 2001 From: Mateusz Bieniek Date: Thu, 28 Feb 2019 14:35:57 +0000 Subject: [PATCH 07/16] Intermittency added with a simple implementation. Intermittency is taken as a gap: meaning that setting it to the value of 2 means that the atom id 7, when in the sequence 7,X,X,7, where X means absence, will be rewritten to 7,7,7,7. This way the intermittency does not affect the actual routines for SP calculation. The array of IDs should never be big so this should not add any significant computational time. --- package/MDAnalysis/analysis/waterdynamics.py | 49 +++++++++++++++++-- .../analysis/test_waterdynamics.py | 26 ++++++++++ 2 files changed, 70 insertions(+), 5 deletions(-) diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py index 78397570fa3..737c9217da8 100644 --- a/package/MDAnalysis/analysis/waterdynamics.py +++ b/package/MDAnalysis/analysis/waterdynamics.py @@ -1231,7 +1231,7 @@ def print(self, verbose, *args): elif verbose: print(args) - def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, verbose=False): + def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, verbose=False, intermittency=0): """ Computes and returns the survival probability timeseries @@ -1248,6 +1248,13 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, verbose=Fa residues : Boolean the selection and analysis will be carried out on the residues (resids) rather than on atoms. A single atom is sufficient to classify the residue as within the distance. + intermittency : int + The number of frames where the selected atom is allowed to leave but be counted as still in the selection. + 0 means it is continuous, which is that for tau_max \tau, the atom survives only if it is present at each + tau :math:`1 <= \tau <= tau_max`. For other values, the data is preprocessed to remove gaps of max size + of the intermittency value. Ie when set to 2, if the atom leaves the selection only for two frames, + it is counted as in. If intermittency is set to the value of \tau-1, the atom has to be present only in the first frame + and in the last frame. verbose : Boolean Overwrite the constructor's verbosity @@ -1306,8 +1313,41 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, verbose=Fa frame_no += 1 frame_loaded_counter += 1 - ## calculate Survival Probability - # TODO + # Pre-process Consecutive Intermittency + print('Original intermittency', selected_ids) + if intermittency > 0: + # Correct the data for the intermittency. If the atom is absent for a number of frames equal or smaller + # than the parameter intermittency, then correct the data and remove the absence. + # This is done by a separate pass over the data. + for i, ids in enumerate(selected_ids): + # look at the inter+2 ahead to see if we need to fill in any gaps + # ie 7,G,G,7 with inter=2 will be replaced by 7,7,7,7 + + # initially update each frame as seen 0 ago (now) + frames_ago_seen = {i: 0 for i in ids} + for j in range(1, intermittency + 2): + for atomid in frames_ago_seen.keys(): + if i + j >= len(selected_ids): + continue + + # if the atom is absent + if not atomid in selected_ids[i + j]: + # increase its absence counter + frames_ago_seen[atomid] += 1 + + # the atom is found + elif atomid in selected_ids[i + j]: + # if the atom was absent before + if frames_ago_seen[atomid] != 0 and frames_ago_seen[atomid] <= intermittency: + # add it to the frames where it was absent and it meets the criteria + for b in range(frames_ago_seen[atomid], 0, -1): + print('adding 9') + selected_ids[i + j - b].add(atomid) + + frames_ago_seen[atomid] = 0 + print('Corrected intermittency', selected_ids) + + # calculate Survival Probability tau_timeseries = np.arange(1, tau_max + 1) sp_timeseries_data = [[] for _ in range(tau_max)] @@ -1326,8 +1366,7 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, verbose=Fa if t + tau >= len(selected_ids): break - # ids that survive from t to t + tau and at every frame in between - # TODO intermittend + # continuous: IDs that survive from t to t + tau and at every frame in between Ntau = len(set.intersection(*selected_ids[t:t + tau + 1])) sp_timeseries_data[tau - 1].append(Ntau / float(Nt)) diff --git a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py index 9bac73c785d..0c201fcb6a9 100644 --- a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py +++ b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py @@ -138,3 +138,29 @@ def test_SurvivalProbability_stepEqualDtMax(universe): sp.run(tau_max=4, step=5, stop=9, verbose=True) # all frames from 0, with 9 inclusive assert_equal(select_atoms_mock.call_count, 10) + + +def test_SurvivalProbability_intermittency1and2(universe): + """ + Intermittency of 2 means that we still count an atom if it is not present in 2 consecutive frames, but then returns + """ + with patch.object(universe, 'select_atoms') as select_atoms_mock: + ids = [(9, 8), (), (8,), (9,), (8,), (), (9,8), (), (8,), (9,8,)] + select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop()) # atom IDs fed set by set + sp = waterdynamics.SurvivalProbability(universe, "") + sp.run(tau_max=3, stop=9, verbose=True, intermittency=2) + print(sp.sp_timeseries) + assert_almost_equal(sp.sp_timeseries, [1, 1, 1]) + + +def test_SurvivalProbability_intermittency2lacking(universe): + """ + Intermittency of 3 is required and not 2 for the atom to be counted. + """ + with patch.object(universe, 'select_atoms') as select_atoms_mock: + ids = [(9,), (), (), (), (9,), (), (), (), (9,)] + select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop()) # atom IDs fed set by set + sp = waterdynamics.SurvivalProbability(universe, "") + sp.run(tau_max=3, stop=8, verbose=True, intermittency=2) + print(sp.sp_timeseries) + assert_almost_equal(sp.sp_timeseries, [0, 0, 0]) \ No newline at end of file From 91a1781175e781dc82eaafa9ee2b9f120a967812 Mon Sep 17 00:00:00 2001 From: Mateusz Bieniek Date: Thu, 28 Feb 2019 15:40:03 +0000 Subject: [PATCH 08/16] Refactored intermittency: fewer nested blocks, giving the user access to the selected IDs, using the selected IDs to verify the behaviour of the intermittency. --- package/MDAnalysis/analysis/waterdynamics.py | 106 ++++++++++-------- .../analysis/test_waterdynamics.py | 3 +- 2 files changed, 61 insertions(+), 48 deletions(-) diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py index 737c9217da8..a4b28f13b18 100644 --- a/package/MDAnalysis/analysis/waterdynamics.py +++ b/package/MDAnalysis/analysis/waterdynamics.py @@ -1231,7 +1231,53 @@ def print(self, verbose, *args): elif verbose: print(args) - def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, verbose=False, intermittency=0): + def _correct_intermittency(self, intermittency, selected_ids): + """ + Pre-process Consecutive Intermittency + :param intermittency: the max gap allowed and to be corrected + :param selected_ids: modifies the selecteded IDs in place by adding atoms which left for <= :param intermittency + """ + self.print('Correcting the selected IDs for intermittancy (gaps). ') + if intermittency == 0: + return + + # If the atom is absent for a number of frames equal or smaller + # than the parameter intermittency, then correct the data and remove the absence. + # ie 7,G,G,7 with inter=2 will be replaced by 7,7,7,7, where G=absence + for i, ids in enumerate(selected_ids): + # initially update each frame as seen 0 ago (now) + seen_frames_ago = {i: 0 for i in ids} + for j in range(1, intermittency + 2): + for atomid in seen_frames_ago.keys(): + # run out of atoms + if i + j >= len(selected_ids): + continue + + # if the atom is absent now + if not atomid in selected_ids[i + j]: + # increase its absence counter + seen_frames_ago[atomid] += 1 + continue + + # the atom is found + # it was present in the last frame + if seen_frames_ago[atomid] == 0: + continue + + # we are passed the gap + if seen_frames_ago[atomid] > intermittency: + continue + + # the atom was absent but returned on time (<= intermittency) + # add it to the frames where it was absent. + # Introduce the corrections. + for k in range(seen_frames_ago[atomid], 0, -1): + selected_ids[i + j - k].add(atomid) + + seen_frames_ago[atomid] = 0 + + + def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermittency=0, verbose=False): """ Computes and returns the survival probability timeseries @@ -1249,12 +1295,11 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, verbose=Fa the selection and analysis will be carried out on the residues (resids) rather than on atoms. A single atom is sufficient to classify the residue as within the distance. intermittency : int - The number of frames where the selected atom is allowed to leave but be counted as still in the selection. - 0 means it is continuous, which is that for tau_max \tau, the atom survives only if it is present at each - tau :math:`1 <= \tau <= tau_max`. For other values, the data is preprocessed to remove gaps of max size - of the intermittency value. Ie when set to 2, if the atom leaves the selection only for two frames, - it is counted as in. If intermittency is set to the value of \tau-1, the atom has to be present only in the first frame - and in the last frame. + The maximum number of frames where an atom can leave but count as present if it returns the next frame. + 0 means a continuous survival probability, which is that for tau_max \tau, the atom survives only if it + is present at each tau :math:`1 <= \tau <= tau_max`. For other values, the data is preprocessed to + remove gaps of max size of the intermittency value. Ie when set to 2, if the atom leaves the selection + only for two consecutive frames, it is counted as in. verbose : Boolean Overwrite the constructor's verbosity @@ -1284,7 +1329,7 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, verbose=Fa raise ValueError("Too few frames selected for given tau_max.") # preload the frames (atom IDs) to a list of sets - selected_ids = [] + self.selected_ids = [] # skip frames that will not be used # Example: step 5 and tau 2: L, L, L, S, S, L, L, L, S, S, ... with L = Load, and S = Skip @@ -1308,44 +1353,13 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, verbose=Fa # SP of residues or of atoms ids = atoms.residues.resids if residues else atoms.ids - selected_ids.append(set(ids)) + self.selected_ids.append(set(ids)) frame_no += 1 frame_loaded_counter += 1 - # Pre-process Consecutive Intermittency - print('Original intermittency', selected_ids) - if intermittency > 0: - # Correct the data for the intermittency. If the atom is absent for a number of frames equal or smaller - # than the parameter intermittency, then correct the data and remove the absence. - # This is done by a separate pass over the data. - for i, ids in enumerate(selected_ids): - # look at the inter+2 ahead to see if we need to fill in any gaps - # ie 7,G,G,7 with inter=2 will be replaced by 7,7,7,7 - - # initially update each frame as seen 0 ago (now) - frames_ago_seen = {i: 0 for i in ids} - for j in range(1, intermittency + 2): - for atomid in frames_ago_seen.keys(): - if i + j >= len(selected_ids): - continue - - # if the atom is absent - if not atomid in selected_ids[i + j]: - # increase its absence counter - frames_ago_seen[atomid] += 1 - - # the atom is found - elif atomid in selected_ids[i + j]: - # if the atom was absent before - if frames_ago_seen[atomid] != 0 and frames_ago_seen[atomid] <= intermittency: - # add it to the frames where it was absent and it meets the criteria - for b in range(frames_ago_seen[atomid], 0, -1): - print('adding 9') - selected_ids[i + j - b].add(atomid) - - frames_ago_seen[atomid] = 0 - print('Corrected intermittency', selected_ids) + # correct the dataset for gaps (intermittency) + self._correct_intermittency(intermittency, self.selected_ids) # calculate Survival Probability tau_timeseries = np.arange(1, tau_max + 1) @@ -1354,8 +1368,8 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, verbose=Fa # frames not analysed are skipped because they were not loaded step = tau_max + 1 if step >= (tau_max + 1) else step - for t in range(0, len(selected_ids), step): - Nt = len(selected_ids[t]) + for t in range(0, len(self.selected_ids), step): + Nt = len(self.selected_ids[t]) if Nt == 0: self.print(verbose, @@ -1363,11 +1377,11 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, verbose=Fa continue for tau in tau_timeseries: - if t + tau >= len(selected_ids): + if t + tau >= len(self.selected_ids): break # continuous: IDs that survive from t to t + tau and at every frame in between - Ntau = len(set.intersection(*selected_ids[t:t + tau + 1])) + Ntau = len(set.intersection(*self.selected_ids[t:t + tau + 1])) sp_timeseries_data[tau - 1].append(Ntau / float(Nt)) # user can investigate the distribution and sample size diff --git a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py index 0c201fcb6a9..46043580cf8 100644 --- a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py +++ b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py @@ -149,7 +149,7 @@ def test_SurvivalProbability_intermittency1and2(universe): select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop()) # atom IDs fed set by set sp = waterdynamics.SurvivalProbability(universe, "") sp.run(tau_max=3, stop=9, verbose=True, intermittency=2) - print(sp.sp_timeseries) + assert all((x == set([9, 8]) for x in sp.selected_ids)) assert_almost_equal(sp.sp_timeseries, [1, 1, 1]) @@ -162,5 +162,4 @@ def test_SurvivalProbability_intermittency2lacking(universe): select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop()) # atom IDs fed set by set sp = waterdynamics.SurvivalProbability(universe, "") sp.run(tau_max=3, stop=8, verbose=True, intermittency=2) - print(sp.sp_timeseries) assert_almost_equal(sp.sp_timeseries, [0, 0, 0]) \ No newline at end of file From 4dde01f6240877b7bf297b7b2c6aec86781521f8 Mon Sep 17 00:00:00 2001 From: Mateusz Bieniek Date: Fri, 1 Mar 2019 12:56:27 +0000 Subject: [PATCH 09/16] Minor updates to the documentation. --- package/MDAnalysis/analysis/waterdynamics.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py index a4b28f13b18..58614592944 100644 --- a/package/MDAnalysis/analysis/waterdynamics.py +++ b/package/MDAnalysis/analysis/waterdynamics.py @@ -276,10 +276,10 @@ SurvivalProbability ~~~~~~~~~~~~~~~~~~~ -Analyzing survival probability (SP) :class:`SurvivalProbability` for water -molecules. In this case we are analyzing how long water molecules remain in a -sphere of radius 12.3 centered in the geometrical center of resid 42, 26, 34 -and 80. A slow decay of SP means a long permanence time of water molecules in +Analyzing survival probability (SP) :class:`SurvivalProbability` of molecules. +In this case we are analyzing how long water molecules remain in a +sphere of radius 12.3 centered in the geometrical center of resid 42 and 26. +A slow decay of SP means a long permanence time of water molecules in the zone, on the other hand, a fast decay means a short permanence time:: import MDAnalysis @@ -287,7 +287,7 @@ import matplotlib.pyplot as plt universe = MDAnalysis.Universe(pdb, trajectory) - selection = "byres name OH2 and sphzone 12.3 (resid 42 or resid 26 or resid 34 or resid 80) " + selection = "byres name OH2 and sphzone 12.3 (resid 42 or resid 26) " sp = SP(universe, selection, verbose=True) sp.run(start=0, stop=100, tau_max=20) tau_timeseries = sp.tau_timeseries @@ -378,13 +378,13 @@ SurvivalProbability ~~~~~~~~~~~~~~~~~~~ -Survival Probability (SP) computes two lists: a list of taus (:attr:`SurvivalProbability.tau_timeseries`) and a list of their corresponding mean survival -probabilities (:attr:`SurvivalProbability.sp_timeseries`). Additionally, a list :attr:`SurvivalProbability.sp_timeseries_data` is provided which contains -a list of SPs for each tau, which can be used to compute their distribution, etc. +Survival Probability (SP) computes two lists: a list of taus (:attr:`SurvivalProbability.tau_timeseries`) and a list of + the corresponding survival probabilities (:attr:`SurvivalProbability.sp_timeseries`). results = [ tau1, tau2, ..., tau_n ], [ sp_tau1, sp_tau2, ..., sp_tau_n] -Additionally, for each +Additionally, a list :attr:`SurvivalProbability.sp_timeseries_data` is provided which contains +a not averaged list of SPs for each tau, which can be used to compute their distribution, etc. Classes -------- From ba79bcfff9a6d41c816d72d9b27517fac6e0fb55 Mon Sep 17 00:00:00 2001 From: Mateusz Bieniek Date: Fri, 8 Mar 2019 14:30:14 +0000 Subject: [PATCH 10/16] New test cases for the step+intermittency relation. - not finished --- .../analysis/test_waterdynamics.py | 32 ++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py index 46043580cf8..e621d0462ed 100644 --- a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py +++ b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py @@ -162,4 +162,34 @@ def test_SurvivalProbability_intermittency2lacking(universe): select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop()) # atom IDs fed set by set sp = waterdynamics.SurvivalProbability(universe, "") sp.run(tau_max=3, stop=8, verbose=True, intermittency=2) - assert_almost_equal(sp.sp_timeseries, [0, 0, 0]) \ No newline at end of file + assert_almost_equal(sp.sp_timeseries, [0, 0, 0]) + + +def test_SurvivalProbability_intermittency1_step5_noSkipping(universe): + """ + Step means skipping frames is tau_max + 2 * intermittency + 1 < step. + No frames should be skipped. + """ + with patch.object(universe, 'select_atoms') as select_atoms_mock: + ids = [(2, 3), (3,), (2, 3), (3,), (2,), (3,), (2, 3), (3,), (2, 3), (2, 3), (2, 3)] + select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop()) # atom IDs fed set by set + sp = waterdynamics.SurvivalProbability(universe, "") + sp.run(tau_max=2, stop=10, verbose=True, intermittency=1, step=5) + assert all((x == set([2, 3]) for x in sp.selected_ids)) + assert_almost_equal(sp.sp_timeseries, [1, 1]) + +def test_SurvivalProbability_intermittency1_step5_Skipping(universe): + """ + Step means skipping frames is tau_max + 2 * intermittency + 1 < step. + One frame will be skipped. + """ + with patch.object(universe, 'select_atoms') as select_atoms_mock: + ids = [(1,), (), (1,), (), (1,), (), (1,), (), (1), (1), ()] + select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop()) # atom IDs fed set by set + sp = waterdynamics.SurvivalProbability(universe, "") + sp.run(tau_max=1, stop=10, verbose=True, intermittency=1, step=5) + assert all((x == set([1]) for x in sp.selected_ids)) + assert(len(sp.selected_ids) == len(ids) - 1) + assert_almost_equal(sp.sp_timeseries, [1]) + + From ff4d266f7604c9d632f3c2aa6b081a5583a92e63 Mon Sep 17 00:00:00 2001 From: Mateusz Bieniek Date: Fri, 8 Mar 2019 17:37:51 +0000 Subject: [PATCH 11/16] The relationship between the intermittency and the window "step" parameter is complex and was accounted for here, together with new test cases. If necessary, we load extra frames for each window, to account for the borders: for the first and last frame in a window. --- package/MDAnalysis/analysis/waterdynamics.py | 29 +++++++++++++------ .../analysis/test_waterdynamics.py | 17 ++++++----- 2 files changed, 29 insertions(+), 17 deletions(-) diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py index 58614592944..cd896181226 100644 --- a/package/MDAnalysis/analysis/waterdynamics.py +++ b/package/MDAnalysis/analysis/waterdynamics.py @@ -1288,7 +1288,8 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte stop : int Zero-based index of the last frame to be analysed (inclusive) step : int - Jump every `step`'th frame + Jump every `step`'th frame. This is compatible but independant of the taus used, and it is good to consider + using the step as tau_max to remove the overlap. Note that step and taus are consistent with intermittency. tau_max : int Survival probability is calculated for the range :math:`1 <= \tau <= tau_max` residues : Boolean @@ -1332,17 +1333,26 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte self.selected_ids = [] # skip frames that will not be used - # Example: step 5 and tau 2: L, L, L, S, S, L, L, L, S, S, ... with L = Load, and S = Skip + # Example: step 5 and tau 2: LLLSS LLLSS, ... where L = Load, and S = Skip + # Intermittency means that we have to load the extra frames to know if the atom is actually missing. + # Say step=5 and tau=1, intermittency=0: LLSSS LLSSS + # Say step=5 and tau=1, intermittency=1: LLLSL LLLSL frame_loaded_counter = 0 - frames_to_load_no = tau_max + 1 - frames_to_skip_no = max(step - (tau_max + 1), 0) + # only for the first window (frames before t are not used) + frames_per_window = tau_max + 1 + intermittency + # This number will apply after the first windows was loaded + frames_per_window_subsequent = (tau_max + 1) + (2 * intermittency) + num_frames_to_skip = max(step - frames_per_window_subsequent, 0) frame_no = start while frame_no < stop: # we have already added 1 to stop, therefore < - if frames_to_skip_no != 0 and frame_loaded_counter == frames_to_load_no: - self.print(verbose, "Skipping the next %d frames:" % frames_to_skip_no) - frame_no += frames_to_skip_no + if num_frames_to_skip != 0 and frame_loaded_counter == frames_per_window: + self.print(verbose, "Skipping the next %d frames:" % num_frames_to_skip) + frame_no += num_frames_to_skip frame_loaded_counter = 0 + # Correct the number of frames to be loaded after the first window (which starts at t=0, and + # intermittency does not apply to the frames before) + frames_per_window = frames_per_window_subsequent continue # update the frame number @@ -1365,8 +1375,9 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte tau_timeseries = np.arange(1, tau_max + 1) sp_timeseries_data = [[] for _ in range(tau_max)] - # frames not analysed are skipped because they were not loaded - step = tau_max + 1 if step >= (tau_max + 1) else step + # adjust step for the frames that were not loaded + # todo + step = step - num_frames_to_skip for t in range(0, len(self.selected_ids), step): Nt = len(self.selected_ids[t]) diff --git a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py index e621d0462ed..9ab2e680153 100644 --- a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py +++ b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py @@ -167,29 +167,30 @@ def test_SurvivalProbability_intermittency2lacking(universe): def test_SurvivalProbability_intermittency1_step5_noSkipping(universe): """ - Step means skipping frames is tau_max + 2 * intermittency + 1 < step. + Step leads to skipping frames if (tau_max + 1) + (intermittency * 2) < step. No frames should be skipped. """ with patch.object(universe, 'select_atoms') as select_atoms_mock: - ids = [(2, 3), (3,), (2, 3), (3,), (2,), (3,), (2, 3), (3,), (2, 3), (2, 3), (2, 3)] + ids = [(2, 3), (3,), (2, 3), (3,), (2,), (3,), (2, 3), (3,), (2, 3), (2, 3)] select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop()) # atom IDs fed set by set sp = waterdynamics.SurvivalProbability(universe, "") - sp.run(tau_max=2, stop=10, verbose=True, intermittency=1, step=5) + sp.run(tau_max=2, stop=9, verbose=True, intermittency=1, step=5) assert all((x == set([2, 3]) for x in sp.selected_ids)) assert_almost_equal(sp.sp_timeseries, [1, 1]) def test_SurvivalProbability_intermittency1_step5_Skipping(universe): """ - Step means skipping frames is tau_max + 2 * intermittency + 1 < step. - One frame will be skipped. + Step leads to skipping frames if (tau_max + 1) * (intermittency * 2) < step. + In this case one frame will be skipped per window. """ with patch.object(universe, 'select_atoms') as select_atoms_mock: - ids = [(1,), (), (1,), (), (1,), (), (1,), (), (1), (1), ()] + ids = [(1,), (), (1,), (), (1,), (), (1,), (), (1,), (1,)] + beforepopsing = len(ids) - 2 select_atoms_mock.side_effect = lambda selection: Mock(ids=ids.pop()) # atom IDs fed set by set sp = waterdynamics.SurvivalProbability(universe, "") - sp.run(tau_max=1, stop=10, verbose=True, intermittency=1, step=5) + sp.run(tau_max=1, stop=9, verbose=True, intermittency=1, step=5) assert all((x == set([1]) for x in sp.selected_ids)) - assert(len(sp.selected_ids) == len(ids) - 1) + assert len(sp.selected_ids) == beforepopsing assert_almost_equal(sp.sp_timeseries, [1]) From 417d229214735ccbb9f9a10b05616b0e98ddc35b Mon Sep 17 00:00:00 2001 From: Mateusz Bieniek Date: Fri, 8 Mar 2019 18:27:30 +0000 Subject: [PATCH 12/16] Corrected documentation / proofreading. --- package/MDAnalysis/analysis/waterdynamics.py | 60 ++++++++++--------- .../analysis/test_waterdynamics.py | 9 ++- 2 files changed, 35 insertions(+), 34 deletions(-) diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py index cd896181226..c334a77d3a9 100644 --- a/package/MDAnalysis/analysis/waterdynamics.py +++ b/package/MDAnalysis/analysis/waterdynamics.py @@ -297,6 +297,10 @@ for tau, sp in zip(tau_timeseries, sp_timeseries): print("{time} {sp}".format(time=tau, sp=sp)) + # SP is not calculated at tau=0, but if you would like to plot SP=1 at tau=0: + tau_timeseries.insert(0, 0) + sp_timeseries.insert(1, 0) + # plot plt.xlabel('Time') plt.ylabel('SP') @@ -383,8 +387,8 @@ results = [ tau1, tau2, ..., tau_n ], [ sp_tau1, sp_tau2, ..., sp_tau_n] -Additionally, a list :attr:`SurvivalProbability.sp_timeseries_data` is provided which contains -a not averaged list of SPs for each tau, which can be used to compute their distribution, etc. +Additionally, a list :attr:`SurvivalProbability.sp_timeseries_data`, is provided which contains +a list of all SPs calculated for each tau. This can be used to compute the distribution or time dependence etc. of SP. Classes -------- @@ -1176,11 +1180,10 @@ def run(self, **kwargs): class SurvivalProbability(object): - r"""Survival probability analysis + r"""Survival Probability (SP) analysis - Function to evaluate the Survival Probability (SP). The SP gives the - probability for a group of particles to remain in certain region. The SP is - given by: + The SP gives the probability for a group of particles to remain in certain region. + The SP is given by: .. math:: P(\tau) = \frac1T \sum_{t=1}^T \frac{N(t,t+\tau)}{N(t)} @@ -1196,9 +1199,8 @@ class SurvivalProbability(object): Universe object selection : str Selection string; any selection is allowed. With this selection you - define the region/zone where to analyze, e.g.: "resname SOL and around 5 (resname LIPID)" - and "resname ION and around 10 (resid 20)" (see `SP-examples`_ ) - verbose : Boolean + define the region/zone where to analyze, e.g.: "resname SOL and around 5 (resid 10)" (see `SP-examples`_ ) + verbose : Boolean, optional If True, prints progress and comments to the console. @@ -1243,7 +1245,7 @@ def _correct_intermittency(self, intermittency, selected_ids): # If the atom is absent for a number of frames equal or smaller # than the parameter intermittency, then correct the data and remove the absence. - # ie 7,G,G,7 with inter=2 will be replaced by 7,7,7,7, where G=absence + # ie 7,A,A,7 with intermittency=2 will be replaced by 7,7,7,7, where A=absence for i, ids in enumerate(selected_ids): # initially update each frame as seen 0 ago (now) seen_frames_ago = {i: 0 for i in ids} @@ -1279,30 +1281,30 @@ def _correct_intermittency(self, intermittency, selected_ids): def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermittency=0, verbose=False): """ - Computes and returns the survival probability timeseries + Computes and returns the Survival Probability (SP) timeseries Parameters ---------- - start : int + start : int, optional Zero-based index of the first frame to be analysed - stop : int + stop : int, optional Zero-based index of the last frame to be analysed (inclusive) - step : int + step : int, optional Jump every `step`'th frame. This is compatible but independant of the taus used, and it is good to consider using the step as tau_max to remove the overlap. Note that step and taus are consistent with intermittency. - tau_max : int + tau_max : int, optional Survival probability is calculated for the range :math:`1 <= \tau <= tau_max` - residues : Boolean - the selection and analysis will be carried out on the residues (resids) rather than on atoms. + residues : Boolean, optional + If true, the analysis will be carried out on the residues (.resids) rather than on atom (.ids). A single atom is sufficient to classify the residue as within the distance. - intermittency : int - The maximum number of frames where an atom can leave but count as present if it returns the next frame. - 0 means a continuous survival probability, which is that for tau_max \tau, the atom survives only if it - is present at each tau :math:`1 <= \tau <= tau_max`. For other values, the data is preprocessed to - remove gaps of max size of the intermittency value. Ie when set to 2, if the atom leaves the selection - only for two consecutive frames, it is counted as in. - verbose : Boolean - Overwrite the constructor's verbosity + intermittency : int, optional + The maximum number of consecutive frames for which an atom can leave but be counted as present if it returns + at the next frame. An intermittency of `0` is equivalent to a continuous survival probability, which does + not allow for the leaving and returning of atoms. For example, for `intermittency=2', any given atom may + leave a region of interest for up to two consecutive frames yet be treated as being present at all frames. + The default is continuous (0). + verbose : Boolean, optional + Print the progress to the console Returns ------- @@ -1375,11 +1377,11 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte tau_timeseries = np.arange(1, tau_max + 1) sp_timeseries_data = [[] for _ in range(tau_max)] - # adjust step for the frames that were not loaded - # todo - step = step - num_frames_to_skip + # adjust for the frames that were not loaded (step>tau_max + 1), + # and for extra frames that were loaded (intermittency) + window_jump = step - num_frames_to_skip - for t in range(0, len(self.selected_ids), step): + for t in range(0, len(self.selected_ids), window_jump): Nt = len(self.selected_ids[t]) if Nt == 0: diff --git a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py index 9ab2e680153..e6dc29b0f35 100644 --- a/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py +++ b/testsuite/MDAnalysisTests/analysis/test_waterdynamics.py @@ -142,7 +142,8 @@ def test_SurvivalProbability_stepEqualDtMax(universe): def test_SurvivalProbability_intermittency1and2(universe): """ - Intermittency of 2 means that we still count an atom if it is not present in 2 consecutive frames, but then returns + Intermittency of 2 means that we still count an atom if it is not present for up to 2 consecutive frames, + but then returns at the following step. """ with patch.object(universe, 'select_atoms') as select_atoms_mock: ids = [(9, 8), (), (8,), (9,), (8,), (), (9,8), (), (8,), (9,8,)] @@ -155,7 +156,7 @@ def test_SurvivalProbability_intermittency1and2(universe): def test_SurvivalProbability_intermittency2lacking(universe): """ - Intermittency of 3 is required and not 2 for the atom to be counted. + If an atom is not present for more than 2 consecutive frames, it is considered to have left the region. """ with patch.object(universe, 'select_atoms') as select_atoms_mock: ids = [(9,), (), (), (), (9,), (), (), (), (9,)] @@ -191,6 +192,4 @@ def test_SurvivalProbability_intermittency1_step5_Skipping(universe): sp.run(tau_max=1, stop=9, verbose=True, intermittency=1, step=5) assert all((x == set([1]) for x in sp.selected_ids)) assert len(sp.selected_ids) == beforepopsing - assert_almost_equal(sp.sp_timeseries, [1]) - - + assert_almost_equal(sp.sp_timeseries, [1]) \ No newline at end of file From 35769843b6864f51e4b53bb8b0417a2101cd9409 Mon Sep 17 00:00:00 2001 From: Mateusz Bieniek Date: Fri, 8 Mar 2019 18:59:31 +0000 Subject: [PATCH 13/16] An example covering the case with multiple references for the SP analysis. This requires several runs and averaging. --- package/MDAnalysis/analysis/waterdynamics.py | 33 ++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py index c334a77d3a9..0c0130cec6e 100644 --- a/package/MDAnalysis/analysis/waterdynamics.py +++ b/package/MDAnalysis/analysis/waterdynamics.py @@ -308,6 +308,39 @@ plt.plot(tau_timeseries, sp_timeseries) plt.show() +Another example applies to the situation where you work with many different "residues". +Here we calculate the SP of a potassium ion around each lipid in a membrane and +average the results. In this example, if the SP analysis were run without treating each lipid +separately, potassium ions may hop from one lipid to another and still be counted as remaining +in the specified region. That is, the survival probability of the potassium ion around the +entire membrane will be calculated. + +Note, for this example, it is advisable to use .Universe(in_memory=True) to ensure that the +simulation does not loaded into RAM memory once per lipid:: + + import MDAnalysis + from MDAnalysis.analysis.waterdynamics import SurvivalProbability as SP + import numpy as np + + u = MDAnalysis.Universe("md.gro", "md100ns.xtc", in_memory=True) + lipids = u.select_atoms('resname LIPIDS') + joined_sp_timeseries = [[] for _ in range(20)] + for lipid in lipids.residues: + print("Lipid ID: %d" % lipid.resid) + + selection = "resname POTASSIUM and around 3.5 (resid %d and name O13 O14) " % lipid.resid + sp = SP(u, selection, verbose=True) + sp.run(tau_max=20) + + # Raw SP points for each tau: + for sps, new_sps in zip(joined_sp_timeseries, sp.sp_timeseries_data): + sps.extend(new_sps) + + # average all SP datapoints + sp_data = [np.mean(sp) for sp in joined_sp_timeseries] + + for tau, sp in zip(range(1, tau_max + 1), sp_data): + print("{time} {sp}".format(time=tau, sp=sp)) .. _Output: From b67a7daeada6f5a0ec5fed8487a5336f48ae35bc Mon Sep 17 00:00:00 2001 From: Mateusz Bieniek Date: Wed, 3 Apr 2019 11:39:34 +0100 Subject: [PATCH 14/16] Logging changes made regarding Survival Probability --- package/CHANGELOG | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index a283ff2fed5..697e87a884a 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -15,7 +15,7 @@ The rules for this file: ------------------------------------------------------------------------------ mm/dd/yy micaela-matta, xiki-tempula, zemanj, mattwthompson, orbeckst, aliehlen, dpadula85, jbarnoud, manuel.nuno.melo, richardjgowers, mattwthompson, - ayushsuhane, picocentauri, NinadBhat + ayushsuhane, picocentauri, NinadBhat, bieniekmateusz, p-j-smith * 0.20.0 @@ -49,6 +49,8 @@ Enhancements distance_array. (Issue #2103, PR #2209) * updated analysis.distances.contact_matrix to use capped_distance. (Issue #2102, PR #2215) + * survival probability additions: residues, intermittency, step with performance, + (PR #2226) Changes * added official support for Python 3.7 (PR #1963) From 970e05b48bfe7b8194148ccbb1a785a36abe8a42 Mon Sep 17 00:00:00 2001 From: Mateusz Bieniek Date: Wed, 3 Apr 2019 15:07:28 +0100 Subject: [PATCH 15/16] Corrections from PR #2226 --- package/MDAnalysis/analysis/waterdynamics.py | 53 ++++++++++---------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py index 0c0130cec6e..20e2b09eab8 100644 --- a/package/MDAnalysis/analysis/waterdynamics.py +++ b/package/MDAnalysis/analysis/waterdynamics.py @@ -315,14 +315,14 @@ in the specified region. That is, the survival probability of the potassium ion around the entire membrane will be calculated. -Note, for this example, it is advisable to use .Universe(in_memory=True) to ensure that the -simulation does not loaded into RAM memory once per lipid:: +Note, for this example, it is advisable to use `Universe(in_memory=True)` to ensure that the +simulation is not being reloaded into memory for each lipid:: - import MDAnalysis + import MDAnalysis as mda from MDAnalysis.analysis.waterdynamics import SurvivalProbability as SP import numpy as np - u = MDAnalysis.Universe("md.gro", "md100ns.xtc", in_memory=True) + u = mda.Universe("md.gro", "md100ns.xtc", in_memory=True) lipids = u.select_atoms('resname LIPIDS') joined_sp_timeseries = [[] for _ in range(20)] for lipid in lipids.residues: @@ -416,12 +416,12 @@ ~~~~~~~~~~~~~~~~~~~ Survival Probability (SP) computes two lists: a list of taus (:attr:`SurvivalProbability.tau_timeseries`) and a list of - the corresponding survival probabilities (:attr:`SurvivalProbability.sp_timeseries`). +the corresponding survival probabilities (:attr:`SurvivalProbability.sp_timeseries`). results = [ tau1, tau2, ..., tau_n ], [ sp_tau1, sp_tau2, ..., sp_tau_n] Additionally, a list :attr:`SurvivalProbability.sp_timeseries_data`, is provided which contains -a list of all SPs calculated for each tau. This can be used to compute the distribution or time dependence etc. of SP. +a list of all SPs calculated for each tau. This can be used to compute the distribution or time dependence of SP, etc. Classes -------- @@ -1213,17 +1213,16 @@ def run(self, **kwargs): class SurvivalProbability(object): - r"""Survival Probability (SP) analysis - - The SP gives the probability for a group of particles to remain in certain region. + r""" + Survival Probability (SP) gives the probability for a group of particles to remain in a certain region. The SP is given by: .. math:: P(\tau) = \frac1T \sum_{t=1}^T \frac{N(t,t+\tau)}{N(t)} where :math:`T` is the maximum time of simulation, :math:`\tau` is the - timestep, :math:`N(t)` the number of particles at time t, and - :math:`N(t, t+\tau)` is the number of particles at every frame from t to `\tau`. + timestep, :math:`N(t)` the number of particles at time :math:`t`, and + :math:`N(t, t+\tau)` is the number of particles at every frame from :math:`t` to `\tau`. Parameters @@ -1232,9 +1231,9 @@ class SurvivalProbability(object): Universe object selection : str Selection string; any selection is allowed. With this selection you - define the region/zone where to analyze, e.g.: "resname SOL and around 5 (resid 10)" (see `SP-examples`_ ) + define the region/zone where to analyze, e.g.: "resname SOL and around 5 (resid 10)". See `SP-examples`_. verbose : Boolean, optional - If True, prints progress and comments to the console. + When True, prints progress and comments to the console. .. versionadded:: 0.11.0 @@ -1268,7 +1267,11 @@ def print(self, verbose, *args): def _correct_intermittency(self, intermittency, selected_ids): """ - Pre-process Consecutive Intermittency + Pre-process Consecutive Intermittency with a single pass over the data. + If an atom is absent for a number of frames equal or smaller + than the parameter intermittency, then correct the data and remove the absence. + ie 7,A,A,7 with intermittency=2 will be replaced by 7,7,7,7, where A=absence + :param intermittency: the max gap allowed and to be corrected :param selected_ids: modifies the selecteded IDs in place by adding atoms which left for <= :param intermittency """ @@ -1276,15 +1279,12 @@ def _correct_intermittency(self, intermittency, selected_ids): if intermittency == 0: return - # If the atom is absent for a number of frames equal or smaller - # than the parameter intermittency, then correct the data and remove the absence. - # ie 7,A,A,7 with intermittency=2 will be replaced by 7,7,7,7, where A=absence for i, ids in enumerate(selected_ids): # initially update each frame as seen 0 ago (now) seen_frames_ago = {i: 0 for i in ids} for j in range(1, intermittency + 2): for atomid in seen_frames_ago.keys(): - # run out of atoms + # no more frames if i + j >= len(selected_ids): continue @@ -1295,15 +1295,15 @@ def _correct_intermittency(self, intermittency, selected_ids): continue # the atom is found - # it was present in the last frame if seen_frames_ago[atomid] == 0: + # the atom was present in the last frame continue - # we are passed the gap + # it was absent more times than allowed if seen_frames_ago[atomid] > intermittency: continue - # the atom was absent but returned on time (<= intermittency) + # the atom was absent but returned (within <= intermittency) # add it to the frames where it was absent. # Introduce the corrections. for k in range(seen_frames_ago[atomid], 0, -1): @@ -1323,17 +1323,18 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte stop : int, optional Zero-based index of the last frame to be analysed (inclusive) step : int, optional - Jump every `step`'th frame. This is compatible but independant of the taus used, and it is good to consider - using the step as tau_max to remove the overlap. Note that step and taus are consistent with intermittency. + Jump every `step`-th frame. This is compatible but independant of the taus used, and it is good to consider + using the `step` equal to `tau_max` to remove the overlap. + Note that `step` and `tau_max` work consistently with intermittency. tau_max : int, optional - Survival probability is calculated for the range :math:`1 <= \tau <= tau_max` + Survival probability is calculated for the range 1 <= `tau` <= `tau_max` residues : Boolean, optional If true, the analysis will be carried out on the residues (.resids) rather than on atom (.ids). A single atom is sufficient to classify the residue as within the distance. intermittency : int, optional The maximum number of consecutive frames for which an atom can leave but be counted as present if it returns at the next frame. An intermittency of `0` is equivalent to a continuous survival probability, which does - not allow for the leaving and returning of atoms. For example, for `intermittency=2', any given atom may + not allow for the leaving and returning of atoms. For example, for `intermittency=2`, any given atom may leave a region of interest for up to two consecutive frames yet be treated as being present at all frames. The default is continuous (0). verbose : Boolean, optional @@ -1342,7 +1343,7 @@ def run(self, tau_max=20, start=0, stop=None, step=1, residues=False, intermitte Returns ------- tau_timeseries : list - tau from 1 to tau_max. Saved in the field tau_timeseries. + tau from 1 to `tau_max`. Saved in the field tau_timeseries. sp_timeseries : list survival probability for each value of `tau`. Saved in the field sp_timeseries. """ From aaf30bc9ad61c19470e3c21530524f093f29b779 Mon Sep 17 00:00:00 2001 From: Mateusz Bieniek Date: Fri, 5 Apr 2019 10:14:44 +0100 Subject: [PATCH 16/16] Numpy documentation style: PR #2226 --- package/MDAnalysis/analysis/waterdynamics.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/package/MDAnalysis/analysis/waterdynamics.py b/package/MDAnalysis/analysis/waterdynamics.py index 20e2b09eab8..a0f3b8cd379 100644 --- a/package/MDAnalysis/analysis/waterdynamics.py +++ b/package/MDAnalysis/analysis/waterdynamics.py @@ -1269,11 +1269,15 @@ def _correct_intermittency(self, intermittency, selected_ids): """ Pre-process Consecutive Intermittency with a single pass over the data. If an atom is absent for a number of frames equal or smaller - than the parameter intermittency, then correct the data and remove the absence. + than the `intermittency`, then correct the data and remove the absence. ie 7,A,A,7 with intermittency=2 will be replaced by 7,7,7,7, where A=absence - :param intermittency: the max gap allowed and to be corrected - :param selected_ids: modifies the selecteded IDs in place by adding atoms which left for <= :param intermittency + Parameters + ---------- + intermittency : int + the max gap allowed and to be corrected + selected_ids: list of ids + modifies the selecteded IDs in place by adding atoms which left for <= `intermittency` """ self.print('Correcting the selected IDs for intermittancy (gaps). ') if intermittency == 0: