From 474be5bbe32270bb9ddf02dc3cab74d3c1312c5e Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Mon, 7 Oct 2024 06:17:42 +0200 Subject: [PATCH] Implementation of Parallelization to analysis.hydrogenbonds.hbond_analysis (#4718) - Fixes #4664 - Parallelization of the backend support to the class HydrogenBondAnalysis in hbond_analysis.py - Moved setting up of donors and acceptors from _prepare() to __init__() (needed to make parallel processing work) - Addition of parallelization tests in test_hydrogenbonds_analysis.py and fixtures in conftest.py - Updated Changelog --------- Co-authored-by: Yuxuan Zhuang Co-authored-by: Oliver Beckstein --- package/CHANGELOG | 3 +- .../analysis/hydrogenbonds/hbond_analysis.py | 36 +++++++++----- .../MDAnalysisTests/analysis/conftest.py | 10 ++++ .../analysis/test_hydrogenbonds_analysis.py | 48 ++++++++++--------- 4 files changed, 62 insertions(+), 35 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index 62ab8a04630..ec5fe28ecad 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -62,7 +62,8 @@ Enhancements * explicitly mark `analysis.pca.PCA` as not parallelizable (Issue #4680) * enables parallelization for analysis.bat.BAT (Issue #4663) * enable parallelization for analysis.dihedrals.{Dihedral,Ramachandran,Janin} - (Issue #4673) + (Issue #4673) + * Enables parallelization for analysis.hydrogenbonds.hbond_analysis.HydrogenBondAnalysis (Issue #4664) * Improve error message for `AtomGroup.unwrap()` when bonds are not present.(Issue #4436, PR #4642) * Add `analysis.DSSP` module for protein secondary structure assignment, based on [pydssp](https://github.com/ShintaroMinami/PyDSSP) * Added a tqdm progress bar for `MDAnalysis.analysis.pca.PCA.transform()` diff --git a/package/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py b/package/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py index e0597e2c3d2..3bf9d5c27a9 100644 --- a/package/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py +++ b/package/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py @@ -241,7 +241,7 @@ import numpy as np -from ..base import AnalysisBase, Results +from ..base import AnalysisBase, Results, ResultsGroup from MDAnalysis.lib.distances import capped_distance, calc_angles from MDAnalysis.lib.correlations import autocorrelation, correct_intermittency from MDAnalysis.exceptions import NoDataError @@ -267,6 +267,12 @@ class HydrogenBondAnalysis(AnalysisBase): Perform an analysis of hydrogen bonds in a Universe. """ + _analysis_algorithm_is_parallelizable = True + + @classmethod + def get_supported_backends(cls): + return ('serial', 'multiprocessing', 'dask',) + def __init__(self, universe, donors_sel=None, hydrogens_sel=None, acceptors_sel=None, between=None, d_h_cutoff=1.2, @@ -335,7 +341,9 @@ def __init__(self, universe, .. versionchanged:: 2.4.0 Added use of atom types in selection strings for hydrogen atoms, bond donors, or bond acceptors - + .. versionchanged:: 2.8.0 + Introduced :meth:`get_supported_backends` allowing for parallel execution on + :mod:`multiprocessing` and :mod:`dask` backends. """ self.u = universe @@ -383,6 +391,17 @@ def __init__(self, universe, self.results = Results() self.results.hbonds = None + # Set atom selections if they have not been provided + if self.acceptors_sel is None: + self.acceptors_sel = self.guess_acceptors() + if self.hydrogens_sel is None: + self.hydrogens_sel = self.guess_hydrogens() + + # Select atom groups + self._acceptors = self.u.select_atoms(self.acceptors_sel, + updating=self.update_selections) + self._donors, self._hydrogens = self._get_dh_pairs() + def guess_hydrogens(self, select='all', max_mass=1.1, @@ -699,16 +718,6 @@ def _filter_atoms(self, donors, acceptors): def _prepare(self): self.results.hbonds = [[], [], [], [], [], []] - # Set atom selections if they have not been provided - if self.acceptors_sel is None: - self.acceptors_sel = self.guess_acceptors() - if self.hydrogens_sel is None: - self.hydrogens_sel = self.guess_hydrogens() - - # Select atom groups - self._acceptors = self.u.select_atoms(self.acceptors_sel, - updating=self.update_selections) - self._donors, self._hydrogens = self._get_dh_pairs() def _single_frame(self): @@ -788,6 +797,9 @@ def _conclude(self): self.results.hbonds = np.asarray(self.results.hbonds).T + def _get_aggregator(self): + return ResultsGroup(lookup={'hbonds': ResultsGroup.ndarray_hstack}) + @property def hbonds(self): wmsg = ("The `hbonds` attribute was deprecated in MDAnalysis 2.0.0 " diff --git a/testsuite/MDAnalysisTests/analysis/conftest.py b/testsuite/MDAnalysisTests/analysis/conftest.py index 5c6157d3bb6..b5fe975dcd8 100644 --- a/testsuite/MDAnalysisTests/analysis/conftest.py +++ b/testsuite/MDAnalysisTests/analysis/conftest.py @@ -10,8 +10,12 @@ from MDAnalysis.analysis.dihedrals import Dihedral, Ramachandran, Janin from MDAnalysis.analysis.bat import BAT from MDAnalysis.analysis.gnm import GNMAnalysis +from MDAnalysis.analysis.hydrogenbonds.hbond_analysis import ( + HydrogenBondAnalysis, +) from MDAnalysis.lib.util import is_installed + def params_for_cls(cls, exclude: list[str] = None): """ This part contains fixtures for simultaneous testing @@ -122,3 +126,9 @@ def client_GNMAnalysis(request): @pytest.fixture(scope='module', params=params_for_cls(BAT)) def client_BAT(request): return request.param + +# MDAnalysis.analysis.hydrogenbonds + +@pytest.fixture(scope='module', params=params_for_cls(HydrogenBondAnalysis)) +def client_HydrogenBondAnalysis(request): + return request.param \ No newline at end of file diff --git a/testsuite/MDAnalysisTests/analysis/test_hydrogenbonds_analysis.py b/testsuite/MDAnalysisTests/analysis/test_hydrogenbonds_analysis.py index b8ea644fc4a..503560a648f 100644 --- a/testsuite/MDAnalysisTests/analysis/test_hydrogenbonds_analysis.py +++ b/testsuite/MDAnalysisTests/analysis/test_hydrogenbonds_analysis.py @@ -53,9 +53,9 @@ def universe(): } @pytest.fixture(scope='class') - def h(self, universe): + def h(self, universe, client_HydrogenBondAnalysis): h = HydrogenBondAnalysis(universe, **self.kwargs) - h.run() + h.run(**client_HydrogenBondAnalysis) return h def test_hbond_analysis(self, h): @@ -181,12 +181,12 @@ def universe(): @staticmethod @pytest.fixture(scope='class') - def hydrogen_bonds(universe): + def hydrogen_bonds(universe, client_HydrogenBondAnalysis): h = HydrogenBondAnalysis( universe, **TestHydrogenBondAnalysisIdeal.kwargs ) - h.run() + h.run(**client_HydrogenBondAnalysis) return h def test_count_by_type(self, hydrogen_bonds): @@ -208,9 +208,12 @@ def test_no_bond_info_exception(self, universe): 'd_h_a_angle_cutoff': 120.0 } + u = universe.copy() + n_residues = 2 + u.add_TopologyAttr('mass', [15.999, 1.008, 1.008] * n_residues) + u.add_TopologyAttr('charge', [-1.04, 0.52, 0.52] * n_residues) with pytest.raises(NoDataError, match="no bond information"): - h = HydrogenBondAnalysis(universe, **kwargs) - h._get_dh_pairs() + h = HydrogenBondAnalysis(u, **kwargs) def test_no_bond_donor_sel(self, universe): @@ -263,10 +266,11 @@ def test_no_attr_hbonds(self, universe): with pytest.raises(NoDataError, match=".hbonds attribute is None"): hbonds.lifetime(tau_max=2, intermittency=1) - def test_logging_step_not_1(self, universe, caplog): + def test_logging_step_not_1(self, universe, caplog, + client_HydrogenBondAnalysis): hbonds = HydrogenBondAnalysis(universe, **self.kwargs) # using step 2 - hbonds.run(step=2) + hbonds.run(**client_HydrogenBondAnalysis, step=2) caplog.set_level(logging.WARNING) hbonds.lifetime(tau_max=2, intermittency=1) @@ -342,12 +346,12 @@ def universe(): @staticmethod @pytest.fixture(scope='class') - def hydrogen_bonds(universe): + def hydrogen_bonds(universe, client_HydrogenBondAnalysis): h = HydrogenBondAnalysis( universe, **TestHydrogenBondAnalysisNoRes.kwargs ) - h.run() + h.run(**client_HydrogenBondAnalysis) return h def test_no_hydrogen_bonds(self, universe): @@ -441,10 +445,10 @@ def universe(): return u - def test_between_all(self, universe): + def test_between_all(self, universe, client_HydrogenBondAnalysis): # don't specify groups between which to find hydrogen bonds hbonds = HydrogenBondAnalysis(universe, between=None, **self.kwargs) - hbonds.run() + hbonds.run(**client_HydrogenBondAnalysis) # indices of [donor, hydrogen, acceptor] for each hydrogen bond expected_hbond_indices = [ @@ -457,14 +461,14 @@ def test_between_all(self, universe): expected_hbond_indices) assert_allclose(hbonds.results.hbonds[:, 4], expected_hbond_distances) - def test_between_PW(self, universe): + def test_between_PW(self, universe, client_HydrogenBondAnalysis): # Find only protein-water hydrogen bonds hbonds = HydrogenBondAnalysis( universe, between=["resname PROT", "resname SOL"], **self.kwargs ) - hbonds.run() + hbonds.run(**client_HydrogenBondAnalysis) # indices of [donor, hydrogen, acceptor] for each hydrogen bond expected_hbond_indices = [ @@ -475,7 +479,7 @@ def test_between_PW(self, universe): expected_hbond_indices) assert_allclose(hbonds.results.hbonds[:, 4], expected_hbond_distances) - def test_between_PW_PP(self, universe): + def test_between_PW_PP(self, universe, client_HydrogenBondAnalysis): # Find protein-water and protein-protein hydrogen bonds (not # water-water) hbonds = HydrogenBondAnalysis( @@ -486,7 +490,7 @@ def test_between_PW_PP(self, universe): ], **self.kwargs ) - hbonds.run() + hbonds.run(**client_HydrogenBondAnalysis) # indices of [donor, hydrogen, acceptor] for each hydrogen bond expected_hbond_indices = [ @@ -512,7 +516,7 @@ class TestHydrogenBondAnalysisTIP3P_GuessAcceptors_GuessHydrogens_UseTopology_(T 'd_h_a_angle_cutoff': 120.0 } - def test_no_hydrogens(self, universe): + def test_no_hydrogens(self, universe, client_HydrogenBondAnalysis): # If no hydrogens are identified at a given frame, check an # empty donor atom group is created test_kwargs = TestHydrogenBondAnalysisTIP3P.kwargs.copy() @@ -520,7 +524,7 @@ def test_no_hydrogens(self, universe): test_kwargs['hydrogens_sel'] = "name H" # no atoms have name H h = HydrogenBondAnalysis(universe, **test_kwargs) - h.run() + h.run(**client_HydrogenBondAnalysis) assert h._hydrogens.n_atoms == 0 assert h._donors.n_atoms == 0 @@ -629,9 +633,9 @@ def universe(): } @pytest.fixture(scope='class') - def h(self, universe): + def h(self, universe, client_HydrogenBondAnalysis): h = HydrogenBondAnalysis(universe, **self.kwargs) - h.run(start=1, step=2) + h.run(**client_HydrogenBondAnalysis, start=1, step=2) return h def test_hbond_analysis(self, h): @@ -690,11 +694,11 @@ def test_empty_sel(self, universe, seltype): with pytest.warns(UserWarning, match=self.msg.format(seltype)): HydrogenBondAnalysis(universe, **sel_kwarg) - def test_hbond_analysis(self, universe): + def test_hbond_analysis(self, universe, client_HydrogenBondAnalysis): h = HydrogenBondAnalysis(universe, donors_sel=' ', hydrogens_sel=' ', acceptors_sel=' ') - h.run() + h.run(**client_HydrogenBondAnalysis) assert h.donors_sel == '' assert h.hydrogens_sel == ''