Skip to content

Commit

Permalink
Implementation of Parallelization to analysis.hydrogenbonds.hbond_ana…
Browse files Browse the repository at this point in the history
…lysis (#4718)

- Fixes #4664 
- Parallelization of the backend support to the class HydrogenBondAnalysis in hbond_analysis.py
- Moved setting up of donors and acceptors from _prepare() to __init__() (needed to make
  parallel processing work)
- Addition of parallelization tests in test_hydrogenbonds_analysis.py and fixtures in conftest.py
- Updated Changelog

---------

Co-authored-by: Yuxuan Zhuang <[email protected]>
Co-authored-by: Oliver Beckstein <[email protected]>
  • Loading branch information
3 people authored Oct 7, 2024
1 parent 650747d commit 474be5b
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 35 deletions.
3 changes: 2 additions & 1 deletion package/CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ Enhancements
* explicitly mark `analysis.pca.PCA` as not parallelizable (Issue #4680)
* enables parallelization for analysis.bat.BAT (Issue #4663)
* enable parallelization for analysis.dihedrals.{Dihedral,Ramachandran,Janin}
(Issue #4673)
(Issue #4673)
* Enables parallelization for analysis.hydrogenbonds.hbond_analysis.HydrogenBondAnalysis (Issue #4664)
* Improve error message for `AtomGroup.unwrap()` when bonds are not present.(Issue #4436, PR #4642)
* Add `analysis.DSSP` module for protein secondary structure assignment, based on [pydssp](https://github.com/ShintaroMinami/PyDSSP)
* Added a tqdm progress bar for `MDAnalysis.analysis.pca.PCA.transform()`
Expand Down
36 changes: 24 additions & 12 deletions package/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@

import numpy as np

from ..base import AnalysisBase, Results
from ..base import AnalysisBase, Results, ResultsGroup
from MDAnalysis.lib.distances import capped_distance, calc_angles
from MDAnalysis.lib.correlations import autocorrelation, correct_intermittency
from MDAnalysis.exceptions import NoDataError
Expand All @@ -267,6 +267,12 @@ class HydrogenBondAnalysis(AnalysisBase):
Perform an analysis of hydrogen bonds in a Universe.
"""

_analysis_algorithm_is_parallelizable = True

@classmethod
def get_supported_backends(cls):
return ('serial', 'multiprocessing', 'dask',)

def __init__(self, universe,
donors_sel=None, hydrogens_sel=None, acceptors_sel=None,
between=None, d_h_cutoff=1.2,
Expand Down Expand Up @@ -335,7 +341,9 @@ def __init__(self, universe,
.. versionchanged:: 2.4.0
Added use of atom types in selection strings for hydrogen atoms,
bond donors, or bond acceptors
.. versionchanged:: 2.8.0
Introduced :meth:`get_supported_backends` allowing for parallel execution on
:mod:`multiprocessing` and :mod:`dask` backends.
"""

self.u = universe
Expand Down Expand Up @@ -383,6 +391,17 @@ def __init__(self, universe,
self.results = Results()
self.results.hbonds = None

# Set atom selections if they have not been provided
if self.acceptors_sel is None:
self.acceptors_sel = self.guess_acceptors()
if self.hydrogens_sel is None:
self.hydrogens_sel = self.guess_hydrogens()

# Select atom groups
self._acceptors = self.u.select_atoms(self.acceptors_sel,
updating=self.update_selections)
self._donors, self._hydrogens = self._get_dh_pairs()

def guess_hydrogens(self,
select='all',
max_mass=1.1,
Expand Down Expand Up @@ -699,16 +718,6 @@ def _filter_atoms(self, donors, acceptors):
def _prepare(self):
self.results.hbonds = [[], [], [], [], [], []]

# Set atom selections if they have not been provided
if self.acceptors_sel is None:
self.acceptors_sel = self.guess_acceptors()
if self.hydrogens_sel is None:
self.hydrogens_sel = self.guess_hydrogens()

# Select atom groups
self._acceptors = self.u.select_atoms(self.acceptors_sel,
updating=self.update_selections)
self._donors, self._hydrogens = self._get_dh_pairs()

def _single_frame(self):

Expand Down Expand Up @@ -788,6 +797,9 @@ def _conclude(self):

self.results.hbonds = np.asarray(self.results.hbonds).T

def _get_aggregator(self):
return ResultsGroup(lookup={'hbonds': ResultsGroup.ndarray_hstack})

@property
def hbonds(self):
wmsg = ("The `hbonds` attribute was deprecated in MDAnalysis 2.0.0 "
Expand Down
10 changes: 10 additions & 0 deletions testsuite/MDAnalysisTests/analysis/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,12 @@
from MDAnalysis.analysis.dihedrals import Dihedral, Ramachandran, Janin
from MDAnalysis.analysis.bat import BAT
from MDAnalysis.analysis.gnm import GNMAnalysis
from MDAnalysis.analysis.hydrogenbonds.hbond_analysis import (
HydrogenBondAnalysis,
)
from MDAnalysis.lib.util import is_installed


def params_for_cls(cls, exclude: list[str] = None):
"""
This part contains fixtures for simultaneous testing
Expand Down Expand Up @@ -122,3 +126,9 @@ def client_GNMAnalysis(request):
@pytest.fixture(scope='module', params=params_for_cls(BAT))
def client_BAT(request):
return request.param

# MDAnalysis.analysis.hydrogenbonds

@pytest.fixture(scope='module', params=params_for_cls(HydrogenBondAnalysis))
def client_HydrogenBondAnalysis(request):
return request.param
48 changes: 26 additions & 22 deletions testsuite/MDAnalysisTests/analysis/test_hydrogenbonds_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@ def universe():
}

@pytest.fixture(scope='class')
def h(self, universe):
def h(self, universe, client_HydrogenBondAnalysis):
h = HydrogenBondAnalysis(universe, **self.kwargs)
h.run()
h.run(**client_HydrogenBondAnalysis)
return h

def test_hbond_analysis(self, h):
Expand Down Expand Up @@ -181,12 +181,12 @@ def universe():

@staticmethod
@pytest.fixture(scope='class')
def hydrogen_bonds(universe):
def hydrogen_bonds(universe, client_HydrogenBondAnalysis):
h = HydrogenBondAnalysis(
universe,
**TestHydrogenBondAnalysisIdeal.kwargs
)
h.run()
h.run(**client_HydrogenBondAnalysis)
return h

def test_count_by_type(self, hydrogen_bonds):
Expand All @@ -208,9 +208,12 @@ def test_no_bond_info_exception(self, universe):
'd_h_a_angle_cutoff': 120.0
}

u = universe.copy()
n_residues = 2
u.add_TopologyAttr('mass', [15.999, 1.008, 1.008] * n_residues)
u.add_TopologyAttr('charge', [-1.04, 0.52, 0.52] * n_residues)
with pytest.raises(NoDataError, match="no bond information"):
h = HydrogenBondAnalysis(universe, **kwargs)
h._get_dh_pairs()
h = HydrogenBondAnalysis(u, **kwargs)

def test_no_bond_donor_sel(self, universe):

Expand Down Expand Up @@ -263,10 +266,11 @@ def test_no_attr_hbonds(self, universe):
with pytest.raises(NoDataError, match=".hbonds attribute is None"):
hbonds.lifetime(tau_max=2, intermittency=1)

def test_logging_step_not_1(self, universe, caplog):
def test_logging_step_not_1(self, universe, caplog,
client_HydrogenBondAnalysis):
hbonds = HydrogenBondAnalysis(universe, **self.kwargs)
# using step 2
hbonds.run(step=2)
hbonds.run(**client_HydrogenBondAnalysis, step=2)

caplog.set_level(logging.WARNING)
hbonds.lifetime(tau_max=2, intermittency=1)
Expand Down Expand Up @@ -342,12 +346,12 @@ def universe():

@staticmethod
@pytest.fixture(scope='class')
def hydrogen_bonds(universe):
def hydrogen_bonds(universe, client_HydrogenBondAnalysis):
h = HydrogenBondAnalysis(
universe,
**TestHydrogenBondAnalysisNoRes.kwargs
)
h.run()
h.run(**client_HydrogenBondAnalysis)
return h

def test_no_hydrogen_bonds(self, universe):
Expand Down Expand Up @@ -441,10 +445,10 @@ def universe():

return u

def test_between_all(self, universe):
def test_between_all(self, universe, client_HydrogenBondAnalysis):
# don't specify groups between which to find hydrogen bonds
hbonds = HydrogenBondAnalysis(universe, between=None, **self.kwargs)
hbonds.run()
hbonds.run(**client_HydrogenBondAnalysis)

# indices of [donor, hydrogen, acceptor] for each hydrogen bond
expected_hbond_indices = [
Expand All @@ -457,14 +461,14 @@ def test_between_all(self, universe):
expected_hbond_indices)
assert_allclose(hbonds.results.hbonds[:, 4], expected_hbond_distances)

def test_between_PW(self, universe):
def test_between_PW(self, universe, client_HydrogenBondAnalysis):
# Find only protein-water hydrogen bonds
hbonds = HydrogenBondAnalysis(
universe,
between=["resname PROT", "resname SOL"],
**self.kwargs
)
hbonds.run()
hbonds.run(**client_HydrogenBondAnalysis)

# indices of [donor, hydrogen, acceptor] for each hydrogen bond
expected_hbond_indices = [
Expand All @@ -475,7 +479,7 @@ def test_between_PW(self, universe):
expected_hbond_indices)
assert_allclose(hbonds.results.hbonds[:, 4], expected_hbond_distances)

def test_between_PW_PP(self, universe):
def test_between_PW_PP(self, universe, client_HydrogenBondAnalysis):
# Find protein-water and protein-protein hydrogen bonds (not
# water-water)
hbonds = HydrogenBondAnalysis(
Expand All @@ -486,7 +490,7 @@ def test_between_PW_PP(self, universe):
],
**self.kwargs
)
hbonds.run()
hbonds.run(**client_HydrogenBondAnalysis)

# indices of [donor, hydrogen, acceptor] for each hydrogen bond
expected_hbond_indices = [
Expand All @@ -512,15 +516,15 @@ class TestHydrogenBondAnalysisTIP3P_GuessAcceptors_GuessHydrogens_UseTopology_(T
'd_h_a_angle_cutoff': 120.0
}

def test_no_hydrogens(self, universe):
def test_no_hydrogens(self, universe, client_HydrogenBondAnalysis):
# If no hydrogens are identified at a given frame, check an
# empty donor atom group is created
test_kwargs = TestHydrogenBondAnalysisTIP3P.kwargs.copy()
test_kwargs['donors_sel'] = None # use topology to find pairs
test_kwargs['hydrogens_sel'] = "name H" # no atoms have name H

h = HydrogenBondAnalysis(universe, **test_kwargs)
h.run()
h.run(**client_HydrogenBondAnalysis)

assert h._hydrogens.n_atoms == 0
assert h._donors.n_atoms == 0
Expand Down Expand Up @@ -629,9 +633,9 @@ def universe():
}

@pytest.fixture(scope='class')
def h(self, universe):
def h(self, universe, client_HydrogenBondAnalysis):
h = HydrogenBondAnalysis(universe, **self.kwargs)
h.run(start=1, step=2)
h.run(**client_HydrogenBondAnalysis, start=1, step=2)
return h

def test_hbond_analysis(self, h):
Expand Down Expand Up @@ -690,11 +694,11 @@ def test_empty_sel(self, universe, seltype):
with pytest.warns(UserWarning, match=self.msg.format(seltype)):
HydrogenBondAnalysis(universe, **sel_kwarg)

def test_hbond_analysis(self, universe):
def test_hbond_analysis(self, universe, client_HydrogenBondAnalysis):

h = HydrogenBondAnalysis(universe, donors_sel=' ', hydrogens_sel=' ',
acceptors_sel=' ')
h.run()
h.run(**client_HydrogenBondAnalysis)

assert h.donors_sel == ''
assert h.hydrogens_sel == ''
Expand Down

0 comments on commit 474be5b

Please sign in to comment.