From 319b6676a533d6609661b0ce22e88df212fc9060 Mon Sep 17 00:00:00 2001 From: Yuxuan Zhuang Date: Mon, 9 Sep 2024 21:04:49 -0700 Subject: [PATCH 1/2] Remove GSD from optional test dependencies (#4707) * Fixed high dimensional GroupBase indexing. * fixed pep8 issues * Removed sanitisation * Fix #4687 -- rdkit values in azure CI (#4688) * Investigate rdkit issue * Update azure-pipelines.yml * fix numpy 2.0 import block * fix imports * mark analysis.pca.PCA as not parallelizable (#4684) - fix #4680 - PCA explicitly marked as not parallelizable (at least not with simple split-apply-combine) - add tests - update CHANGELOG * disable gsd * disable gsd in azure * reduce timeout and set logical * fix azure * restore timeout to 200 --------- Co-authored-by: Matthew Davies <128810112+MattTDavies@users.noreply.github.com> Co-authored-by: Irfan Alibay Co-authored-by: Oliver Beckstein --- .github/workflows/gh-ci.yaml | 7 +++++-- azure-pipelines.yml | 5 +++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/gh-ci.yaml b/.github/workflows/gh-ci.yaml index 471e3bd20de..8a45aec650f 100644 --- a/.github/workflows/gh-ci.yaml +++ b/.github/workflows/gh-ci.yaml @@ -85,6 +85,8 @@ jobs: with: micromamba: true full-deps: ${{ matrix.full-deps }} + # disable GSD because it occasionally introduce hanging in testing #4209 + gsd: '' # in most cases will just default to empty, i.e. pick up max version from other deps numpy: ${{ matrix.numpy }} extra-pip-deps: ${{ matrix.extra-pip-deps }} @@ -113,7 +115,7 @@ jobs: PYTEST_FLAGS="${PYTEST_FLAGS} --cov-config=.coveragerc --cov=MDAnalysis --cov-report=xml" fi echo $PYTEST_FLAGS - pytest -n auto --timeout=200 testsuite/MDAnalysisTests $PYTEST_FLAGS + pytest -n logical --timeout=200 testsuite/MDAnalysisTests $PYTEST_FLAGS - name: run_asv if: contains(matrix.name, 'asv_check') @@ -161,6 +163,7 @@ jobs: with: micromamba: true full-deps: true + gsd: '' extra-pip-deps: "docutils sphinx-sitemap sphinxcontrib-bibtex pybtex pybtex-docutils" extra-conda-deps: "mdanalysis-sphinx-theme>=1.3.0" @@ -281,4 +284,4 @@ jobs: - name: run tests working-directory: ./dist - run: python -m pytest --timeout=200 -n auto --pyargs MDAnalysisTests + run: python -m pytest --timeout=200 -n logical --pyargs MDAnalysisTests diff --git a/azure-pipelines.yml b/azure-pipelines.yml index cace2be35ba..250f3ba63b5 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -105,7 +105,6 @@ jobs: biopython "chemfiles>=0.10,<0.10.4" duecredit - "gsd>3.0.0" joblib GridDataFormats mmtf-python @@ -114,6 +113,8 @@ jobs: pytng>=0.2.3 rdkit>=2020.03.1 tidynamics>=1.0.0 + # remove from azure to avoid test hanging #4707 + # "gsd>3.0.0" displayName: 'Install additional dependencies for 64-bit tests' condition: and(succeeded(), eq(variables['PYTHON_ARCH'], 'x64')) - script: >- @@ -128,7 +129,7 @@ jobs: displayName: 'Check installed packages' - powershell: | cd testsuite - pytest MDAnalysisTests --disable-pytest-warnings -n auto --timeout=200 -rsx --cov=MDAnalysis + pytest MDAnalysisTests --disable-pytest-warnings -n logical --timeout=200 -rsx --cov=MDAnalysis displayName: 'Run MDAnalysis Test Suite' - script: | curl -s https://codecov.io/bash | bash From 4fafd51de84d5b89be0559a412acefde0040847c Mon Sep 17 00:00:00 2001 From: Valerij Talagayev <82884038+talagayev@users.noreply.github.com> Date: Sat, 14 Sep 2024 02:19:44 +0200 Subject: [PATCH 2/2] Implementation of Parallelization to analysis.bat (#4693) * Fixes #4663 * Changes made in this Pull Request: - parallelization of the class BAT in analysis.bat.py - addition of parallelization tests (including fixtures in analysis/conftest.py) - update docs for analysis class - update of CHANGELOG --- package/CHANGELOG | 1 + package/MDAnalysis/analysis/bat.py | 29 ++++++++++++++++--- .../MDAnalysisTests/analysis/conftest.py | 6 ++++ .../MDAnalysisTests/analysis/test_bat.py | 12 ++++---- 4 files changed, 38 insertions(+), 10 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index aecde6e6468..d799b52e4e0 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -58,6 +58,7 @@ Enhancements (Issue #4158, PR #4304) * Enables parallelization for analysis.gnm.GNMAnalysis (Issue #4672) * explicitly mark `analysis.pca.PCA` as not parallelizable (Issue #4680) + * enables parallelization for analysis.bat.BAT (Issue #4663) * Improve error message for `AtomGroup.unwrap()` when bonds are not present.(Issue #4436, PR #4642) * Add `analysis.DSSP` module for protein secondary structure assignment, based on [pydssp](https://github.com/ShintaroMinami/PyDSSP) * Added a tqdm progress bar for `MDAnalysis.analysis.pca.PCA.transform()` diff --git a/package/MDAnalysis/analysis/bat.py b/package/MDAnalysis/analysis/bat.py index 0c3e15a32e9..5186cb6c882 100644 --- a/package/MDAnalysis/analysis/bat.py +++ b/package/MDAnalysis/analysis/bat.py @@ -164,7 +164,7 @@ class to calculate dihedral angles for a given set of atoms or residues import copy import MDAnalysis as mda -from .base import AnalysisBase +from .base import AnalysisBase, ResultsGroup from MDAnalysis.lib.distances import calc_bonds, calc_angles, calc_dihedrals from MDAnalysis.lib.mdamath import make_whole @@ -253,10 +253,28 @@ class BAT(AnalysisBase): Bond-Angle-Torsions (BAT) internal coordinates will be computed for the group of atoms and all frame in the trajectory belonging to `ag`. + .. versionchanged:: 2.8.0 + Enabled **parallel execution** with the ``multiprocessing`` and ``dask`` + backends; use the new method :meth:`get_supported_backends` to see all + supported backends. + """ - @due.dcite(Doi("10.1002/jcc.26036"), - description="Bond-Angle-Torsions Coordinate Transformation", - path="MDAnalysis.analysis.bat.BAT") + _analysis_algorithm_is_parallelizable = True + + @classmethod + def get_supported_backends(cls): + return ( + "serial", + "multiprocessing", + "dask", + ) + + @due.dcite( + Doi("10.1002/jcc.26036"), + description="Bond-Angle-Torsions Coordinate Transformation", + path="MDAnalysis.analysis.bat.BAT", + ) + def __init__(self, ag, initial_atom=None, filename=None, **kwargs): r"""Parameters ---------- @@ -558,3 +576,6 @@ def Cartesian(self, bat_frame): def atoms(self): """The atomgroup for which BAT are computed (read-only property)""" return self._ag + + def _get_aggregator(self): + return ResultsGroup(lookup={'bat': ResultsGroup.ndarray_vstack}) diff --git a/testsuite/MDAnalysisTests/analysis/conftest.py b/testsuite/MDAnalysisTests/analysis/conftest.py index 75d62284b7b..f53cdcfaac1 100644 --- a/testsuite/MDAnalysisTests/analysis/conftest.py +++ b/testsuite/MDAnalysisTests/analysis/conftest.py @@ -7,6 +7,7 @@ OldAPIAnalysis, ) from MDAnalysis.analysis.rms import RMSD, RMSF +from MDAnalysis.analysis.bat import BAT from MDAnalysis.lib.util import is_installed from MDAnalysis.analysis.gnm import GNMAnalysis @@ -93,3 +94,8 @@ def client_RMSF(request): @pytest.fixture(scope='module', params=params_for_cls(GNMAnalysis)) def client_GNMAnalysis(request): return request.param + + +@pytest.fixture(scope='module', params=params_for_cls(BAT)) +def client_BAT(request): + return request.param \ No newline at end of file diff --git a/testsuite/MDAnalysisTests/analysis/test_bat.py b/testsuite/MDAnalysisTests/analysis/test_bat.py index c80353baff6..5fb2603df62 100644 --- a/testsuite/MDAnalysisTests/analysis/test_bat.py +++ b/testsuite/MDAnalysisTests/analysis/test_bat.py @@ -41,16 +41,16 @@ def selected_residues(self): return ag @pytest.fixture() - def bat(self, selected_residues): + def bat(self, selected_residues, client_BAT): R = BAT(selected_residues) - R.run() + R.run(**client_BAT) return R.results.bat @pytest.fixture - def bat_npz(self, tmpdir, selected_residues): + def bat_npz(self, tmpdir, selected_residues, client_BAT): filename = str(tmpdir / 'test_bat_IO.npy') R = BAT(selected_residues) - R.run() + R.run(**client_BAT) R.save(filename) return filename @@ -73,8 +73,8 @@ def test_bat_coordinates(self, bat): atol=1.5e-5, err_msg="error: BAT coordinates should match test values") - def test_bat_coordinates_single_frame(self, selected_residues): - bat = BAT(selected_residues).run(start=1, stop=2).results.bat + def test_bat_coordinates_single_frame(self, selected_residues, client_BAT): + bat = BAT(selected_residues).run(start=1, stop=2, **client_BAT).results.bat test_bat = [np.load(BATArray)[1]] assert_allclose( bat,