From f955922966c31a7c1922f1f4a9e83770709557ca Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Mon, 17 Feb 2020 23:35:12 -0800 Subject: [PATCH] add new density.DensityAnalysis MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - close #2502 - DensityAnalysis is based on @nawtrey's pmda.density.DensityAnalysis - added tests (based on the Test_density_from_universe): both give the same answer as they pass the same tests - update docs (also write ångström consistently throughout) - update CHANGELOG --- package/CHANGELOG | 3 +- package/MDAnalysis/analysis/density.py | 298 ++++++++++++++++-- .../MDAnalysisTests/analysis/test_density.py | 112 ++++++- 3 files changed, 379 insertions(+), 34 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index 33a44f7cc7c..feb3eb8cfb7 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -15,7 +15,7 @@ The rules for this file: ------------------------------------------------------------------------------ mm/dd/yy richardjgowers, kain88-de, lilyminium, p-j-smith, bdice, joaomcteixeira, PicoCentauri, davidercruz, jbarnoud, RMeli, IAlibay, mtiberti, CCook96, - Yuan-Yu, xiki-tempula + Yuan-Yu, xiki-tempula, orbeckst * 0.21.0 @@ -56,6 +56,7 @@ Fixes * Added parmed to setup.py Enhancements + * Added new density.DensityAnalysis (Issue #2502) * Added coordinate reader and writer for NAMD binary coordinate format (PR #2485) * Improved ClusterCollection and Cluster string representations (Issue #2464) * XYZ parser store elements attribute (#2420) and XYZ write uses the elements diff --git a/package/MDAnalysis/analysis/density.py b/package/MDAnalysis/analysis/density.py index c1c1a489614..b35c9413af3 100644 --- a/package/MDAnalysis/analysis/density.py +++ b/package/MDAnalysis/analysis/density.py @@ -89,12 +89,20 @@ Creating densities ------------------ -The following functions take trajectory or coordinate data and generate a -:class:`Density` object. +The :class:`DensityAnalysis` class generates a :class:`Density` from an +atomgroup. Its function equivalent :func:`density_from_Universe` is deprecated. + +:func:`density_from_PDB` generates a pseudo-density from a PDB file by +replacing each atom with a Gaussian density with a width that is computed from +the crystallographic temperature factors (B-factor) with :func:`Bfactor2RMSF`. + +.. autoclass:: DensityAnalysis + :members: -.. autoclass:; DensityAnalysis .. autofunction:: density_from_Universe + .. autofunction:: density_from_PDB + .. autofunction:: Bfactor2RMSF @@ -118,39 +126,23 @@ .. autofunction:: notwithin_coordinates_factory + .. rubric:: Footnotes .. [#pbc] Making molecules whole can be accomplished with the :meth:`MDAnalysis.core.groups.AtomGroup.wrap` of - :attr:`Universe.atoms` (use ``compound="fragments"``). - - When using, for instance, the Gromacs_ command `gmx trjconv`_ - - .. code-block:: bash - - gmx trjconv -pbc mol -center -ur compact - - one can make the molecules whole ``-pbc whole``, center it on a group - (``-center``), and also pack all molecules in a compact unitcell - representation, which can be useful for density generation. + :attr:`Universe.atoms` (use ``compound="fragments"``). or the + PBC-wrapping transformations in + :mod:`MDAnalysis.transformations.wrap`. .. [#fit] Superposition can be performed with - :class:`MDAnalysis.analysis.align.AlignTraj`. - - The Gromacs_ command `gmx trjconv`_ - - .. code-block:: bash + :class:`MDAnalysis.analysis.align.AlignTraj` or the fitting + transformations in :mod:`MDAnalysis.transformations.fit`. - gmx trjconv -fit rot+trans - - will also accomplish such a superposition. Note that the fitting has - to be done in a *separate* step from the treatment of the periodic - boundaries [#pbc]_. - -.. [#testraj] Note that the trajectory in the example (`XTC`) is *not* properly - made whole and fitted to a reference structure; these steps were - omitted to clearly show the steps necessary for the actual density - calculation. +.. [#testraj] Note that the trajectory in the example (`XTC`) is *not* + properly made whole and fitted to a reference structure; + these steps were omitted to clearly show the steps necessary + for the actual density calculation. .. Links .. ----- @@ -187,10 +179,256 @@ from MDAnalysis.lib.log import ProgressMeter from MDAnalysis.lib.util import deprecate +from .base import AnalysisBase + import logging logger = logging.getLogger("MDAnalysis.analysis.density") +class DensityAnalysis(AnalysisBase): + r"""Volumetric density analysis. + + The trajectory is read, frame by frame, and the atoms in `atomgroup` are + histogrammed on a 3D grid with spacing `delta`. + + Parameters + ---------- + atomgroup : AtomGroup or UpdatingAtomGroup + Group of atoms (such as all the water oxygen atoms) being analyzed. + This can be an :class:`~MDAnalysis.core.groups.UpdatingAtomGroup` for + selections that change every time step. + delta : float (optional) + Bin size for the density grid in ångström (same in x,y,z). + Slice the trajectory as "trajectory[start:stop:step]"; default + is to read the whole trajectory. + metadata : dict (optional) + `dict` of additional data to be saved with the object; the meta + data are passed through as they are. + padding : float (optional) + Increase histogram dimensions by padding (on top of initial box + size) in ångström. Padding is ignored when setting a user defined + grid. + parameters : dict (optional) + `dict` with some special parameters for :class:`Density` (see docs) + gridcenter : numpy ndarray, float32 (optional) + 3 element numpy array detailing the x, y and z coordinates of the + center of a user defined grid box in ångström. + xdim : float (optional) + User defined x dimension box edge in ångström; ignored if + gridcenter is "None". + ydim : float (optional) + User defined y dimension box edge in ångström; ignored if + gridcenter is "None". + zdim : float (optional) + User defined z dimension box edge in ångström; ignored if + gridcenter is "None". + + See Also + -------- + pmda.density.DensityAnalysis for a parallel version + + Notes + ----- + Normal :class:`AtomGroup` instances represent a static selection of + atoms. If you want *dynamically changing selections* (such as "name OW and + around 4.0 (protein and not name H*)", i.e., the water oxygen atoms that + are within 4 Å of the protein heavy atoms) then create an + :class:`~MDAnalysis.core.groups.UpdatingAtomGroup` (see Examples). + + + Examples + -------- + A common use case is to analyze the solvent density around a protein of + interest. The density is calculated with :class:`DensityAnalysis` in the + fixed coordinate system of the simulation unit cell. It is therefore + necessary to orient and fix the protein with respect to the box coordinate + system. In practice this means centering and superimposing the protein, + frame by frame, on a reference structure and translating and rotating all + other components of the simulation with the protein. In this way, the + solvent will appear in the reference frame of the protein. + + An input trajectory must + + 1. have been centered on the protein of interest; + 2. have all molecules made whole that have been broken across periodic + boundaries [#pbc]_; + 3. have the solvent molecules remapped so that they are closest to the + solute (this is important when using triclinic unit cells such as + a dodecahedron or a truncated octahedron) [#pbc]_; + 4. have a fixed frame of reference; for instance, by superimposing a + protein on a reference structure so that one can study the solvent + density around it [#fit]_. + + To generate the density of water molecules around a protein (assuming that + the trajectory is already appropriately treated for periodic boundary + artifacts and is suitably superimposed to provide a fixed reference frame) + [#testraj]_, first create the :class:`DensityAnalysis` object by + supplying an AtomGroup, then use the :meth:`run` method:: + + from MDAnalysis.analysis import density + u = Universe(TPR, XTC) + ow = u.select_atoms("name OW") + D = density.DensityAnalysis(ow, delta=1.0) + D.run() + D.convert_density('TIP4P') + + The positions of all water oxygens are histogrammed on a grid with spacing + *delta* = 1 Å. Initially the density is measured in inverse cubic + ångström. With the :meth:`Density.convert_density` method, + the units of measurement are changed. In the example we are now measuring + the density relative to the literature value of the TIP4P water model at + ambient conditions (see the values in :data:`MDAnalysis.units.water` for + details). In particular, the density is stored as a NumPy array in + :attr:`grid`, which can be processed in any manner. Results are available + through the :attr:`density` attribute, which has the :attr:`grid` + attribute that contains the histogrammed density data. + :attr:`DensityAnalysis.density` is a :class:`gridData.core.Grid` object. + In particular, its contents can be `exported to different formats + `_. + For example, to `write a DX file + `_ + ``water.dx`` that can be read with VMD, PyMOL, or Chimera:: + + D.density.export("water.dx", type="double") + + Basic use for creating a water density (just using the water oxygen + atoms "OW"):: + + D = DensityAnalysis(universe.select_atoms('name OW')).run() + + If you are only interested in water within a certain region, e.g., within + a vicinity around a binding site, you can use a selection that updates + every step by using an :class:`~MDAnalysis.core.groups.UpdatingAtomGroup`:: + + near_waters = universe.select_atoms('name OW and around 5 (resid 156 157 305)', + updating=True) + D_site = DensityAnalysis(near_waters).run() + + If you are interested in explicitly setting a grid box of a given edge size + and origin, you can use the `gridcenter` and `xdim`/`ydim`/`zdim` + arguments. For example to plot the density of waters within 5 Å of a + ligand (in this case the ligand has been assigned the residue name "LIG") + in a cubic grid with 20 Å edges which is centered on the center of mass + (COM) of the ligand:: + + # Create a selection based on the ligand + ligand_selection = universe.select_atoms("resname LIG") + + # Extract the COM of the ligand + ligand_COM = ligand_selection.center_of_mass() + + # Create a density of waters on a cubic grid centered on the ligand COM + # In this case, we update the atom selection as shown above. + D_water = DensityAnalysis(universe.select_atoms('name OW and around 5 resname LIG', + updating=True), + delta=1.0, + gridcenter=ligand_COM, + xdim=20, ydim=20, zdim=20) + + (It should be noted that the `padding` keyword is not used when a user + defined grid is assigned). + + + .. versionadded:: 1.0.0 + + """ + def __init__(self, atomgroup, delta=1.0, + metadata=None, padding=2.0, + parameters=None, gridcenter=None, + xdim=None, ydim=None, zdim=None): + u = atomgroup.universe + super(DensityAnalysis, self).__init__(u.trajectory) + self._atomgroup = atomgroup + self._delta = delta + self._metadata = metadata + self._padding = padding + self._parameters = parameters + self._gridcenter = gridcenter + self._xdim = xdim + self._ydim = ydim + self._zdim = zdim + + def _prepare(self): + coord = self._atomgroup.positions + if self._gridcenter is not None: + # Issue 2372: padding is ignored, defaults to 2.0 therefore warn + if self._padding > 0: + msg = ("Box padding (currently set at {0}) " + "is not used in user defined grids.".format(self._padding)) + warnings.warn(msg) + logger.warning(msg) + # Generate a copy of smin/smax from coords to later check if the + # defined box might be too small for the selection + smin = np.min(coord, axis=0) + smax = np.max(coord, axis=0) + # Overwrite smin/smax with user defined values + smin, smax = _set_user_grid(self._gridcenter, self._xdim, + self._ydim, self._zdim, smin, smax) + else: + # Make the box bigger to avoid as much as possible 'outlier'. This + # is important if the sites are defined at a high density: in this + # case the bulk regions don't have to be close to 1 * n0 but can + # be less. It's much more difficult to deal with outliers. The + # ideal solution would use images: implement 'looking across the + # periodic boundaries' but that gets complicated when the box + # rotates due to RMS fitting. + smin = np.min(coord, axis=0) - self._padding + smax = np.max(coord, axis=0) + self._padding + BINS = fixedwidth_bins(self._delta, smin, smax) + arange = np.transpose(np.vstack((BINS['min'], BINS['max']))) + bins = BINS['Nbins'] + # create empty grid with the right dimensions (and get the edges) + grid, edges = np.histogramdd(np.zeros((1, 3)), bins=bins, + range=arange, normed=False) + grid *= 0.0 + self._grid = grid + self._edges = edges + self._arange = arange + self._bins = bins + #: density as a :class:`Density` + self.density = None + + def _single_frame(self): + h, _ = np.histogramdd(self._atomgroup.positions, + bins=self._bins, range=self._arange, + normed=False) + # manually reduce (not yet part of AnalysisBase but exists in pmda.parallel.AnalysisBase) + self._reduce(self._grid, h) + + def _conclude(self): + # average: + self._grid /= float(self.n_frames) + + metadata = self._metadata if self._metadata is not None else {} + metadata['topology'] = metadata['psf'] = self._atomgroup.universe.filename + metadata['trajectory'] = metadata['dcd'] = self._trajectory.filename + metadata['n_frames'] = self.n_frames + metadata['totaltime'] = self._trajectory.totaltime + metadata['dt'] = self._trajectory.dt + metadata['time_unit'] = "ps" + parameters = self._parameters if self._parameters is not None else {} + parameters['isDensity'] = False # must override + density = Density(grid=self._grid, edges=self._edges, + units={'length': "Angstrom"}, + parameters=parameters, + metadata=metadata) + density.make_density() + self.density = density + + @staticmethod + def _reduce(res, result_single_frame): + """'accumulate' action for a time series + + If `res` is a numpy array, the `result_single_frame` is added to it + element-wise. If `res` and `result_single_frame` are lists then + `result_single_frame` is appended to `res`. + """ + if isinstance(res, list) and len(res) == 0: + res = result_single_frame + else: + res += result_single_frame + return res + class Density(Grid): r"""Class representing a density on a regular cartesian grid. @@ -311,7 +549,7 @@ class Density(Grid): D.convert_density('water') After the final step, ``D`` will contain a density on a grid measured in - Ångstrom, with the density values itself measured relative to the + ångström, with the density values itself measured relative to the density of water. :class:`Density` objects can be algebraically manipulated (added, diff --git a/testsuite/MDAnalysisTests/analysis/test_density.py b/testsuite/MDAnalysisTests/analysis/test_density.py index c40a91c786f..eb0ddcb5044 100644 --- a/testsuite/MDAnalysisTests/analysis/test_density.py +++ b/testsuite/MDAnalysisTests/analysis/test_density.py @@ -118,8 +118,7 @@ def test_export_types(self, D, dxtype, tmpdir, outfile="density.dx"): data = dx.components['data'] assert data.type == dxtype - -class Test_density_from_Universe(object): +class DensityParameters(object): topology = TPR trajectory = XTC delta = 2.0 @@ -152,6 +151,114 @@ class Test_density_from_Universe(object): def universe(self): return mda.Universe(self.topology, self.trajectory) +class TestDensityAnalysis(DensityParameters): + def check_DensityAnalysis(self, ag, ref_meandensity, + tmpdir, runargs=None, **kwargs): + runargs = runargs if runargs else {} + with tmpdir.as_cwd(): + D = density.DensityAnalysis(ag, delta=self.delta, **kwargs).run(**runargs) + assert_almost_equal(D.density.grid.mean(), ref_meandensity, + err_msg="mean density does not match") + D.density.export(self.outfile) + + D2 = density.Density(self.outfile) + assert_almost_equal(D.density.grid, D2.grid, decimal=self.precision, + err_msg="DX export failed: different grid sizes") + + @pytest.mark.parametrize("mode", ("static", "dynamic")) + def test_run(self, mode, universe, tmpdir): + updating = (mode == "dynamic") + self.check_DensityAnalysis( + universe.select_atoms(self.selections[mode], updating=updating), + self.references[mode]['meandensity'], + tmpdir=tmpdir + ) + + def test_sliced(self, universe, tmpdir): + self.check_DensityAnalysis( + universe.select_atoms(self.selections['static']), + self.references['static_sliced']['meandensity'], + tmpdir=tmpdir, + runargs=dict(start=1, stop=-1, step=2), + ) + + def test_userdefn_eqbox(self, universe, tmpdir): + with warnings.catch_warnings(): + # Do not need to see UserWarning that box is too small + warnings.simplefilter("ignore") + self.check_DensityAnalysis( + universe.select_atoms(self.selections['static']), + self.references['static_defined']['meandensity'], + tmpdir=tmpdir, + gridcenter=self.gridcenters['static_defined'], + xdim=10.0, + ydim=10.0, + zdim=10.0, + ) + + def test_userdefn_neqbox(self, universe, tmpdir): + self.check_DensityAnalysis( + universe.select_atoms(self.selections['static']), + self.references['static_defined_unequal']['meandensity'], + tmpdir=tmpdir, + gridcenter=self.gridcenters['static_defined'], + xdim=10.0, + ydim=15.0, + zdim=20.0, + ) + + def test_userdefn_boxshape(self, universe): + D = density.DensityAnalysis( + universe.select_atoms(self.selections['static']), + delta=1.0, xdim=8.0, ydim=12.0, zdim=17.0, + gridcenter=self.gridcenters['static_defined']).run() + assert D.density.grid.shape == (8, 12, 17) + + def test_warn_userdefn_padding(self, universe): + regex = ("Box padding \(currently set at 1\.0\) is not used " + "in user defined grids\.") + with pytest.warns(UserWarning, match=regex): + D = density.DensityAnalysis( + universe.select_atoms(self.selections['static']), + delta=self.delta, xdim=100.0, ydim=100.0, zdim=100.0, padding=1.0, + gridcenter=self.gridcenters['static_defined']).run(step=5) + + def test_warn_userdefn_smallgrid(self, universe): + regex = ("Atom selection does not fit grid --- " + "you may want to define a larger box") + with pytest.warns(UserWarning, match=regex): + D = density.density_from_Universe( + universe, select=self.selections['static'], + delta=self.delta, xdim=1.0, ydim=2.0, zdim=2.0, padding=0.0, + gridcenter=self.gridcenters['static_defined']) + + def test_ValueError_userdefn_gridcenter_shape(self, universe): + # Test len(gridcenter) != 3 + with pytest.raises(ValueError): + D = density.DensityAnalysis( + universe.select_atoms(self.selections['static']), + delta=self.delta, xdim=10.0, ydim=10.0, zdim=10.0, + gridcenter=self.gridcenters['error1']).run() + + def test_ValueError_userdefn_gridcenter_type(self, universe): + # Test gridcenter includes non-numeric strings + with pytest.raises(ValueError): + D = density.DensityAnalysis( + universe.select_atoms(self.selections['static']), + delta=self.delta, xdim=10.0, ydim=10.0, zdim=10.0, + gridcenter=self.gridcenters['error2']).run() + + def test_ValueError_userdefn_xdim_type(self, universe): + # Test xdim != int or float + with pytest.raises(ValueError): + D = density.DensityAnalysis( + universe.select_atoms(self.selections['static']), + delta=self.delta, xdim="MDAnalysis", ydim=10.0, zdim=10.0, + gridcenter=self.gridcenters['static_defined']).run() + + +# remove in 2.0.0 +class Test_density_from_Universe(DensityParameters): def check_density_from_Universe(self, atomselection, ref_meandensity, universe, tmpdir, **kwargs): with tmpdir.as_cwd(): @@ -279,7 +386,6 @@ def test_density_from_Universe_Deprecation_warning(self, universe): universe, select=self.selections['static'], delta=self.delta) - class TestNotWithin(object): # tests notwithin_coordinates_factory # only checks that KDTree and distance_array give same results